summaryrefslogtreecommitdiff
path: root/lib/prism/translation
diff options
context:
space:
mode:
Diffstat (limited to 'lib/prism/translation')
-rw-r--r--lib/prism/translation/parser.rb31
-rw-r--r--lib/prism/translation/parser/builder.rb13
-rw-r--r--lib/prism/translation/parser/compiler.rb336
-rw-r--r--lib/prism/translation/parser/lexer.rb260
-rw-r--r--lib/prism/translation/parser33.rb12
-rw-r--r--lib/prism/translation/parser34.rb12
-rw-r--r--lib/prism/translation/parser35.rb12
-rw-r--r--lib/prism/translation/parser_current.rb26
-rw-r--r--lib/prism/translation/parser_versions.rb36
-rw-r--r--lib/prism/translation/ripper.rb1256
-rw-r--r--lib/prism/translation/ripper/filter.rb53
-rw-r--r--lib/prism/translation/ripper/lexer.rb133
-rw-r--r--lib/prism/translation/ripper/sexp.rb13
-rw-r--r--lib/prism/translation/ripper/shim.rb2
-rw-r--r--lib/prism/translation/ruby_parser.rb106
15 files changed, 1768 insertions, 533 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
index 5f2f01dbda..70031f133a 100644
--- a/lib/prism/translation/parser.rb
+++ b/lib/prism/translation/parser.rb
@@ -1,9 +1,15 @@
# frozen_string_literal: true
+# :markup: markdown
begin
+ required_version = ">= 3.3.7.2"
+ gem "parser", required_version
require "parser"
rescue LoadError
- warn(%q{Error: Unable to load parser. Add `gem "parser"` to your Gemfile.})
+ warn(<<~MSG)
+ Error: Unable to load parser #{required_version}. \
+ Add `gem "parser"` to your Gemfile or run `bundle update parser`.
+ MSG
exit(1)
end
@@ -13,6 +19,13 @@ module Prism
# whitequark/parser gem's syntax tree. It inherits from the base parser for
# the parser gem, and overrides the parse* methods to parse with prism and
# then translate.
+ #
+ # Note that this version of the parser always parses using the latest
+ # version of Ruby syntax supported by Prism. If you want specific version
+ # support, use one of the version-specific subclasses, such as
+ # `Prism::Translation::Parser34`. If you want to parse using the same
+ # version of Ruby syntax as the currently running version of Ruby, use
+ # `Prism::Translation::ParserCurrent`.
class Parser < ::Parser::Base
Diagnostic = ::Parser::Diagnostic # :nodoc:
private_constant :Diagnostic
@@ -20,7 +33,7 @@ module Prism
# The parser gem has a list of diagnostics with a hard-coded set of error
# messages. We create our own diagnostic class in order to set our own
# error messages.
- class PrismDiagnostic < Diagnostic
+ class PrismDiagnostic < Diagnostic # :nodoc:
# This is the cached message coming from prism.
attr_reader :message
@@ -59,13 +72,19 @@ module Prism
# should be implemented as needed.
#
def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
+ if !builder.is_a?(Prism::Translation::Parser::Builder)
+ warn(<<~MSG, uplevel: 1, category: :deprecated)
+ [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
+ `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
+ MSG
+ end
@parser = parser
super(builder)
end
def version # :nodoc:
- 34
+ 41
end
# The default encoding for Ruby files is UTF-8.
@@ -337,8 +356,10 @@ module Prism
"3.3.1"
when 34
"3.4.0"
- when 35
- "3.5.0"
+ when 35, 40
+ "4.0.0"
+ when 41
+ "4.1.0"
else
"latest"
end
diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb
index d3b51f4275..7fc3bba6b7 100644
--- a/lib/prism/translation/parser/builder.rb
+++ b/lib/prism/translation/parser/builder.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
module Prism
module Translation
@@ -6,12 +7,14 @@ module Prism
# A builder that knows how to convert more modern Ruby syntax
# into whitequark/parser gem's syntax tree.
class Builder < ::Parser::Builders::Default
- # It represents the `it` block argument, which is not yet implemented in the Parser gem.
+ # It represents the `it` block argument, which is not yet implemented in
+ # the Parser gem.
def itarg
n(:itarg, [:it], nil)
end
- # The following three lines have been added to support the `it` block parameter syntax in the source code below.
+ # The following three lines have been added to support the `it` block
+ # parameter syntax in the source code below.
#
# if args.type == :itarg
# block_type = :itblock
@@ -55,6 +58,12 @@ module Prism
method_call.loc.with_expression(join_exprs(method_call, block)))
end
end
+
+ # def foo(&nil); end
+ # ^^^^
+ def blocknilarg(amper_t, nil_t)
+ n0(:blocknilarg, arg_prefix_map(amper_t, nil_t))
+ end
end
end
end
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 8453c9383a..d11db12ae6 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1,13 +1,14 @@
# frozen_string_literal: true
+# :markup: markdown
module Prism
module Translation
class Parser
# A visitor that knows how to convert a prism syntax tree into the
# whitequark/parser gem's syntax tree.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# Raised when the tree is malformed or there is a bug in the compiler.
- class CompilationError < StandardError
+ class CompilationError < StandardError # :nodoc:
end
# The Parser::Base instance that is being used to build the AST.
@@ -74,7 +75,29 @@ module Prism
# []
# ^^
def visit_array_node(node)
- builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
+ elements = node.elements.flat_map do |element|
+ if element.is_a?(StringNode)
+ if element.content.include?("\n")
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
+ end
+ elsif element.is_a?(InterpolatedStringNode)
+ builder.string_compose(
+ token(element.opening_loc),
+ string_nodes_from_interpolation(element, node.opening),
+ token(element.closing_loc)
+ )
+ else
+ [visit(element)]
+ end
+ end
+ else
+ elements = visit_all(node.elements)
+ end
+
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
end
# foo => [bar]
@@ -111,8 +134,8 @@ module Prism
def visit_assoc_node(node)
key = node.key
- if in_pattern
- if node.value.is_a?(ImplicitNode)
+ if node.value.is_a?(ImplicitNode)
+ if in_pattern
if key.is_a?(SymbolNode)
if key.opening.nil?
builder.match_hash_var([key.unescaped, srange(key.location)])
@@ -122,23 +145,19 @@ module Prism
else
builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc))
end
- elsif key.opening.nil?
- builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
else
- builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
- end
- elsif node.value.is_a?(ImplicitNode)
- value = node.value.value
+ value = node.value.value
- implicit_value = if value.is_a?(CallNode)
- builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
- elsif value.is_a?(ConstantReadNode)
- builder.const([value.name, srange(key.value_loc)])
- else
- builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
- end
+ implicit_value = if value.is_a?(CallNode)
+ builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
+ elsif value.is_a?(ConstantReadNode)
+ builder.const([value.name, srange(key.value_loc)])
+ else
+ builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
+ end
- builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
+ builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
+ end
elsif node.operator_loc
builder.pair(visit(key), token(node.operator_loc), visit(node.value))
elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
@@ -184,14 +203,21 @@ module Prism
if (rescue_clause = node.rescue_clause)
begin
find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
- find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1))
+ find_end_offset = (
+ rescue_clause.statements&.location&.start_offset ||
+ rescue_clause.subsequent&.location&.start_offset ||
+ node.else_clause&.location&.start_offset ||
+ node.ensure_clause&.location&.start_offset ||
+ node.end_keyword_loc&.start_offset ||
+ find_start_offset + 1
+ )
rescue_bodies << builder.rescue_body(
token(rescue_clause.keyword_loc),
rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil,
token(rescue_clause.operator_loc),
visit(rescue_clause.reference),
- srange_find(find_start_offset, find_end_offset, ";"),
+ srange_semicolon(find_start_offset, find_end_offset),
visit(rescue_clause.statements)
)
end until (rescue_clause = rescue_clause.subsequent).nil?
@@ -271,11 +297,6 @@ module Prism
if node.call_operator_loc.nil?
case name
- when :-@
- case (receiver = node.receiver).type
- when :integer_node, :float_node, :rational_node, :imaginary_node
- return visit(numeric_negate(node.message_loc, receiver))
- end
when :!
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
when :=~
@@ -297,7 +318,7 @@ module Prism
visit_all(arguments),
token(node.closing_loc),
),
- srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, "="),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
),
block
@@ -314,7 +335,7 @@ module Prism
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
builder.assign(
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
- srange_find(message_loc.end_offset, node.arguments.location.start_offset, "="),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
)
else
@@ -667,13 +688,37 @@ module Prism
# defined?(a)
# ^^^^^^^^^^^
def visit_defined_node(node)
- builder.keyword_cmd(
- :defined?,
- token(node.keyword_loc),
- token(node.lparen_loc),
- [visit(node.value)],
- token(node.rparen_loc)
- )
+ # Very weird circumstances here where something like:
+ #
+ # defined?
+ # (1)
+ #
+ # gets parsed in Ruby as having only the `1` expression but in parser
+ # it gets parsed as having a begin. In this case we need to synthesize
+ # that begin to match parser's behavior.
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ nil,
+ [
+ builder.begin(
+ token(node.lparen_loc),
+ visit(node.value),
+ token(node.rparen_loc)
+ )
+ ],
+ nil
+ )
+ else
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ token(node.lparen_loc),
+ [visit(node.value)],
+ token(node.rparen_loc)
+ )
+ end
end
# if foo then bar else baz end
@@ -739,7 +784,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
token(node.end_keyword_loc)
@@ -871,7 +916,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
case node.subsequent
@@ -937,7 +982,7 @@ module Prism
if (then_loc = node.then_loc)
token(then_loc)
else
- srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, ";")
+ srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset)
end,
visit(node.statements)
)
@@ -1003,7 +1048,7 @@ module Prism
builder.index_asgn(
visit(node.receiver),
token(node.opening_loc),
- visit_all(node.arguments.arguments),
+ visit_all(node.arguments&.arguments || []),
token(node.closing_loc),
)
end
@@ -1071,7 +1116,7 @@ module Prism
def visit_interpolated_regular_expression_node(node)
builder.regexp_compose(
token(node.opening_loc),
- visit_all(node.parts),
+ string_nodes_from_interpolation(node, node.opening),
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
)
@@ -1088,19 +1133,9 @@ module Prism
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
end
- parts = node.parts.flat_map do |part|
- # When the content of a string node is split across multiple lines, the
- # parser gem creates individual string nodes for each line the content is part of.
- if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
- string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening)
- else
- visit(part)
- end
- end
-
builder.string_compose(
token(node.opening_loc),
- parts,
+ string_nodes_from_interpolation(node, node.opening),
token(node.closing_loc)
)
end
@@ -1110,7 +1145,7 @@ module Prism
def visit_interpolated_symbol_node(node)
builder.symbol_compose(
token(node.opening_loc),
- visit_all(node.parts),
+ string_nodes_from_interpolation(node, node.opening),
token(node.closing_loc)
)
end
@@ -1119,14 +1154,14 @@ module Prism
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
if node.heredoc?
- visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
- else
- builder.xstring_compose(
- token(node.opening_loc),
- visit_all(node.parts),
- token(node.closing_loc)
- )
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
end
+
+ builder.xstring_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ token(node.closing_loc)
+ )
end
# -> { it }
@@ -1138,7 +1173,17 @@ module Prism
# -> { it }
# ^^^^^^^^^
def visit_it_parameters_node(node)
- builder.itarg
+ # FIXME: The builder _should_ always be a subclass of the prism builder.
+ # Currently RuboCop passes in its own builder that always inherits from the
+ # parser builder (which is lacking the `itarg` method). Once rubocop-ast
+ # opts in to use the custom prism builder a warning can be emitted when
+ # it is not the expected class, and eventually raise.
+ # https://github.com/rubocop/rubocop-ast/pull/354
+ if builder.is_a?(Translation::Parser::Builder)
+ builder.itarg
+ else
+ builder.args(nil, [], nil, false)
+ end
end
# foo(bar: baz)
@@ -1274,7 +1319,7 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
+ def visit_error_recovery_node(node)
::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
end
@@ -1304,7 +1349,7 @@ module Prism
def visit_multi_write_node(node)
elements = multi_target_elements(node)
- if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
elements = multi_target_elements(elements.first)
end
@@ -1340,6 +1385,12 @@ module Prism
builder.nil(token(node.location))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc))
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1432,7 +1483,8 @@ module Prism
# foo => ^(bar)
# ^^^^^^
def visit_pinned_expression_node(node)
- expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
+ parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest
+ expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc))
builder.pin(token(node.operator_loc), expression)
end
@@ -1716,7 +1768,7 @@ module Prism
end
else
parts =
- if node.value == ""
+ if node.value_loc.nil?
[]
elsif node.value.include?("\n")
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
@@ -1757,7 +1809,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset)
end,
visit(node.else_clause),
token(node.else_clause&.else_keyword_loc),
@@ -1788,7 +1840,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
end,
visit(node.statements),
token(node.closing_loc)
@@ -1812,7 +1864,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, ";")
+ srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset)
end,
visit(node.statements)
)
@@ -1832,7 +1884,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
end,
visit(node.statements),
token(node.closing_loc)
@@ -1916,22 +1968,6 @@ module Prism
elements
end
- # Negate the value of a numeric node. This is a special case where you
- # have a negative sign on one line and then a number on the next line.
- # In normal Ruby, this will always be a method call. The parser gem,
- # however, marks this as a numeric literal. We have to massage the tree
- # here to get it into the correct form.
- def numeric_negate(message_loc, receiver)
- case receiver.type
- when :integer_node, :float_node
- receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
- when :rational_node
- receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location))
- when :imaginary_node
- receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
- end
- end
-
# Blocks can have a special set of parameters that automatically expand
# when given arrays if they have a single required parameter and no
# other parameters.
@@ -1961,16 +1997,16 @@ module Prism
Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
end
- # Constructs a new source range by finding the given character between
- # the given start offset and end offset. If the needle is not found, it
- # returns nil. Importantly it does not search past newlines or comments.
+ # Constructs a new source range by finding a semicolon between the given
+ # start offset and end offset. If the semicolon is not found, it returns
+ # nil. Importantly it does not search past newlines or comments.
#
# Note that end_offset is allowed to be nil, in which case this will
# search until the end of the string.
- def srange_find(start_offset, end_offset, character)
- if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*#{character}/])
+ def srange_semicolon(start_offset, end_offset)
+ if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/])
final_offset = start_offset + match.bytesize
- [character, Range.new(source_buffer, offset_cache[final_offset - character.bytesize], offset_cache[final_offset])]
+ [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])]
end
end
@@ -2014,13 +2050,6 @@ module Prism
end
end
- # The parser gem automatically converts \r\n to \n, meaning our offsets
- # need to be adjusted to always subtract 1 from the length.
- def chomped_bytesize(line)
- chomped = line.chomp
- chomped.bytesize + (chomped == line ? 0 : 1)
- end
-
# Visit a heredoc that can be either a string or an xstring.
def visit_heredoc(node)
children = Array.new
@@ -2089,55 +2118,98 @@ module Prism
end
end
+ # When the content of a string node is split across multiple lines, the
+ # parser gem creates individual string nodes for each line the content is part of.
+ def string_nodes_from_interpolation(node, opening)
+ node.parts.flat_map do |part|
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
+ else
+ visit(part)
+ end
+ end
+ end
+
# Create parser string nodes from a single prism node. The parser gem
# "glues" strings together when a line continuation is encountered.
def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
unescaped = unescaped.lines
escaped = escaped.lines
-
- escaped_lengths = []
- normalized_lengths = []
- # Keeps track of where an unescaped line should start a new token. An unescaped
- # \n would otherwise be indistinguishable from the actual newline at the end of
- # of the line. The parser gem only emits a new string node at "real" newlines,
- # line continuations don't start a new node as well.
- do_next_tokens = []
-
- if opening&.end_with?("'")
- escaped.each do |line|
- escaped_lengths << line.bytesize
- normalized_lengths << chomped_bytesize(line)
- do_next_tokens << true
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
+ regex = opening == "/" || opening&.start_with?("%r")
+
+ # Non-interpolating strings
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
+ current_length = 0
+ current_line = +""
+
+ escaped.filter_map.with_index do |escaped_line, index|
+ unescaped_line = unescaped.fetch(index, "")
+ current_length += escaped_line.bytesize
+ current_line << unescaped_line
+
+ # Glue line continuations together. Only %w and %i arrays can contain these.
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
+ next unless index == escaped.count - 1
+ end
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
+ start_offset += escaped_line.bytesize
+ current_line = +""
+ current_length = 0
+ s
end
else
+ escaped_lengths = []
+ normalized_lengths = []
+ # Keeps track of where an unescaped line should start a new token. An unescaped
+ # \n would otherwise be indistinguishable from the actual newline at the end of
+ # of the line. The parser gem only emits a new string node at "real" newlines,
+ # line continuations don't start a new node as well.
+ do_next_tokens = []
+
escaped
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
.each do |lines|
escaped_lengths << lines.sum(&:bytesize)
- normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
- unescaped_lines_count = lines.sum do |line|
- line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
- end
- do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
+
+ unescaped_lines_count =
+ if regex
+ 0 # Will always be preserved as is
+ else
+ lines.sum do |line|
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
+ count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0
+ count
+ end
+ end
+
+ extra = 1
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
+
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
do_next_tokens[-1] = true
end
- end
-
- current_line = +""
- current_normalized_length = 0
-
- unescaped.filter_map.with_index do |unescaped_line, index|
- current_line << unescaped_line
- current_normalized_length += normalized_lengths.fetch(index, 0)
- if do_next_tokens[index]
- inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
- start_offset += escaped_lengths.fetch(index, 0)
- current_line = +""
- current_normalized_length = 0
- inner_part
- else
- nil
+ current_line = +""
+ current_normalized_length = 0
+
+ emitted_count = 0
+ unescaped.filter_map.with_index do |unescaped_line, index|
+ current_line << unescaped_line
+ current_normalized_length += normalized_lengths.fetch(index, 0)
+
+ if do_next_tokens[index]
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
+ current_line = +""
+ current_normalized_length = 0
+ emitted_count += 1
+ inner_part
+ else
+ nil
+ end
end
end
end
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 1fa2723f03..e82042867f 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -1,23 +1,25 @@
# frozen_string_literal: true
+# :markup: markdown
require "strscan"
+require_relative "../../polyfill/append_as_bytes"
+require_relative "../../polyfill/scan_byte"
module Prism
module Translation
class Parser
# Accepts a list of prism tokens and converts them into the expected
# format for the parser gem.
- class Lexer
+ class Lexer # :nodoc:
+ # These tokens are always skipped
+ TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
+ private_constant :TYPES_ALWAYS_SKIP
+
# The direct translating of types between the two lexers.
TYPES = {
# These tokens should never appear in the output of the lexer.
- EOF: nil,
- MISSING: nil,
- NOT_PROVIDED: nil,
- IGNORED_NEWLINE: nil,
EMBDOC_END: nil,
EMBDOC_LINE: nil,
- __END__: nil,
# These tokens have more or less direct mappings.
AMPERSAND: :tAMPER2,
@@ -85,6 +87,7 @@ module Prism
KEYWORD_DEF: :kDEF,
KEYWORD_DEFINED: :kDEFINED,
KEYWORD_DO: :kDO,
+ KEYWORD_DO_BLOCK: :kDO_BLOCK,
KEYWORD_DO_LOOP: :kDO_COND,
KEYWORD_END: :kEND,
KEYWORD_END_UPCASE: :klEND,
@@ -186,25 +189,25 @@ module Prism
# without them. We should find another way to do this, but in the
# meantime we'll hide them from the documentation and mark them as
# private constants.
- EXPR_BEG = 0x1 # :nodoc:
- EXPR_LABEL = 0x400 # :nodoc:
+ EXPR_BEG = 0x1
+ EXPR_LABEL = 0x400
# It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`.
#
# NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
# instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
- LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]
+ LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
# The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
# The following token types are listed as those classified as `tLPAREN`.
- LPAREN_CONVERSION_TOKEN_TYPES = [
- :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
- :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
- ]
+ LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
+ :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
+ :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
+ ])
# Types of tokens that are allowed to continue a method call with comments in-between.
# For these, the parser gem doesn't emit a newline token after the last comment.
- COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT]
+ COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
private_constant :COMMENT_CONTINUATION_TYPES
# Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -230,7 +233,7 @@ module Prism
@offset_cache = offset_cache
end
- Range = ::Parser::Source::Range # :nodoc:
+ Range = ::Parser::Source::Range
private_constant :Range
# Convert the prism tokens into the expected format for the parser gem.
@@ -251,7 +254,7 @@ module Prism
while index < length
token, state = lexed[index]
index += 1
- next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
+ next if TYPES_ALWAYS_SKIP.include?(token.type)
type = TYPES.fetch(token.type)
value = token.value
@@ -259,10 +262,11 @@ module Prism
case type
when :kDO
- types = tokens.map(&:first)
- nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) }
+ nearest_lambda_token = tokens.reverse_each.find do |token|
+ LAMBDA_TOKEN_TYPES.include?(token.first)
+ end
- if nearest_lambda_token_type == :tLAMBDA
+ if nearest_lambda_token&.first == :tLAMBDA
type = :kDO_LAMBDA
end
when :tCHARACTER
@@ -272,20 +276,20 @@ module Prism
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
- while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
+ while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end
value += next_token.value
- location = range(token.location.start_offset, lexed[index][0].location.end_offset)
+ location = range(token.location.start_offset, next_token.location.end_offset)
index += 1
else
is_at_eol = value.chomp!.nil?
location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
- prev_token = lexed[index - 2][0] if index - 2 >= 0
- next_token = lexed[index][0]
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ next_token, _ = lexed[index]
is_inline_comment = prev_token&.location&.start_line == token.location.start_line
if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
@@ -304,7 +308,7 @@ module Prism
end
end
when :tNL
- next_token = next_token = lexed[index][0]
+ next_token, _ = lexed[index]
# Newlines after comments are emitted out of order.
if next_token&.type == :COMMENT
comment_newline_location = location
@@ -338,11 +342,12 @@ module Prism
when :tRATIONAL
value = parse_rational(value)
when :tSPACE
+ location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
value = nil
when :tSTRING_BEG
- next_token = lexed[index][0]
- next_next_token = lexed[index + 1][0]
- basic_quotes = ["\"", "'"].include?(value)
+ next_token, _ = lexed[index]
+ next_next_token, _ = lexed[index + 1]
+ basic_quotes = value == '"' || value == "'"
if basic_quotes && next_token&.type == :STRING_END
next_location = token.location.join(next_token.location)
@@ -351,11 +356,15 @@ module Prism
location = range(next_location.start_offset, next_location.end_offset)
index += 1
elsif value.start_with?("'", '"', "%")
- if next_token&.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && next_next_token&.type == :STRING_END
- # the parser gem doesn't simplify strings when its value ends in a newline
- if !(string_value = next_token.value).end_with?("\n") && basic_quotes
+ if next_token&.type == :STRING_CONTENT && next_next_token&.type == :STRING_END
+ string_value = next_token.value
+ if simplify_string?(string_value, value)
next_location = token.location.join(next_next_token.location)
- value = unescape_string(string_value, value)
+ if percent_array?(value)
+ value = percent_array_unescape(string_value)
+ else
+ value = unescape_string(string_value, value)
+ end
type = :tSTRING
location = range(next_location.start_offset, next_location.end_offset)
index += 2
@@ -394,16 +403,40 @@ module Prism
quote_stack.push(value)
end
when :tSTRING_CONTENT
+ is_percent_array = percent_array?(quote_stack.last)
+
if (lines = token.value.lines).one?
- # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
- is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
- # The parser gem only removes indentation when the heredoc is not nested
- not_nested = heredoc_stack.size == 1
- if is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
- value = trim_heredoc_whitespace(value, current_heredoc)
- end
+ # Prism usually emits a single token for strings with line continuations.
+ # For squiggly heredocs they are not joined so we do that manually here.
+ current_string = +""
+ current_length = 0
+ start_offset = token.location.start_offset
+ while token.type == :STRING_CONTENT
+ current_length += token.value.bytesize
+ # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
+ # The parser gem only removes indentation when the heredoc is not nested
+ not_nested = heredoc_stack.size == 1
+ if is_percent_array
+ value = percent_array_unescape(token.value)
+ elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
+ value = trim_heredoc_whitespace(token.value, current_heredoc)
+ end
- value = unescape_string(value, quote_stack.last)
+ current_string << unescape_string(value, quote_stack.last)
+ relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
+ 0 # the last backslash escapes the newline
+ else
+ token.value[/(\\{1,})\n/, 1]&.length || 0
+ end
+ if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
+ tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
+ break
+ end
+ token, _ = lexed[index]
+ index += 1
+ end
else
# When the parser gem encounters a line continuation inside of a multiline string,
# it emits a single string node. The backslash (and remaining newline) is removed.
@@ -416,12 +449,10 @@ module Prism
chomped_line = line.chomp
backslash_count = chomped_line[/\\{1,}\z/]&.length || 0
is_interpolation = interpolation?(quote_stack.last)
- is_percent_array = percent_array?(quote_stack.last)
if backslash_count.odd? && (is_interpolation || is_percent_array)
if is_percent_array
- # Remove the last backslash, keep potential newlines
- current_line << line.sub(/(\\)(\r?\n)\z/, '\2')
+ current_line << percent_array_unescape(line)
adjustment += 1
else
chomped_line.delete_suffix!("\\")
@@ -443,8 +474,8 @@ module Prism
adjustment = 0
end
end
- next
end
+ next
when :tSTRING_DVAR
value = nil
when :tSTRING_END
@@ -458,7 +489,7 @@ module Prism
end
if percent_array?(quote_stack.pop)
- prev_token = lexed[index - 2][0] if index - 2 >= 0
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
ends_with_whitespace = prev_token&.type == :WORDS_SEP
# parser always emits a space token after content in a percent array, even if no actual whitespace is present.
@@ -467,7 +498,7 @@ module Prism
end
end
when :tSYMBEG
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
+ if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
@@ -482,13 +513,13 @@ module Prism
type = :tIDENTIFIER
end
when :tXSTRING_BEG
- if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
+ if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
# self.`()
type = :tBACK_REF2
end
quote_stack.push(value)
when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
- if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP
+ if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
index += 1
end
@@ -564,15 +595,16 @@ module Prism
previous_line = -1
result = Float::MAX
- while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
+ while (next_token = lexed[next_token_index]&.first)
next_token_index += 1
- next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
+ next_next_token, _ = lexed[next_token_index]
+ first_token_on_line = next_token.location.start_column == 0
# String content inside nested heredocs and interpolation is ignored
if next_token.type == :HEREDOC_START || next_token.type == :EMBEXPR_BEGIN
# When interpolation is the first token of a line there is no string
# content to check against. There will be no common whitespace.
- if nesting_level == 0 && next_token.location.start_column == 0
+ if nesting_level == 0 && first_token_on_line
result = 0
end
nesting_level += 1
@@ -580,7 +612,7 @@ module Prism
nesting_level -= 1
# When we encountered the matching heredoc end, we can exit
break if nesting_level == -1
- elsif next_token.type == :STRING_CONTENT && nesting_level == 0
+ elsif next_token.type == :STRING_CONTENT && nesting_level == 0 && first_token_on_line
common_whitespace = 0
next_token.value[/^\s*/].each_char do |char|
if char == "\t"
@@ -668,52 +700,108 @@ module Prism
scanner = StringScanner.new(string)
while (skipped = scanner.skip_until(/\\/))
# Append what was just skipped over, excluding the found backslash.
- result << string.byteslice(scanner.pos - skipped, skipped - 1)
-
- # Simple single-character escape sequences like \n
- if (replacement = ESCAPES[scanner.peek(1)])
- result << replacement
- scanner.pos += 1
- elsif (octal = scanner.check(/[0-7]{1,3}/))
- # \nnn
- # NOTE: When Ruby 3.4 is required, this can become result.append_as_bytes(chr)
- result << octal.to_i(8).chr.b
- scanner.pos += octal.bytesize
- elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/))
- # \xnn
- result << hex[1..].to_i(16).chr.b
- scanner.pos += hex.bytesize
- elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/))
- # \unnnn
- result << unicode[1..].hex.chr(Encoding::UTF_8).b
- scanner.pos += unicode.bytesize
- elsif scanner.peek(3) == "u{}"
- # https://github.com/whitequark/parser/issues/856
- scanner.pos += 3
- elsif (unicode_parts = scanner.check(/u{.*}/))
- # \u{nnnn ...}
- unicode_parts[2..-2].split.each do |unicode|
- result << unicode.hex.chr(Encoding::UTF_8).b
- end
- scanner.pos += unicode_parts.bytesize
- end
+ result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
+ escape_read(result, scanner, false, false)
end
- # Add remainging chars
- result << string.byteslice(scanner.pos..)
-
+ # Add remaining chars
+ result.append_as_bytes(string.byteslice(scanner.pos..))
result.force_encoding(source_buffer.source.encoding)
-
- result
else
delimiters = Regexp.escape("#{delimiter}#{DELIMITER_SYMETRY[delimiter]}")
string.gsub(/\\([\\#{delimiters}])/, '\1')
end
end
+ # Certain strings are merged into a single string token.
+ def simplify_string?(value, quote)
+ case quote
+ when "'"
+ # Only simplify 'foo'
+ !value.include?("\n")
+ when '"'
+ # Simplify when every line ends with a line continuation, or it is the last line
+ value.lines.all? do |line|
+ !line.end_with?("\n") || line[/(\\*)$/, 1]&.length&.odd?
+ end
+ else
+ # %q and similar are never simplified
+ false
+ end
+ end
+
+ # Escape a byte value, given the control and meta flags.
+ def escape_build(value, control, meta)
+ value &= 0x9f if control
+ value |= 0x80 if meta
+ value
+ end
+
+ # Read an escape out of the string scanner, given the control and meta
+ # flags, and push the unescaped value into the result.
+ def escape_read(result, scanner, control, meta)
+ if scanner.skip("\n")
+ # Line continuation
+ elsif (value = ESCAPES[scanner.peek(1)])
+ # Simple single-character escape sequences like \n
+ result.append_as_bytes(value)
+ scanner.pos += 1
+ elsif (value = scanner.scan(/[0-7]{1,3}/))
+ # \nnn
+ result.append_as_bytes(escape_build(value.to_i(8), control, meta))
+ elsif (value = scanner.scan(/x[0-9a-fA-F]{1,2}/))
+ # \xnn
+ result.append_as_bytes(escape_build(value[1..].to_i(16), control, meta))
+ elsif (value = scanner.scan(/u[0-9a-fA-F]{4}/))
+ # \unnnn
+ result.append_as_bytes(value[1..].hex.chr(Encoding::UTF_8))
+ elsif scanner.skip("u{}")
+ # https://github.com/whitequark/parser/issues/856
+ elsif (value = scanner.scan(/u{.*?}/))
+ # \u{nnnn ...}
+ value[2..-2].split.each do |unicode|
+ result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
+ end
+ elsif (value = scanner.scan(/c\\?(?=[[:print:]])|C-\\?(?=[[:print:]])/))
+ # \cx or \C-x where x is an ASCII printable character
+ escape_read(result, scanner, true, meta)
+ elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
+ # \M-x where x is an ASCII printable character
+ escape_read(result, scanner, control, true)
+ elsif (byte = scanner.scan_byte)
+ # Something else after an escape.
+ if control && byte == 0x3f # ASCII '?'
+ result.append_as_bytes(escape_build(0x7f, false, meta))
+ else
+ result.append_as_bytes(escape_build(byte, control, meta))
+ end
+ end
+ end
+
+ # In a percent array, certain whitespace can be preceeded with a backslash,
+ # causing the following characters to be part of the previous element.
+ def percent_array_unescape(string)
+ string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match|
+ full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd?
+ full_match
+ end
+ end
+
+ # For %-arrays whitespace, the parser gem only considers whitespace before the newline.
+ def percent_array_leading_whitespace(string)
+ return 1 if string.start_with?("\n")
+
+ leading_whitespace = 0
+ string.each_char do |c|
+ break if c == "\n"
+ leading_whitespace += 1
+ end
+ leading_whitespace
+ end
+
# Determine if characters preceeded by a backslash should be escaped or not
def interpolation?(quote)
- quote != "'" && !quote.start_with?("%q", "%w", "%i")
+ !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
end
# Regexp allow interpolation but are handled differently during unescaping
diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb
deleted file mode 100644
index b09266e06a..0000000000
--- a/lib/prism/translation/parser33.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
- class Parser33 < Parser
- def version # :nodoc:
- 33
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb
deleted file mode 100644
index 0ead70ad3c..0000000000
--- a/lib/prism/translation/parser34.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
- class Parser34 < Parser
- def version # :nodoc:
- 34
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser35.rb b/lib/prism/translation/parser35.rb
deleted file mode 100644
index a6abc12589..0000000000
--- a/lib/prism/translation/parser35.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.5 of `Prism::Translation::Parser`.
- class Parser35 < Parser
- def version # :nodoc:
- 35
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser_current.rb b/lib/prism/translation/parser_current.rb
new file mode 100644
index 0000000000..f7c1070e30
--- /dev/null
+++ b/lib/prism/translation/parser_current.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# typed: ignore
+
+module Prism
+ module Translation
+ case RUBY_VERSION
+ when /^3\.3\./
+ ParserCurrent = Parser33
+ when /^3\.4\./
+ ParserCurrent = Parser34
+ when /^3\.5\./, /^4\.0\./
+ ParserCurrent = Parser40
+ when /^4\.1\./
+ ParserCurrent = Parser41
+ else
+ # Keep this in sync with released Ruby.
+ parser = Parser40
+ major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
+ warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \
+ "but you are running #{major}.#{minor}."
+ ParserCurrent = parser
+ end
+ end
+end
diff --git a/lib/prism/translation/parser_versions.rb b/lib/prism/translation/parser_versions.rb
new file mode 100644
index 0000000000..720c7d548c
--- /dev/null
+++ b/lib/prism/translation/parser_versions.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
+ class Parser33 < Parser
+ def version # :nodoc:
+ 33
+ end
+ end
+
+ # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
+ class Parser34 < Parser
+ def version # :nodoc:
+ 34
+ end
+ end
+
+ # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`.
+ class Parser40 < Parser
+ def version # :nodoc:
+ 40
+ end
+ end
+
+ Parser35 = Parser40 # :nodoc:
+
+ # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`.
+ class Parser41 < Parser
+ def version # :nodoc:
+ 41
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index dce96e01ab..f179a149a1 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -1,6 +1,5 @@
# frozen_string_literal: true
-
-require "ripper"
+# :markup: markdown
module Prism
module Translation
@@ -23,22 +22,10 @@ module Prism
# - on_comma
# - on_ignored_nl
# - on_ignored_sp
- # - on_kw
- # - on_label_end
- # - on_lbrace
- # - on_lbracket
- # - on_lparen
# - on_nl
- # - on_op
# - on_operator_ambiguous
- # - on_rbrace
- # - on_rbracket
- # - on_rparen
# - on_semicolon
# - on_sp
- # - on_symbeg
- # - on_tstring_beg
- # - on_tstring_end
#
class Ripper < Compiler
# Parses the given Ruby program read from +src+.
@@ -70,7 +57,8 @@ module Prism
# [[1, 13], :on_kw, "end", END ]]
#
def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
- result = Prism.lex_compat(src, filepath: filename, line: lineno)
+ coerced = coerce_source(src)
+ result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding)
if result.failure? && raise_errors
raise SyntaxError, result.errors.first.message
@@ -79,6 +67,34 @@ module Prism
end
end
+ # Tokenizes the Ruby program and returns an array of strings.
+ # The +filename+ and +lineno+ arguments are mostly ignored, since the
+ # return value is just the tokenized input.
+ # By default, this method does not handle syntax errors in +src+,
+ # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+.
+ #
+ # p Ripper.tokenize("def m(a) nil end")
+ # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
+ #
+ def self.tokenize(...)
+ lex(...).map { |token| token[2] }
+ end
+
+ # Mirros the various lex_types that ripper supports
+ def self.coerce_source(source) # :nodoc:
+ if source.is_a?(IO)
+ source.read
+ elsif source.respond_to?(:gets)
+ src = +""
+ while line = source.gets
+ src << line
+ end
+ src
+ else
+ source.to_str
+ end
+ end
+
# This contains a table of all of the parser events and their
# corresponding arity.
PARSER_EVENT_TABLE = {
@@ -331,7 +347,7 @@ module Prism
"__ENCODING__",
"__FILE__",
"__LINE__"
- ]
+ ].to_set
# A list of all of the Ruby binary operators.
BINARY_OPERATORS = [
@@ -356,7 +372,7 @@ module Prism
:/,
:*,
:**
- ]
+ ].to_set
private_constant :KEYWORDS, :BINARY_OPERATORS
@@ -425,9 +441,93 @@ module Prism
end
end
+ autoload :Filter, "prism/translation/ripper/filter"
+ autoload :Lexer, "prism/translation/ripper/lexer"
autoload :SexpBuilder, "prism/translation/ripper/sexp"
autoload :SexpBuilderPP, "prism/translation/ripper/sexp"
+ # Provides optimized access to line and column information.
+ # Ripper bounds are mostly accessed in a linear fashion, so
+ # we can try a linear scan first and fall back to binary search.
+ class LineAndColumnCache # :nodoc:
+ # How many should it look ahead/behind before falling back to binary searching.
+ WINDOW = 8
+ private_constant :WINDOW
+
+ #: (Source source) -> void
+ def initialize(source)
+ @source = source
+ @offsets = source.offsets
+ @hint = 0
+ end
+
+ #: (Integer byte_offset) -> [Integer, Integer]
+ def line_and_column(byte_offset)
+ @hint = new_hint(byte_offset) || @source.find_line(byte_offset)
+ return [@hint + @source.start_line, byte_offset - @offsets[@hint]]
+ end
+
+ private
+
+ def new_hint(byte_offset)
+ if @offsets[@hint] <= byte_offset
+ # Same line?
+ if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset)
+ return @hint
+ end
+
+ # Scan forwards
+ limit = [@hint + WINDOW + 1, @offsets.size].min
+ idx = @hint + 1
+ while idx < limit
+ if @offsets[idx] > byte_offset
+ return idx - 1
+ end
+ if @offsets[idx] == byte_offset
+ return idx
+ end
+ idx += 1
+ end
+ else
+ # Scan backwards
+ limit = @hint > WINDOW ? @hint - WINDOW : 0
+ idx = @hint
+ while idx >= limit + 1
+ if @offsets[idx - 1] <= byte_offset
+ return idx - 1
+ end
+ idx -= 1
+ end
+ end
+
+ nil
+ end
+ end
+
+ # :stopdoc:
+ # This is not part of the public API but used by some gems.
+
+ # Ripper-internal bitflags.
+ LEX_STATE_NAMES = %i[
+ BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
+ ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze
+ private_constant :LEX_STATE_NAMES
+
+ LEX_STATE_NAMES.each do |value, key|
+ const_set("EXPR_#{key}", value)
+ end
+ EXPR_NONE = 0
+ EXPR_VALUE = EXPR_BEG
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
+
+ def self.lex_state_name(state)
+ LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|")
+ end
+
+ # :startdoc:
+
# The source that is being parsed.
attr_reader :source
@@ -437,16 +537,17 @@ module Prism
# The current line number of the parser.
attr_reader :lineno
- # The current column number of the parser.
+ # The current column in bytes of the parser.
attr_reader :column
# Create a new Translation::Ripper object with the given source.
def initialize(source, filename = "(ripper)", lineno = 1)
- @source = source
+ @source = Ripper.coerce_source(source)
@filename = filename
@lineno = lineno
@column = 0
@result = nil
+ @line_and_column_cache = nil
end
##########################################################################
@@ -465,7 +566,12 @@ module Prism
bounds(location)
if comment.is_a?(InlineComment)
- on_comment(comment.slice)
+ # Inline comments always contain a newline if the line itself contains it
+ if result.source.source.bytesize > comment.location.end_offset
+ on_comment("#{comment.slice}\n")
+ else
+ on_comment(comment.slice)
+ end
else
offset = location.start_offset
lines = comment.slice.lines
@@ -546,9 +652,14 @@ module Prism
# Visitor methods
##########################################################################
+ # :stopdoc:
+
# alias foo bar
# ^^^^^^^^^^^^^
def visit_alias_method_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit(node.new_name)
old_name = visit(node.old_name)
@@ -559,6 +670,9 @@ module Prism
# alias $foo $bar
# ^^^^^^^^^^^^^^^
def visit_alias_global_variable_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit_alias_global_variable_node_value(node.new_name)
old_name = visit_alias_global_variable_node_value(node.old_name)
@@ -584,6 +698,10 @@ module Prism
# ^^^^^^^^^
def visit_alternation_pattern_node(node)
left = visit_pattern_node(node.left)
+
+ bounds(node.operator_loc)
+ on_op("|")
+
right = visit_pattern_node(node.right)
bounds(node.location)
@@ -594,7 +712,13 @@ module Prism
# parenthesis node that can be used to wrap patterns.
private def visit_pattern_node(node)
if node.is_a?(ParenthesesNode)
- visit(node.body)
+ bounds(node.opening_loc)
+ on_lparen("(")
+ result = visit(node.body)
+ bounds(node.closing_loc)
+ on_rparen(")")
+
+ result
else
visit(node)
end
@@ -604,6 +728,14 @@ module Prism
# ^^^^^^^
def visit_and_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "and"
+ on_kw("and")
+ else
+ on_op("&&")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -631,6 +763,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%i/
@@ -650,6 +784,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%W/
@@ -687,6 +823,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%I/
@@ -724,6 +862,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
else
@@ -740,15 +880,21 @@ module Prism
on_array(elements)
end
- # Dispatch a words_sep event that contains the space between the elements
+ # Dispatch words_sep events that contains the whitespace between the elements
# of list literals.
private def visit_words_sep(opening_loc, previous, current)
- end_offset = (previous.nil? ? opening_loc : previous.location).end_offset
- start_offset = current.location.start_offset
-
- if end_offset != start_offset
- bounds(current.location.copy(start_offset: end_offset))
- on_words_sep(source.byteslice(end_offset...start_offset))
+ start_offset = (previous.nil? ? opening_loc : previous.location).end_offset
+ end_offset = current.start_offset
+ length = end_offset - start_offset
+
+ if length > 0
+ whitespace = source.byteslice(start_offset, length)
+ current_offset = start_offset
+ whitespace.each_line do |part|
+ bounds(opening_loc.copy(start_offset: current_offset, length: part.bytesize))
+ on_words_sep(part)
+ current_offset += part.bytesize
+ end
end
end
@@ -774,9 +920,18 @@ module Prism
# ^^^^^
def visit_array_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+
requireds = visit_all(node.requireds) if node.requireds.any?
rest =
if (rest_node = node.rest).is_a?(SplatNode)
+ bounds(rest_node.operator_loc)
+ on_op("*")
+
if rest_node.expression.nil?
bounds(rest_node.location)
on_var_field(nil)
@@ -787,6 +942,10 @@ module Prism
posts = visit_all(node.posts) if node.posts.any?
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_aryptn(constant, requireds, rest, posts)
end
@@ -802,6 +961,12 @@ module Prism
# ^^^^
def visit_assoc_node(node)
key = visit(node.key)
+
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
value = visit(node.value)
bounds(node.location)
@@ -814,6 +979,9 @@ module Prism
# { **foo }
# ^^^^^
def visit_assoc_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
value = visit(node.value)
bounds(node.location)
@@ -830,8 +998,18 @@ module Prism
# begin end
# ^^^^^^^^^
def visit_begin_node(node)
+ if node.begin_keyword_loc
+ bounds(node.begin_keyword_loc)
+ on_kw("begin")
+ end
+
clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false)
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_begin(clauses)
end
@@ -843,7 +1021,7 @@ module Prism
on_stmts_add(on_stmts_new, on_void_stmt)
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline)
bounds(node.statements.location)
visit_statements_node_body(body)
@@ -852,12 +1030,15 @@ module Prism
rescue_clause = visit(node.rescue_clause)
else_clause =
unless (else_clause_node = node.else_clause).nil?
+ bounds(else_clause_node.else_keyword_loc)
+ on_kw("else")
+
else_statements =
if else_clause_node.statements.nil?
[nil]
else
body = else_clause_node.statements.body
- body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
body
end
@@ -879,7 +1060,7 @@ module Prism
on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil)
when StatementsNode
body = [*node.body]
- body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline)
stmts = visit_statements_node_body(body)
bounds(node.body.first.location)
@@ -894,6 +1075,8 @@ module Prism
# foo(&bar)
# ^^^^
def visit_block_argument_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
visit(node.expression)
end
@@ -907,6 +1090,13 @@ module Prism
# Visit a BlockNode.
def visit_block_node(node)
braces = node.opening == "{"
+ bounds(node.opening_loc)
+ if braces
+ on_lbrace("{")
+ else
+ on_kw("do")
+ end
+
parameters = visit(node.parameters)
body =
@@ -919,7 +1109,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -931,6 +1121,14 @@ module Prism
end
if braces
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ else
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
+ if braces
bounds(node.location)
on_brace_block(parameters, body)
else
@@ -942,12 +1140,15 @@ module Prism
# def foo(&bar); end
# ^^^^
def visit_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+
if node.name_loc.nil?
bounds(node.location)
on_blockarg(nil)
else
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
bounds(node.location)
on_blockarg(name)
@@ -956,6 +1157,9 @@ module Prism
# A block's parameters.
def visit_block_parameters_node(node)
+ bounds(node.opening_loc)
+ on_op("|")
+
parameters =
if node.parameters.nil?
on_params(nil, nil, nil, nil, nil, nil, nil)
@@ -970,6 +1174,9 @@ module Prism
false
end
+ bounds(node.closing_loc)
+ on_op("|")
+
bounds(node.location)
on_block_var(parameters, locals)
end
@@ -980,6 +1187,9 @@ module Prism
# break foo
# ^^^^^^^^^
def visit_break_node(node)
+ bounds(node.keyword_loc)
+ on_kw("break")
+
if node.arguments.nil?
bounds(node.location)
on_break(on_args_new)
@@ -1004,20 +1214,32 @@ module Prism
case node.name
when :[]
receiver = visit(node.receiver)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
+ block = visit(block_node)
bounds(node.location)
call = on_aref(receiver, arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.location)
on_method_add_block(call, block)
+ else
+ call
end
when :[]=
receiver = visit(node.receiver)
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
*arguments, last_argument = node.arguments.arguments
arguments << node.block if !node.block.nil?
@@ -1033,6 +1255,11 @@ module Prism
end
end
+ bounds(node.closing_loc)
+ on_rbracket("]")
+ bounds(node.equal_loc)
+ on_op("=")
+
bounds(node.location)
call = on_aref_field(receiver, arguments)
value = visit_write_value(last_argument)
@@ -1040,27 +1267,54 @@ module Prism
bounds(last_argument.location)
on_assign(call, value)
when :-@, :+@, :~
- receiver = visit(node.receiver)
+ bounds(node.message_loc)
+ on_op(node.message)
+ receiver = visit(node.receiver)
bounds(node.location)
on_unary(node.name, receiver)
when :!
+ bounds(node.message_loc)
if node.message == "not"
+ on_kw("not")
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
receiver =
- if !node.receiver.is_a?(ParenthesesNode) || !node.receiver.body.nil?
+ if node.receiver.is_a?(ParenthesesNode) && node.receiver.body.nil?
+ # The parens in `not()` just emit parens and nothing else.
+ bounds(node.receiver.opening_loc)
+ on_lparen("(")
+ bounds(node.receiver.closing_loc)
+ on_rparen(")")
+ nil
+ else
visit(node.receiver)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
bounds(node.location)
on_unary(:not, receiver)
else
+ on_op("!")
+
receiver = visit(node.receiver)
bounds(node.location)
on_unary(:!, receiver)
end
- when *BINARY_OPERATORS
+ when BINARY_OPERATORS
receiver = visit(node.receiver)
+
+ bounds(node.message_loc)
+ on_op(node.message)
+
value = visit(node.arguments.arguments.first)
bounds(node.location)
@@ -1072,9 +1326,21 @@ module Prism
if node.variable_call?
on_vcall(message)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
- if node.opening_loc.nil? && arguments&.any?
+ if node.opening_loc.nil? && get_arguments_and_block(node.arguments, node.block).first.any?
bounds(node.location)
on_command(message, arguments)
elsif !node.opening_loc.nil?
@@ -1085,11 +1351,11 @@ module Prism
on_method_add_arg(on_fcall(message), on_args_new)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
@@ -1097,7 +1363,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
message =
if node.message_loc.nil?
@@ -1107,13 +1373,30 @@ module Prism
visit_token(node.message, false)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil?
value = visit_write_value(node.arguments.arguments.first)
bounds(node.location)
on_assign(on_field(receiver, call_operator, message), value)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
if node.opening_loc.nil?
bounds(node.location)
@@ -1131,27 +1414,35 @@ module Prism
on_method_add_arg(on_call(receiver, call_operator, message), arguments)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
end
- # Visit the arguments and block of a call node and return the arguments
- # and block as they should be used.
- private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ # Extract the arguments and block Ripper-style, which means if the block
+ # is like `&b` then it's moved to arguments.
+ private def get_arguments_and_block(arguments_node, block_node)
arguments = arguments_node&.arguments || []
block = block_node
if block.is_a?(BlockArgumentNode)
- arguments << block
+ arguments += [block]
block = nil
end
+ [arguments, block]
+ end
+
+ # Visit the arguments and block of a call node and return the arguments
+ # and block as they should be used.
+ private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ arguments, block = get_arguments_and_block(arguments_node, block_node)
+
[
if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode)
visit(arguments.first)
@@ -1165,7 +1456,7 @@ module Prism
on_args_add_block(args, false)
end
end,
- visit(block)
+ block,
]
end
@@ -1183,7 +1474,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1205,7 +1496,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1227,7 +1518,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1249,6 +1540,9 @@ module Prism
if node.call_operator == "::"
receiver = visit(node.receiver)
+ bounds(node.call_operator_loc)
+ on_op("::")
+
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1258,7 +1552,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1272,6 +1566,10 @@ module Prism
# ^^^^^^^^^^
def visit_capture_pattern_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
target = visit(node.target)
bounds(node.location)
@@ -1281,10 +1579,21 @@ module Prism
# case foo; when bar; end
# ^^^^^^^^^^^^^^^^^^^^^^^
def visit_case_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map { |condition| visit(condition) }
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition|
- on_when(*visit(condition), current)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_when(*condition, current)
end
bounds(node.location)
@@ -1294,10 +1603,23 @@ module Prism
# case foo; in bar; end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_case_match_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map do | condition|
+ visit(condition)
+ end
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition|
- on_in(*visit(condition), current)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_in(*condition, current)
end
bounds(node.location)
@@ -1307,6 +1629,9 @@ module Prism
# class Foo; end
# ^^^^^^^^^^^^^^
def visit_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -1315,9 +1640,17 @@ module Prism
visit(node.constant_path)
end
+ if node.inheritance_operator_loc
+ bounds(node.inheritance_operator_loc)
+ on_op("<")
+ end
+
superclass = visit(node.superclass)
bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_class(constant_path, superclass, bodystmt)
end
@@ -1331,12 +1664,13 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_cvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1401,12 +1735,13 @@ module Prism
# Foo = 1
# ^^^^^^^
- #
- # Foo, Bar = 1
- # ^^^ ^^^
def visit_constant_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_const(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1466,6 +1801,11 @@ module Prism
# ^^^^^^^^
def visit_constant_path_node(node)
if node.parent.nil?
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1474,6 +1814,9 @@ module Prism
else
parent = visit(node.parent)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1484,11 +1827,12 @@ module Prism
# Foo::Bar = 1
# ^^^^^^^^^^^^
- #
- # Foo::Foo, Bar::Bar = 1
- # ^^^^^^^^ ^^^^^^^^
def visit_constant_path_write_node(node)
target = visit_constant_path_write_node_target(node.target)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1498,6 +1842,11 @@ module Prism
# Visit a constant path that is part of a write node.
private def visit_constant_path_write_node_target(node)
if node.parent.nil?
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1506,6 +1855,9 @@ module Prism
else
parent = visit(node.parent)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1518,7 +1870,6 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_constant_path_operator_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.binary_operator_loc)
operator = on_op("#{node.binary_operator}=")
@@ -1532,7 +1883,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_and_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("&&=")
@@ -1546,7 +1896,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_or_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("||=")
@@ -1568,16 +1917,24 @@ module Prism
# def self.foo; end
# ^^^^^^^^^^^^^^^^^
def visit_def_node(node)
+ bounds(node.def_keyword_loc)
+ on_kw("def")
+
receiver = visit(node.receiver)
operator =
if !node.operator_loc.nil?
bounds(node.operator_loc)
- visit_token(node.operator)
+ node.operator == "." ? on_period(".") : on_op("::")
end
bounds(node.name_loc)
name = visit_token(node.name_loc.slice)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
parameters =
if node.parameters.nil?
bounds(node.location)
@@ -1587,10 +1944,17 @@ module Prism
end
if !node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
parameters = on_paren(parameters)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
bodystmt =
if node.equal_loc.nil?
visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body)
@@ -1601,11 +1965,16 @@ module Prism
on_bodystmt(body, nil, nil, nil)
end
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
- if receiver.nil?
- on_def(name, parameters, bodystmt)
- else
+ if receiver
on_defs(receiver, operator, name, parameters, bodystmt)
+ else
+ on_def(name, parameters, bodystmt)
end
end
@@ -1615,24 +1984,59 @@ module Prism
# defined?(a)
# ^^^^^^^^^^^
def visit_defined_node(node)
+ bounds(node.keyword_loc)
+ on_kw("defined?")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
+ expression = visit(node.value)
+
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ # Very weird circumstances here where something like:
+ #
+ # defined?
+ # (1)
+ #
+ # gets parsed in Ruby as having only the `1` expression but in Ripper it
+ # gets parsed as having a parentheses node. In this case we need to
+ # synthesize that node to match Ripper's behavior.
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+ bounds(node.lparen_loc.join(node.rparen_loc))
+ expression = on_paren(on_stmts_add(on_stmts_new, expression))
+ end
+
bounds(node.location)
- on_defined(visit(node.value))
+ on_defined(expression)
end
# if foo then bar else baz end
# ^^^^^^^^^^^^
def visit_else_node(node)
+ bounds(node.else_keyword_loc)
+ on_kw("else")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
body
end
+ else_statements = visit_statements_node_body(statements)
+
+ bounds(node.end_keyword_loc)
+ on_kw("end")
bounds(node.location)
- on_else(visit_statements_node_body(statements))
+ on_else(else_statements)
end
# "foo #{bar}"
@@ -1670,12 +2074,15 @@ module Prism
# Visit an EnsureNode node.
def visit_ensure_node(node)
+ bounds(node.ensure_keyword_loc)
+ on_kw("ensure")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
body
end
@@ -1696,6 +2103,14 @@ module Prism
# ^^^^^^^^^^^
def visit_find_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+ bounds(node.left.operator_loc)
+ on_op("*")
+
left =
if node.left.expression.nil?
bounds(node.left.location)
@@ -1705,6 +2120,10 @@ module Prism
end
requireds = visit_all(node.requireds) if node.requireds.any?
+
+ bounds(node.right.operator_loc)
+ on_op("*")
+
right =
if node.right.expression.nil?
bounds(node.right.location)
@@ -1713,6 +2132,10 @@ module Prism
visit(node.right.expression)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_fndptn(constant, left, requireds, right)
end
@@ -1721,6 +2144,10 @@ module Prism
# ^^^^^^^^^^
def visit_flip_flop_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -1740,8 +2167,18 @@ module Prism
# for foo in bar do end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_for_node(node)
+ bounds(node.for_keyword_loc)
+ on_kw("for")
+
index = visit(node.index)
+ bounds(node.in_keyword_loc)
+ on_kw("in")
+
collection = visit(node.collection)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1750,6 +2187,9 @@ module Prism
visit(node.statements)
end
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_for(index, collection, statements)
end
@@ -1758,6 +2198,7 @@ module Prism
# ^^^
def visit_forwarding_arguments_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1765,6 +2206,7 @@ module Prism
# ^^^
def visit_forwarding_parameter_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1774,6 +2216,9 @@ module Prism
# super {}
# ^^^^^^^^
def visit_forwarding_super_node(node)
+ bounds(node.keyword_loc)
+ on_kw("super")
+
if node.block.nil?
bounds(node.location)
on_zsuper
@@ -1794,12 +2239,13 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_gvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1858,6 +2304,9 @@ module Prism
# {}
# ^^
def visit_hash_node(node)
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
elements =
if node.elements.any?
args = visit_all(node.elements)
@@ -1866,6 +2315,8 @@ module Prism
on_assoclist_from_args(args)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_hash(elements)
end
@@ -1874,6 +2325,15 @@ module Prism
# ^^
def visit_hash_pattern_node(node)
constant = visit(node.constant)
+
+ if node.constant
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ elsif node.opening_loc
+ bounds(node.opening_loc)
+ on_lbrace("{")
+ end
+
elements =
if node.elements.any? || !node.rest.nil?
node.elements.map do |element|
@@ -1896,12 +2356,21 @@ module Prism
rest =
case node.rest
when AssocSplatNode
+ bounds(node.rest.operator_loc)
+ on_op("**")
visit(node.rest.value)
when NoKeywordsParameterNode
bounds(node.rest.location)
on_var_field(visit(node.rest))
end
+ if node.constant
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ end
bounds(node.location)
on_hshptn(constant, elements, rest)
end
@@ -1917,13 +2386,27 @@ module Prism
def visit_if_node(node)
if node.then_keyword == "?"
predicate = visit(node.predicate)
+
+ bounds(node.then_keyword_loc)
+ on_op("?")
+
truthy = visit(node.statements.body.first)
+
+ bounds(node.subsequent.else_keyword_loc)
+ on_op(":")
+
falsy = visit(node.subsequent.statements.body.first)
bounds(node.location)
on_ifop(predicate, truthy, falsy)
elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
+ if node.then_keyword_loc && node.then_keyword != "?"
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1933,6 +2416,11 @@ module Prism
end
subsequent = visit(node.subsequent)
+ if node.end_keyword_loc && !node.subsequent
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
if node.if_keyword == "if"
on_if(predicate, statements, subsequent)
@@ -1941,6 +2429,8 @@ module Prism
end
else
statements = visit(node.statements.body.first)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
bounds(node.location)
@@ -1972,7 +2462,14 @@ module Prism
# This is a special case where we're not going to call on_in directly
# because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.in_loc)
+ on_kw("in")
+
pattern = visit_pattern_node(node.pattern)
+ if node.then_loc
+ bounds(node.then_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1988,8 +2485,15 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_index_operator_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2005,8 +2509,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_and_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2022,8 +2533,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_or_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2039,8 +2557,15 @@ module Prism
# ^^^^^^^^
def visit_index_target_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
on_aref_field(receiver, arguments)
end
@@ -2057,6 +2582,10 @@ module Prism
def visit_instance_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ivar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2159,20 +2688,37 @@ module Prism
# "foo #{bar}"
# ^^^^^^^^^^^^
def visit_interpolated_string_node(node)
- if node.opening&.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node)
+ with_string_bounds(node) do
+ if node.opening&.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node)
- bounds(node.location)
- on_string_literal(heredoc)
- elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
- first, *rest = node.parts
- rest.inject(visit(first)) do |content, part|
- concat = visit(part)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
+ first, *rest = node.parts
+ rest.inject(visit(first)) do |content, part|
+ concat = visit(part)
+
+ bounds(part.location)
+ on_string_concat(content, concat)
+ end
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_string_content) do |content, part|
+ on_string_add(content, visit_string_content(part))
+ end
- bounds(part.location)
- on_string_concat(content, concat)
+ bounds(node.location)
+ on_string_literal(parts)
end
- else
+ end
+ end
+
+ # :"foo #{bar}"
+ # ^^^^^^^^^^^^^
+ def visit_interpolated_symbol_node(node)
+ with_string_bounds(node) do
bounds(node.parts.first.location)
parts =
node.parts.inject(on_string_content) do |content, part|
@@ -2180,40 +2726,29 @@ module Prism
end
bounds(node.location)
- on_string_literal(parts)
+ on_dyna_symbol(parts)
end
end
- # :"foo #{bar}"
- # ^^^^^^^^^^^^^
- def visit_interpolated_symbol_node(node)
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_string_content) do |content, part|
- on_string_add(content, visit_string_content(part))
- end
-
- bounds(node.location)
- on_dyna_symbol(parts)
- end
-
# `foo #{bar}`
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
- if node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node)
+ with_string_bounds(node) do
+ if node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_xstring_new) do |content, part|
- on_xstring_add(content, visit_string_content(part))
- end
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_xstring_new) do |content, part|
+ on_xstring_add(content, visit_string_content(part))
+ end
- bounds(node.location)
- on_xstring_literal(parts)
+ bounds(node.location)
+ on_xstring_literal(parts)
+ end
end
end
@@ -2254,6 +2789,9 @@ module Prism
# def foo(**); end
# ^^
def visit_keyword_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
if node.name_loc.nil?
bounds(node.location)
on_kwrest_param(nil)
@@ -2273,6 +2811,11 @@ module Prism
parameters =
if node.parameters.is_a?(BlockParametersNode)
+ if node.parameters.opening_loc
+ bounds(node.parameters.opening_loc)
+ on_lparen("(")
+ end
+
# Ripper does not track block-locals within lambdas, so we skip
# directly to the parameters here.
params =
@@ -2283,6 +2826,13 @@ module Prism
visit(node.parameters.parameters)
end
+ visit_all(node.parameters.locals)
+
+ if node.parameters.closing_loc
+ bounds(node.parameters.closing_loc)
+ on_rparen(")")
+ end
+
if node.parameters.opening_loc.nil?
params
else
@@ -2295,9 +2845,11 @@ module Prism
end
braces = node.opening == "{"
+ bounds(node.opening_loc)
if braces
- bounds(node.opening_loc)
on_tlambeg(node.opening)
+ else
+ on_kw("do")
end
body =
@@ -2310,7 +2862,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -2321,6 +2873,13 @@ module Prism
raise
end
+ bounds(node.closing_loc)
+ if braces
+ on_rbrace("}")
+ else
+ on_kw("end")
+ end
+
bounds(node.location)
on_lambda(parameters, body)
end
@@ -2337,6 +2896,10 @@ module Prism
def visit_local_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ident(node.name_loc.slice))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2411,6 +2974,8 @@ module Prism
# ^^^^^^^^^^
def visit_match_predicate_node(node)
value = visit(node.value)
+ bounds(node.operator_loc)
+ on_kw("in")
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2420,6 +2985,10 @@ module Prism
# ^^^^^^^^^^
def visit_match_required_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2433,13 +3002,16 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error.
- def visit_missing_node(node)
- raise "Cannot visit missing nodes directly."
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery nodes directly."
end
# module Foo; end
# ^^^^^^^^^^^^^^^
def visit_module_node(node)
+ bounds(node.module_keyword_loc)
+ on_kw("module")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -2450,6 +3022,9 @@ module Prism
bodystmt = visit_body_node(node.constant_path.location, node.body, true)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_module(constant_path, bodystmt)
end
@@ -2457,9 +3032,19 @@ module Prism
# (foo, bar), bar = qux
# ^^^^^^^^^^
def visit_multi_target_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
if node.lparen_loc.nil?
targets
else
@@ -2511,9 +3096,22 @@ module Prism
# foo, bar = baz
# ^^^^^^^^^^^^^^
def visit_multi_write_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ bounds(node.operator_loc)
+ on_op("=")
+
unless node.lparen_loc.nil?
bounds(node.lparen_loc)
targets = on_mlhs_paren(targets)
@@ -2531,6 +3129,9 @@ module Prism
# next foo
# ^^^^^^^^
def visit_next_node(node)
+ bounds(node.keyword_loc)
+ on_kw("next")
+
if node.arguments.nil?
bounds(node.location)
on_next(on_args_new)
@@ -2549,9 +3150,24 @@ module Prism
on_var_ref(on_kw("nil"))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+ bounds(node.keyword_loc)
+ on_kw("nil")
+ bounds(node.location)
+ on_blockarg(:nil)
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+ bounds(node.keyword_loc)
+ on_kw("nil")
bounds(node.location)
on_nokw_param(nil)
@@ -2584,7 +3200,11 @@ module Prism
# ^^^^^^^
def visit_optional_parameter_node(node)
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit(node.value)
[name, value]
@@ -2594,6 +3214,14 @@ module Prism
# ^^^^^^
def visit_or_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "or"
+ on_kw("or")
+ else
+ on_op("||")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -2617,9 +3245,19 @@ module Prism
# Visit a destructured positional parameter node.
private def visit_destructured_parameter_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
bounds(node.lparen_loc)
on_mlhs_paren(targets)
end
@@ -2630,6 +3268,9 @@ module Prism
# (1)
# ^^^
def visit_parentheses_node(node)
+ bounds(node.opening_loc)
+ on_lparen("(")
+
body =
if node.body.nil?
on_stmts_add(on_stmts_new, on_void_stmt)
@@ -2637,6 +3278,8 @@ module Prism
visit(node.body)
end
+ bounds(node.closing_loc)
+ on_rparen(")")
bounds(node.location)
on_paren(body)
end
@@ -2644,8 +3287,15 @@ module Prism
# foo => ^(bar)
# ^^^^^^
def visit_pinned_expression_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+ bounds(node.lparen_loc)
+ on_lparen("(")
+
expression = visit(node.expression)
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.location)
on_begin(expression)
end
@@ -2653,12 +3303,20 @@ module Prism
# foo = 1 and bar => ^foo
# ^^^^
def visit_pinned_variable_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+
visit(node.variable)
end
# END {}
# ^^^^^^
def visit_post_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("END")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2667,6 +3325,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_END(statements)
end
@@ -2674,6 +3334,11 @@ module Prism
# BEGIN {}
# ^^^^^^^^
def visit_pre_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("BEGIN")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2682,6 +3347,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_BEGIN(statements)
end
@@ -2689,7 +3356,7 @@ module Prism
# The top-level program node.
def visit_program_node(node)
body = node.statements.body
- body << nil if body.empty?
+ body = [nil] if body.empty?
statements = visit_statements_node_body(body)
bounds(node.location)
@@ -2700,6 +3367,10 @@ module Prism
# ^^^^
def visit_range_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -2720,6 +3391,7 @@ module Prism
# ^^^^
def visit_redo_node(node)
bounds(node.location)
+ on_kw("redo")
on_redo
end
@@ -2762,6 +3434,9 @@ module Prism
# foo rescue bar
# ^^^^^^^^^^^^^^
def visit_rescue_modifier_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
expression = visit_write_value(node.expression)
rescue_expression = visit(node.rescue_expression)
@@ -2772,6 +3447,9 @@ module Prism
# begin; rescue; end
# ^^^^^^^
def visit_rescue_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
exceptions =
case node.exceptions.length
when 0
@@ -2809,6 +3487,11 @@ module Prism
end
end
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
reference = visit(node.reference)
statements =
if node.statements.nil?
@@ -2830,12 +3513,15 @@ module Prism
# def foo(*); end
# ^
def visit_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
+
if node.name_loc.nil?
bounds(node.location)
on_rest_param(nil)
else
bounds(node.name_loc)
- on_rest_param(visit_token(node.name.to_s))
+ on_rest_param(on_ident(node.name.to_s))
end
end
@@ -2843,6 +3529,7 @@ module Prism
# ^^^^^
def visit_retry_node(node)
bounds(node.location)
+ on_kw("retry")
on_retry
end
@@ -2852,6 +3539,9 @@ module Prism
# return 1
# ^^^^^^^^
def visit_return_node(node)
+ bounds(node.keyword_loc)
+ on_kw("return")
+
if node.arguments.nil?
bounds(node.location)
on_return0
@@ -2878,9 +3568,17 @@ module Prism
# class << self; end
# ^^^^^^^^^^^^^^^^^^
def visit_singleton_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+ bounds(node.operator_loc)
+ on_op("<<")
+
expression = visit(node.expression)
bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_sclass(expression, bodystmt)
end
@@ -2915,6 +3613,8 @@ module Prism
# def foo(*); bar(*); end
# ^
def visit_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
visit(node.expression)
end
@@ -2937,26 +3637,68 @@ module Prism
# "foo"
# ^^^^^
def visit_string_node(node)
- if (content = node.content).empty?
- bounds(node.location)
- on_string_literal(on_string_content)
- elsif (opening = node.opening) == "?"
- bounds(node.location)
- on_CHAR("?#{node.content}")
- elsif opening.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if (content = node.content).empty?
+ bounds(node.location)
+ on_string_literal(on_string_content)
+ elsif (opening = node.opening) == "?"
+ bounds(node.location)
+ on_CHAR("?#{node.content}")
+ elsif opening.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node.to_interpolated)
- bounds(node.location)
- on_string_literal(heredoc)
- else
- bounds(node.content_loc)
- tstring_content = on_tstring_content(content)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ tstring_content = on_tstring_content(content)
- bounds(node.location)
- on_string_literal(on_string_add(on_string_content, tstring_content))
+ bounds(node.location)
+ on_string_literal(on_string_add(on_string_content, tstring_content))
+ end
end
end
+ # Responsible for emitting the various string-like begin/end events
+ private def with_string_bounds(node)
+ # `foo "bar": baz` doesn't emit the closing location
+ assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":")
+
+ is_heredoc = opening&.start_with?("<<")
+ if is_heredoc
+ bounds(node.opening_loc)
+ on_heredoc_beg(node.opening)
+ elsif opening&.start_with?(":", "%s")
+ bounds(node.opening_loc)
+ on_symbeg(node.opening)
+ elsif opening&.start_with?("`", "%x")
+ bounds(node.opening_loc)
+ on_backtick(node.opening)
+ elsif opening && !opening.start_with?("?")
+ bounds(node.opening_loc)
+ on_tstring_beg(opening)
+ end
+
+ result = yield
+ if assoc
+ if node.closing != ":"
+ bounds(node.closing_loc)
+ on_label_end(node.closing)
+ end
+ return result
+ end
+
+ if is_heredoc
+ bounds(node.closing_loc)
+ on_heredoc_end(node.closing)
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_tstring_end(node.closing)
+ end
+
+ result
+ end
+
# Ripper gives back the escaped string content but strips out the common
# leading whitespace. Prism gives back the unescaped string content and
# a location for the escaped string content. Unfortunately these don't
@@ -3034,42 +3776,39 @@ module Prism
# Visit a heredoc node that is representing a string.
private def visit_heredoc_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_string_content) do |parts, part|
- on_string_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_string_content) do |parts, part|
+ on_string_add(parts, part)
+ end
end
# Visit a heredoc node that is representing an xstring.
private def visit_heredoc_x_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
- on_xstring_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
+ on_xstring_add(parts, part)
+ end
end
# super(foo)
# ^^^^^^^^^^
def visit_super_node(node)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+ bounds(node.keyword_loc)
+ on_kw("super")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
if !node.lparen_loc.nil?
bounds(node.lparen_loc)
@@ -3079,35 +3818,36 @@ module Prism
bounds(node.location)
call = on_super(arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
# :foo
# ^^^^
def visit_symbol_node(node)
- if (opening = node.opening)&.match?(/^%s|['"]:?$/)
- bounds(node.value_loc)
- content = on_string_content
-
- if !(value = node.value).empty?
- content = on_string_add(content, on_tstring_content(value))
+ with_string_bounds(node) do
+ if node.value_loc.nil?
+ bounds(node.location)
+ on_dyna_symbol(on_string_content)
+ elsif (opening = node.opening)&.match?(/^%s|['"]:?$/)
+ bounds(node.value_loc)
+ content = on_string_add(on_string_content, on_tstring_content(node.value))
+ bounds(node.location)
+ on_dyna_symbol(content)
+ elsif (closing = node.closing) == ":"
+ bounds(node.location)
+ on_label("#{node.value}:")
+ elsif opening.nil? && node.closing_loc.nil?
+ bounds(node.value_loc)
+ on_symbol_literal(visit_token(node.value))
+ else
+ bounds(node.value_loc)
+ on_symbol_literal(on_symbol(visit_token(node.value)))
end
-
- on_dyna_symbol(content)
- elsif (closing = node.closing) == ":"
- bounds(node.location)
- on_label("#{node.value}:")
- elsif opening.nil? && node.closing_loc.nil?
- bounds(node.value_loc)
- on_symbol_literal(visit_token(node.value))
- else
- bounds(node.value_loc)
- on_symbol_literal(on_symbol(visit_token(node.value)))
end
end
@@ -3121,6 +3861,9 @@ module Prism
# undef foo
# ^^^^^^^^^
def visit_undef_node(node)
+ bounds(node.keyword_loc)
+ on_kw("undef")
+
names = visit_all(node.names)
bounds(node.location)
@@ -3134,7 +3877,13 @@ module Prism
# ^^^^^^^^^^^^^^
def visit_unless_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3144,10 +3893,17 @@ module Prism
end
else_clause = visit(node.else_clause)
+ if node.end_keyword_loc && !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_unless(predicate, statements, else_clause)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3161,7 +3917,14 @@ module Prism
# bar until foo
# ^^^^^^^^^^^^^
def visit_until_node(node)
+ bounds(node.keyword_loc)
+ on_kw("until")
+
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
statements =
if node.statements.nil?
@@ -3171,6 +3934,11 @@ module Prism
visit(node.statements)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_until(predicate, statements)
else
@@ -3188,7 +3956,14 @@ module Prism
# This is a special case where we're not going to call on_when directly
# because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.keyword_loc)
+ on_kw("when")
+
conditions = visit_arguments(node.conditions)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3207,7 +3982,17 @@ module Prism
# ^^^^^^^^^^^^^
def visit_while_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("while")
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3220,6 +4005,8 @@ module Prism
on_while(predicate, statements)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("while")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3230,20 +4017,22 @@ module Prism
# `foo`
# ^^^^^
def visit_x_string_node(node)
- if node.unescaped.empty?
- bounds(node.location)
- on_xstring_literal(on_xstring_new)
- elsif node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if node.unescaped.empty?
+ bounds(node.location)
+ on_xstring_literal(on_xstring_new)
+ elsif node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node.to_interpolated)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.content_loc)
- content = on_tstring_content(node.content)
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ content = on_tstring_content(node.content)
- bounds(node.location)
- on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ bounds(node.location)
+ on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ end
end
end
@@ -3253,10 +4042,18 @@ module Prism
# yield 1
# ^^^^^^^
def visit_yield_node(node)
+ bounds(node.keyword_loc)
+ on_kw("yield")
+
if node.arguments.nil? && node.lparen_loc.nil?
bounds(node.location)
on_yield0
else
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
arguments =
if node.arguments.nil?
bounds(node.location)
@@ -3266,6 +4063,8 @@ module Prism
end
unless node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
arguments = on_paren(arguments)
end
@@ -3279,7 +4078,11 @@ module Prism
# Lazily initialize the parse result.
def result
- @result ||= Prism.parse(source, partial_script: true)
+ @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding)
+ end
+
+ def line_and_column_cache
+ @line_and_column_cache ||= LineAndColumnCache.new(result.source)
end
##########################################################################
@@ -3300,30 +4103,34 @@ module Prism
# Visit the string content of a particular node. This method is used to
# split into the various token types.
def visit_token(token, allow_keywords = true)
- case token
- when "."
+ if token == "."
on_period(token)
- when "`"
+ elsif token == "`"
on_backtick(token)
- when *(allow_keywords ? KEYWORDS : [])
+ elsif allow_keywords && KEYWORDS.include?(token)
on_kw(token)
- when /^_/
+ elsif token.start_with?("_")
on_ident(token)
- when /^[[:upper:]]\w*$/
+ elsif token.match?(/^[[:upper:]]\w*$/)
on_const(token)
- when /^@@/
+ elsif token.start_with?("@@")
on_cvar(token)
- when /^@/
+ elsif token.start_with?("@")
on_ivar(token)
- when /^\$/
+ elsif token.start_with?("$")
on_gvar(token)
- when /^[[:punct:]]/
+ elsif token.match?(/^[[:punct:]]/)
on_op(token)
else
on_ident(token)
end
end
+ # Visit either `.`, `&.`, or `::`.
+ def visit_call_operator(token)
+ token == "." ? on_period(token) : on_op(token)
+ end
+
# Visit a node that represents a number. We need to explicitly handle the
# unary - operator.
def visit_number_node(node)
@@ -3331,6 +4138,9 @@ module Prism
location = node.location
if slice[0] == "-"
+ bounds(location.copy(length: 1))
+ on_op("-")
+
bounds(location.copy(start_offset: location.start_offset + 1))
value = yield slice[1..-1]
@@ -3379,26 +4189,24 @@ module Prism
# This method is responsible for updating lineno and column information
# to reflect the current node.
- #
- # This method could be drastically improved with some caching on the start
- # of every line, but for now it's good enough.
def bounds(location)
- @lineno = location.start_line
- @column = location.start_column
+ @lineno, @column = line_and_column_cache.line_and_column(location.start_offset)
end
+ # :startdoc:
+
##########################################################################
# Ripper interface
##########################################################################
# :stopdoc:
def _dispatch_0; end
- def _dispatch_1(_); end
- def _dispatch_2(_, _); end
- def _dispatch_3(_, _, _); end
- def _dispatch_4(_, _, _, _); end
- def _dispatch_5(_, _, _, _, _); end
- def _dispatch_7(_, _, _, _, _, _, _); end
+ def _dispatch_1(arg); arg end
+ def _dispatch_2(arg, _); arg end
+ def _dispatch_3(arg, _, _); arg end
+ def _dispatch_4(arg, _, _, _); arg end
+ def _dispatch_5(arg, _, _, _, _); arg end
+ def _dispatch_7(arg, _, _, _, _, _, _); arg end
# :startdoc:
#
diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb
new file mode 100644
index 0000000000..19deef2d37
--- /dev/null
+++ b/lib/prism/translation/ripper/filter.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Prism
+ module Translation
+ class Ripper
+ class Filter # :nodoc:
+ # :stopdoc:
+ def initialize(src, filename = '-', lineno = 1)
+ @__lexer = Lexer.new(src, filename, lineno)
+ @__line = nil
+ @__col = nil
+ @__state = nil
+ end
+
+ def filename
+ @__lexer.filename
+ end
+
+ def lineno
+ @__line
+ end
+
+ def column
+ @__col
+ end
+
+ def state
+ @__state
+ end
+
+ def parse(init = nil)
+ data = init
+ @__lexer.lex.each do |pos, event, tok, state|
+ @__line, @__col = *pos
+ @__state = state
+ data = if respond_to?(event, true)
+ then __send__(event, tok, data)
+ else on_default(event, tok, data)
+ end
+ end
+ data
+ end
+
+ private
+
+ def on_default(event, token, data)
+ data
+ end
+ # :startdoc:
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb
new file mode 100644
index 0000000000..c6aeae4bd7
--- /dev/null
+++ b/lib/prism/translation/ripper/lexer.rb
@@ -0,0 +1,133 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+require_relative "../ripper"
+
+module Prism
+ module Translation
+ class Ripper
+ class Lexer < Ripper # :nodoc:
+ class State # :nodoc:
+ attr_reader :to_int, :to_s
+
+ def initialize(i)
+ @to_int = i
+ @to_s = Ripper.lex_state_name(i)
+ freeze
+ end
+
+ def [](index)
+ case index
+ when 0, :to_int
+ @to_int
+ when 1, :to_s
+ @to_s
+ else
+ nil
+ end
+ end
+
+ alias to_i to_int
+ alias inspect to_s
+ def pretty_print(q) q.text(to_s) end
+ def ==(i) super or to_int == i end
+ def &(i) self.class.new(to_int & i) end
+ def |(i) self.class.new(to_int | i) end
+ def allbits?(i) to_int.allbits?(i) end
+ def anybits?(i) to_int.anybits?(i) end
+ def nobits?(i) to_int.nobits?(i) end
+
+ # Instances are frozen and there are only a handful of them so we
+ # cache them here.
+ STATES = Hash.new { |hash, key| hash[key] = State.new(key) }
+ private_constant :STATES
+
+ def self.[](i)
+ STATES[i]
+ end
+ end
+
+ class Elem # :nodoc:
+ attr_accessor :pos, :event, :tok, :state, :message
+
+ def initialize(pos, event, tok, state, message = nil)
+ @pos = pos
+ @event = event
+ @tok = tok
+ @state = State[state]
+ @message = message
+ end
+
+ def [](index)
+ case index
+ when 0, :pos
+ @pos
+ when 1, :event
+ @event
+ when 2, :tok
+ @tok
+ when 3, :state
+ @state
+ when 4, :message
+ @message
+ else
+ nil
+ end
+ end
+
+ def inspect
+ "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>"
+ end
+
+ alias to_s inspect
+
+ def pretty_print(q)
+ q.group(2, "#<#{self.class}:", ">") {
+ q.breakable
+ q.text("#{event}@#{pos[0]}:#{pos[1]}")
+ q.breakable
+ state.pretty_print(q)
+ q.breakable
+ q.text("token: ")
+ tok.pretty_print(q)
+ if message
+ q.breakable
+ q.text("message: ")
+ q.text(message)
+ end
+ }
+ end
+
+ def to_a
+ if @message
+ [@pos, @event, @tok, @state, @message]
+ else
+ [@pos, @event, @tok, @state]
+ end
+ end
+ end
+
+ # Pretty much just the same as Prism.lex_compat.
+ def lex(raise_errors: false)
+ Ripper.lex(@source, filename, lineno, raise_errors: raise_errors)
+ end
+
+ # Returns the lex_compat result wrapped in `Elem`. Errors are omitted.
+ # Since ripper is a streaming parser, tokens are expected to be emitted in the order
+ # that the parser encounters them. This is not implemented.
+ def parse(...)
+ lex(...).map do |position, event, token, state|
+ Elem.new(position, event, token, state.to_int)
+ end
+ end
+
+ # Similar to parse but ripper sorts the elements by position in the source. Also
+ # includes errors. Since prism does error recovery, in cases of syntax errors
+ # the result may differ greatly compared to ripper.
+ def scan(...)
+ parse(...)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb
index dc26a639a3..46c0333544 100644
--- a/lib/prism/translation/ripper/sexp.rb
+++ b/lib/prism/translation/ripper/sexp.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
require_relative "../ripper"
@@ -7,9 +8,7 @@ module Prism
class Ripper
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
# returns the arrays of [type, *children].
- class SexpBuilder < Ripper
- # :stopdoc:
-
+ class SexpBuilder < Ripper # :nodoc:
attr_reader :error
private
@@ -64,16 +63,12 @@ module Prism
remove_method :on_parse_error
alias on_parse_error on_error
alias compile_error on_error
-
- # :startdoc:
end
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
# returns the same values as ::Ripper::SexpBuilder except with a couple of
# niceties that flatten linked lists into arrays.
- class SexpBuilderPP < SexpBuilder
- # :stopdoc:
-
+ class SexpBuilderPP < SexpBuilder # :nodoc:
private
def on_heredoc_dedent(val, width)
@@ -117,8 +112,6 @@ module Prism
alias_method "on_#{event}", :_dispatch_event_push
end
end
-
- # :startdoc:
end
end
end
diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb
index 10e21cd16a..00ed625da3 100644
--- a/lib/prism/translation/ripper/shim.rb
+++ b/lib/prism/translation/ripper/shim.rb
@@ -2,4 +2,6 @@
# This writes the prism ripper translation into the Ripper constant so that
# users can transparently use Ripper without any changes.
+# :stopdoc:
Ripper = Prism::Translation::Ripper
+# :startdoc:
diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb
index 8784e22d10..42bc5ee658 100644
--- a/lib/prism/translation/ruby_parser.rb
+++ b/lib/prism/translation/ruby_parser.rb
@@ -1,21 +1,27 @@
# frozen_string_literal: true
+# :markup: markdown
begin
- require "ruby_parser"
+ require "sexp"
rescue LoadError
- warn(%q{Error: Unable to load ruby_parser. Add `gem "ruby_parser"` to your Gemfile.})
+ warn(%q{Error: Unable to load sexp. Add `gem "sexp_processor"` to your Gemfile.})
exit(1)
end
+class RubyParser # :nodoc:
+ class SyntaxError < RuntimeError # :nodoc:
+ end
+end
+
module Prism
module Translation
# This module is the entry-point for converting a prism syntax tree into the
# seattlerb/ruby_parser gem's syntax tree.
class RubyParser
# A prism visitor that builds Sexp objects.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# This is the name of the file that we are compiling. We set it on every
- # Sexp object that is generated, and also use it to compile __FILE__
+ # Sexp object that is generated, and also use it to compile `__FILE__`
# nodes.
attr_reader :file
@@ -131,7 +137,7 @@ module Prism
# $+
# ^^
def visit_back_reference_read_node(node)
- s(node, :back_ref, node.name.name.delete_prefix("$").to_sym)
+ s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym)
end
# begin end
@@ -366,14 +372,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :class, name, visit(node.superclass))
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :class, name, visit(node.superclass))
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# @@foo
@@ -384,9 +394,6 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
s(node, class_variable_write_type, node.name, visit_write_value(node.value))
end
@@ -524,7 +531,9 @@ module Prism
s(node, :defs, visit(node.receiver), name)
end
+ attach_comments(result, node)
result.line(node.name_loc.start_line)
+
if node.parameters.nil?
result << s(node, :args).line(node.name_loc.start_line)
else
@@ -639,9 +648,6 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
s(node, :gasgn, node.name, visit_write_value(node.value))
end
@@ -787,9 +793,6 @@ module Prism
# @foo = 1
# ^^^^^^^^
- #
- # @foo, @bar = 1
- # ^^^^ ^^^^
def visit_instance_variable_write_node(node)
s(node, :iasgn, node.name, visit_write_value(node.value))
end
@@ -976,8 +979,8 @@ module Prism
def visit_lambda_node(node)
parameters =
case node.parameters
- when nil, NumberedParametersNode
- s(node, :args)
+ when nil, ItParametersNode, NumberedParametersNode
+ 0
else
visit(node.parameters)
end
@@ -1001,9 +1004,6 @@ module Prism
# foo = 1
# ^^^^^^^
- #
- # foo, bar = 1
- # ^^^ ^^^
def visit_local_variable_write_node(node)
s(node, :lasgn, node.name, visit_write_value(node.value))
end
@@ -1059,8 +1059,8 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
- raise "Cannot visit missing node directly"
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery node directly"
end
# module Foo; end
@@ -1073,14 +1073,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :module, name)
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :module, name)
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# foo, bar = baz
@@ -1136,6 +1140,12 @@ module Prism
s(node, :nil)
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ :"&nil"
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1188,7 +1198,7 @@ module Prism
# ^^^^^^^^^
def visit_parameters_node(node)
children =
- node.compact_child_nodes.map do |element|
+ node.each_child_node.map do |element|
if element.is_a?(MultiTargetNode)
visit_destructured_parameter(element)
else
@@ -1537,6 +1547,17 @@ module Prism
private
+ # Attach prism comments to the given sexp.
+ def attach_comments(sexp, node)
+ return unless node.comments
+ return if node.comments.empty?
+
+ extra = node.location.start_line - node.comments.last.location.start_line
+ comments = node.comments.map(&:slice)
+ comments.concat([nil] * [0, extra].max)
+ sexp.comments = comments.join("\n")
+ end
+
# Create a new compiler with the given options.
def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern)
Compiler.new(file, in_def: in_def, in_pattern: in_pattern)
@@ -1615,6 +1636,14 @@ module Prism
translate(Prism.parse_file(filepath, partial_script: true), filepath)
end
+ # Parse the give file and translate it into the
+ # seattlerb/ruby_parser gem's Sexp format. This method is
+ # provided for API compatibility to RubyParser and takes an
+ # optional +timeout+ argument.
+ def process(ruby, file = "(string)", timeout = nil)
+ Timeout.timeout(timeout) { parse(ruby, file) }
+ end
+
class << self
# Parse the given source and translate it into the seattlerb/ruby_parser
# gem's Sexp format.
@@ -1639,6 +1668,7 @@ module Prism
raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}"
end
+ result.attach_comments!
result.value.accept(Compiler.new(filepath))
end
end