summaryrefslogtreecommitdiff
path: root/lib/prism/translation/parser
diff options
context:
space:
mode:
Diffstat (limited to 'lib/prism/translation/parser')
-rw-r--r--lib/prism/translation/parser/builder.rb70
-rw-r--r--lib/prism/translation/parser/compiler.rb2219
-rw-r--r--lib/prism/translation/parser/lexer.rb819
3 files changed, 3108 insertions, 0 deletions
diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb
new file mode 100644
index 0000000000..7fc3bba6b7
--- /dev/null
+++ b/lib/prism/translation/parser/builder.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ class Parser
+ # A builder that knows how to convert more modern Ruby syntax
+ # into whitequark/parser gem's syntax tree.
+ class Builder < ::Parser::Builders::Default
+ # It represents the `it` block argument, which is not yet implemented in
+ # the Parser gem.
+ def itarg
+ n(:itarg, [:it], nil)
+ end
+
+ # The following three lines have been added to support the `it` block
+ # parameter syntax in the source code below.
+ #
+ # if args.type == :itarg
+ # block_type = :itblock
+ # args = :it
+ #
+ # https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155
+ def block(method_call, begin_t, args, body, end_t)
+ _receiver, _selector, *call_args = *method_call
+
+ if method_call.type == :yield
+ diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)]
+ end
+
+ last_arg = call_args.last
+ if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args)
+ diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)]
+ end
+
+ if args.type == :itarg
+ block_type = :itblock
+ args = :it
+ elsif args.type == :numargs
+ block_type = :numblock
+ args = args.children[0]
+ else
+ block_type = :block
+ end
+
+ if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type)
+ n(block_type, [ method_call, args, body ],
+ block_map(method_call.loc.expression, begin_t, end_t))
+ else
+ # Code like "return foo 1 do end" is reduced in a weird sequence.
+ # Here, method_call is actually (return).
+ actual_send, = *method_call
+ block =
+ n(block_type, [ actual_send, args, body ],
+ block_map(actual_send.loc.expression, begin_t, end_t))
+
+ n(method_call.type, [ block ],
+ method_call.loc.with_expression(join_exprs(method_call, block)))
+ end
+ end
+
+ # def foo(&nil); end
+ # ^^^^
+ def blocknilarg(amper_t, nil_t)
+ n0(:blocknilarg, arg_prefix_map(amper_t, nil_t))
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
new file mode 100644
index 0000000000..d11db12ae6
--- /dev/null
+++ b/lib/prism/translation/parser/compiler.rb
@@ -0,0 +1,2219 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ class Parser
+ # A visitor that knows how to convert a prism syntax tree into the
+ # whitequark/parser gem's syntax tree.
+ class Compiler < ::Prism::Compiler # :nodoc:
+ # Raised when the tree is malformed or there is a bug in the compiler.
+ class CompilationError < StandardError # :nodoc:
+ end
+
+ # The Parser::Base instance that is being used to build the AST.
+ attr_reader :parser
+
+ # The Parser::Builders::Default instance that is being used to build the
+ # AST.
+ attr_reader :builder
+
+ # The Parser::Source::Buffer instance that is holding a reference to the
+ # source code.
+ attr_reader :source_buffer
+
+ # The offset cache that is used to map between byte and character
+ # offsets in the file.
+ attr_reader :offset_cache
+
+ # The types of values that can be forwarded in the current scope.
+ attr_reader :forwarding
+
+ # Whether or not the current node is in a destructure.
+ attr_reader :in_destructure
+
+ # Whether or not the current node is in a pattern.
+ attr_reader :in_pattern
+
+ # Initialize a new compiler with the given parser, offset cache, and
+ # options.
+ def initialize(parser, offset_cache, forwarding: [], in_destructure: false, in_pattern: false)
+ @parser = parser
+ @builder = parser.builder
+ @source_buffer = parser.source_buffer
+ @offset_cache = offset_cache
+
+ @forwarding = forwarding
+ @in_destructure = in_destructure
+ @in_pattern = in_pattern
+ end
+
+ # alias foo bar
+ # ^^^^^^^^^^^^^
+ def visit_alias_method_node(node)
+ builder.alias(token(node.keyword_loc), visit(node.new_name), visit(node.old_name))
+ end
+
+ # alias $foo $bar
+ # ^^^^^^^^^^^^^^^
+ def visit_alias_global_variable_node(node)
+ builder.alias(token(node.keyword_loc), visit(node.new_name), visit(node.old_name))
+ end
+
+ # foo => bar | baz
+ # ^^^^^^^^^
+ def visit_alternation_pattern_node(node)
+ builder.match_alt(visit(node.left), token(node.operator_loc), visit(node.right))
+ end
+
+ # a and b
+ # ^^^^^^^
+ def visit_and_node(node)
+ builder.logical_op(:and, visit(node.left), token(node.operator_loc), visit(node.right))
+ end
+
+ # []
+ # ^^
+ def visit_array_node(node)
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
+ elements = node.elements.flat_map do |element|
+ if element.is_a?(StringNode)
+ if element.content.include?("\n")
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
+ end
+ elsif element.is_a?(InterpolatedStringNode)
+ builder.string_compose(
+ token(element.opening_loc),
+ string_nodes_from_interpolation(element, node.opening),
+ token(element.closing_loc)
+ )
+ else
+ [visit(element)]
+ end
+ end
+ else
+ elements = visit_all(node.elements)
+ end
+
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
+ end
+
+ # foo => [bar]
+ # ^^^^^
+ def visit_array_pattern_node(node)
+ elements = [*node.requireds]
+ elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
+ elements.concat(node.posts)
+ visited = visit_all(elements)
+
+ if node.rest.is_a?(ImplicitRestNode)
+ visited[-1] = builder.match_with_trailing_comma(visited[-1], token(node.rest.location))
+ end
+
+ if node.constant
+ if visited.empty?
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc)), token(node.closing_loc))
+ else
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc))
+ end
+ else
+ builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc))
+ end
+ end
+
+ # foo(bar)
+ # ^^^
+ def visit_arguments_node(node)
+ visit_all(node.arguments)
+ end
+
+ # { a: 1 }
+ # ^^^^
+ def visit_assoc_node(node)
+ key = node.key
+
+ if node.value.is_a?(ImplicitNode)
+ if in_pattern
+ if key.is_a?(SymbolNode)
+ if key.opening.nil?
+ builder.match_hash_var([key.unescaped, srange(key.location)])
+ else
+ builder.match_hash_var_from_str(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc))
+ end
+ else
+ builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc))
+ end
+ else
+ value = node.value.value
+
+ implicit_value = if value.is_a?(CallNode)
+ builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
+ elsif value.is_a?(ConstantReadNode)
+ builder.const([value.name, srange(key.value_loc)])
+ else
+ builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
+ end
+
+ builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
+ end
+ elsif node.operator_loc
+ builder.pair(visit(key), token(node.operator_loc), visit(node.value))
+ elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
+ builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
+ else
+ parts =
+ if key.is_a?(SymbolNode)
+ [builder.string_internal([key.unescaped, srange(key.value_loc)])]
+ else
+ visit_all(key.parts)
+ end
+
+ builder.pair_quoted(token(key.opening_loc), parts, token(key.closing_loc), visit(node.value))
+ end
+ end
+
+ # def foo(**); bar(**); end
+ # ^^
+ #
+ # { **foo }
+ # ^^^^^
+ def visit_assoc_splat_node(node)
+ if in_pattern
+ builder.match_rest(token(node.operator_loc), token(node.value&.location))
+ elsif node.value.nil? && forwarding.include?(:**)
+ builder.forwarded_kwrestarg(token(node.operator_loc))
+ else
+ builder.kwsplat(token(node.operator_loc), visit(node.value))
+ end
+ end
+
+ # $+
+ # ^^
+ def visit_back_reference_read_node(node)
+ builder.back_ref(token(node.location))
+ end
+
+ # begin end
+ # ^^^^^^^^^
+ def visit_begin_node(node)
+ rescue_bodies = []
+
+ if (rescue_clause = node.rescue_clause)
+ begin
+ find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
+ find_end_offset = (
+ rescue_clause.statements&.location&.start_offset ||
+ rescue_clause.subsequent&.location&.start_offset ||
+ node.else_clause&.location&.start_offset ||
+ node.ensure_clause&.location&.start_offset ||
+ node.end_keyword_loc&.start_offset ||
+ find_start_offset + 1
+ )
+
+ rescue_bodies << builder.rescue_body(
+ token(rescue_clause.keyword_loc),
+ rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil,
+ token(rescue_clause.operator_loc),
+ visit(rescue_clause.reference),
+ srange_semicolon(find_start_offset, find_end_offset),
+ visit(rescue_clause.statements)
+ )
+ end until (rescue_clause = rescue_clause.subsequent).nil?
+ end
+
+ begin_body =
+ builder.begin_body(
+ visit(node.statements),
+ rescue_bodies,
+ token(node.else_clause&.else_keyword_loc),
+ visit(node.else_clause),
+ token(node.ensure_clause&.ensure_keyword_loc),
+ visit(node.ensure_clause&.statements)
+ )
+
+ if node.begin_keyword_loc
+ builder.begin_keyword(token(node.begin_keyword_loc), begin_body, token(node.end_keyword_loc))
+ else
+ begin_body
+ end
+ end
+
+ # foo(&bar)
+ # ^^^^
+ def visit_block_argument_node(node)
+ builder.block_pass(token(node.operator_loc), visit(node.expression))
+ end
+
+ # foo { |; bar| }
+ # ^^^
+ def visit_block_local_variable_node(node)
+ builder.shadowarg(token(node.location))
+ end
+
+ # A block on a keyword or method call.
+ def visit_block_node(node)
+ raise CompilationError, "Cannot directly compile block nodes"
+ end
+
+ # def foo(&bar); end
+ # ^^^^
+ def visit_block_parameter_node(node)
+ builder.blockarg(token(node.operator_loc), token(node.name_loc))
+ end
+
+ # A block's parameters.
+ def visit_block_parameters_node(node)
+ [*visit(node.parameters)].concat(visit_all(node.locals))
+ end
+
+ # break
+ # ^^^^^
+ #
+ # break foo
+ # ^^^^^^^^^
+ def visit_break_node(node)
+ builder.keyword_cmd(:break, token(node.keyword_loc), nil, visit(node.arguments) || [], nil)
+ end
+
+ # foo
+ # ^^^
+ #
+ # foo.bar
+ # ^^^^^^^
+ #
+ # foo.bar() {}
+ # ^^^^^^^^^^^^
+ def visit_call_node(node)
+ name = node.name
+ arguments = node.arguments&.arguments || []
+ block = node.block
+
+ if block.is_a?(BlockArgumentNode)
+ arguments = [*arguments, block]
+ block = nil
+ end
+
+ if node.call_operator_loc.nil?
+ case name
+ when :!
+ return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
+ when :=~
+ if (receiver = node.receiver).is_a?(RegularExpressionNode)
+ return builder.match_op(visit(receiver), token(node.message_loc), visit(node.arguments.arguments.first))
+ end
+ when :[]
+ return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
+ when :[]=
+ if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
+ arguments = node.arguments.arguments[...-1]
+ arguments << node.block if node.block
+
+ return visit_block(
+ builder.assign(
+ builder.index_asgn(
+ visit(node.receiver),
+ token(node.opening_loc),
+ visit_all(arguments),
+ token(node.closing_loc),
+ ),
+ token(node.equal_loc),
+ visit(node.arguments.arguments.last)
+ ),
+ block
+ )
+ end
+ end
+ end
+
+ message_loc = node.message_loc
+ call_operator_loc = node.call_operator_loc
+ call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
+
+ visit_block(
+ if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
+ builder.assign(
+ builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
+ token(node.equal_loc),
+ visit(node.arguments.arguments.last)
+ )
+ else
+ builder.call_method(
+ visit(node.receiver),
+ call_operator,
+ message_loc ? [node.name, srange(message_loc)] : nil,
+ token(node.opening_loc),
+ visit_all(arguments),
+ token(node.closing_loc)
+ )
+ end,
+ block
+ )
+ end
+
+ # foo.bar += baz
+ # ^^^^^^^^^^^^^^^
+ def visit_call_operator_write_node(node)
+ call_operator_loc = node.call_operator_loc
+
+ builder.op_assign(
+ builder.call_method(
+ visit(node.receiver),
+ call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)],
+ node.message_loc ? [node.read_name, srange(node.message_loc)] : nil,
+ nil,
+ [],
+ nil
+ ),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo.bar &&= baz
+ # ^^^^^^^^^^^^^^^
+ def visit_call_and_write_node(node)
+ call_operator_loc = node.call_operator_loc
+
+ builder.op_assign(
+ builder.call_method(
+ visit(node.receiver),
+ call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)],
+ node.message_loc ? [node.read_name, srange(node.message_loc)] : nil,
+ nil,
+ [],
+ nil
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo.bar ||= baz
+ # ^^^^^^^^^^^^^^^
+ def visit_call_or_write_node(node)
+ call_operator_loc = node.call_operator_loc
+
+ builder.op_assign(
+ builder.call_method(
+ visit(node.receiver),
+ call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)],
+ node.message_loc ? [node.read_name, srange(node.message_loc)] : nil,
+ nil,
+ [],
+ nil
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo.bar, = 1
+ # ^^^^^^^
+ def visit_call_target_node(node)
+ call_operator_loc = node.call_operator_loc
+
+ builder.attr_asgn(
+ visit(node.receiver),
+ call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)],
+ token(node.message_loc)
+ )
+ end
+
+ # foo => bar => baz
+ # ^^^^^^^^^^
+ def visit_capture_pattern_node(node)
+ builder.match_as(visit(node.value), token(node.operator_loc), visit(node.target))
+ end
+
+ # case foo; when bar; end
+ # ^^^^^^^^^^^^^^^^^^^^^^^
+ def visit_case_node(node)
+ builder.case(
+ token(node.case_keyword_loc),
+ visit(node.predicate),
+ visit_all(node.conditions),
+ token(node.else_clause&.else_keyword_loc),
+ visit(node.else_clause),
+ token(node.end_keyword_loc)
+ )
+ end
+
+ # case foo; in bar; end
+ # ^^^^^^^^^^^^^^^^^^^^^
+ def visit_case_match_node(node)
+ builder.case_match(
+ token(node.case_keyword_loc),
+ visit(node.predicate),
+ visit_all(node.conditions),
+ token(node.else_clause&.else_keyword_loc),
+ visit(node.else_clause),
+ token(node.end_keyword_loc)
+ )
+ end
+
+ # class Foo; end
+ # ^^^^^^^^^^^^^^
+ def visit_class_node(node)
+ builder.def_class(
+ token(node.class_keyword_loc),
+ visit(node.constant_path),
+ token(node.inheritance_operator_loc),
+ visit(node.superclass),
+ node.body&.accept(copy_compiler(forwarding: [])),
+ token(node.end_keyword_loc)
+ )
+ end
+
+ # @@foo
+ # ^^^^^
+ def visit_class_variable_read_node(node)
+ builder.cvar(token(node.location))
+ end
+
+ # @@foo = 1
+ # ^^^^^^^^^
+ def visit_class_variable_write_node(node)
+ builder.assign(
+ builder.assignable(builder.cvar(token(node.name_loc))),
+ token(node.operator_loc),
+ visit(node.value)
+ )
+ end
+
+ # @@foo += bar
+ # ^^^^^^^^^^^^
+ def visit_class_variable_operator_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.cvar(token(node.name_loc))),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # @@foo &&= bar
+ # ^^^^^^^^^^^^^
+ def visit_class_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.cvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # @@foo ||= bar
+ # ^^^^^^^^^^^^^
+ def visit_class_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.cvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # @@foo, = bar
+ # ^^^^^
+ def visit_class_variable_target_node(node)
+ builder.assignable(builder.cvar(token(node.location)))
+ end
+
+ # Foo
+ # ^^^
+ def visit_constant_read_node(node)
+ builder.const([node.name, srange(node.location)])
+ end
+
+ # Foo = 1
+ # ^^^^^^^
+ #
+ # Foo, Bar = 1
+ # ^^^ ^^^
+ def visit_constant_write_node(node)
+ builder.assign(builder.assignable(builder.const([node.name, srange(node.name_loc)])), token(node.operator_loc), visit(node.value))
+ end
+
+ # Foo += bar
+ # ^^^^^^^^^^^
+ def visit_constant_operator_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.const([node.name, srange(node.name_loc)])),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # Foo &&= bar
+ # ^^^^^^^^^^^^
+ def visit_constant_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.const([node.name, srange(node.name_loc)])),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # Foo ||= bar
+ # ^^^^^^^^^^^^
+ def visit_constant_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.const([node.name, srange(node.name_loc)])),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # Foo, = bar
+ # ^^^
+ def visit_constant_target_node(node)
+ builder.assignable(builder.const([node.name, srange(node.location)]))
+ end
+
+ # Foo::Bar
+ # ^^^^^^^^
+ def visit_constant_path_node(node)
+ if node.parent.nil?
+ builder.const_global(
+ token(node.delimiter_loc),
+ [node.name, srange(node.name_loc)]
+ )
+ else
+ builder.const_fetch(
+ visit(node.parent),
+ token(node.delimiter_loc),
+ [node.name, srange(node.name_loc)]
+ )
+ end
+ end
+
+ # Foo::Bar = 1
+ # ^^^^^^^^^^^^
+ #
+ # Foo::Foo, Bar::Bar = 1
+ # ^^^^^^^^ ^^^^^^^^
+ def visit_constant_path_write_node(node)
+ builder.assign(
+ builder.assignable(visit(node.target)),
+ token(node.operator_loc),
+ visit(node.value)
+ )
+ end
+
+ # Foo::Bar += baz
+ # ^^^^^^^^^^^^^^^
+ def visit_constant_path_operator_write_node(node)
+ builder.op_assign(
+ builder.assignable(visit(node.target)),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # Foo::Bar &&= baz
+ # ^^^^^^^^^^^^^^^^
+ def visit_constant_path_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(visit(node.target)),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # Foo::Bar ||= baz
+ # ^^^^^^^^^^^^^^^^
+ def visit_constant_path_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(visit(node.target)),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # Foo::Bar, = baz
+ # ^^^^^^^^
+ def visit_constant_path_target_node(node)
+ builder.assignable(visit_constant_path_node(node))
+ end
+
+ # def foo; end
+ # ^^^^^^^^^^^^
+ #
+ # def self.foo; end
+ # ^^^^^^^^^^^^^^^^^
+ def visit_def_node(node)
+ if node.equal_loc
+ if node.receiver
+ builder.def_endless_singleton(
+ token(node.def_keyword_loc),
+ visit(node.receiver.is_a?(ParenthesesNode) ? node.receiver.body : node.receiver),
+ token(node.operator_loc),
+ token(node.name_loc),
+ builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
+ token(node.equal_loc),
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
+ )
+ else
+ builder.def_endless_method(
+ token(node.def_keyword_loc),
+ token(node.name_loc),
+ builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
+ token(node.equal_loc),
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
+ )
+ end
+ elsif node.receiver
+ builder.def_singleton(
+ token(node.def_keyword_loc),
+ visit(node.receiver.is_a?(ParenthesesNode) ? node.receiver.body : node.receiver),
+ token(node.operator_loc),
+ token(node.name_loc),
+ builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
+ token(node.end_keyword_loc)
+ )
+ else
+ builder.def_method(
+ token(node.def_keyword_loc),
+ token(node.name_loc),
+ builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
+ node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
+ token(node.end_keyword_loc)
+ )
+ end
+ end
+
+ # defined? a
+ # ^^^^^^^^^^
+ #
+ # defined?(a)
+ # ^^^^^^^^^^^
+ def visit_defined_node(node)
+ # Very weird circumstances here where something like:
+ #
+ # defined?
+ # (1)
+ #
+ # gets parsed in Ruby as having only the `1` expression but in parser
+ # it gets parsed as having a begin. In this case we need to synthesize
+ # that begin to match parser's behavior.
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ nil,
+ [
+ builder.begin(
+ token(node.lparen_loc),
+ visit(node.value),
+ token(node.rparen_loc)
+ )
+ ],
+ nil
+ )
+ else
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ token(node.lparen_loc),
+ [visit(node.value)],
+ token(node.rparen_loc)
+ )
+ end
+ end
+
+ # if foo then bar else baz end
+ # ^^^^^^^^^^^^
+ def visit_else_node(node)
+ visit(node.statements)
+ end
+
+ # "foo #{bar}"
+ # ^^^^^^
+ def visit_embedded_statements_node(node)
+ builder.begin(
+ token(node.opening_loc),
+ visit(node.statements),
+ token(node.closing_loc)
+ )
+ end
+
+ # "foo #@bar"
+ # ^^^^^
+ def visit_embedded_variable_node(node)
+ visit(node.variable)
+ end
+
+ # begin; foo; ensure; bar; end
+ # ^^^^^^^^^^^^
+ def visit_ensure_node(node)
+ raise CompilationError, "Cannot directly compile ensure nodes"
+ end
+
+ # false
+ # ^^^^^
+ def visit_false_node(node)
+ builder.false(token(node.location))
+ end
+
+ # foo => [*, bar, *]
+ # ^^^^^^^^^^^
+ def visit_find_pattern_node(node)
+ elements = [node.left, *node.requireds, node.right]
+
+ if node.constant
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.find_pattern(nil, visit_all(elements), nil), token(node.closing_loc))
+ else
+ builder.find_pattern(token(node.opening_loc), visit_all(elements), token(node.closing_loc))
+ end
+ end
+
+ # 1.0
+ # ^^^
+ def visit_float_node(node)
+ visit_numeric(node, builder.float([node.value, srange(node.location)]))
+ end
+
+ # for foo in bar do end
+ # ^^^^^^^^^^^^^^^^^^^^^
+ def visit_for_node(node)
+ builder.for(
+ token(node.for_keyword_loc),
+ visit(node.index),
+ token(node.in_keyword_loc),
+ visit(node.collection),
+ if (do_keyword_loc = node.do_keyword_loc)
+ token(do_keyword_loc)
+ else
+ srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset)
+ end,
+ visit(node.statements),
+ token(node.end_keyword_loc)
+ )
+ end
+
+ # def foo(...); bar(...); end
+ # ^^^
+ def visit_forwarding_arguments_node(node)
+ builder.forwarded_args(token(node.location))
+ end
+
+ # def foo(...); end
+ # ^^^
+ def visit_forwarding_parameter_node(node)
+ builder.forward_arg(token(node.location))
+ end
+
+ # super
+ # ^^^^^
+ #
+ # super {}
+ # ^^^^^^^^
+ def visit_forwarding_super_node(node)
+ visit_block(
+ builder.keyword_cmd(
+ :zsuper,
+ ["super", srange_offsets(node.location.start_offset, node.location.start_offset + 5)]
+ ),
+ node.block
+ )
+ end
+
+ # $foo
+ # ^^^^
+ def visit_global_variable_read_node(node)
+ builder.gvar(token(node.location))
+ end
+
+ # $foo = 1
+ # ^^^^^^^^
+ def visit_global_variable_write_node(node)
+ builder.assign(
+ builder.assignable(builder.gvar(token(node.name_loc))),
+ token(node.operator_loc),
+ visit(node.value)
+ )
+ end
+
+ # $foo += bar
+ # ^^^^^^^^^^^
+ def visit_global_variable_operator_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.gvar(token(node.name_loc))),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # $foo &&= bar
+ # ^^^^^^^^^^^^
+ def visit_global_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.gvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # $foo ||= bar
+ # ^^^^^^^^^^^^
+ def visit_global_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.gvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # $foo, = bar
+ # ^^^^
+ def visit_global_variable_target_node(node)
+ builder.assignable(builder.gvar([node.slice, srange(node.location)]))
+ end
+
+ # {}
+ # ^^
+ def visit_hash_node(node)
+ builder.associate(
+ token(node.opening_loc),
+ visit_all(node.elements),
+ token(node.closing_loc)
+ )
+ end
+
+ # foo => {}
+ # ^^
+ def visit_hash_pattern_node(node)
+ elements = [*node.elements, *node.rest]
+
+ if node.constant
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.hash_pattern(nil, visit_all(elements), nil), token(node.closing_loc))
+ else
+ builder.hash_pattern(token(node.opening_loc), visit_all(elements), token(node.closing_loc))
+ end
+ end
+
+ # if foo then bar end
+ # ^^^^^^^^^^^^^^^^^^^
+ #
+ # bar if foo
+ # ^^^^^^^^^^
+ #
+ # foo ? bar : baz
+ # ^^^^^^^^^^^^^^^
+ def visit_if_node(node)
+ if !node.if_keyword_loc
+ builder.ternary(
+ visit(node.predicate),
+ token(node.then_keyword_loc),
+ visit(node.statements),
+ token(node.subsequent.else_keyword_loc),
+ visit(node.subsequent)
+ )
+ elsif node.if_keyword_loc.start_offset == node.location.start_offset
+ builder.condition(
+ token(node.if_keyword_loc),
+ visit(node.predicate),
+ if (then_keyword_loc = node.then_keyword_loc)
+ token(then_keyword_loc)
+ else
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset)
+ end,
+ visit(node.statements),
+ case node.subsequent
+ when IfNode
+ token(node.subsequent.if_keyword_loc)
+ when ElseNode
+ token(node.subsequent.else_keyword_loc)
+ end,
+ visit(node.subsequent),
+ if node.if_keyword != "elsif"
+ token(node.end_keyword_loc)
+ end
+ )
+ else
+ builder.condition_mod(
+ visit(node.statements),
+ visit(node.subsequent),
+ token(node.if_keyword_loc),
+ visit(node.predicate)
+ )
+ end
+ end
+
+ # 1i
+ # ^^
+ def visit_imaginary_node(node)
+ visit_numeric(node, builder.complex([Complex(0, node.numeric.value), srange(node.location)]))
+ end
+
+ # { foo: }
+ # ^^^^
+ def visit_implicit_node(node)
+ raise CompilationError, "Cannot directly compile implicit nodes"
+ end
+
+ # foo { |bar,| }
+ # ^
+ def visit_implicit_rest_node(node)
+ raise CompilationError, "Cannot compile implicit rest nodes"
+ end
+
+ # case foo; in bar; end
+ # ^^^^^^^^^^^^^^^^^^^^^
+ def visit_in_node(node)
+ pattern = nil
+ guard = nil
+
+ case node.pattern
+ when IfNode
+ pattern = within_pattern { |compiler| node.pattern.statements.accept(compiler) }
+ guard = builder.if_guard(token(node.pattern.if_keyword_loc), visit(node.pattern.predicate))
+ when UnlessNode
+ pattern = within_pattern { |compiler| node.pattern.statements.accept(compiler) }
+ guard = builder.unless_guard(token(node.pattern.keyword_loc), visit(node.pattern.predicate))
+ else
+ pattern = within_pattern { |compiler| node.pattern.accept(compiler) }
+ end
+
+ builder.in_pattern(
+ token(node.in_loc),
+ pattern,
+ guard,
+ if (then_loc = node.then_loc)
+ token(then_loc)
+ else
+ srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset)
+ end,
+ visit(node.statements)
+ )
+ end
+
+ # foo[bar] += baz
+ # ^^^^^^^^^^^^^^^
+ def visit_index_operator_write_node(node)
+ arguments = node.arguments&.arguments || []
+ arguments << node.block if node.block
+
+ builder.op_assign(
+ builder.index(
+ visit(node.receiver),
+ token(node.opening_loc),
+ visit_all(arguments),
+ token(node.closing_loc)
+ ),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo[bar] &&= baz
+ # ^^^^^^^^^^^^^^^^
+ def visit_index_and_write_node(node)
+ arguments = node.arguments&.arguments || []
+ arguments << node.block if node.block
+
+ builder.op_assign(
+ builder.index(
+ visit(node.receiver),
+ token(node.opening_loc),
+ visit_all(arguments),
+ token(node.closing_loc)
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo[bar] ||= baz
+ # ^^^^^^^^^^^^^^^^
+ def visit_index_or_write_node(node)
+ arguments = node.arguments&.arguments || []
+ arguments << node.block if node.block
+
+ builder.op_assign(
+ builder.index(
+ visit(node.receiver),
+ token(node.opening_loc),
+ visit_all(arguments),
+ token(node.closing_loc)
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo[bar], = 1
+ # ^^^^^^^^
+ def visit_index_target_node(node)
+ builder.index_asgn(
+ visit(node.receiver),
+ token(node.opening_loc),
+ visit_all(node.arguments&.arguments || []),
+ token(node.closing_loc),
+ )
+ end
+
+ # @foo
+ # ^^^^
+ def visit_instance_variable_read_node(node)
+ builder.ivar(token(node.location))
+ end
+
+ # @foo = 1
+ # ^^^^^^^^
+ def visit_instance_variable_write_node(node)
+ builder.assign(
+ builder.assignable(builder.ivar(token(node.name_loc))),
+ token(node.operator_loc),
+ visit(node.value)
+ )
+ end
+
+ # @foo += bar
+ # ^^^^^^^^^^^
+ def visit_instance_variable_operator_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ivar(token(node.name_loc))),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # @foo &&= bar
+ # ^^^^^^^^^^^^
+ def visit_instance_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ivar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # @foo ||= bar
+ # ^^^^^^^^^^^^
+ def visit_instance_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ivar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # @foo, = bar
+ # ^^^^
+ def visit_instance_variable_target_node(node)
+ builder.assignable(builder.ivar(token(node.location)))
+ end
+
+ # 1
+ # ^
+ def visit_integer_node(node)
+ visit_numeric(node, builder.integer([node.value, srange(node.location)]))
+ end
+
+ # /foo #{bar}/
+ # ^^^^^^^^^^^^
+ def visit_interpolated_regular_expression_node(node)
+ builder.regexp_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
+ builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
+ )
+ end
+
+ # if /foo #{bar}/ then end
+ # ^^^^^^^^^^^^
+ alias visit_interpolated_match_last_line_node visit_interpolated_regular_expression_node
+
+ # "foo #{bar}"
+ # ^^^^^^^^^^^^
+ def visit_interpolated_string_node(node)
+ if node.heredoc?
+ return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
+ end
+
+ builder.string_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ token(node.closing_loc)
+ )
+ end
+
+ # :"foo #{bar}"
+ # ^^^^^^^^^^^^^
+ def visit_interpolated_symbol_node(node)
+ builder.symbol_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ token(node.closing_loc)
+ )
+ end
+
+ # `foo #{bar}`
+ # ^^^^^^^^^^^^
+ def visit_interpolated_x_string_node(node)
+ if node.heredoc?
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
+ end
+
+ builder.xstring_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ token(node.closing_loc)
+ )
+ end
+
+ # -> { it }
+ # ^^
+ def visit_it_local_variable_read_node(node)
+ builder.ident([:it, srange(node.location)]).updated(:lvar)
+ end
+
+ # -> { it }
+ # ^^^^^^^^^
+ def visit_it_parameters_node(node)
+ # FIXME: The builder _should_ always be a subclass of the prism builder.
+ # Currently RuboCop passes in its own builder that always inherits from the
+ # parser builder (which is lacking the `itarg` method). Once rubocop-ast
+ # opts in to use the custom prism builder a warning can be emitted when
+ # it is not the expected class, and eventually raise.
+ # https://github.com/rubocop/rubocop-ast/pull/354
+ if builder.is_a?(Translation::Parser::Builder)
+ builder.itarg
+ else
+ builder.args(nil, [], nil, false)
+ end
+ end
+
+ # foo(bar: baz)
+ # ^^^^^^^^
+ def visit_keyword_hash_node(node)
+ builder.associate(nil, visit_all(node.elements), nil)
+ end
+
+ # def foo(**bar); end
+ # ^^^^^
+ #
+ # def foo(**); end
+ # ^^
+ def visit_keyword_rest_parameter_node(node)
+ builder.kwrestarg(
+ token(node.operator_loc),
+ node.name ? [node.name, srange(node.name_loc)] : nil
+ )
+ end
+
+ # -> {}
+ # ^^^^^
+ def visit_lambda_node(node)
+ parameters = node.parameters
+ implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode)
+
+ builder.block(
+ builder.call_lambda(token(node.operator_loc)),
+ [node.opening, srange(node.opening_loc)],
+ if parameters.nil?
+ builder.args(nil, [], nil, false)
+ elsif implicit_parameters
+ visit(node.parameters)
+ else
+ builder.args(
+ token(node.parameters.opening_loc),
+ visit(node.parameters),
+ token(node.parameters.closing_loc),
+ false
+ )
+ end,
+ visit(node.body),
+ [node.closing, srange(node.closing_loc)]
+ )
+ end
+
+ # foo
+ # ^^^
+ def visit_local_variable_read_node(node)
+ builder.ident([node.name, srange(node.location)]).updated(:lvar)
+ end
+
+ # foo = 1
+ # ^^^^^^^
+ def visit_local_variable_write_node(node)
+ builder.assign(
+ builder.assignable(builder.ident(token(node.name_loc))),
+ token(node.operator_loc),
+ visit(node.value)
+ )
+ end
+
+ # foo += bar
+ # ^^^^^^^^^^
+ def visit_local_variable_operator_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ident(token(node.name_loc))),
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo &&= bar
+ # ^^^^^^^^^^^
+ def visit_local_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ident(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo ||= bar
+ # ^^^^^^^^^^^
+ def visit_local_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ident(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
+
+ # foo, = bar
+ # ^^^
+ def visit_local_variable_target_node(node)
+ if in_pattern
+ builder.assignable(builder.match_var([node.name, srange(node.location)]))
+ else
+ builder.assignable(builder.ident(token(node.location)))
+ end
+ end
+
+ # foo in bar
+ # ^^^^^^^^^^
+ def visit_match_predicate_node(node)
+ builder.match_pattern_p(
+ visit(node.value),
+ token(node.operator_loc),
+ within_pattern { |compiler| node.pattern.accept(compiler) }
+ )
+ end
+
+ # foo => bar
+ # ^^^^^^^^^^
+ def visit_match_required_node(node)
+ builder.match_pattern(
+ visit(node.value),
+ token(node.operator_loc),
+ within_pattern { |compiler| node.pattern.accept(compiler) }
+ )
+ end
+
+ # /(?<foo>foo)/ =~ bar
+ # ^^^^^^^^^^^^^^^^^^^^
+ def visit_match_write_node(node)
+ builder.match_op(
+ visit(node.call.receiver),
+ token(node.call.message_loc),
+ visit(node.call.arguments.arguments.first)
+ )
+ end
+
+ # A node that is missing from the syntax tree. This is only used in the
+ # case of a syntax error. The parser gem doesn't have such a concept, so
+ # we invent our own here.
+ def visit_error_recovery_node(node)
+ ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
+ end
+
+ # module Foo; end
+ # ^^^^^^^^^^^^^^^
+ def visit_module_node(node)
+ builder.def_module(
+ token(node.module_keyword_loc),
+ visit(node.constant_path),
+ node.body&.accept(copy_compiler(forwarding: [])),
+ token(node.end_keyword_loc)
+ )
+ end
+
+ # foo, bar = baz
+ # ^^^^^^^^
+ def visit_multi_target_node(node)
+ builder.multi_lhs(
+ token(node.lparen_loc),
+ visit_all(multi_target_elements(node)),
+ token(node.rparen_loc)
+ )
+ end
+
+ # foo, bar = baz
+ # ^^^^^^^^^^^^^^
+ def visit_multi_write_node(node)
+ elements = multi_target_elements(node)
+
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
+ elements = multi_target_elements(elements.first)
+ end
+
+ builder.multi_assign(
+ builder.multi_lhs(
+ token(node.lparen_loc),
+ visit_all(elements),
+ token(node.rparen_loc)
+ ),
+ token(node.operator_loc),
+ visit(node.value)
+ )
+ end
+
+ # next
+ # ^^^^
+ #
+ # next foo
+ # ^^^^^^^^
+ def visit_next_node(node)
+ builder.keyword_cmd(
+ :next,
+ token(node.keyword_loc),
+ nil,
+ visit(node.arguments) || [],
+ nil
+ )
+ end
+
+ # nil
+ # ^^^
+ def visit_nil_node(node)
+ builder.nil(token(node.location))
+ end
+
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc))
+ end
+
+ # def foo(**nil); end
+ # ^^^^^
+ def visit_no_keywords_parameter_node(node)
+ if in_pattern
+ builder.match_nil_pattern(token(node.operator_loc), token(node.keyword_loc))
+ else
+ builder.kwnilarg(token(node.operator_loc), token(node.keyword_loc))
+ end
+ end
+
+ # -> { _1 + _2 }
+ # ^^^^^^^^^^^^^^
+ def visit_numbered_parameters_node(node)
+ builder.numargs(node.maximum)
+ end
+
+ # $1
+ # ^^
+ def visit_numbered_reference_read_node(node)
+ builder.nth_ref([node.number, srange(node.location)])
+ end
+
+ # def foo(bar: baz); end
+ # ^^^^^^^^
+ def visit_optional_keyword_parameter_node(node)
+ builder.kwoptarg([node.name, srange(node.name_loc)], visit(node.value))
+ end
+
+ # def foo(bar = 1); end
+ # ^^^^^^^
+ def visit_optional_parameter_node(node)
+ builder.optarg(token(node.name_loc), token(node.operator_loc), visit(node.value))
+ end
+
+ # a or b
+ # ^^^^^^
+ def visit_or_node(node)
+ builder.logical_op(:or, visit(node.left), token(node.operator_loc), visit(node.right))
+ end
+
+ # def foo(bar, *baz); end
+ # ^^^^^^^^^
+ def visit_parameters_node(node)
+ params = []
+
+ if node.requireds.any?
+ node.requireds.each do |required|
+ params <<
+ if required.is_a?(RequiredParameterNode)
+ visit(required)
+ else
+ required.accept(copy_compiler(in_destructure: true))
+ end
+ end
+ end
+
+ params.concat(visit_all(node.optionals)) if node.optionals.any?
+ params << visit(node.rest) if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
+
+ if node.posts.any?
+ node.posts.each do |post|
+ params <<
+ if post.is_a?(RequiredParameterNode)
+ visit(post)
+ else
+ post.accept(copy_compiler(in_destructure: true))
+ end
+ end
+ end
+
+ params.concat(visit_all(node.keywords)) if node.keywords.any?
+ params << visit(node.keyword_rest) if !node.keyword_rest.nil?
+ params << visit(node.block) if !node.block.nil?
+ params
+ end
+
+ # ()
+ # ^^
+ #
+ # (1)
+ # ^^^
+ def visit_parentheses_node(node)
+ builder.begin(
+ token(node.opening_loc),
+ visit(node.body),
+ token(node.closing_loc)
+ )
+ end
+
+ # foo => ^(bar)
+ # ^^^^^^
+ def visit_pinned_expression_node(node)
+ parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest
+ expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc))
+ builder.pin(token(node.operator_loc), expression)
+ end
+
+ # foo = 1 and bar => ^foo
+ # ^^^^
+ def visit_pinned_variable_node(node)
+ builder.pin(token(node.operator_loc), visit(node.variable))
+ end
+
+ # END {}
+ def visit_post_execution_node(node)
+ builder.postexe(
+ token(node.keyword_loc),
+ token(node.opening_loc),
+ visit(node.statements),
+ token(node.closing_loc)
+ )
+ end
+
+ # BEGIN {}
+ def visit_pre_execution_node(node)
+ builder.preexe(
+ token(node.keyword_loc),
+ token(node.opening_loc),
+ visit(node.statements),
+ token(node.closing_loc)
+ )
+ end
+
+ # The top-level program node.
+ def visit_program_node(node)
+ visit(node.statements)
+ end
+
+ # 0..5
+ # ^^^^
+ def visit_range_node(node)
+ if node.exclude_end?
+ builder.range_exclusive(
+ visit(node.left),
+ token(node.operator_loc),
+ visit(node.right)
+ )
+ else
+ builder.range_inclusive(
+ visit(node.left),
+ token(node.operator_loc),
+ visit(node.right)
+ )
+ end
+ end
+
+ # if foo .. bar; end
+ # ^^^^^^^^^^
+ alias visit_flip_flop_node visit_range_node
+
+ # 1r
+ # ^^
+ def visit_rational_node(node)
+ visit_numeric(node, builder.rational([node.value, srange(node.location)]))
+ end
+
+ # redo
+ # ^^^^
+ def visit_redo_node(node)
+ builder.keyword_cmd(:redo, token(node.location))
+ end
+
+ # /foo/
+ # ^^^^^
+ def visit_regular_expression_node(node)
+ parts =
+ if node.content == ""
+ []
+ elsif node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
+ end
+
+ builder.regexp_compose(
+ token(node.opening_loc),
+ parts,
+ [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
+ builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
+ )
+ end
+
+ # if /foo/ then end
+ # ^^^^^
+ alias visit_match_last_line_node visit_regular_expression_node
+
+ # def foo(bar:); end
+ # ^^^^
+ def visit_required_keyword_parameter_node(node)
+ builder.kwarg([node.name, srange(node.name_loc)])
+ end
+
+ # def foo(bar); end
+ # ^^^
+ def visit_required_parameter_node(node)
+ builder.arg(token(node.location))
+ end
+
+ # foo rescue bar
+ # ^^^^^^^^^^^^^^
+ def visit_rescue_modifier_node(node)
+ builder.begin_body(
+ visit(node.expression),
+ [
+ builder.rescue_body(
+ token(node.keyword_loc),
+ nil,
+ nil,
+ nil,
+ nil,
+ visit(node.rescue_expression)
+ )
+ ]
+ )
+ end
+
+ # begin; rescue; end
+ # ^^^^^^^
+ def visit_rescue_node(node)
+ raise CompilationError, "Cannot directly compile rescue nodes"
+ end
+
+ # def foo(*bar); end
+ # ^^^^
+ #
+ # def foo(*); end
+ # ^
+ def visit_rest_parameter_node(node)
+ builder.restarg(token(node.operator_loc), token(node.name_loc))
+ end
+
+ # retry
+ # ^^^^^
+ def visit_retry_node(node)
+ builder.keyword_cmd(:retry, token(node.location))
+ end
+
+ # return
+ # ^^^^^^
+ #
+ # return 1
+ # ^^^^^^^^
+ def visit_return_node(node)
+ builder.keyword_cmd(
+ :return,
+ token(node.keyword_loc),
+ nil,
+ visit(node.arguments) || [],
+ nil
+ )
+ end
+
+ # self
+ # ^^^^
+ def visit_self_node(node)
+ builder.self(token(node.location))
+ end
+
+ # A shareable constant.
+ def visit_shareable_constant_node(node)
+ visit(node.write)
+ end
+
+ # class << self; end
+ # ^^^^^^^^^^^^^^^^^^
+ def visit_singleton_class_node(node)
+ builder.def_sclass(
+ token(node.class_keyword_loc),
+ token(node.operator_loc),
+ visit(node.expression),
+ node.body&.accept(copy_compiler(forwarding: [])),
+ token(node.end_keyword_loc)
+ )
+ end
+
+ # __ENCODING__
+ # ^^^^^^^^^^^^
+ def visit_source_encoding_node(node)
+ builder.accessible(builder.__ENCODING__(token(node.location)))
+ end
+
+ # __FILE__
+ # ^^^^^^^^
+ def visit_source_file_node(node)
+ builder.accessible(builder.__FILE__(token(node.location)))
+ end
+
+ # __LINE__
+ # ^^^^^^^^
+ def visit_source_line_node(node)
+ builder.accessible(builder.__LINE__(token(node.location)))
+ end
+
+ # foo(*bar)
+ # ^^^^
+ #
+ # def foo((bar, *baz)); end
+ # ^^^^
+ #
+ # def foo(*); bar(*); end
+ # ^
+ def visit_splat_node(node)
+ if node.expression.nil? && forwarding.include?(:*)
+ builder.forwarded_restarg(token(node.operator_loc))
+ elsif in_destructure
+ builder.restarg(token(node.operator_loc), token(node.expression&.location))
+ elsif in_pattern
+ builder.match_rest(token(node.operator_loc), token(node.expression&.location))
+ else
+ builder.splat(token(node.operator_loc), visit(node.expression))
+ end
+ end
+
+ # A list of statements.
+ def visit_statements_node(node)
+ builder.compstmt(visit_all(node.body))
+ end
+
+ # "foo"
+ # ^^^^^
+ def visit_string_node(node)
+ if node.heredoc?
+ visit_heredoc(node.to_interpolated) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
+ elsif node.opening == "?"
+ builder.character([node.unescaped, srange(node.location)])
+ elsif node.opening&.start_with?("%") && node.unescaped.empty?
+ builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
+ else
+ parts =
+ if node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
+ end
+
+ builder.string_compose(
+ token(node.opening_loc),
+ parts,
+ token(node.closing_loc)
+ )
+ end
+ end
+
+ # super(foo)
+ # ^^^^^^^^^^
+ def visit_super_node(node)
+ arguments = node.arguments&.arguments || []
+ block = node.block
+
+ if block.is_a?(BlockArgumentNode)
+ arguments = [*arguments, block]
+ block = nil
+ end
+
+ visit_block(
+ builder.keyword_cmd(
+ :super,
+ token(node.keyword_loc),
+ token(node.lparen_loc),
+ visit_all(arguments),
+ token(node.rparen_loc)
+ ),
+ block
+ )
+ end
+
+ # :foo
+ # ^^^^
+ def visit_symbol_node(node)
+ if node.closing_loc.nil?
+ if node.opening_loc.nil?
+ builder.symbol_internal([node.unescaped, srange(node.location)])
+ else
+ builder.symbol([node.unescaped, srange(node.location)])
+ end
+ else
+ parts =
+ if node.value_loc.nil?
+ []
+ elsif node.value.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
+ end
+
+ builder.symbol_compose(
+ token(node.opening_loc),
+ parts,
+ token(node.closing_loc)
+ )
+ end
+ end
+
+ # true
+ # ^^^^
+ def visit_true_node(node)
+ builder.true(token(node.location))
+ end
+
+ # undef foo
+ # ^^^^^^^^^
+ def visit_undef_node(node)
+ builder.undef_method(token(node.keyword_loc), visit_all(node.names))
+ end
+
+ # unless foo; bar end
+ # ^^^^^^^^^^^^^^^^^^^
+ #
+ # bar unless foo
+ # ^^^^^^^^^^^^^^
+ def visit_unless_node(node)
+ if node.keyword_loc.start_offset == node.location.start_offset
+ builder.condition(
+ token(node.keyword_loc),
+ visit(node.predicate),
+ if (then_keyword_loc = node.then_keyword_loc)
+ token(then_keyword_loc)
+ else
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset)
+ end,
+ visit(node.else_clause),
+ token(node.else_clause&.else_keyword_loc),
+ visit(node.statements),
+ token(node.end_keyword_loc)
+ )
+ else
+ builder.condition_mod(
+ visit(node.else_clause),
+ visit(node.statements),
+ token(node.keyword_loc),
+ visit(node.predicate)
+ )
+ end
+ end
+
+ # until foo; bar end
+ # ^^^^^^^^^^^^^^^^^^
+ #
+ # bar until foo
+ # ^^^^^^^^^^^^^
+ def visit_until_node(node)
+ if node.location.start_offset == node.keyword_loc.start_offset
+ builder.loop(
+ :until,
+ token(node.keyword_loc),
+ visit(node.predicate),
+ if (do_keyword_loc = node.do_keyword_loc)
+ token(do_keyword_loc)
+ else
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
+ end,
+ visit(node.statements),
+ token(node.closing_loc)
+ )
+ else
+ builder.loop_mod(
+ :until,
+ visit(node.statements),
+ token(node.keyword_loc),
+ visit(node.predicate)
+ )
+ end
+ end
+
+ # case foo; when bar; end
+ # ^^^^^^^^^^^^^
+ def visit_when_node(node)
+ builder.when(
+ token(node.keyword_loc),
+ visit_all(node.conditions),
+ if (then_keyword_loc = node.then_keyword_loc)
+ token(then_keyword_loc)
+ else
+ srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset)
+ end,
+ visit(node.statements)
+ )
+ end
+
+ # while foo; bar end
+ # ^^^^^^^^^^^^^^^^^^
+ #
+ # bar while foo
+ # ^^^^^^^^^^^^^
+ def visit_while_node(node)
+ if node.location.start_offset == node.keyword_loc.start_offset
+ builder.loop(
+ :while,
+ token(node.keyword_loc),
+ visit(node.predicate),
+ if (do_keyword_loc = node.do_keyword_loc)
+ token(do_keyword_loc)
+ else
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
+ end,
+ visit(node.statements),
+ token(node.closing_loc)
+ )
+ else
+ builder.loop_mod(
+ :while,
+ visit(node.statements),
+ token(node.keyword_loc),
+ visit(node.predicate)
+ )
+ end
+ end
+
+ # `foo`
+ # ^^^^^
+ def visit_x_string_node(node)
+ if node.heredoc?
+ return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
+ end
+
+ parts =
+ if node.content == ""
+ []
+ elsif node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
+ end
+
+ builder.xstring_compose(
+ token(node.opening_loc),
+ parts,
+ token(node.closing_loc)
+ )
+ end
+
+ # yield
+ # ^^^^^
+ #
+ # yield 1
+ # ^^^^^^^
+ def visit_yield_node(node)
+ builder.keyword_cmd(
+ :yield,
+ token(node.keyword_loc),
+ token(node.lparen_loc),
+ visit(node.arguments) || [],
+ token(node.rparen_loc)
+ )
+ end
+
+ private
+
+ # Initialize a new compiler with the given option overrides, used to
+ # visit a subtree with the given options.
+ def copy_compiler(forwarding: self.forwarding, in_destructure: self.in_destructure, in_pattern: self.in_pattern)
+ Compiler.new(parser, offset_cache, forwarding: forwarding, in_destructure: in_destructure, in_pattern: in_pattern)
+ end
+
+ # When *, **, &, or ... are used as an argument in a method call, we
+ # check if they were allowed by the current context. To determine that
+ # we build this lookup table.
+ def find_forwarding(node)
+ return [] if node.nil?
+
+ forwarding = []
+ forwarding << :* if node.rest.is_a?(RestParameterNode) && node.rest.name.nil?
+ forwarding << :** if node.keyword_rest.is_a?(KeywordRestParameterNode) && node.keyword_rest.name.nil?
+ forwarding << :& if !node.block.nil? && node.block.name.nil?
+ forwarding |= [:&, :"..."] if node.keyword_rest.is_a?(ForwardingParameterNode)
+
+ forwarding
+ end
+
+ # Returns the set of targets for a MultiTargetNode or a MultiWriteNode.
+ def multi_target_elements(node)
+ elements = [*node.lefts]
+ elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
+ elements.concat(node.rights)
+ elements
+ end
+
+ # Blocks can have a special set of parameters that automatically expand
+ # when given arrays if they have a single required parameter and no
+ # other parameters.
+ def procarg0?(parameters)
+ parameters &&
+ parameters.requireds.length == 1 &&
+ parameters.optionals.empty? &&
+ parameters.rest.nil? &&
+ parameters.posts.empty? &&
+ parameters.keywords.empty? &&
+ parameters.keyword_rest.nil? &&
+ parameters.block.nil?
+ end
+
+ # Locations in the parser gem AST are generated using this class. We
+ # store a reference to its constant to make it slightly faster to look
+ # up.
+ Range = ::Parser::Source::Range
+
+ # Constructs a new source range from the given start and end offsets.
+ def srange(location)
+ Range.new(source_buffer, offset_cache[location.start_offset], offset_cache[location.end_offset]) if location
+ end
+
+ # Constructs a new source range from the given start and end offsets.
+ def srange_offsets(start_offset, end_offset)
+ Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
+ end
+
+ # Constructs a new source range by finding a semicolon between the given
+ # start offset and end offset. If the semicolon is not found, it returns
+ # nil. Importantly it does not search past newlines or comments.
+ #
+ # Note that end_offset is allowed to be nil, in which case this will
+ # search until the end of the string.
+ def srange_semicolon(start_offset, end_offset)
+ if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/])
+ final_offset = start_offset + match.bytesize
+ [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])]
+ end
+ end
+
+ # Transform a location into a token that the parser gem expects.
+ def token(location)
+ [location.slice, Range.new(source_buffer, offset_cache[location.start_offset], offset_cache[location.end_offset])] if location
+ end
+
+ # Visit a block node on a call.
+ def visit_block(call, block)
+ if block
+ parameters = block.parameters
+ implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode)
+
+ builder.block(
+ call,
+ token(block.opening_loc),
+ if parameters.nil?
+ builder.args(nil, [], nil, false)
+ elsif implicit_parameters
+ visit(parameters)
+ else
+ builder.args(
+ token(parameters.opening_loc),
+ if procarg0?(parameters.parameters)
+ parameter = parameters.parameters.requireds.first
+ visited = parameter.is_a?(RequiredParameterNode) ? visit(parameter) : parameter.accept(copy_compiler(in_destructure: true))
+ [builder.procarg0(visited)].concat(visit_all(parameters.locals))
+ else
+ visit(parameters)
+ end,
+ token(parameters.closing_loc),
+ false
+ )
+ end,
+ visit(block.body),
+ token(block.closing_loc)
+ )
+ else
+ call
+ end
+ end
+
+ # Visit a heredoc that can be either a string or an xstring.
+ def visit_heredoc(node)
+ children = Array.new
+ indented = false
+
+ # If this is a dedenting heredoc, then we need to insert the opening
+ # content into the children as well.
+ if node.opening.start_with?("<<~") && node.parts.length > 0 && !node.parts.first.is_a?(StringNode)
+ location = node.parts.first.location
+ location = location.copy(start_offset: location.start_offset - location.start_line_slice.bytesize)
+ children << builder.string_internal(token(location))
+ indented = true
+ end
+
+ node.parts.each do |part|
+ pushing =
+ if part.is_a?(StringNode) && part.content.include?("\n")
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
+ else
+ [visit(part)]
+ end
+
+ pushing.each do |child|
+ if child.type == :str && child.children.last == ""
+ # nothing
+ elsif child.type == :str && children.last && children.last.type == :str && !children.last.children.first.end_with?("\n")
+ appendee = children[-1]
+
+ location = appendee.loc
+ location = location.with_expression(location.expression.join(child.loc.expression))
+
+ children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
+ else
+ children << child
+ end
+ end
+ end
+
+ closing = node.closing
+ closing_t = [closing.chomp, srange_offsets(node.closing_loc.start_offset, node.closing_loc.end_offset - (closing[/\s+$/]&.length || 0))]
+ composed = yield children, closing_t
+
+ composed = composed.updated(nil, children[1..-1]) if indented
+ composed
+ end
+
+ # Visit a numeric node and account for the optional sign.
+ def visit_numeric(node, value)
+ if (slice = node.slice).match?(/^[+-]/)
+ builder.unary_num(
+ [slice[0].to_sym, srange_offsets(node.location.start_offset, node.location.start_offset + 1)],
+ value
+ )
+ else
+ value
+ end
+ end
+
+ # Within the given block, track that we're within a pattern.
+ def within_pattern
+ begin
+ parser.pattern_variables.push
+ yield copy_compiler(in_pattern: true)
+ ensure
+ parser.pattern_variables.pop
+ end
+ end
+
+ # When the content of a string node is split across multiple lines, the
+ # parser gem creates individual string nodes for each line the content is part of.
+ def string_nodes_from_interpolation(node, opening)
+ node.parts.flat_map do |part|
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
+ else
+ visit(part)
+ end
+ end
+ end
+
+ # Create parser string nodes from a single prism node. The parser gem
+ # "glues" strings together when a line continuation is encountered.
+ def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
+ unescaped = unescaped.lines
+ escaped = escaped.lines
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
+ regex = opening == "/" || opening&.start_with?("%r")
+
+ # Non-interpolating strings
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
+ current_length = 0
+ current_line = +""
+
+ escaped.filter_map.with_index do |escaped_line, index|
+ unescaped_line = unescaped.fetch(index, "")
+ current_length += escaped_line.bytesize
+ current_line << unescaped_line
+
+ # Glue line continuations together. Only %w and %i arrays can contain these.
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
+ next unless index == escaped.count - 1
+ end
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
+ start_offset += escaped_line.bytesize
+ current_line = +""
+ current_length = 0
+ s
+ end
+ else
+ escaped_lengths = []
+ normalized_lengths = []
+ # Keeps track of where an unescaped line should start a new token. An unescaped
+ # \n would otherwise be indistinguishable from the actual newline at the end of
+ # of the line. The parser gem only emits a new string node at "real" newlines,
+ # line continuations don't start a new node as well.
+ do_next_tokens = []
+
+ escaped
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
+ .each do |lines|
+ escaped_lengths << lines.sum(&:bytesize)
+
+ unescaped_lines_count =
+ if regex
+ 0 # Will always be preserved as is
+ else
+ lines.sum do |line|
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
+ count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0
+ count
+ end
+ end
+
+ extra = 1
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
+
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
+ do_next_tokens[-1] = true
+ end
+
+ current_line = +""
+ current_normalized_length = 0
+
+ emitted_count = 0
+ unescaped.filter_map.with_index do |unescaped_line, index|
+ current_line << unescaped_line
+ current_normalized_length += normalized_lengths.fetch(index, 0)
+
+ if do_next_tokens[index]
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
+ current_line = +""
+ current_normalized_length = 0
+ emitted_count += 1
+ inner_part
+ else
+ nil
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
new file mode 100644
index 0000000000..e82042867f
--- /dev/null
+++ b/lib/prism/translation/parser/lexer.rb
@@ -0,0 +1,819 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+require "strscan"
+require_relative "../../polyfill/append_as_bytes"
+require_relative "../../polyfill/scan_byte"
+
+module Prism
+ module Translation
+ class Parser
+ # Accepts a list of prism tokens and converts them into the expected
+ # format for the parser gem.
+ class Lexer # :nodoc:
+ # These tokens are always skipped
+ TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
+ private_constant :TYPES_ALWAYS_SKIP
+
+ # The direct translating of types between the two lexers.
+ TYPES = {
+ # These tokens should never appear in the output of the lexer.
+ EMBDOC_END: nil,
+ EMBDOC_LINE: nil,
+
+ # These tokens have more or less direct mappings.
+ AMPERSAND: :tAMPER2,
+ AMPERSAND_AMPERSAND: :tANDOP,
+ AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
+ AMPERSAND_DOT: :tANDDOT,
+ AMPERSAND_EQUAL: :tOP_ASGN,
+ BACK_REFERENCE: :tBACK_REF,
+ BACKTICK: :tXSTRING_BEG,
+ BANG: :tBANG,
+ BANG_EQUAL: :tNEQ,
+ BANG_TILDE: :tNMATCH,
+ BRACE_LEFT: :tLCURLY,
+ BRACE_RIGHT: :tRCURLY,
+ BRACKET_LEFT: :tLBRACK2,
+ BRACKET_LEFT_ARRAY: :tLBRACK,
+ BRACKET_LEFT_RIGHT: :tAREF,
+ BRACKET_LEFT_RIGHT_EQUAL: :tASET,
+ BRACKET_RIGHT: :tRBRACK,
+ CARET: :tCARET,
+ CARET_EQUAL: :tOP_ASGN,
+ CHARACTER_LITERAL: :tCHARACTER,
+ CLASS_VARIABLE: :tCVAR,
+ COLON: :tCOLON,
+ COLON_COLON: :tCOLON2,
+ COMMA: :tCOMMA,
+ COMMENT: :tCOMMENT,
+ CONSTANT: :tCONSTANT,
+ DOT: :tDOT,
+ DOT_DOT: :tDOT2,
+ DOT_DOT_DOT: :tDOT3,
+ EMBDOC_BEGIN: :tCOMMENT,
+ EMBEXPR_BEGIN: :tSTRING_DBEG,
+ EMBEXPR_END: :tSTRING_DEND,
+ EMBVAR: :tSTRING_DVAR,
+ EQUAL: :tEQL,
+ EQUAL_EQUAL: :tEQ,
+ EQUAL_EQUAL_EQUAL: :tEQQ,
+ EQUAL_GREATER: :tASSOC,
+ EQUAL_TILDE: :tMATCH,
+ FLOAT: :tFLOAT,
+ FLOAT_IMAGINARY: :tIMAGINARY,
+ FLOAT_RATIONAL: :tRATIONAL,
+ FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
+ GLOBAL_VARIABLE: :tGVAR,
+ GREATER: :tGT,
+ GREATER_EQUAL: :tGEQ,
+ GREATER_GREATER: :tRSHFT,
+ GREATER_GREATER_EQUAL: :tOP_ASGN,
+ HEREDOC_START: :tSTRING_BEG,
+ HEREDOC_END: :tSTRING_END,
+ IDENTIFIER: :tIDENTIFIER,
+ INSTANCE_VARIABLE: :tIVAR,
+ INTEGER: :tINTEGER,
+ INTEGER_IMAGINARY: :tIMAGINARY,
+ INTEGER_RATIONAL: :tRATIONAL,
+ INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
+ KEYWORD_ALIAS: :kALIAS,
+ KEYWORD_AND: :kAND,
+ KEYWORD_BEGIN: :kBEGIN,
+ KEYWORD_BEGIN_UPCASE: :klBEGIN,
+ KEYWORD_BREAK: :kBREAK,
+ KEYWORD_CASE: :kCASE,
+ KEYWORD_CLASS: :kCLASS,
+ KEYWORD_DEF: :kDEF,
+ KEYWORD_DEFINED: :kDEFINED,
+ KEYWORD_DO: :kDO,
+ KEYWORD_DO_BLOCK: :kDO_BLOCK,
+ KEYWORD_DO_LOOP: :kDO_COND,
+ KEYWORD_END: :kEND,
+ KEYWORD_END_UPCASE: :klEND,
+ KEYWORD_ENSURE: :kENSURE,
+ KEYWORD_ELSE: :kELSE,
+ KEYWORD_ELSIF: :kELSIF,
+ KEYWORD_FALSE: :kFALSE,
+ KEYWORD_FOR: :kFOR,
+ KEYWORD_IF: :kIF,
+ KEYWORD_IF_MODIFIER: :kIF_MOD,
+ KEYWORD_IN: :kIN,
+ KEYWORD_MODULE: :kMODULE,
+ KEYWORD_NEXT: :kNEXT,
+ KEYWORD_NIL: :kNIL,
+ KEYWORD_NOT: :kNOT,
+ KEYWORD_OR: :kOR,
+ KEYWORD_REDO: :kREDO,
+ KEYWORD_RESCUE: :kRESCUE,
+ KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
+ KEYWORD_RETRY: :kRETRY,
+ KEYWORD_RETURN: :kRETURN,
+ KEYWORD_SELF: :kSELF,
+ KEYWORD_SUPER: :kSUPER,
+ KEYWORD_THEN: :kTHEN,
+ KEYWORD_TRUE: :kTRUE,
+ KEYWORD_UNDEF: :kUNDEF,
+ KEYWORD_UNLESS: :kUNLESS,
+ KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
+ KEYWORD_UNTIL: :kUNTIL,
+ KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
+ KEYWORD_WHEN: :kWHEN,
+ KEYWORD_WHILE: :kWHILE,
+ KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
+ KEYWORD_YIELD: :kYIELD,
+ KEYWORD___ENCODING__: :k__ENCODING__,
+ KEYWORD___FILE__: :k__FILE__,
+ KEYWORD___LINE__: :k__LINE__,
+ LABEL: :tLABEL,
+ LABEL_END: :tLABEL_END,
+ LAMBDA_BEGIN: :tLAMBEG,
+ LESS: :tLT,
+ LESS_EQUAL: :tLEQ,
+ LESS_EQUAL_GREATER: :tCMP,
+ LESS_LESS: :tLSHFT,
+ LESS_LESS_EQUAL: :tOP_ASGN,
+ METHOD_NAME: :tFID,
+ MINUS: :tMINUS,
+ MINUS_EQUAL: :tOP_ASGN,
+ MINUS_GREATER: :tLAMBDA,
+ NEWLINE: :tNL,
+ NUMBERED_REFERENCE: :tNTH_REF,
+ PARENTHESIS_LEFT: :tLPAREN2,
+ PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
+ PARENTHESIS_RIGHT: :tRPAREN,
+ PERCENT: :tPERCENT,
+ PERCENT_EQUAL: :tOP_ASGN,
+ PERCENT_LOWER_I: :tQSYMBOLS_BEG,
+ PERCENT_LOWER_W: :tQWORDS_BEG,
+ PERCENT_UPPER_I: :tSYMBOLS_BEG,
+ PERCENT_UPPER_W: :tWORDS_BEG,
+ PERCENT_LOWER_X: :tXSTRING_BEG,
+ PLUS: :tPLUS,
+ PLUS_EQUAL: :tOP_ASGN,
+ PIPE_EQUAL: :tOP_ASGN,
+ PIPE: :tPIPE,
+ PIPE_PIPE: :tOROP,
+ PIPE_PIPE_EQUAL: :tOP_ASGN,
+ QUESTION_MARK: :tEH,
+ REGEXP_BEGIN: :tREGEXP_BEG,
+ REGEXP_END: :tSTRING_END,
+ SEMICOLON: :tSEMI,
+ SLASH: :tDIVIDE,
+ SLASH_EQUAL: :tOP_ASGN,
+ STAR: :tSTAR2,
+ STAR_EQUAL: :tOP_ASGN,
+ STAR_STAR: :tPOW,
+ STAR_STAR_EQUAL: :tOP_ASGN,
+ STRING_BEGIN: :tSTRING_BEG,
+ STRING_CONTENT: :tSTRING_CONTENT,
+ STRING_END: :tSTRING_END,
+ SYMBOL_BEGIN: :tSYMBEG,
+ TILDE: :tTILDE,
+ UAMPERSAND: :tAMPER,
+ UCOLON_COLON: :tCOLON3,
+ UDOT_DOT: :tBDOT2,
+ UDOT_DOT_DOT: :tBDOT3,
+ UMINUS: :tUMINUS,
+ UMINUS_NUM: :tUNARY_NUM,
+ UPLUS: :tUPLUS,
+ USTAR: :tSTAR,
+ USTAR_STAR: :tDSTAR,
+ WORDS_SEP: :tSPACE
+ }
+
+ # These constants represent flags in our lex state. We really, really
+ # don't want to be using them and we really, really don't want to be
+ # exposing them as part of our public API. Unfortunately, we don't have
+ # another way of matching the exact tokens that the parser gem expects
+ # without them. We should find another way to do this, but in the
+ # meantime we'll hide them from the documentation and mark them as
+ # private constants.
+ EXPR_BEG = 0x1
+ EXPR_LABEL = 0x400
+
+ # It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`.
+ #
+ # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
+ # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
+ LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
+
+ # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
+ # The following token types are listed as those classified as `tLPAREN`.
+ LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
+ :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
+ :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
+ ])
+
+ # Types of tokens that are allowed to continue a method call with comments in-between.
+ # For these, the parser gem doesn't emit a newline token after the last comment.
+ COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
+ private_constant :COMMENT_CONTINUATION_TYPES
+
+ # Heredocs are complex and require us to keep track of a bit of info to refer to later
+ HeredocData = Struct.new(:identifier, :common_whitespace, keyword_init: true)
+
+ private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL, :LAMBDA_TOKEN_TYPES, :LPAREN_CONVERSION_TOKEN_TYPES, :HeredocData
+
+ # The Parser::Source::Buffer that the tokens were lexed from.
+ attr_reader :source_buffer
+
+ # An array of tuples that contain prism tokens and their associated lex
+ # state when they were lexed.
+ attr_reader :lexed
+
+ # A hash that maps offsets in bytes to offsets in characters.
+ attr_reader :offset_cache
+
+ # Initialize the lexer with the given source buffer, prism tokens, and
+ # offset cache.
+ def initialize(source_buffer, lexed, offset_cache)
+ @source_buffer = source_buffer
+ @lexed = lexed
+ @offset_cache = offset_cache
+ end
+
+ Range = ::Parser::Source::Range
+ private_constant :Range
+
+ # Convert the prism tokens into the expected format for the parser gem.
+ def to_a
+ tokens = []
+
+ index = 0
+ length = lexed.length
+
+ heredoc_stack = []
+ quote_stack = []
+
+ # The parser gem emits the newline tokens for comments out of order. This saves
+ # that token location to emit at a later time to properly line everything up.
+ # https://github.com/whitequark/parser/issues/1025
+ comment_newline_location = nil
+
+ while index < length
+ token, state = lexed[index]
+ index += 1
+ next if TYPES_ALWAYS_SKIP.include?(token.type)
+
+ type = TYPES.fetch(token.type)
+ value = token.value
+ location = range(token.location.start_offset, token.location.end_offset)
+
+ case type
+ when :kDO
+ nearest_lambda_token = tokens.reverse_each.find do |token|
+ LAMBDA_TOKEN_TYPES.include?(token.first)
+ end
+
+ if nearest_lambda_token&.first == :tLAMBDA
+ type = :kDO_LAMBDA
+ end
+ when :tCHARACTER
+ value.delete_prefix!("?")
+ # Character literals behave similar to double-quoted strings. We can use the same escaping mechanism.
+ value = unescape_string(value, "?")
+ when :tCOMMENT
+ if token.type == :EMBDOC_BEGIN
+
+ while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
+ value += next_token.value
+ index += 1
+ end
+
+ value += next_token.value
+ location = range(token.location.start_offset, next_token.location.end_offset)
+ index += 1
+ else
+ is_at_eol = value.chomp!.nil?
+ location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
+
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ next_token, _ = lexed[index]
+
+ is_inline_comment = prev_token&.location&.start_line == token.location.start_line
+ if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+ tokens << [:tCOMMENT, [value, location]]
+
+ nl_location = range(token.location.end_offset - 1, token.location.end_offset)
+ tokens << [:tNL, [nil, nl_location]]
+ next
+ elsif is_inline_comment && next_token&.type == :COMMENT
+ comment_newline_location = range(token.location.end_offset - 1, token.location.end_offset)
+ elsif comment_newline_location && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+ tokens << [:tCOMMENT, [value, location]]
+ tokens << [:tNL, [nil, comment_newline_location]]
+ comment_newline_location = nil
+ next
+ end
+ end
+ when :tNL
+ next_token, _ = lexed[index]
+ # Newlines after comments are emitted out of order.
+ if next_token&.type == :COMMENT
+ comment_newline_location = location
+ next
+ end
+
+ value = nil
+ when :tFLOAT
+ value = parse_float(value)
+ when :tIMAGINARY
+ value = parse_complex(value)
+ when :tINTEGER
+ if value.start_with?("+")
+ tokens << [:tUNARY_NUM, ["+", range(token.location.start_offset, token.location.start_offset + 1)]]
+ location = range(token.location.start_offset + 1, token.location.end_offset)
+ end
+
+ value = parse_integer(value)
+ when :tLABEL
+ value.chomp!(":")
+ when :tLABEL_END
+ value.chomp!(":")
+ when :tLCURLY
+ type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
+ when :tLPAREN2
+ type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0))
+ when :tNTH_REF
+ value = parse_integer(value.delete_prefix("$"))
+ when :tOP_ASGN
+ value.chomp!("=")
+ when :tRATIONAL
+ value = parse_rational(value)
+ when :tSPACE
+ location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
+ value = nil
+ when :tSTRING_BEG
+ next_token, _ = lexed[index]
+ next_next_token, _ = lexed[index + 1]
+ basic_quotes = value == '"' || value == "'"
+
+ if basic_quotes && next_token&.type == :STRING_END
+ next_location = token.location.join(next_token.location)
+ type = :tSTRING
+ value = ""
+ location = range(next_location.start_offset, next_location.end_offset)
+ index += 1
+ elsif value.start_with?("'", '"', "%")
+ if next_token&.type == :STRING_CONTENT && next_next_token&.type == :STRING_END
+ string_value = next_token.value
+ if simplify_string?(string_value, value)
+ next_location = token.location.join(next_next_token.location)
+ if percent_array?(value)
+ value = percent_array_unescape(string_value)
+ else
+ value = unescape_string(string_value, value)
+ end
+ type = :tSTRING
+ location = range(next_location.start_offset, next_location.end_offset)
+ index += 2
+ tokens << [type, [value, location]]
+
+ next
+ end
+ end
+
+ quote_stack.push(value)
+ elsif token.type == :HEREDOC_START
+ quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
+ heredoc_type = value[2] == "-" || value[2] == "~" ? value[2] : ""
+ heredoc = HeredocData.new(
+ identifier: value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier],
+ common_whitespace: 0,
+ )
+
+ if quote == "`"
+ type = :tXSTRING_BEG
+ end
+
+ # The parser gem trims whitespace from squiggly heredocs. We must record
+ # the most common whitespace to later remove.
+ if heredoc_type == "~" || heredoc_type == "`"
+ heredoc.common_whitespace = calculate_heredoc_whitespace(index)
+ end
+
+ if quote == "'" || quote == '"' || quote == "`"
+ value = "<<#{quote}"
+ else
+ value = '<<"'
+ end
+
+ heredoc_stack.push(heredoc)
+ quote_stack.push(value)
+ end
+ when :tSTRING_CONTENT
+ is_percent_array = percent_array?(quote_stack.last)
+
+ if (lines = token.value.lines).one?
+ # Prism usually emits a single token for strings with line continuations.
+ # For squiggly heredocs they are not joined so we do that manually here.
+ current_string = +""
+ current_length = 0
+ start_offset = token.location.start_offset
+ while token.type == :STRING_CONTENT
+ current_length += token.value.bytesize
+ # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
+ # The parser gem only removes indentation when the heredoc is not nested
+ not_nested = heredoc_stack.size == 1
+ if is_percent_array
+ value = percent_array_unescape(token.value)
+ elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
+ value = trim_heredoc_whitespace(token.value, current_heredoc)
+ end
+
+ current_string << unescape_string(value, quote_stack.last)
+ relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
+ 0 # the last backslash escapes the newline
+ else
+ token.value[/(\\{1,})\n/, 1]&.length || 0
+ end
+ if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
+ tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
+ break
+ end
+ token, _ = lexed[index]
+ index += 1
+ end
+ else
+ # When the parser gem encounters a line continuation inside of a multiline string,
+ # it emits a single string node. The backslash (and remaining newline) is removed.
+ current_line = +""
+ adjustment = 0
+ start_offset = token.location.start_offset
+ emit = false
+
+ lines.each.with_index do |line, index|
+ chomped_line = line.chomp
+ backslash_count = chomped_line[/\\{1,}\z/]&.length || 0
+ is_interpolation = interpolation?(quote_stack.last)
+
+ if backslash_count.odd? && (is_interpolation || is_percent_array)
+ if is_percent_array
+ current_line << percent_array_unescape(line)
+ adjustment += 1
+ else
+ chomped_line.delete_suffix!("\\")
+ current_line << chomped_line
+ adjustment += 2
+ end
+ # If the string ends with a line continuation emit the remainder
+ emit = index == lines.count - 1
+ else
+ current_line << line
+ emit = true
+ end
+
+ if emit
+ end_offset = start_offset + current_line.bytesize + adjustment
+ tokens << [:tSTRING_CONTENT, [unescape_string(current_line, quote_stack.last), range(start_offset, end_offset)]]
+ start_offset = end_offset
+ current_line = +""
+ adjustment = 0
+ end
+ end
+ end
+ next
+ when :tSTRING_DVAR
+ value = nil
+ when :tSTRING_END
+ if token.type == :HEREDOC_END && value.end_with?("\n")
+ newline_length = value.end_with?("\r\n") ? 2 : 1
+ value = heredoc_stack.pop.identifier
+ location = range(token.location.start_offset, token.location.end_offset - newline_length)
+ elsif token.type == :REGEXP_END
+ value = value[0]
+ location = range(token.location.start_offset, token.location.start_offset + 1)
+ end
+
+ if percent_array?(quote_stack.pop)
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
+ ends_with_whitespace = prev_token&.type == :WORDS_SEP
+ # parser always emits a space token after content in a percent array, even if no actual whitespace is present.
+ if !empty && !ends_with_whitespace
+ tokens << [:tSPACE, [nil, range(token.location.start_offset, token.location.start_offset)]]
+ end
+ end
+ when :tSYMBEG
+ if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
+ next_location = token.location.join(next_token.location)
+ type = :tSYMBOL
+ value = next_token.value
+ value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
+ location = range(next_location.start_offset, next_location.end_offset)
+ index += 1
+ else
+ quote_stack.push(value)
+ end
+ when :tFID
+ if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
+ type = :tIDENTIFIER
+ end
+ when :tXSTRING_BEG
+ if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
+ # self.`()
+ type = :tBACK_REF2
+ end
+ quote_stack.push(value)
+ when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
+ if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
+ index += 1
+ end
+
+ quote_stack.push(value)
+ when :tREGEXP_BEG
+ quote_stack.push(value)
+ end
+
+ tokens << [type, [value, location]]
+
+ if token.type == :REGEXP_END
+ tokens << [:tREGEXP_OPT, [token.value[1..], range(token.location.start_offset + 1, token.location.end_offset)]]
+ end
+ end
+
+ tokens
+ end
+
+ private
+
+ # Creates a new parser range, taking prisms byte offsets into account
+ def range(start_offset, end_offset)
+ Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
+ end
+
+ # Parse an integer from the string representation.
+ def parse_integer(value)
+ Integer(value)
+ rescue ArgumentError
+ 0
+ end
+
+ # Parse a float from the string representation.
+ def parse_float(value)
+ Float(value)
+ rescue ArgumentError
+ 0.0
+ end
+
+ # Parse a complex from the string representation.
+ def parse_complex(value)
+ value.chomp!("i")
+
+ if value.end_with?("r")
+ Complex(0, parse_rational(value))
+ elsif value.start_with?(/0[BbOoDdXx]/)
+ Complex(0, parse_integer(value))
+ else
+ Complex(0, value)
+ end
+ rescue ArgumentError
+ 0i
+ end
+
+ # Parse a rational from the string representation.
+ def parse_rational(value)
+ value.chomp!("r")
+
+ if value.start_with?(/0[BbOoDdXx]/)
+ Rational(parse_integer(value))
+ else
+ Rational(value)
+ end
+ rescue ArgumentError
+ 0r
+ end
+
+ # Wonky heredoc tab/spaces rules.
+ # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L10548-L10558
+ def calculate_heredoc_whitespace(heredoc_token_index)
+ next_token_index = heredoc_token_index
+ nesting_level = 0
+ previous_line = -1
+ result = Float::MAX
+
+ while (next_token = lexed[next_token_index]&.first)
+ next_token_index += 1
+ next_next_token, _ = lexed[next_token_index]
+ first_token_on_line = next_token.location.start_column == 0
+
+ # String content inside nested heredocs and interpolation is ignored
+ if next_token.type == :HEREDOC_START || next_token.type == :EMBEXPR_BEGIN
+ # When interpolation is the first token of a line there is no string
+ # content to check against. There will be no common whitespace.
+ if nesting_level == 0 && first_token_on_line
+ result = 0
+ end
+ nesting_level += 1
+ elsif next_token.type == :HEREDOC_END || next_token.type == :EMBEXPR_END
+ nesting_level -= 1
+ # When we encountered the matching heredoc end, we can exit
+ break if nesting_level == -1
+ elsif next_token.type == :STRING_CONTENT && nesting_level == 0 && first_token_on_line
+ common_whitespace = 0
+ next_token.value[/^\s*/].each_char do |char|
+ if char == "\t"
+ common_whitespace = (common_whitespace / 8 + 1) * 8;
+ else
+ common_whitespace += 1
+ end
+ end
+
+ is_first_token_on_line = next_token.location.start_line != previous_line
+ # Whitespace is significant if followed by interpolation
+ whitespace_only = common_whitespace == next_token.value.length && next_next_token&.location&.start_line != next_token.location.start_line
+ if is_first_token_on_line && !whitespace_only && common_whitespace < result
+ result = common_whitespace
+ previous_line = next_token.location.start_line
+ end
+ end
+ end
+ result
+ end
+
+ # Wonky heredoc tab/spaces rules.
+ # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L16528-L16545
+ def trim_heredoc_whitespace(string, heredoc)
+ trimmed_whitespace = 0
+ trimmed_characters = 0
+ while (string[trimmed_characters] == "\t" || string[trimmed_characters] == " ") && trimmed_whitespace < heredoc.common_whitespace
+ if string[trimmed_characters] == "\t"
+ trimmed_whitespace = (trimmed_whitespace / 8 + 1) * 8;
+ break if trimmed_whitespace > heredoc.common_whitespace
+ else
+ trimmed_whitespace += 1
+ end
+ trimmed_characters += 1
+ end
+
+ string[trimmed_characters..]
+ end
+
+ # Escape sequences that have special and should appear unescaped in the resulting string.
+ ESCAPES = {
+ "a" => "\a", "b" => "\b", "e" => "\e", "f" => "\f",
+ "n" => "\n", "r" => "\r", "s" => "\s", "t" => "\t",
+ "v" => "\v", "\\" => "\\"
+ }.freeze
+ private_constant :ESCAPES
+
+ # When one of these delimiters is encountered, then the other
+ # one is allowed to be escaped as well.
+ DELIMITER_SYMETRY = { "[" => "]", "(" => ")", "{" => "}", "<" => ">" }.freeze
+ private_constant :DELIMITER_SYMETRY
+
+
+ # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/lexer-strings.rl#L14
+ REGEXP_META_CHARACTERS = ["\\", "$", "(", ")", "*", "+", ".", "<", ">", "?", "[", "]", "^", "{", "|", "}"]
+ private_constant :REGEXP_META_CHARACTERS
+
+ # Apply Ruby string escaping rules
+ def unescape_string(string, quote)
+ # In single-quoted heredocs, everything is taken literally.
+ return string if quote == "<<'"
+
+ # OPTIMIZATION: Assume that few strings need escaping to speed up the common case.
+ return string unless string.include?("\\")
+
+ # Enclosing character for the string. `"` for `"foo"`, `{` for `%w{foo}`, etc.
+ delimiter = quote[-1]
+
+ if regexp?(quote)
+ # Should be escaped handled to single-quoted heredocs. The only character that is
+ # allowed to be escaped is the delimiter, except when that also has special meaning
+ # in the regexp. Since all the symetry delimiters have special meaning, they don't need
+ # to be considered separately.
+ if REGEXP_META_CHARACTERS.include?(delimiter)
+ string
+ else
+ # There can never be an even amount of backslashes. It would be a syntax error.
+ string.gsub(/\\(#{Regexp.escape(delimiter)})/, '\1')
+ end
+ elsif interpolation?(quote)
+ # Appending individual escape sequences may force the string out of its intended
+ # encoding. Start out with binary and force it back later.
+ result = "".b
+
+ scanner = StringScanner.new(string)
+ while (skipped = scanner.skip_until(/\\/))
+ # Append what was just skipped over, excluding the found backslash.
+ result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
+ escape_read(result, scanner, false, false)
+ end
+
+ # Add remaining chars
+ result.append_as_bytes(string.byteslice(scanner.pos..))
+ result.force_encoding(source_buffer.source.encoding)
+ else
+ delimiters = Regexp.escape("#{delimiter}#{DELIMITER_SYMETRY[delimiter]}")
+ string.gsub(/\\([\\#{delimiters}])/, '\1')
+ end
+ end
+
+ # Certain strings are merged into a single string token.
+ def simplify_string?(value, quote)
+ case quote
+ when "'"
+ # Only simplify 'foo'
+ !value.include?("\n")
+ when '"'
+ # Simplify when every line ends with a line continuation, or it is the last line
+ value.lines.all? do |line|
+ !line.end_with?("\n") || line[/(\\*)$/, 1]&.length&.odd?
+ end
+ else
+ # %q and similar are never simplified
+ false
+ end
+ end
+
+ # Escape a byte value, given the control and meta flags.
+ def escape_build(value, control, meta)
+ value &= 0x9f if control
+ value |= 0x80 if meta
+ value
+ end
+
+ # Read an escape out of the string scanner, given the control and meta
+ # flags, and push the unescaped value into the result.
+ def escape_read(result, scanner, control, meta)
+ if scanner.skip("\n")
+ # Line continuation
+ elsif (value = ESCAPES[scanner.peek(1)])
+ # Simple single-character escape sequences like \n
+ result.append_as_bytes(value)
+ scanner.pos += 1
+ elsif (value = scanner.scan(/[0-7]{1,3}/))
+ # \nnn
+ result.append_as_bytes(escape_build(value.to_i(8), control, meta))
+ elsif (value = scanner.scan(/x[0-9a-fA-F]{1,2}/))
+ # \xnn
+ result.append_as_bytes(escape_build(value[1..].to_i(16), control, meta))
+ elsif (value = scanner.scan(/u[0-9a-fA-F]{4}/))
+ # \unnnn
+ result.append_as_bytes(value[1..].hex.chr(Encoding::UTF_8))
+ elsif scanner.skip("u{}")
+ # https://github.com/whitequark/parser/issues/856
+ elsif (value = scanner.scan(/u{.*?}/))
+ # \u{nnnn ...}
+ value[2..-2].split.each do |unicode|
+ result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
+ end
+ elsif (value = scanner.scan(/c\\?(?=[[:print:]])|C-\\?(?=[[:print:]])/))
+ # \cx or \C-x where x is an ASCII printable character
+ escape_read(result, scanner, true, meta)
+ elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
+ # \M-x where x is an ASCII printable character
+ escape_read(result, scanner, control, true)
+ elsif (byte = scanner.scan_byte)
+ # Something else after an escape.
+ if control && byte == 0x3f # ASCII '?'
+ result.append_as_bytes(escape_build(0x7f, false, meta))
+ else
+ result.append_as_bytes(escape_build(byte, control, meta))
+ end
+ end
+ end
+
+ # In a percent array, certain whitespace can be preceeded with a backslash,
+ # causing the following characters to be part of the previous element.
+ def percent_array_unescape(string)
+ string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match|
+ full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd?
+ full_match
+ end
+ end
+
+ # For %-arrays whitespace, the parser gem only considers whitespace before the newline.
+ def percent_array_leading_whitespace(string)
+ return 1 if string.start_with?("\n")
+
+ leading_whitespace = 0
+ string.each_char do |c|
+ break if c == "\n"
+ leading_whitespace += 1
+ end
+ leading_whitespace
+ end
+
+ # Determine if characters preceeded by a backslash should be escaped or not
+ def interpolation?(quote)
+ !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
+ end
+
+ # Regexp allow interpolation but are handled differently during unescaping
+ def regexp?(quote)
+ quote == "/" || quote.start_with?("%r")
+ end
+
+ # Determine if the string is part of a %-style array.
+ def percent_array?(quote)
+ quote.start_with?("%w", "%W", "%i", "%I")
+ end
+ end
+ end
+ end
+end