diff options
Diffstat (limited to 'lib/prism/translation')
| -rw-r--r-- | lib/prism/translation/parser.rb | 31 | ||||
| -rw-r--r-- | lib/prism/translation/parser/builder.rb | 13 | ||||
| -rw-r--r-- | lib/prism/translation/parser/compiler.rb | 336 | ||||
| -rw-r--r-- | lib/prism/translation/parser/lexer.rb | 260 | ||||
| -rw-r--r-- | lib/prism/translation/parser33.rb | 12 | ||||
| -rw-r--r-- | lib/prism/translation/parser34.rb | 12 | ||||
| -rw-r--r-- | lib/prism/translation/parser35.rb | 12 | ||||
| -rw-r--r-- | lib/prism/translation/parser_current.rb | 26 | ||||
| -rw-r--r-- | lib/prism/translation/parser_versions.rb | 36 | ||||
| -rw-r--r-- | lib/prism/translation/ripper.rb | 1256 | ||||
| -rw-r--r-- | lib/prism/translation/ripper/filter.rb | 53 | ||||
| -rw-r--r-- | lib/prism/translation/ripper/lexer.rb | 133 | ||||
| -rw-r--r-- | lib/prism/translation/ripper/sexp.rb | 13 | ||||
| -rw-r--r-- | lib/prism/translation/ripper/shim.rb | 2 | ||||
| -rw-r--r-- | lib/prism/translation/ruby_parser.rb | 106 |
15 files changed, 1768 insertions, 533 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb index 5f2f01dbda..70031f133a 100644 --- a/lib/prism/translation/parser.rb +++ b/lib/prism/translation/parser.rb @@ -1,9 +1,15 @@ # frozen_string_literal: true +# :markup: markdown begin + required_version = ">= 3.3.7.2" + gem "parser", required_version require "parser" rescue LoadError - warn(%q{Error: Unable to load parser. Add `gem "parser"` to your Gemfile.}) + warn(<<~MSG) + Error: Unable to load parser #{required_version}. \ + Add `gem "parser"` to your Gemfile or run `bundle update parser`. + MSG exit(1) end @@ -13,6 +19,13 @@ module Prism # whitequark/parser gem's syntax tree. It inherits from the base parser for # the parser gem, and overrides the parse* methods to parse with prism and # then translate. + # + # Note that this version of the parser always parses using the latest + # version of Ruby syntax supported by Prism. If you want specific version + # support, use one of the version-specific subclasses, such as + # `Prism::Translation::Parser34`. If you want to parse using the same + # version of Ruby syntax as the currently running version of Ruby, use + # `Prism::Translation::ParserCurrent`. class Parser < ::Parser::Base Diagnostic = ::Parser::Diagnostic # :nodoc: private_constant :Diagnostic @@ -20,7 +33,7 @@ module Prism # The parser gem has a list of diagnostics with a hard-coded set of error # messages. We create our own diagnostic class in order to set our own # error messages. - class PrismDiagnostic < Diagnostic + class PrismDiagnostic < Diagnostic # :nodoc: # This is the cached message coming from prism. attr_reader :message @@ -59,13 +72,19 @@ module Prism # should be implemented as needed. # def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism) + if !builder.is_a?(Prism::Translation::Parser::Builder) + warn(<<~MSG, uplevel: 1, category: :deprecated) + [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \ + `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version. + MSG + end @parser = parser super(builder) end def version # :nodoc: - 34 + 41 end # The default encoding for Ruby files is UTF-8. @@ -337,8 +356,10 @@ module Prism "3.3.1" when 34 "3.4.0" - when 35 - "3.5.0" + when 35, 40 + "4.0.0" + when 41 + "4.1.0" else "latest" end diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb index d3b51f4275..7fc3bba6b7 100644 --- a/lib/prism/translation/parser/builder.rb +++ b/lib/prism/translation/parser/builder.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown module Prism module Translation @@ -6,12 +7,14 @@ module Prism # A builder that knows how to convert more modern Ruby syntax # into whitequark/parser gem's syntax tree. class Builder < ::Parser::Builders::Default - # It represents the `it` block argument, which is not yet implemented in the Parser gem. + # It represents the `it` block argument, which is not yet implemented in + # the Parser gem. def itarg n(:itarg, [:it], nil) end - # The following three lines have been added to support the `it` block parameter syntax in the source code below. + # The following three lines have been added to support the `it` block + # parameter syntax in the source code below. # # if args.type == :itarg # block_type = :itblock @@ -55,6 +58,12 @@ module Prism method_call.loc.with_expression(join_exprs(method_call, block))) end end + + # def foo(&nil); end + # ^^^^ + def blocknilarg(amper_t, nil_t) + n0(:blocknilarg, arg_prefix_map(amper_t, nil_t)) + end end end end diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index 8453c9383a..d11db12ae6 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1,13 +1,14 @@ # frozen_string_literal: true +# :markup: markdown module Prism module Translation class Parser # A visitor that knows how to convert a prism syntax tree into the # whitequark/parser gem's syntax tree. - class Compiler < ::Prism::Compiler + class Compiler < ::Prism::Compiler # :nodoc: # Raised when the tree is malformed or there is a bug in the compiler. - class CompilationError < StandardError + class CompilationError < StandardError # :nodoc: end # The Parser::Base instance that is being used to build the AST. @@ -74,7 +75,29 @@ module Prism # [] # ^^ def visit_array_node(node) - builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc)) + if node.opening&.start_with?("%w", "%W", "%i", "%I") + elements = node.elements.flat_map do |element| + if element.is_a?(StringNode) + if element.content.include?("\n") + string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening) + else + [builder.string_internal([element.unescaped, srange(element.content_loc)])] + end + elsif element.is_a?(InterpolatedStringNode) + builder.string_compose( + token(element.opening_loc), + string_nodes_from_interpolation(element, node.opening), + token(element.closing_loc) + ) + else + [visit(element)] + end + end + else + elements = visit_all(node.elements) + end + + builder.array(token(node.opening_loc), elements, token(node.closing_loc)) end # foo => [bar] @@ -111,8 +134,8 @@ module Prism def visit_assoc_node(node) key = node.key - if in_pattern - if node.value.is_a?(ImplicitNode) + if node.value.is_a?(ImplicitNode) + if in_pattern if key.is_a?(SymbolNode) if key.opening.nil? builder.match_hash_var([key.unescaped, srange(key.location)]) @@ -122,23 +145,19 @@ module Prism else builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc)) end - elsif key.opening.nil? - builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value)) else - builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value)) - end - elsif node.value.is_a?(ImplicitNode) - value = node.value.value + value = node.value.value - implicit_value = if value.is_a?(CallNode) - builder.call_method(nil, nil, [value.name, srange(value.message_loc)]) - elsif value.is_a?(ConstantReadNode) - builder.const([value.name, srange(key.value_loc)]) - else - builder.ident([value.name, srange(key.value_loc)]).updated(:lvar) - end + implicit_value = if value.is_a?(CallNode) + builder.call_method(nil, nil, [value.name, srange(value.message_loc)]) + elsif value.is_a?(ConstantReadNode) + builder.const([value.name, srange(key.value_loc)]) + else + builder.ident([value.name, srange(key.value_loc)]).updated(:lvar) + end - builder.pair_keyword([key.unescaped, srange(key)], implicit_value) + builder.pair_keyword([key.unescaped, srange(key)], implicit_value) + end elsif node.operator_loc builder.pair(visit(key), token(node.operator_loc), visit(node.value)) elsif key.is_a?(SymbolNode) && key.opening_loc.nil? @@ -184,14 +203,21 @@ module Prism if (rescue_clause = node.rescue_clause) begin find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset - find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1)) + find_end_offset = ( + rescue_clause.statements&.location&.start_offset || + rescue_clause.subsequent&.location&.start_offset || + node.else_clause&.location&.start_offset || + node.ensure_clause&.location&.start_offset || + node.end_keyword_loc&.start_offset || + find_start_offset + 1 + ) rescue_bodies << builder.rescue_body( token(rescue_clause.keyword_loc), rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil, token(rescue_clause.operator_loc), visit(rescue_clause.reference), - srange_find(find_start_offset, find_end_offset, ";"), + srange_semicolon(find_start_offset, find_end_offset), visit(rescue_clause.statements) ) end until (rescue_clause = rescue_clause.subsequent).nil? @@ -271,11 +297,6 @@ module Prism if node.call_operator_loc.nil? case name - when :-@ - case (receiver = node.receiver).type - when :integer_node, :float_node, :rational_node, :imaginary_node - return visit(numeric_negate(node.message_loc, receiver)) - end when :! return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block) when :=~ @@ -297,7 +318,7 @@ module Prism visit_all(arguments), token(node.closing_loc), ), - srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, "="), + token(node.equal_loc), visit(node.arguments.arguments.last) ), block @@ -314,7 +335,7 @@ module Prism if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil? builder.assign( builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)), - srange_find(message_loc.end_offset, node.arguments.location.start_offset, "="), + token(node.equal_loc), visit(node.arguments.arguments.last) ) else @@ -667,13 +688,37 @@ module Prism # defined?(a) # ^^^^^^^^^^^ def visit_defined_node(node) - builder.keyword_cmd( - :defined?, - token(node.keyword_loc), - token(node.lparen_loc), - [visit(node.value)], - token(node.rparen_loc) - ) + # Very weird circumstances here where something like: + # + # defined? + # (1) + # + # gets parsed in Ruby as having only the `1` expression but in parser + # it gets parsed as having a begin. In this case we need to synthesize + # that begin to match parser's behavior. + if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n") + builder.keyword_cmd( + :defined?, + token(node.keyword_loc), + nil, + [ + builder.begin( + token(node.lparen_loc), + visit(node.value), + token(node.rparen_loc) + ) + ], + nil + ) + else + builder.keyword_cmd( + :defined?, + token(node.keyword_loc), + token(node.lparen_loc), + [visit(node.value)], + token(node.rparen_loc) + ) + end end # if foo then bar else baz end @@ -739,7 +784,7 @@ module Prism if (do_keyword_loc = node.do_keyword_loc) token(do_keyword_loc) else - srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, ";") + srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset) end, visit(node.statements), token(node.end_keyword_loc) @@ -871,7 +916,7 @@ module Prism if (then_keyword_loc = node.then_keyword_loc) token(then_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset) end, visit(node.statements), case node.subsequent @@ -937,7 +982,7 @@ module Prism if (then_loc = node.then_loc) token(then_loc) else - srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, ";") + srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset) end, visit(node.statements) ) @@ -1003,7 +1048,7 @@ module Prism builder.index_asgn( visit(node.receiver), token(node.opening_loc), - visit_all(node.arguments.arguments), + visit_all(node.arguments&.arguments || []), token(node.closing_loc), ) end @@ -1071,7 +1116,7 @@ module Prism def visit_interpolated_regular_expression_node(node) builder.regexp_compose( token(node.opening_loc), - visit_all(node.parts), + string_nodes_from_interpolation(node, node.opening), [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)], builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)]) ) @@ -1088,19 +1133,9 @@ module Prism return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) } end - parts = node.parts.flat_map do |part| - # When the content of a string node is split across multiple lines, the - # parser gem creates individual string nodes for each line the content is part of. - if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil? - string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening) - else - visit(part) - end - end - builder.string_compose( token(node.opening_loc), - parts, + string_nodes_from_interpolation(node, node.opening), token(node.closing_loc) ) end @@ -1110,7 +1145,7 @@ module Prism def visit_interpolated_symbol_node(node) builder.symbol_compose( token(node.opening_loc), - visit_all(node.parts), + string_nodes_from_interpolation(node, node.opening), token(node.closing_loc) ) end @@ -1119,14 +1154,14 @@ module Prism # ^^^^^^^^^^^^ def visit_interpolated_x_string_node(node) if node.heredoc? - visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) } - else - builder.xstring_compose( - token(node.opening_loc), - visit_all(node.parts), - token(node.closing_loc) - ) + return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) } end + + builder.xstring_compose( + token(node.opening_loc), + string_nodes_from_interpolation(node, node.opening), + token(node.closing_loc) + ) end # -> { it } @@ -1138,7 +1173,17 @@ module Prism # -> { it } # ^^^^^^^^^ def visit_it_parameters_node(node) - builder.itarg + # FIXME: The builder _should_ always be a subclass of the prism builder. + # Currently RuboCop passes in its own builder that always inherits from the + # parser builder (which is lacking the `itarg` method). Once rubocop-ast + # opts in to use the custom prism builder a warning can be emitted when + # it is not the expected class, and eventually raise. + # https://github.com/rubocop/rubocop-ast/pull/354 + if builder.is_a?(Translation::Parser::Builder) + builder.itarg + else + builder.args(nil, [], nil, false) + end end # foo(bar: baz) @@ -1274,7 +1319,7 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. The parser gem doesn't have such a concept, so # we invent our own here. - def visit_missing_node(node) + def visit_error_recovery_node(node) ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location))) end @@ -1304,7 +1349,7 @@ module Prism def visit_multi_write_node(node) elements = multi_target_elements(node) - if elements.length == 1 && elements.first.is_a?(MultiTargetNode) + if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest elements = multi_target_elements(elements.first) end @@ -1340,6 +1385,12 @@ module Prism builder.nil(token(node.location)) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc)) + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) @@ -1432,7 +1483,8 @@ module Prism # foo => ^(bar) # ^^^^^^ def visit_pinned_expression_node(node) - expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc)) + parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest + expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc)) builder.pin(token(node.operator_loc), expression) end @@ -1716,7 +1768,7 @@ module Prism end else parts = - if node.value == "" + if node.value_loc.nil? [] elsif node.value.include?("\n") string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening) @@ -1757,7 +1809,7 @@ module Prism if (then_keyword_loc = node.then_keyword_loc) token(then_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset) end, visit(node.else_clause), token(node.else_clause&.else_keyword_loc), @@ -1788,7 +1840,7 @@ module Prism if (do_keyword_loc = node.do_keyword_loc) token(do_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset) end, visit(node.statements), token(node.closing_loc) @@ -1812,7 +1864,7 @@ module Prism if (then_keyword_loc = node.then_keyword_loc) token(then_keyword_loc) else - srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, ";") + srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset) end, visit(node.statements) ) @@ -1832,7 +1884,7 @@ module Prism if (do_keyword_loc = node.do_keyword_loc) token(do_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset) end, visit(node.statements), token(node.closing_loc) @@ -1916,22 +1968,6 @@ module Prism elements end - # Negate the value of a numeric node. This is a special case where you - # have a negative sign on one line and then a number on the next line. - # In normal Ruby, this will always be a method call. The parser gem, - # however, marks this as a numeric literal. We have to massage the tree - # here to get it into the correct form. - def numeric_negate(message_loc, receiver) - case receiver.type - when :integer_node, :float_node - receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location)) - when :rational_node - receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location)) - when :imaginary_node - receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location)) - end - end - # Blocks can have a special set of parameters that automatically expand # when given arrays if they have a single required parameter and no # other parameters. @@ -1961,16 +1997,16 @@ module Prism Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset]) end - # Constructs a new source range by finding the given character between - # the given start offset and end offset. If the needle is not found, it - # returns nil. Importantly it does not search past newlines or comments. + # Constructs a new source range by finding a semicolon between the given + # start offset and end offset. If the semicolon is not found, it returns + # nil. Importantly it does not search past newlines or comments. # # Note that end_offset is allowed to be nil, in which case this will # search until the end of the string. - def srange_find(start_offset, end_offset, character) - if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*#{character}/]) + def srange_semicolon(start_offset, end_offset) + if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/]) final_offset = start_offset + match.bytesize - [character, Range.new(source_buffer, offset_cache[final_offset - character.bytesize], offset_cache[final_offset])] + [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])] end end @@ -2014,13 +2050,6 @@ module Prism end end - # The parser gem automatically converts \r\n to \n, meaning our offsets - # need to be adjusted to always subtract 1 from the length. - def chomped_bytesize(line) - chomped = line.chomp - chomped.bytesize + (chomped == line ? 0 : 1) - end - # Visit a heredoc that can be either a string or an xstring. def visit_heredoc(node) children = Array.new @@ -2089,55 +2118,98 @@ module Prism end end + # When the content of a string node is split across multiple lines, the + # parser gem creates individual string nodes for each line the content is part of. + def string_nodes_from_interpolation(node, opening) + node.parts.flat_map do |part| + if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil? + string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening) + else + visit(part) + end + end + end + # Create parser string nodes from a single prism node. The parser gem # "glues" strings together when a line continuation is encountered. def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening) unescaped = unescaped.lines escaped = escaped.lines - - escaped_lengths = [] - normalized_lengths = [] - # Keeps track of where an unescaped line should start a new token. An unescaped - # \n would otherwise be indistinguishable from the actual newline at the end of - # of the line. The parser gem only emits a new string node at "real" newlines, - # line continuations don't start a new node as well. - do_next_tokens = [] - - if opening&.end_with?("'") - escaped.each do |line| - escaped_lengths << line.bytesize - normalized_lengths << chomped_bytesize(line) - do_next_tokens << true + percent_array = opening&.start_with?("%w", "%W", "%i", "%I") + regex = opening == "/" || opening&.start_with?("%r") + + # Non-interpolating strings + if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i") + current_length = 0 + current_line = +"" + + escaped.filter_map.with_index do |escaped_line, index| + unescaped_line = unescaped.fetch(index, "") + current_length += escaped_line.bytesize + current_line << unescaped_line + + # Glue line continuations together. Only %w and %i arrays can contain these. + if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd? + next unless index == escaped.count - 1 + end + s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)]) + start_offset += escaped_line.bytesize + current_line = +"" + current_length = 0 + s end else + escaped_lengths = [] + normalized_lengths = [] + # Keeps track of where an unescaped line should start a new token. An unescaped + # \n would otherwise be indistinguishable from the actual newline at the end of + # of the line. The parser gem only emits a new string node at "real" newlines, + # line continuations don't start a new node as well. + do_next_tokens = [] + escaped .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false } .each do |lines| escaped_lengths << lines.sum(&:bytesize) - normalized_lengths << lines.sum { |line| chomped_bytesize(line) } - unescaped_lines_count = lines.sum do |line| - line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false } - end - do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false)) + + unescaped_lines_count = + if regex + 0 # Will always be preserved as is + else + lines.sum do |line| + count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? } + count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0 + count + end + end + + extra = 1 + extra = lines.count if percent_array # Account for line continuations in percent arrays + + normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0)) + normalized_lengths[-1] = lines.sum { |line| line.bytesize } + do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false)) do_next_tokens[-1] = true end - end - - current_line = +"" - current_normalized_length = 0 - - unescaped.filter_map.with_index do |unescaped_line, index| - current_line << unescaped_line - current_normalized_length += normalized_lengths.fetch(index, 0) - if do_next_tokens[index] - inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)]) - start_offset += escaped_lengths.fetch(index, 0) - current_line = +"" - current_normalized_length = 0 - inner_part - else - nil + current_line = +"" + current_normalized_length = 0 + + emitted_count = 0 + unescaped.filter_map.with_index do |unescaped_line, index| + current_line << unescaped_line + current_normalized_length += normalized_lengths.fetch(index, 0) + + if do_next_tokens[index] + inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)]) + start_offset += escaped_lengths.fetch(emitted_count, 0) + current_line = +"" + current_normalized_length = 0 + emitted_count += 1 + inner_part + else + nil + end end end end diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 1fa2723f03..e82042867f 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -1,23 +1,25 @@ # frozen_string_literal: true +# :markup: markdown require "strscan" +require_relative "../../polyfill/append_as_bytes" +require_relative "../../polyfill/scan_byte" module Prism module Translation class Parser # Accepts a list of prism tokens and converts them into the expected # format for the parser gem. - class Lexer + class Lexer # :nodoc: + # These tokens are always skipped + TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF]) + private_constant :TYPES_ALWAYS_SKIP + # The direct translating of types between the two lexers. TYPES = { # These tokens should never appear in the output of the lexer. - EOF: nil, - MISSING: nil, - NOT_PROVIDED: nil, - IGNORED_NEWLINE: nil, EMBDOC_END: nil, EMBDOC_LINE: nil, - __END__: nil, # These tokens have more or less direct mappings. AMPERSAND: :tAMPER2, @@ -85,6 +87,7 @@ module Prism KEYWORD_DEF: :kDEF, KEYWORD_DEFINED: :kDEFINED, KEYWORD_DO: :kDO, + KEYWORD_DO_BLOCK: :kDO_BLOCK, KEYWORD_DO_LOOP: :kDO_COND, KEYWORD_END: :kEND, KEYWORD_END_UPCASE: :klEND, @@ -186,25 +189,25 @@ module Prism # without them. We should find another way to do this, but in the # meantime we'll hide them from the documentation and mark them as # private constants. - EXPR_BEG = 0x1 # :nodoc: - EXPR_LABEL = 0x400 # :nodoc: + EXPR_BEG = 0x1 + EXPR_LABEL = 0x400 # It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`. # # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046 - LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG] + LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]) # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem. # The following token types are listed as those classified as `tLPAREN`. - LPAREN_CONVERSION_TOKEN_TYPES = [ - :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3, - :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS - ] + LPAREN_CONVERSION_TOKEN_TYPES = Set.new([ + :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3, + :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY + ]) # Types of tokens that are allowed to continue a method call with comments in-between. # For these, the parser gem doesn't emit a newline token after the last comment. - COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT] + COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT]) private_constant :COMMENT_CONTINUATION_TYPES # Heredocs are complex and require us to keep track of a bit of info to refer to later @@ -230,7 +233,7 @@ module Prism @offset_cache = offset_cache end - Range = ::Parser::Source::Range # :nodoc: + Range = ::Parser::Source::Range private_constant :Range # Convert the prism tokens into the expected format for the parser gem. @@ -251,7 +254,7 @@ module Prism while index < length token, state = lexed[index] index += 1 - next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) + next if TYPES_ALWAYS_SKIP.include?(token.type) type = TYPES.fetch(token.type) value = token.value @@ -259,10 +262,11 @@ module Prism case type when :kDO - types = tokens.map(&:first) - nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) } + nearest_lambda_token = tokens.reverse_each.find do |token| + LAMBDA_TOKEN_TYPES.include?(token.first) + end - if nearest_lambda_token_type == :tLAMBDA + if nearest_lambda_token&.first == :tLAMBDA type = :kDO_LAMBDA end when :tCHARACTER @@ -272,20 +276,20 @@ module Prism when :tCOMMENT if token.type == :EMBDOC_BEGIN - while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1) + while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1) value += next_token.value index += 1 end value += next_token.value - location = range(token.location.start_offset, lexed[index][0].location.end_offset) + location = range(token.location.start_offset, next_token.location.end_offset) index += 1 else is_at_eol = value.chomp!.nil? location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1)) - prev_token = lexed[index - 2][0] if index - 2 >= 0 - next_token = lexed[index][0] + prev_token, _ = lexed[index - 2] if index - 2 >= 0 + next_token, _ = lexed[index] is_inline_comment = prev_token&.location&.start_line == token.location.start_line if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type) @@ -304,7 +308,7 @@ module Prism end end when :tNL - next_token = next_token = lexed[index][0] + next_token, _ = lexed[index] # Newlines after comments are emitted out of order. if next_token&.type == :COMMENT comment_newline_location = location @@ -338,11 +342,12 @@ module Prism when :tRATIONAL value = parse_rational(value) when :tSPACE + location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value)) value = nil when :tSTRING_BEG - next_token = lexed[index][0] - next_next_token = lexed[index + 1][0] - basic_quotes = ["\"", "'"].include?(value) + next_token, _ = lexed[index] + next_next_token, _ = lexed[index + 1] + basic_quotes = value == '"' || value == "'" if basic_quotes && next_token&.type == :STRING_END next_location = token.location.join(next_token.location) @@ -351,11 +356,15 @@ module Prism location = range(next_location.start_offset, next_location.end_offset) index += 1 elsif value.start_with?("'", '"', "%") - if next_token&.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && next_next_token&.type == :STRING_END - # the parser gem doesn't simplify strings when its value ends in a newline - if !(string_value = next_token.value).end_with?("\n") && basic_quotes + if next_token&.type == :STRING_CONTENT && next_next_token&.type == :STRING_END + string_value = next_token.value + if simplify_string?(string_value, value) next_location = token.location.join(next_next_token.location) - value = unescape_string(string_value, value) + if percent_array?(value) + value = percent_array_unescape(string_value) + else + value = unescape_string(string_value, value) + end type = :tSTRING location = range(next_location.start_offset, next_location.end_offset) index += 2 @@ -394,16 +403,40 @@ module Prism quote_stack.push(value) end when :tSTRING_CONTENT + is_percent_array = percent_array?(quote_stack.last) + if (lines = token.value.lines).one? - # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line. - is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line - # The parser gem only removes indentation when the heredoc is not nested - not_nested = heredoc_stack.size == 1 - if is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0 - value = trim_heredoc_whitespace(value, current_heredoc) - end + # Prism usually emits a single token for strings with line continuations. + # For squiggly heredocs they are not joined so we do that manually here. + current_string = +"" + current_length = 0 + start_offset = token.location.start_offset + while token.type == :STRING_CONTENT + current_length += token.value.bytesize + # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line. + prev_token, _ = lexed[index - 2] if index - 2 >= 0 + is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line + # The parser gem only removes indentation when the heredoc is not nested + not_nested = heredoc_stack.size == 1 + if is_percent_array + value = percent_array_unescape(token.value) + elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0 + value = trim_heredoc_whitespace(token.value, current_heredoc) + end - value = unescape_string(value, quote_stack.last) + current_string << unescape_string(value, quote_stack.last) + relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I") + 0 # the last backslash escapes the newline + else + token.value[/(\\{1,})\n/, 1]&.length || 0 + end + if relevant_backslash_count.even? || !interpolation?(quote_stack.last) + tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]] + break + end + token, _ = lexed[index] + index += 1 + end else # When the parser gem encounters a line continuation inside of a multiline string, # it emits a single string node. The backslash (and remaining newline) is removed. @@ -416,12 +449,10 @@ module Prism chomped_line = line.chomp backslash_count = chomped_line[/\\{1,}\z/]&.length || 0 is_interpolation = interpolation?(quote_stack.last) - is_percent_array = percent_array?(quote_stack.last) if backslash_count.odd? && (is_interpolation || is_percent_array) if is_percent_array - # Remove the last backslash, keep potential newlines - current_line << line.sub(/(\\)(\r?\n)\z/, '\2') + current_line << percent_array_unescape(line) adjustment += 1 else chomped_line.delete_suffix!("\\") @@ -443,8 +474,8 @@ module Prism adjustment = 0 end end - next end + next when :tSTRING_DVAR value = nil when :tSTRING_END @@ -458,7 +489,7 @@ module Prism end if percent_array?(quote_stack.pop) - prev_token = lexed[index - 2][0] if index - 2 >= 0 + prev_token, _ = lexed[index - 2] if index - 2 >= 0 empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type) ends_with_whitespace = prev_token&.type == :WORDS_SEP # parser always emits a space token after content in a percent array, even if no actual whitespace is present. @@ -467,7 +498,7 @@ module Prism end end when :tSYMBEG - if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END + if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END next_location = token.location.join(next_token.location) type = :tSYMBOL value = next_token.value @@ -482,13 +513,13 @@ module Prism type = :tIDENTIFIER end when :tXSTRING_BEG - if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type) + if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type) # self.`() type = :tBACK_REF2 end quote_stack.push(value) when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG - if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP + if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP index += 1 end @@ -564,15 +595,16 @@ module Prism previous_line = -1 result = Float::MAX - while (lexed[next_token_index] && next_token = lexed[next_token_index][0]) + while (next_token = lexed[next_token_index]&.first) next_token_index += 1 - next_next_token = lexed[next_token_index] && lexed[next_token_index][0] + next_next_token, _ = lexed[next_token_index] + first_token_on_line = next_token.location.start_column == 0 # String content inside nested heredocs and interpolation is ignored if next_token.type == :HEREDOC_START || next_token.type == :EMBEXPR_BEGIN # When interpolation is the first token of a line there is no string # content to check against. There will be no common whitespace. - if nesting_level == 0 && next_token.location.start_column == 0 + if nesting_level == 0 && first_token_on_line result = 0 end nesting_level += 1 @@ -580,7 +612,7 @@ module Prism nesting_level -= 1 # When we encountered the matching heredoc end, we can exit break if nesting_level == -1 - elsif next_token.type == :STRING_CONTENT && nesting_level == 0 + elsif next_token.type == :STRING_CONTENT && nesting_level == 0 && first_token_on_line common_whitespace = 0 next_token.value[/^\s*/].each_char do |char| if char == "\t" @@ -668,52 +700,108 @@ module Prism scanner = StringScanner.new(string) while (skipped = scanner.skip_until(/\\/)) # Append what was just skipped over, excluding the found backslash. - result << string.byteslice(scanner.pos - skipped, skipped - 1) - - # Simple single-character escape sequences like \n - if (replacement = ESCAPES[scanner.peek(1)]) - result << replacement - scanner.pos += 1 - elsif (octal = scanner.check(/[0-7]{1,3}/)) - # \nnn - # NOTE: When Ruby 3.4 is required, this can become result.append_as_bytes(chr) - result << octal.to_i(8).chr.b - scanner.pos += octal.bytesize - elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/)) - # \xnn - result << hex[1..].to_i(16).chr.b - scanner.pos += hex.bytesize - elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/)) - # \unnnn - result << unicode[1..].hex.chr(Encoding::UTF_8).b - scanner.pos += unicode.bytesize - elsif scanner.peek(3) == "u{}" - # https://github.com/whitequark/parser/issues/856 - scanner.pos += 3 - elsif (unicode_parts = scanner.check(/u{.*}/)) - # \u{nnnn ...} - unicode_parts[2..-2].split.each do |unicode| - result << unicode.hex.chr(Encoding::UTF_8).b - end - scanner.pos += unicode_parts.bytesize - end + result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1)) + escape_read(result, scanner, false, false) end - # Add remainging chars - result << string.byteslice(scanner.pos..) - + # Add remaining chars + result.append_as_bytes(string.byteslice(scanner.pos..)) result.force_encoding(source_buffer.source.encoding) - - result else delimiters = Regexp.escape("#{delimiter}#{DELIMITER_SYMETRY[delimiter]}") string.gsub(/\\([\\#{delimiters}])/, '\1') end end + # Certain strings are merged into a single string token. + def simplify_string?(value, quote) + case quote + when "'" + # Only simplify 'foo' + !value.include?("\n") + when '"' + # Simplify when every line ends with a line continuation, or it is the last line + value.lines.all? do |line| + !line.end_with?("\n") || line[/(\\*)$/, 1]&.length&.odd? + end + else + # %q and similar are never simplified + false + end + end + + # Escape a byte value, given the control and meta flags. + def escape_build(value, control, meta) + value &= 0x9f if control + value |= 0x80 if meta + value + end + + # Read an escape out of the string scanner, given the control and meta + # flags, and push the unescaped value into the result. + def escape_read(result, scanner, control, meta) + if scanner.skip("\n") + # Line continuation + elsif (value = ESCAPES[scanner.peek(1)]) + # Simple single-character escape sequences like \n + result.append_as_bytes(value) + scanner.pos += 1 + elsif (value = scanner.scan(/[0-7]{1,3}/)) + # \nnn + result.append_as_bytes(escape_build(value.to_i(8), control, meta)) + elsif (value = scanner.scan(/x[0-9a-fA-F]{1,2}/)) + # \xnn + result.append_as_bytes(escape_build(value[1..].to_i(16), control, meta)) + elsif (value = scanner.scan(/u[0-9a-fA-F]{4}/)) + # \unnnn + result.append_as_bytes(value[1..].hex.chr(Encoding::UTF_8)) + elsif scanner.skip("u{}") + # https://github.com/whitequark/parser/issues/856 + elsif (value = scanner.scan(/u{.*?}/)) + # \u{nnnn ...} + value[2..-2].split.each do |unicode| + result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8)) + end + elsif (value = scanner.scan(/c\\?(?=[[:print:]])|C-\\?(?=[[:print:]])/)) + # \cx or \C-x where x is an ASCII printable character + escape_read(result, scanner, true, meta) + elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/)) + # \M-x where x is an ASCII printable character + escape_read(result, scanner, control, true) + elsif (byte = scanner.scan_byte) + # Something else after an escape. + if control && byte == 0x3f # ASCII '?' + result.append_as_bytes(escape_build(0x7f, false, meta)) + else + result.append_as_bytes(escape_build(byte, control, meta)) + end + end + end + + # In a percent array, certain whitespace can be preceeded with a backslash, + # causing the following characters to be part of the previous element. + def percent_array_unescape(string) + string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match| + full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd? + full_match + end + end + + # For %-arrays whitespace, the parser gem only considers whitespace before the newline. + def percent_array_leading_whitespace(string) + return 1 if string.start_with?("\n") + + leading_whitespace = 0 + string.each_char do |c| + break if c == "\n" + leading_whitespace += 1 + end + leading_whitespace + end + # Determine if characters preceeded by a backslash should be escaped or not def interpolation?(quote) - quote != "'" && !quote.start_with?("%q", "%w", "%i") + !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s") end # Regexp allow interpolation but are handled differently during unescaping diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb deleted file mode 100644 index b09266e06a..0000000000 --- a/lib/prism/translation/parser33.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`. - class Parser33 < Parser - def version # :nodoc: - 33 - end - end - end -end diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb deleted file mode 100644 index 0ead70ad3c..0000000000 --- a/lib/prism/translation/parser34.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`. - class Parser34 < Parser - def version # :nodoc: - 34 - end - end - end -end diff --git a/lib/prism/translation/parser35.rb b/lib/prism/translation/parser35.rb deleted file mode 100644 index a6abc12589..0000000000 --- a/lib/prism/translation/parser35.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.5 of `Prism::Translation::Parser`. - class Parser35 < Parser - def version # :nodoc: - 35 - end - end - end -end diff --git a/lib/prism/translation/parser_current.rb b/lib/prism/translation/parser_current.rb new file mode 100644 index 0000000000..f7c1070e30 --- /dev/null +++ b/lib/prism/translation/parser_current.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true +# :markup: markdown +#-- +# typed: ignore + +module Prism + module Translation + case RUBY_VERSION + when /^3\.3\./ + ParserCurrent = Parser33 + when /^3\.4\./ + ParserCurrent = Parser34 + when /^3\.5\./, /^4\.0\./ + ParserCurrent = Parser40 + when /^4\.1\./ + ParserCurrent = Parser41 + else + # Keep this in sync with released Ruby. + parser = Parser40 + major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments + warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \ + "but you are running #{major}.#{minor}." + ParserCurrent = parser + end + end +end diff --git a/lib/prism/translation/parser_versions.rb b/lib/prism/translation/parser_versions.rb new file mode 100644 index 0000000000..720c7d548c --- /dev/null +++ b/lib/prism/translation/parser_versions.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true +# :markup: markdown + +module Prism + module Translation + # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`. + class Parser33 < Parser + def version # :nodoc: + 33 + end + end + + # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`. + class Parser34 < Parser + def version # :nodoc: + 34 + end + end + + # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`. + class Parser40 < Parser + def version # :nodoc: + 40 + end + end + + Parser35 = Parser40 # :nodoc: + + # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`. + class Parser41 < Parser + def version # :nodoc: + 41 + end + end + end +end diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index dce96e01ab..f179a149a1 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true - -require "ripper" +# :markup: markdown module Prism module Translation @@ -23,22 +22,10 @@ module Prism # - on_comma # - on_ignored_nl # - on_ignored_sp - # - on_kw - # - on_label_end - # - on_lbrace - # - on_lbracket - # - on_lparen # - on_nl - # - on_op # - on_operator_ambiguous - # - on_rbrace - # - on_rbracket - # - on_rparen # - on_semicolon # - on_sp - # - on_symbeg - # - on_tstring_beg - # - on_tstring_end # class Ripper < Compiler # Parses the given Ruby program read from +src+. @@ -70,7 +57,8 @@ module Prism # [[1, 13], :on_kw, "end", END ]] # def self.lex(src, filename = "-", lineno = 1, raise_errors: false) - result = Prism.lex_compat(src, filepath: filename, line: lineno) + coerced = coerce_source(src) + result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding) if result.failure? && raise_errors raise SyntaxError, result.errors.first.message @@ -79,6 +67,34 @@ module Prism end end + # Tokenizes the Ruby program and returns an array of strings. + # The +filename+ and +lineno+ arguments are mostly ignored, since the + # return value is just the tokenized input. + # By default, this method does not handle syntax errors in +src+, + # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. + # + # p Ripper.tokenize("def m(a) nil end") + # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] + # + def self.tokenize(...) + lex(...).map { |token| token[2] } + end + + # Mirros the various lex_types that ripper supports + def self.coerce_source(source) # :nodoc: + if source.is_a?(IO) + source.read + elsif source.respond_to?(:gets) + src = +"" + while line = source.gets + src << line + end + src + else + source.to_str + end + end + # This contains a table of all of the parser events and their # corresponding arity. PARSER_EVENT_TABLE = { @@ -331,7 +347,7 @@ module Prism "__ENCODING__", "__FILE__", "__LINE__" - ] + ].to_set # A list of all of the Ruby binary operators. BINARY_OPERATORS = [ @@ -356,7 +372,7 @@ module Prism :/, :*, :** - ] + ].to_set private_constant :KEYWORDS, :BINARY_OPERATORS @@ -425,9 +441,93 @@ module Prism end end + autoload :Filter, "prism/translation/ripper/filter" + autoload :Lexer, "prism/translation/ripper/lexer" autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" + # Provides optimized access to line and column information. + # Ripper bounds are mostly accessed in a linear fashion, so + # we can try a linear scan first and fall back to binary search. + class LineAndColumnCache # :nodoc: + # How many should it look ahead/behind before falling back to binary searching. + WINDOW = 8 + private_constant :WINDOW + + #: (Source source) -> void + def initialize(source) + @source = source + @offsets = source.offsets + @hint = 0 + end + + #: (Integer byte_offset) -> [Integer, Integer] + def line_and_column(byte_offset) + @hint = new_hint(byte_offset) || @source.find_line(byte_offset) + return [@hint + @source.start_line, byte_offset - @offsets[@hint]] + end + + private + + def new_hint(byte_offset) + if @offsets[@hint] <= byte_offset + # Same line? + if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset) + return @hint + end + + # Scan forwards + limit = [@hint + WINDOW + 1, @offsets.size].min + idx = @hint + 1 + while idx < limit + if @offsets[idx] > byte_offset + return idx - 1 + end + if @offsets[idx] == byte_offset + return idx + end + idx += 1 + end + else + # Scan backwards + limit = @hint > WINDOW ? @hint - WINDOW : 0 + idx = @hint + while idx >= limit + 1 + if @offsets[idx - 1] <= byte_offset + return idx - 1 + end + idx -= 1 + end + end + + nil + end + end + + # :stopdoc: + # This is not part of the public API but used by some gems. + + # Ripper-internal bitflags. + LEX_STATE_NAMES = %i[ + BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM + ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze + private_constant :LEX_STATE_NAMES + + LEX_STATE_NAMES.each do |value, key| + const_set("EXPR_#{key}", value) + end + EXPR_NONE = 0 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS + EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG + EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN + + def self.lex_state_name(state) + LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|") + end + + # :startdoc: + # The source that is being parsed. attr_reader :source @@ -437,16 +537,17 @@ module Prism # The current line number of the parser. attr_reader :lineno - # The current column number of the parser. + # The current column in bytes of the parser. attr_reader :column # Create a new Translation::Ripper object with the given source. def initialize(source, filename = "(ripper)", lineno = 1) - @source = source + @source = Ripper.coerce_source(source) @filename = filename @lineno = lineno @column = 0 @result = nil + @line_and_column_cache = nil end ########################################################################## @@ -465,7 +566,12 @@ module Prism bounds(location) if comment.is_a?(InlineComment) - on_comment(comment.slice) + # Inline comments always contain a newline if the line itself contains it + if result.source.source.bytesize > comment.location.end_offset + on_comment("#{comment.slice}\n") + else + on_comment(comment.slice) + end else offset = location.start_offset lines = comment.slice.lines @@ -546,9 +652,14 @@ module Prism # Visitor methods ########################################################################## + # :stopdoc: + # alias foo bar # ^^^^^^^^^^^^^ def visit_alias_method_node(node) + bounds(node.keyword_loc) + on_kw("alias") + new_name = visit(node.new_name) old_name = visit(node.old_name) @@ -559,6 +670,9 @@ module Prism # alias $foo $bar # ^^^^^^^^^^^^^^^ def visit_alias_global_variable_node(node) + bounds(node.keyword_loc) + on_kw("alias") + new_name = visit_alias_global_variable_node_value(node.new_name) old_name = visit_alias_global_variable_node_value(node.old_name) @@ -584,6 +698,10 @@ module Prism # ^^^^^^^^^ def visit_alternation_pattern_node(node) left = visit_pattern_node(node.left) + + bounds(node.operator_loc) + on_op("|") + right = visit_pattern_node(node.right) bounds(node.location) @@ -594,7 +712,13 @@ module Prism # parenthesis node that can be used to wrap patterns. private def visit_pattern_node(node) if node.is_a?(ParenthesesNode) - visit(node.body) + bounds(node.opening_loc) + on_lparen("(") + result = visit(node.body) + bounds(node.closing_loc) + on_rparen(")") + + result else visit(node) end @@ -604,6 +728,14 @@ module Prism # ^^^^^^^ def visit_and_node(node) left = visit(node.left) + + bounds(node.operator_loc) + if node.operator == "and" + on_kw("and") + else + on_op("&&") + end + right = visit(node.right) bounds(node.location) @@ -631,6 +763,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%i/ @@ -650,6 +784,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%W/ @@ -687,6 +823,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%I/ @@ -724,6 +862,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) else @@ -740,15 +880,21 @@ module Prism on_array(elements) end - # Dispatch a words_sep event that contains the space between the elements + # Dispatch words_sep events that contains the whitespace between the elements # of list literals. private def visit_words_sep(opening_loc, previous, current) - end_offset = (previous.nil? ? opening_loc : previous.location).end_offset - start_offset = current.location.start_offset - - if end_offset != start_offset - bounds(current.location.copy(start_offset: end_offset)) - on_words_sep(source.byteslice(end_offset...start_offset)) + start_offset = (previous.nil? ? opening_loc : previous.location).end_offset + end_offset = current.start_offset + length = end_offset - start_offset + + if length > 0 + whitespace = source.byteslice(start_offset, length) + current_offset = start_offset + whitespace.each_line do |part| + bounds(opening_loc.copy(start_offset: current_offset, length: part.bytesize)) + on_words_sep(part) + current_offset += part.bytesize + end end end @@ -774,9 +920,18 @@ module Prism # ^^^^^ def visit_array_pattern_node(node) constant = visit(node.constant) + + if node.opening_loc + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + end + requireds = visit_all(node.requireds) if node.requireds.any? rest = if (rest_node = node.rest).is_a?(SplatNode) + bounds(rest_node.operator_loc) + on_op("*") + if rest_node.expression.nil? bounds(rest_node.location) on_var_field(nil) @@ -787,6 +942,10 @@ module Prism posts = visit_all(node.posts) if node.posts.any? + if node.closing_loc + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + end bounds(node.location) on_aryptn(constant, requireds, rest, posts) end @@ -802,6 +961,12 @@ module Prism # ^^^^ def visit_assoc_node(node) key = visit(node.key) + + if node.operator_loc + bounds(node.operator_loc) + on_op("=>") + end + value = visit(node.value) bounds(node.location) @@ -814,6 +979,9 @@ module Prism # { **foo } # ^^^^^ def visit_assoc_splat_node(node) + bounds(node.operator_loc) + on_op("**") + value = visit(node.value) bounds(node.location) @@ -830,8 +998,18 @@ module Prism # begin end # ^^^^^^^^^ def visit_begin_node(node) + if node.begin_keyword_loc + bounds(node.begin_keyword_loc) + on_kw("begin") + end + clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false) + if node.end_keyword_loc + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) on_begin(clauses) end @@ -843,7 +1021,7 @@ module Prism on_stmts_add(on_stmts_new, on_void_stmt) else body = node.statements.body - body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline) bounds(node.statements.location) visit_statements_node_body(body) @@ -852,12 +1030,15 @@ module Prism rescue_clause = visit(node.rescue_clause) else_clause = unless (else_clause_node = node.else_clause).nil? + bounds(else_clause_node.else_keyword_loc) + on_kw("else") + else_statements = if else_clause_node.statements.nil? [nil] else body = else_clause_node.statements.body - body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) body end @@ -879,7 +1060,7 @@ module Prism on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil) when StatementsNode body = [*node.body] - body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline) stmts = visit_statements_node_body(body) bounds(node.body.first.location) @@ -894,6 +1075,8 @@ module Prism # foo(&bar) # ^^^^ def visit_block_argument_node(node) + bounds(node.operator_loc) + on_op("&") visit(node.expression) end @@ -907,6 +1090,13 @@ module Prism # Visit a BlockNode. def visit_block_node(node) braces = node.opening == "{" + bounds(node.opening_loc) + if braces + on_lbrace("{") + else + on_kw("do") + end + parameters = visit(node.parameters) body = @@ -919,7 +1109,7 @@ module Prism braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -931,6 +1121,14 @@ module Prism end if braces + bounds(node.closing_loc) + on_rbrace("}") + else + bounds(node.closing_loc) + on_kw("end") + end + + if braces bounds(node.location) on_brace_block(parameters, body) else @@ -942,12 +1140,15 @@ module Prism # def foo(&bar); end # ^^^^ def visit_block_parameter_node(node) + bounds(node.operator_loc) + on_op("&") + if node.name_loc.nil? bounds(node.location) on_blockarg(nil) else bounds(node.name_loc) - name = visit_token(node.name.to_s) + name = on_ident(node.name.to_s) bounds(node.location) on_blockarg(name) @@ -956,6 +1157,9 @@ module Prism # A block's parameters. def visit_block_parameters_node(node) + bounds(node.opening_loc) + on_op("|") + parameters = if node.parameters.nil? on_params(nil, nil, nil, nil, nil, nil, nil) @@ -970,6 +1174,9 @@ module Prism false end + bounds(node.closing_loc) + on_op("|") + bounds(node.location) on_block_var(parameters, locals) end @@ -980,6 +1187,9 @@ module Prism # break foo # ^^^^^^^^^ def visit_break_node(node) + bounds(node.keyword_loc) + on_kw("break") + if node.arguments.nil? bounds(node.location) on_break(on_args_new) @@ -1004,20 +1214,32 @@ module Prism case node.name when :[] receiver = visit(node.receiver) - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.opening_loc) + on_lbracket("[") + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.closing_loc) + on_rbracket("]") + + block = visit(block_node) bounds(node.location) call = on_aref(receiver, arguments) - if block.nil? - call - else + if block_node bounds(node.location) on_method_add_block(call, block) + else + call end when :[]= receiver = visit(node.receiver) + bounds(node.opening_loc) + on_lbracket("[") + *arguments, last_argument = node.arguments.arguments arguments << node.block if !node.block.nil? @@ -1033,6 +1255,11 @@ module Prism end end + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.equal_loc) + on_op("=") + bounds(node.location) call = on_aref_field(receiver, arguments) value = visit_write_value(last_argument) @@ -1040,27 +1267,54 @@ module Prism bounds(last_argument.location) on_assign(call, value) when :-@, :+@, :~ - receiver = visit(node.receiver) + bounds(node.message_loc) + on_op(node.message) + receiver = visit(node.receiver) bounds(node.location) on_unary(node.name, receiver) when :! + bounds(node.message_loc) if node.message == "not" + on_kw("not") + + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + receiver = - if !node.receiver.is_a?(ParenthesesNode) || !node.receiver.body.nil? + if node.receiver.is_a?(ParenthesesNode) && node.receiver.body.nil? + # The parens in `not()` just emit parens and nothing else. + bounds(node.receiver.opening_loc) + on_lparen("(") + bounds(node.receiver.closing_loc) + on_rparen(")") + nil + else visit(node.receiver) end + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end bounds(node.location) on_unary(:not, receiver) else + on_op("!") + receiver = visit(node.receiver) bounds(node.location) on_unary(:!, receiver) end - when *BINARY_OPERATORS + when BINARY_OPERATORS receiver = visit(node.receiver) + + bounds(node.message_loc) + on_op(node.message) + value = visit(node.arguments.arguments.first) bounds(node.location) @@ -1072,9 +1326,21 @@ module Prism if node.variable_call? on_vcall(message) else - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end + + block = visit(block_node) call = - if node.opening_loc.nil? && arguments&.any? + if node.opening_loc.nil? && get_arguments_and_block(node.arguments, node.block).first.any? bounds(node.location) on_command(message, arguments) elsif !node.opening_loc.nil? @@ -1085,11 +1351,11 @@ module Prism on_method_add_arg(on_fcall(message), on_args_new) end - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end end @@ -1097,7 +1363,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) message = if node.message_loc.nil? @@ -1107,13 +1373,30 @@ module Prism visit_token(node.message, false) end + if node.equal_loc + bounds(node.equal_loc) + on_op("=") + end + if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil? value = visit_write_value(node.arguments.arguments.first) bounds(node.location) on_assign(on_field(receiver, call_operator, message), value) else - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end + + block = visit(block_node) call = if node.opening_loc.nil? bounds(node.location) @@ -1131,27 +1414,35 @@ module Prism on_method_add_arg(on_call(receiver, call_operator, message), arguments) end - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end end end - # Visit the arguments and block of a call node and return the arguments - # and block as they should be used. - private def visit_call_node_arguments(arguments_node, block_node, trailing_comma) + # Extract the arguments and block Ripper-style, which means if the block + # is like `&b` then it's moved to arguments. + private def get_arguments_and_block(arguments_node, block_node) arguments = arguments_node&.arguments || [] block = block_node if block.is_a?(BlockArgumentNode) - arguments << block + arguments += [block] block = nil end + [arguments, block] + end + + # Visit the arguments and block of a call node and return the arguments + # and block as they should be used. + private def visit_call_node_arguments(arguments_node, block_node, trailing_comma) + arguments, block = get_arguments_and_block(arguments_node, block_node) + [ if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode) visit(arguments.first) @@ -1165,7 +1456,7 @@ module Prism on_args_add_block(args, false) end end, - visit(block) + block, ] end @@ -1183,7 +1474,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1205,7 +1496,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1227,7 +1518,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1249,6 +1540,9 @@ module Prism if node.call_operator == "::" receiver = visit(node.receiver) + bounds(node.call_operator_loc) + on_op("::") + bounds(node.message_loc) message = visit_token(node.message) @@ -1258,7 +1552,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1272,6 +1566,10 @@ module Prism # ^^^^^^^^^^ def visit_capture_pattern_node(node) value = visit(node.value) + + bounds(node.operator_loc) + on_op("=>") + target = visit(node.target) bounds(node.location) @@ -1281,10 +1579,21 @@ module Prism # case foo; when bar; end # ^^^^^^^^^^^^^^^^^^^^^^^ def visit_case_node(node) + bounds(node.case_keyword_loc) + on_kw("case") + predicate = visit(node.predicate) + visited_conditions = node.conditions.map { |condition| visit(condition) } + visited_else_clause = visit(node.else_clause) + + if !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + clauses = - node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition| - on_when(*visit(condition), current) + visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition| + on_when(*condition, current) end bounds(node.location) @@ -1294,10 +1603,23 @@ module Prism # case foo; in bar; end # ^^^^^^^^^^^^^^^^^^^^^ def visit_case_match_node(node) + bounds(node.case_keyword_loc) + on_kw("case") + predicate = visit(node.predicate) + visited_conditions = node.conditions.map do | condition| + visit(condition) + end + visited_else_clause = visit(node.else_clause) + + if !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + clauses = - node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition| - on_in(*visit(condition), current) + visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition| + on_in(*condition, current) end bounds(node.location) @@ -1307,6 +1629,9 @@ module Prism # class Foo; end # ^^^^^^^^^^^^^^ def visit_class_node(node) + bounds(node.class_keyword_loc) + on_kw("class") + constant_path = if node.constant_path.is_a?(ConstantReadNode) bounds(node.constant_path.location) @@ -1315,9 +1640,17 @@ module Prism visit(node.constant_path) end + if node.inheritance_operator_loc + bounds(node.inheritance_operator_loc) + on_op("<") + end + superclass = visit(node.superclass) bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_class(constant_path, superclass, bodystmt) end @@ -1331,12 +1664,13 @@ module Prism # @@foo = 1 # ^^^^^^^^^ - # - # @@foo, @@bar = 1 - # ^^^^^ ^^^^^ def visit_class_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_cvar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1401,12 +1735,13 @@ module Prism # Foo = 1 # ^^^^^^^ - # - # Foo, Bar = 1 - # ^^^ ^^^ def visit_constant_write_node(node) bounds(node.name_loc) target = on_var_field(on_const(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1466,6 +1801,11 @@ module Prism # ^^^^^^^^ def visit_constant_path_node(node) if node.parent.nil? + if node.delimiter_loc + bounds(node.delimiter_loc) + on_op("::") + end + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1474,6 +1814,9 @@ module Prism else parent = visit(node.parent) + bounds(node.delimiter_loc) + on_op("::") + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1484,11 +1827,12 @@ module Prism # Foo::Bar = 1 # ^^^^^^^^^^^^ - # - # Foo::Foo, Bar::Bar = 1 - # ^^^^^^^^ ^^^^^^^^ def visit_constant_path_write_node(node) target = visit_constant_path_write_node_target(node.target) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1498,6 +1842,11 @@ module Prism # Visit a constant path that is part of a write node. private def visit_constant_path_write_node_target(node) if node.parent.nil? + if node.delimiter_loc + bounds(node.delimiter_loc) + on_op("::") + end + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1506,6 +1855,9 @@ module Prism else parent = visit(node.parent) + bounds(node.delimiter_loc) + on_op("::") + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1518,7 +1870,6 @@ module Prism # ^^^^^^^^^^^^^^^ def visit_constant_path_operator_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.binary_operator_loc) operator = on_op("#{node.binary_operator}=") @@ -1532,7 +1883,6 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_constant_path_and_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.operator_loc) operator = on_op("&&=") @@ -1546,7 +1896,6 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_constant_path_or_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.operator_loc) operator = on_op("||=") @@ -1568,16 +1917,24 @@ module Prism # def self.foo; end # ^^^^^^^^^^^^^^^^^ def visit_def_node(node) + bounds(node.def_keyword_loc) + on_kw("def") + receiver = visit(node.receiver) operator = if !node.operator_loc.nil? bounds(node.operator_loc) - visit_token(node.operator) + node.operator == "." ? on_period(".") : on_op("::") end bounds(node.name_loc) name = visit_token(node.name_loc.slice) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + parameters = if node.parameters.nil? bounds(node.location) @@ -1587,10 +1944,17 @@ module Prism end if !node.lparen_loc.nil? + bounds(node.rparen_loc) + on_rparen(")") bounds(node.lparen_loc) parameters = on_paren(parameters) end + if node.equal_loc + bounds(node.equal_loc) + on_op("=") + end + bodystmt = if node.equal_loc.nil? visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body) @@ -1601,11 +1965,16 @@ module Prism on_bodystmt(body, nil, nil, nil) end + if node.end_keyword_loc + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) - if receiver.nil? - on_def(name, parameters, bodystmt) - else + if receiver on_defs(receiver, operator, name, parameters, bodystmt) + else + on_def(name, parameters, bodystmt) end end @@ -1615,24 +1984,59 @@ module Prism # defined?(a) # ^^^^^^^^^^^ def visit_defined_node(node) + bounds(node.keyword_loc) + on_kw("defined?") + + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + + expression = visit(node.value) + + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + # Very weird circumstances here where something like: + # + # defined? + # (1) + # + # gets parsed in Ruby as having only the `1` expression but in Ripper it + # gets parsed as having a parentheses node. In this case we need to + # synthesize that node to match Ripper's behavior. + if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n") + bounds(node.lparen_loc.join(node.rparen_loc)) + expression = on_paren(on_stmts_add(on_stmts_new, expression)) + end + bounds(node.location) - on_defined(visit(node.value)) + on_defined(expression) end # if foo then bar else baz end # ^^^^^^^^^^^^ def visit_else_node(node) + bounds(node.else_keyword_loc) + on_kw("else") + statements = if node.statements.nil? [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) + body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) body end + else_statements = visit_statements_node_body(statements) + + bounds(node.end_keyword_loc) + on_kw("end") bounds(node.location) - on_else(visit_statements_node_body(statements)) + on_else(else_statements) end # "foo #{bar}" @@ -1670,12 +2074,15 @@ module Prism # Visit an EnsureNode node. def visit_ensure_node(node) + bounds(node.ensure_keyword_loc) + on_kw("ensure") + statements = if node.statements.nil? [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false) + body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false) body end @@ -1696,6 +2103,14 @@ module Prism # ^^^^^^^^^^^ def visit_find_pattern_node(node) constant = visit(node.constant) + + if node.opening_loc + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + end + bounds(node.left.operator_loc) + on_op("*") + left = if node.left.expression.nil? bounds(node.left.location) @@ -1705,6 +2120,10 @@ module Prism end requireds = visit_all(node.requireds) if node.requireds.any? + + bounds(node.right.operator_loc) + on_op("*") + right = if node.right.expression.nil? bounds(node.right.location) @@ -1713,6 +2132,10 @@ module Prism visit(node.right.expression) end + if node.closing_loc + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + end bounds(node.location) on_fndptn(constant, left, requireds, right) end @@ -1721,6 +2144,10 @@ module Prism # ^^^^^^^^^^ def visit_flip_flop_node(node) left = visit(node.left) + + bounds(node.operator_loc) + on_op(node.operator) + right = visit(node.right) bounds(node.location) @@ -1740,8 +2167,18 @@ module Prism # for foo in bar do end # ^^^^^^^^^^^^^^^^^^^^^ def visit_for_node(node) + bounds(node.for_keyword_loc) + on_kw("for") + index = visit(node.index) + bounds(node.in_keyword_loc) + on_kw("in") + collection = visit(node.collection) + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end statements = if node.statements.nil? bounds(node.location) @@ -1750,6 +2187,9 @@ module Prism visit(node.statements) end + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_for(index, collection, statements) end @@ -1758,6 +2198,7 @@ module Prism # ^^^ def visit_forwarding_arguments_node(node) bounds(node.location) + on_op("...") on_args_forward end @@ -1765,6 +2206,7 @@ module Prism # ^^^ def visit_forwarding_parameter_node(node) bounds(node.location) + on_op("...") on_args_forward end @@ -1774,6 +2216,9 @@ module Prism # super {} # ^^^^^^^^ def visit_forwarding_super_node(node) + bounds(node.keyword_loc) + on_kw("super") + if node.block.nil? bounds(node.location) on_zsuper @@ -1794,12 +2239,13 @@ module Prism # $foo = 1 # ^^^^^^^^ - # - # $foo, $bar = 1 - # ^^^^ ^^^^ def visit_global_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_gvar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1858,6 +2304,9 @@ module Prism # {} # ^^ def visit_hash_node(node) + bounds(node.opening_loc) + on_lbrace("{") + elements = if node.elements.any? args = visit_all(node.elements) @@ -1866,6 +2315,8 @@ module Prism on_assoclist_from_args(args) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_hash(elements) end @@ -1874,6 +2325,15 @@ module Prism # ^^ def visit_hash_pattern_node(node) constant = visit(node.constant) + + if node.constant + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + elsif node.opening_loc + bounds(node.opening_loc) + on_lbrace("{") + end + elements = if node.elements.any? || !node.rest.nil? node.elements.map do |element| @@ -1896,12 +2356,21 @@ module Prism rest = case node.rest when AssocSplatNode + bounds(node.rest.operator_loc) + on_op("**") visit(node.rest.value) when NoKeywordsParameterNode bounds(node.rest.location) on_var_field(visit(node.rest)) end + if node.constant + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + elsif node.closing_loc + bounds(node.closing_loc) + on_rbrace("}") + end bounds(node.location) on_hshptn(constant, elements, rest) end @@ -1917,13 +2386,27 @@ module Prism def visit_if_node(node) if node.then_keyword == "?" predicate = visit(node.predicate) + + bounds(node.then_keyword_loc) + on_op("?") + truthy = visit(node.statements.body.first) + + bounds(node.subsequent.else_keyword_loc) + on_op(":") + falsy = visit(node.subsequent.statements.body.first) bounds(node.location) on_ifop(predicate, truthy, falsy) elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.if_keyword_loc) + on_kw(node.if_keyword) predicate = visit(node.predicate) + if node.then_keyword_loc && node.then_keyword != "?" + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -1933,6 +2416,11 @@ module Prism end subsequent = visit(node.subsequent) + if node.end_keyword_loc && !node.subsequent + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) if node.if_keyword == "if" on_if(predicate, statements, subsequent) @@ -1941,6 +2429,8 @@ module Prism end else statements = visit(node.statements.body.first) + bounds(node.if_keyword_loc) + on_kw(node.if_keyword) predicate = visit(node.predicate) bounds(node.location) @@ -1972,7 +2462,14 @@ module Prism # This is a special case where we're not going to call on_in directly # because we don't have access to the subsequent. Instead, we'll return # the component parts and let the parent node handle it. + bounds(node.in_loc) + on_kw("in") + pattern = visit_pattern_node(node.pattern) + if node.then_loc + bounds(node.then_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -1988,8 +2485,15 @@ module Prism # ^^^^^^^^^^^^^^^ def visit_index_operator_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2005,8 +2509,15 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_index_and_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2022,8 +2533,15 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_index_or_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2039,8 +2557,15 @@ module Prism # ^^^^^^^^ def visit_index_target_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) on_aref_field(receiver, arguments) end @@ -2057,6 +2582,10 @@ module Prism def visit_instance_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_ivar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -2159,20 +2688,37 @@ module Prism # "foo #{bar}" # ^^^^^^^^^^^^ def visit_interpolated_string_node(node) - if node.opening&.start_with?("<<~") - heredoc = visit_heredoc_string_node(node) + with_string_bounds(node) do + if node.opening&.start_with?("<<~") + heredoc = visit_heredoc_string_node(node) - bounds(node.location) - on_string_literal(heredoc) - elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } - first, *rest = node.parts - rest.inject(visit(first)) do |content, part| - concat = visit(part) + bounds(node.location) + on_string_literal(heredoc) + elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } + first, *rest = node.parts + rest.inject(visit(first)) do |content, part| + concat = visit(part) + + bounds(part.location) + on_string_concat(content, concat) + end + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_string_content) do |content, part| + on_string_add(content, visit_string_content(part)) + end - bounds(part.location) - on_string_concat(content, concat) + bounds(node.location) + on_string_literal(parts) end - else + end + end + + # :"foo #{bar}" + # ^^^^^^^^^^^^^ + def visit_interpolated_symbol_node(node) + with_string_bounds(node) do bounds(node.parts.first.location) parts = node.parts.inject(on_string_content) do |content, part| @@ -2180,40 +2726,29 @@ module Prism end bounds(node.location) - on_string_literal(parts) + on_dyna_symbol(parts) end end - # :"foo #{bar}" - # ^^^^^^^^^^^^^ - def visit_interpolated_symbol_node(node) - bounds(node.parts.first.location) - parts = - node.parts.inject(on_string_content) do |content, part| - on_string_add(content, visit_string_content(part)) - end - - bounds(node.location) - on_dyna_symbol(parts) - end - # `foo #{bar}` # ^^^^^^^^^^^^ def visit_interpolated_x_string_node(node) - if node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node) + with_string_bounds(node) do + if node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.parts.first.location) - parts = - node.parts.inject(on_xstring_new) do |content, part| - on_xstring_add(content, visit_string_content(part)) - end + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_xstring_new) do |content, part| + on_xstring_add(content, visit_string_content(part)) + end - bounds(node.location) - on_xstring_literal(parts) + bounds(node.location) + on_xstring_literal(parts) + end end end @@ -2254,6 +2789,9 @@ module Prism # def foo(**); end # ^^ def visit_keyword_rest_parameter_node(node) + bounds(node.operator_loc) + on_op("**") + if node.name_loc.nil? bounds(node.location) on_kwrest_param(nil) @@ -2273,6 +2811,11 @@ module Prism parameters = if node.parameters.is_a?(BlockParametersNode) + if node.parameters.opening_loc + bounds(node.parameters.opening_loc) + on_lparen("(") + end + # Ripper does not track block-locals within lambdas, so we skip # directly to the parameters here. params = @@ -2283,6 +2826,13 @@ module Prism visit(node.parameters.parameters) end + visit_all(node.parameters.locals) + + if node.parameters.closing_loc + bounds(node.parameters.closing_loc) + on_rparen(")") + end + if node.parameters.opening_loc.nil? params else @@ -2295,9 +2845,11 @@ module Prism end braces = node.opening == "{" + bounds(node.opening_loc) if braces - bounds(node.opening_loc) on_tlambeg(node.opening) + else + on_kw("do") end body = @@ -2310,7 +2862,7 @@ module Prism braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -2321,6 +2873,13 @@ module Prism raise end + bounds(node.closing_loc) + if braces + on_rbrace("}") + else + on_kw("end") + end + bounds(node.location) on_lambda(parameters, body) end @@ -2337,6 +2896,10 @@ module Prism def visit_local_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_ident(node.name_loc.slice)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -2411,6 +2974,8 @@ module Prism # ^^^^^^^^^^ def visit_match_predicate_node(node) value = visit(node.value) + bounds(node.operator_loc) + on_kw("in") pattern = on_in(visit_pattern_node(node.pattern), nil, nil) on_case(value, pattern) @@ -2420,6 +2985,10 @@ module Prism # ^^^^^^^^^^ def visit_match_required_node(node) value = visit(node.value) + + bounds(node.operator_loc) + on_op("=>") + pattern = on_in(visit_pattern_node(node.pattern), nil, nil) on_case(value, pattern) @@ -2433,13 +3002,16 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. - def visit_missing_node(node) - raise "Cannot visit missing nodes directly." + def visit_error_recovery_node(node) + raise "Cannot visit error recovery nodes directly." end # module Foo; end # ^^^^^^^^^^^^^^^ def visit_module_node(node) + bounds(node.module_keyword_loc) + on_kw("module") + constant_path = if node.constant_path.is_a?(ConstantReadNode) bounds(node.constant_path.location) @@ -2450,6 +3022,9 @@ module Prism bodystmt = visit_body_node(node.constant_path.location, node.body, true) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_module(constant_path, bodystmt) end @@ -2457,9 +3032,19 @@ module Prism # (foo, bar), bar = qux # ^^^^^^^^^^ def visit_multi_target_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + if node.lparen_loc.nil? targets else @@ -2511,9 +3096,22 @@ module Prism # foo, bar = baz # ^^^^^^^^^^^^^^ def visit_multi_write_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + bounds(node.operator_loc) + on_op("=") + unless node.lparen_loc.nil? bounds(node.lparen_loc) targets = on_mlhs_paren(targets) @@ -2531,6 +3129,9 @@ module Prism # next foo # ^^^^^^^^ def visit_next_node(node) + bounds(node.keyword_loc) + on_kw("next") + if node.arguments.nil? bounds(node.location) on_next(on_args_new) @@ -2549,9 +3150,24 @@ module Prism on_var_ref(on_kw("nil")) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + bounds(node.operator_loc) + on_op("&") + bounds(node.keyword_loc) + on_kw("nil") + bounds(node.location) + on_blockarg(:nil) + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) + bounds(node.operator_loc) + on_op("**") + bounds(node.keyword_loc) + on_kw("nil") bounds(node.location) on_nokw_param(nil) @@ -2584,7 +3200,11 @@ module Prism # ^^^^^^^ def visit_optional_parameter_node(node) bounds(node.name_loc) - name = visit_token(node.name.to_s) + name = on_ident(node.name.to_s) + + bounds(node.operator_loc) + on_op("=") + value = visit(node.value) [name, value] @@ -2594,6 +3214,14 @@ module Prism # ^^^^^^ def visit_or_node(node) left = visit(node.left) + + bounds(node.operator_loc) + if node.operator == "or" + on_kw("or") + else + on_op("||") + end + right = visit(node.right) bounds(node.location) @@ -2617,9 +3245,19 @@ module Prism # Visit a destructured positional parameter node. private def visit_destructured_parameter_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + bounds(node.lparen_loc) on_mlhs_paren(targets) end @@ -2630,6 +3268,9 @@ module Prism # (1) # ^^^ def visit_parentheses_node(node) + bounds(node.opening_loc) + on_lparen("(") + body = if node.body.nil? on_stmts_add(on_stmts_new, on_void_stmt) @@ -2637,6 +3278,8 @@ module Prism visit(node.body) end + bounds(node.closing_loc) + on_rparen(")") bounds(node.location) on_paren(body) end @@ -2644,8 +3287,15 @@ module Prism # foo => ^(bar) # ^^^^^^ def visit_pinned_expression_node(node) + bounds(node.operator_loc) + on_op("^") + bounds(node.lparen_loc) + on_lparen("(") + expression = visit(node.expression) + bounds(node.rparen_loc) + on_rparen(")") bounds(node.location) on_begin(expression) end @@ -2653,12 +3303,20 @@ module Prism # foo = 1 and bar => ^foo # ^^^^ def visit_pinned_variable_node(node) + bounds(node.operator_loc) + on_op("^") + visit(node.variable) end # END {} # ^^^^^^ def visit_post_execution_node(node) + bounds(node.keyword_loc) + on_kw("END") + bounds(node.opening_loc) + on_lbrace("{") + statements = if node.statements.nil? bounds(node.location) @@ -2667,6 +3325,8 @@ module Prism visit(node.statements) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_END(statements) end @@ -2674,6 +3334,11 @@ module Prism # BEGIN {} # ^^^^^^^^ def visit_pre_execution_node(node) + bounds(node.keyword_loc) + on_kw("BEGIN") + bounds(node.opening_loc) + on_lbrace("{") + statements = if node.statements.nil? bounds(node.location) @@ -2682,6 +3347,8 @@ module Prism visit(node.statements) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_BEGIN(statements) end @@ -2689,7 +3356,7 @@ module Prism # The top-level program node. def visit_program_node(node) body = node.statements.body - body << nil if body.empty? + body = [nil] if body.empty? statements = visit_statements_node_body(body) bounds(node.location) @@ -2700,6 +3367,10 @@ module Prism # ^^^^ def visit_range_node(node) left = visit(node.left) + + bounds(node.operator_loc) + on_op(node.operator) + right = visit(node.right) bounds(node.location) @@ -2720,6 +3391,7 @@ module Prism # ^^^^ def visit_redo_node(node) bounds(node.location) + on_kw("redo") on_redo end @@ -2762,6 +3434,9 @@ module Prism # foo rescue bar # ^^^^^^^^^^^^^^ def visit_rescue_modifier_node(node) + bounds(node.keyword_loc) + on_kw("rescue") + expression = visit_write_value(node.expression) rescue_expression = visit(node.rescue_expression) @@ -2772,6 +3447,9 @@ module Prism # begin; rescue; end # ^^^^^^^ def visit_rescue_node(node) + bounds(node.keyword_loc) + on_kw("rescue") + exceptions = case node.exceptions.length when 0 @@ -2809,6 +3487,11 @@ module Prism end end + if node.operator_loc + bounds(node.operator_loc) + on_op("=>") + end + reference = visit(node.reference) statements = if node.statements.nil? @@ -2830,12 +3513,15 @@ module Prism # def foo(*); end # ^ def visit_rest_parameter_node(node) + bounds(node.operator_loc) + on_op("*") + if node.name_loc.nil? bounds(node.location) on_rest_param(nil) else bounds(node.name_loc) - on_rest_param(visit_token(node.name.to_s)) + on_rest_param(on_ident(node.name.to_s)) end end @@ -2843,6 +3529,7 @@ module Prism # ^^^^^ def visit_retry_node(node) bounds(node.location) + on_kw("retry") on_retry end @@ -2852,6 +3539,9 @@ module Prism # return 1 # ^^^^^^^^ def visit_return_node(node) + bounds(node.keyword_loc) + on_kw("return") + if node.arguments.nil? bounds(node.location) on_return0 @@ -2878,9 +3568,17 @@ module Prism # class << self; end # ^^^^^^^^^^^^^^^^^^ def visit_singleton_class_node(node) + bounds(node.class_keyword_loc) + on_kw("class") + bounds(node.operator_loc) + on_op("<<") + expression = visit(node.expression) bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_sclass(expression, bodystmt) end @@ -2915,6 +3613,8 @@ module Prism # def foo(*); bar(*); end # ^ def visit_splat_node(node) + bounds(node.operator_loc) + on_op("*") visit(node.expression) end @@ -2937,26 +3637,68 @@ module Prism # "foo" # ^^^^^ def visit_string_node(node) - if (content = node.content).empty? - bounds(node.location) - on_string_literal(on_string_content) - elsif (opening = node.opening) == "?" - bounds(node.location) - on_CHAR("?#{node.content}") - elsif opening.start_with?("<<~") - heredoc = visit_heredoc_string_node(node.to_interpolated) + with_string_bounds(node) do + if (content = node.content).empty? + bounds(node.location) + on_string_literal(on_string_content) + elsif (opening = node.opening) == "?" + bounds(node.location) + on_CHAR("?#{node.content}") + elsif opening.start_with?("<<~") + heredoc = visit_heredoc_string_node(node.to_interpolated) - bounds(node.location) - on_string_literal(heredoc) - else - bounds(node.content_loc) - tstring_content = on_tstring_content(content) + bounds(node.location) + on_string_literal(heredoc) + else + bounds(node.content_loc) + tstring_content = on_tstring_content(content) - bounds(node.location) - on_string_literal(on_string_add(on_string_content, tstring_content)) + bounds(node.location) + on_string_literal(on_string_add(on_string_content, tstring_content)) + end end end + # Responsible for emitting the various string-like begin/end events + private def with_string_bounds(node) + # `foo "bar": baz` doesn't emit the closing location + assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":") + + is_heredoc = opening&.start_with?("<<") + if is_heredoc + bounds(node.opening_loc) + on_heredoc_beg(node.opening) + elsif opening&.start_with?(":", "%s") + bounds(node.opening_loc) + on_symbeg(node.opening) + elsif opening&.start_with?("`", "%x") + bounds(node.opening_loc) + on_backtick(node.opening) + elsif opening && !opening.start_with?("?") + bounds(node.opening_loc) + on_tstring_beg(opening) + end + + result = yield + if assoc + if node.closing != ":" + bounds(node.closing_loc) + on_label_end(node.closing) + end + return result + end + + if is_heredoc + bounds(node.closing_loc) + on_heredoc_end(node.closing) + elsif node.closing_loc + bounds(node.closing_loc) + on_tstring_end(node.closing) + end + + result + end + # Ripper gives back the escaped string content but strips out the common # leading whitespace. Prism gives back the unescaped string content and # a location for the escaped string content. Unfortunately these don't @@ -3034,42 +3776,39 @@ module Prism # Visit a heredoc node that is representing a string. private def visit_heredoc_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_string_content) do |parts, part| - on_string_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_string_content) do |parts, part| + on_string_add(parts, part) + end end # Visit a heredoc node that is representing an xstring. private def visit_heredoc_x_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| - on_xstring_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| + on_xstring_add(parts, part) + end end # super(foo) # ^^^^^^^^^^ def visit_super_node(node) - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location)) + bounds(node.keyword_loc) + on_kw("super") + + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location)) + + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + block = visit(block_node) if !node.lparen_loc.nil? bounds(node.lparen_loc) @@ -3079,35 +3818,36 @@ module Prism bounds(node.location) call = on_super(arguments) - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end # :foo # ^^^^ def visit_symbol_node(node) - if (opening = node.opening)&.match?(/^%s|['"]:?$/) - bounds(node.value_loc) - content = on_string_content - - if !(value = node.value).empty? - content = on_string_add(content, on_tstring_content(value)) + with_string_bounds(node) do + if node.value_loc.nil? + bounds(node.location) + on_dyna_symbol(on_string_content) + elsif (opening = node.opening)&.match?(/^%s|['"]:?$/) + bounds(node.value_loc) + content = on_string_add(on_string_content, on_tstring_content(node.value)) + bounds(node.location) + on_dyna_symbol(content) + elsif (closing = node.closing) == ":" + bounds(node.location) + on_label("#{node.value}:") + elsif opening.nil? && node.closing_loc.nil? + bounds(node.value_loc) + on_symbol_literal(visit_token(node.value)) + else + bounds(node.value_loc) + on_symbol_literal(on_symbol(visit_token(node.value))) end - - on_dyna_symbol(content) - elsif (closing = node.closing) == ":" - bounds(node.location) - on_label("#{node.value}:") - elsif opening.nil? && node.closing_loc.nil? - bounds(node.value_loc) - on_symbol_literal(visit_token(node.value)) - else - bounds(node.value_loc) - on_symbol_literal(on_symbol(visit_token(node.value))) end end @@ -3121,6 +3861,9 @@ module Prism # undef foo # ^^^^^^^^^ def visit_undef_node(node) + bounds(node.keyword_loc) + on_kw("undef") + names = visit_all(node.names) bounds(node.location) @@ -3134,7 +3877,13 @@ module Prism # ^^^^^^^^^^^^^^ def visit_unless_node(node) if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.keyword_loc) + on_kw("unless") predicate = visit(node.predicate) + if node.then_keyword_loc + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -3144,10 +3893,17 @@ module Prism end else_clause = visit(node.else_clause) + if node.end_keyword_loc && !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) on_unless(predicate, statements, else_clause) else statements = visit(node.statements.body.first) + bounds(node.keyword_loc) + on_kw("unless") predicate = visit(node.predicate) bounds(node.location) @@ -3161,7 +3917,14 @@ module Prism # bar until foo # ^^^^^^^^^^^^^ def visit_until_node(node) + bounds(node.keyword_loc) + on_kw("until") + if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end predicate = visit(node.predicate) statements = if node.statements.nil? @@ -3171,6 +3934,11 @@ module Prism visit(node.statements) end + if node.closing_loc + bounds(node.closing_loc) + on_kw("end") + end + bounds(node.location) on_until(predicate, statements) else @@ -3188,7 +3956,14 @@ module Prism # This is a special case where we're not going to call on_when directly # because we don't have access to the subsequent. Instead, we'll return # the component parts and let the parent node handle it. + bounds(node.keyword_loc) + on_kw("when") + conditions = visit_arguments(node.conditions) + if node.then_keyword_loc + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -3207,7 +3982,17 @@ module Prism # ^^^^^^^^^^^^^ def visit_while_node(node) if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.keyword_loc) + on_kw("while") + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end predicate = visit(node.predicate) + if node.closing_loc + bounds(node.closing_loc) + on_kw("end") + end statements = if node.statements.nil? bounds(node.location) @@ -3220,6 +4005,8 @@ module Prism on_while(predicate, statements) else statements = visit(node.statements.body.first) + bounds(node.keyword_loc) + on_kw("while") predicate = visit(node.predicate) bounds(node.location) @@ -3230,20 +4017,22 @@ module Prism # `foo` # ^^^^^ def visit_x_string_node(node) - if node.unescaped.empty? - bounds(node.location) - on_xstring_literal(on_xstring_new) - elsif node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node.to_interpolated) + with_string_bounds(node) do + if node.unescaped.empty? + bounds(node.location) + on_xstring_literal(on_xstring_new) + elsif node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node.to_interpolated) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.content_loc) - content = on_tstring_content(node.content) + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.content_loc) + content = on_tstring_content(node.content) - bounds(node.location) - on_xstring_literal(on_xstring_add(on_xstring_new, content)) + bounds(node.location) + on_xstring_literal(on_xstring_add(on_xstring_new, content)) + end end end @@ -3253,10 +4042,18 @@ module Prism # yield 1 # ^^^^^^^ def visit_yield_node(node) + bounds(node.keyword_loc) + on_kw("yield") + if node.arguments.nil? && node.lparen_loc.nil? bounds(node.location) on_yield0 else + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + arguments = if node.arguments.nil? bounds(node.location) @@ -3266,6 +4063,8 @@ module Prism end unless node.lparen_loc.nil? + bounds(node.rparen_loc) + on_rparen(")") bounds(node.lparen_loc) arguments = on_paren(arguments) end @@ -3279,7 +4078,11 @@ module Prism # Lazily initialize the parse result. def result - @result ||= Prism.parse(source, partial_script: true) + @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding) + end + + def line_and_column_cache + @line_and_column_cache ||= LineAndColumnCache.new(result.source) end ########################################################################## @@ -3300,30 +4103,34 @@ module Prism # Visit the string content of a particular node. This method is used to # split into the various token types. def visit_token(token, allow_keywords = true) - case token - when "." + if token == "." on_period(token) - when "`" + elsif token == "`" on_backtick(token) - when *(allow_keywords ? KEYWORDS : []) + elsif allow_keywords && KEYWORDS.include?(token) on_kw(token) - when /^_/ + elsif token.start_with?("_") on_ident(token) - when /^[[:upper:]]\w*$/ + elsif token.match?(/^[[:upper:]]\w*$/) on_const(token) - when /^@@/ + elsif token.start_with?("@@") on_cvar(token) - when /^@/ + elsif token.start_with?("@") on_ivar(token) - when /^\$/ + elsif token.start_with?("$") on_gvar(token) - when /^[[:punct:]]/ + elsif token.match?(/^[[:punct:]]/) on_op(token) else on_ident(token) end end + # Visit either `.`, `&.`, or `::`. + def visit_call_operator(token) + token == "." ? on_period(token) : on_op(token) + end + # Visit a node that represents a number. We need to explicitly handle the # unary - operator. def visit_number_node(node) @@ -3331,6 +4138,9 @@ module Prism location = node.location if slice[0] == "-" + bounds(location.copy(length: 1)) + on_op("-") + bounds(location.copy(start_offset: location.start_offset + 1)) value = yield slice[1..-1] @@ -3379,26 +4189,24 @@ module Prism # This method is responsible for updating lineno and column information # to reflect the current node. - # - # This method could be drastically improved with some caching on the start - # of every line, but for now it's good enough. def bounds(location) - @lineno = location.start_line - @column = location.start_column + @lineno, @column = line_and_column_cache.line_and_column(location.start_offset) end + # :startdoc: + ########################################################################## # Ripper interface ########################################################################## # :stopdoc: def _dispatch_0; end - def _dispatch_1(_); end - def _dispatch_2(_, _); end - def _dispatch_3(_, _, _); end - def _dispatch_4(_, _, _, _); end - def _dispatch_5(_, _, _, _, _); end - def _dispatch_7(_, _, _, _, _, _, _); end + def _dispatch_1(arg); arg end + def _dispatch_2(arg, _); arg end + def _dispatch_3(arg, _, _); arg end + def _dispatch_4(arg, _, _, _); arg end + def _dispatch_5(arg, _, _, _, _); arg end + def _dispatch_7(arg, _, _, _, _, _, _); arg end # :startdoc: # diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb new file mode 100644 index 0000000000..19deef2d37 --- /dev/null +++ b/lib/prism/translation/ripper/filter.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Ripper + class Filter # :nodoc: + # :stopdoc: + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + @__state = nil + end + + def filename + @__lexer.filename + end + + def lineno + @__line + end + + def column + @__col + end + + def state + @__state + end + + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok, state| + @__line, @__col = *pos + @__state = state + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + def on_default(event, token, data) + data + end + # :startdoc: + end + end + end +end diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb new file mode 100644 index 0000000000..c6aeae4bd7 --- /dev/null +++ b/lib/prism/translation/ripper/lexer.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true +# :markup: markdown + +require_relative "../ripper" + +module Prism + module Translation + class Ripper + class Lexer < Ripper # :nodoc: + class State # :nodoc: + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = Ripper.lex_state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + + # Instances are frozen and there are only a handful of them so we + # cache them here. + STATES = Hash.new { |hash, key| hash[key] = State.new(key) } + private_constant :STATES + + def self.[](i) + STATES[i] + end + end + + class Elem # :nodoc: + attr_accessor :pos, :event, :tok, :state, :message + + def initialize(pos, event, tok, state, message = nil) + @pos = pos + @event = event + @tok = tok + @state = State[state] + @message = message + end + + def [](index) + case index + when 0, :pos + @pos + when 1, :event + @event + when 2, :tok + @tok + when 3, :state + @state + when 4, :message + @message + else + nil + end + end + + def inspect + "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>" + end + + alias to_s inspect + + def pretty_print(q) + q.group(2, "#<#{self.class}:", ">") { + q.breakable + q.text("#{event}@#{pos[0]}:#{pos[1]}") + q.breakable + state.pretty_print(q) + q.breakable + q.text("token: ") + tok.pretty_print(q) + if message + q.breakable + q.text("message: ") + q.text(message) + end + } + end + + def to_a + if @message + [@pos, @event, @tok, @state, @message] + else + [@pos, @event, @tok, @state] + end + end + end + + # Pretty much just the same as Prism.lex_compat. + def lex(raise_errors: false) + Ripper.lex(@source, filename, lineno, raise_errors: raise_errors) + end + + # Returns the lex_compat result wrapped in `Elem`. Errors are omitted. + # Since ripper is a streaming parser, tokens are expected to be emitted in the order + # that the parser encounters them. This is not implemented. + def parse(...) + lex(...).map do |position, event, token, state| + Elem.new(position, event, token, state.to_int) + end + end + + # Similar to parse but ripper sorts the elements by position in the source. Also + # includes errors. Since prism does error recovery, in cases of syntax errors + # the result may differ greatly compared to ripper. + def scan(...) + parse(...) + end + end + end + end +end diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb index dc26a639a3..46c0333544 100644 --- a/lib/prism/translation/ripper/sexp.rb +++ b/lib/prism/translation/ripper/sexp.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown require_relative "../ripper" @@ -7,9 +8,7 @@ module Prism class Ripper # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that # returns the arrays of [type, *children]. - class SexpBuilder < Ripper - # :stopdoc: - + class SexpBuilder < Ripper # :nodoc: attr_reader :error private @@ -64,16 +63,12 @@ module Prism remove_method :on_parse_error alias on_parse_error on_error alias compile_error on_error - - # :startdoc: end # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that # returns the same values as ::Ripper::SexpBuilder except with a couple of # niceties that flatten linked lists into arrays. - class SexpBuilderPP < SexpBuilder - # :stopdoc: - + class SexpBuilderPP < SexpBuilder # :nodoc: private def on_heredoc_dedent(val, width) @@ -117,8 +112,6 @@ module Prism alias_method "on_#{event}", :_dispatch_event_push end end - - # :startdoc: end end end diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb index 10e21cd16a..00ed625da3 100644 --- a/lib/prism/translation/ripper/shim.rb +++ b/lib/prism/translation/ripper/shim.rb @@ -2,4 +2,6 @@ # This writes the prism ripper translation into the Ripper constant so that # users can transparently use Ripper without any changes. +# :stopdoc: Ripper = Prism::Translation::Ripper +# :startdoc: diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index 8784e22d10..42bc5ee658 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -1,21 +1,27 @@ # frozen_string_literal: true +# :markup: markdown begin - require "ruby_parser" + require "sexp" rescue LoadError - warn(%q{Error: Unable to load ruby_parser. Add `gem "ruby_parser"` to your Gemfile.}) + warn(%q{Error: Unable to load sexp. Add `gem "sexp_processor"` to your Gemfile.}) exit(1) end +class RubyParser # :nodoc: + class SyntaxError < RuntimeError # :nodoc: + end +end + module Prism module Translation # This module is the entry-point for converting a prism syntax tree into the # seattlerb/ruby_parser gem's syntax tree. class RubyParser # A prism visitor that builds Sexp objects. - class Compiler < ::Prism::Compiler + class Compiler < ::Prism::Compiler # :nodoc: # This is the name of the file that we are compiling. We set it on every - # Sexp object that is generated, and also use it to compile __FILE__ + # Sexp object that is generated, and also use it to compile `__FILE__` # nodes. attr_reader :file @@ -131,7 +137,7 @@ module Prism # $+ # ^^ def visit_back_reference_read_node(node) - s(node, :back_ref, node.name.name.delete_prefix("$").to_sym) + s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym) end # begin end @@ -366,14 +372,18 @@ module Prism visit(node.constant_path) end - if node.body.nil? - s(node, :class, name, visit(node.superclass)) - elsif node.body.is_a?(StatementsNode) - compiler = copy_compiler(in_def: false) - s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) }) - else - s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false))) - end + result = + if node.body.nil? + s(node, :class, name, visit(node.superclass)) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false))) + end + + attach_comments(result, node) + result end # @@foo @@ -384,9 +394,6 @@ module Prism # @@foo = 1 # ^^^^^^^^^ - # - # @@foo, @@bar = 1 - # ^^^^^ ^^^^^ def visit_class_variable_write_node(node) s(node, class_variable_write_type, node.name, visit_write_value(node.value)) end @@ -524,7 +531,9 @@ module Prism s(node, :defs, visit(node.receiver), name) end + attach_comments(result, node) result.line(node.name_loc.start_line) + if node.parameters.nil? result << s(node, :args).line(node.name_loc.start_line) else @@ -639,9 +648,6 @@ module Prism # $foo = 1 # ^^^^^^^^ - # - # $foo, $bar = 1 - # ^^^^ ^^^^ def visit_global_variable_write_node(node) s(node, :gasgn, node.name, visit_write_value(node.value)) end @@ -787,9 +793,6 @@ module Prism # @foo = 1 # ^^^^^^^^ - # - # @foo, @bar = 1 - # ^^^^ ^^^^ def visit_instance_variable_write_node(node) s(node, :iasgn, node.name, visit_write_value(node.value)) end @@ -976,8 +979,8 @@ module Prism def visit_lambda_node(node) parameters = case node.parameters - when nil, NumberedParametersNode - s(node, :args) + when nil, ItParametersNode, NumberedParametersNode + 0 else visit(node.parameters) end @@ -1001,9 +1004,6 @@ module Prism # foo = 1 # ^^^^^^^ - # - # foo, bar = 1 - # ^^^ ^^^ def visit_local_variable_write_node(node) s(node, :lasgn, node.name, visit_write_value(node.value)) end @@ -1059,8 +1059,8 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. The parser gem doesn't have such a concept, so # we invent our own here. - def visit_missing_node(node) - raise "Cannot visit missing node directly" + def visit_error_recovery_node(node) + raise "Cannot visit error recovery node directly" end # module Foo; end @@ -1073,14 +1073,18 @@ module Prism visit(node.constant_path) end - if node.body.nil? - s(node, :module, name) - elsif node.body.is_a?(StatementsNode) - compiler = copy_compiler(in_def: false) - s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) }) - else - s(node, :module, name, node.body.accept(copy_compiler(in_def: false))) - end + result = + if node.body.nil? + s(node, :module, name) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :module, name, node.body.accept(copy_compiler(in_def: false))) + end + + attach_comments(result, node) + result end # foo, bar = baz @@ -1136,6 +1140,12 @@ module Prism s(node, :nil) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + :"&nil" + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) @@ -1188,7 +1198,7 @@ module Prism # ^^^^^^^^^ def visit_parameters_node(node) children = - node.compact_child_nodes.map do |element| + node.each_child_node.map do |element| if element.is_a?(MultiTargetNode) visit_destructured_parameter(element) else @@ -1537,6 +1547,17 @@ module Prism private + # Attach prism comments to the given sexp. + def attach_comments(sexp, node) + return unless node.comments + return if node.comments.empty? + + extra = node.location.start_line - node.comments.last.location.start_line + comments = node.comments.map(&:slice) + comments.concat([nil] * [0, extra].max) + sexp.comments = comments.join("\n") + end + # Create a new compiler with the given options. def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern) Compiler.new(file, in_def: in_def, in_pattern: in_pattern) @@ -1615,6 +1636,14 @@ module Prism translate(Prism.parse_file(filepath, partial_script: true), filepath) end + # Parse the give file and translate it into the + # seattlerb/ruby_parser gem's Sexp format. This method is + # provided for API compatibility to RubyParser and takes an + # optional +timeout+ argument. + def process(ruby, file = "(string)", timeout = nil) + Timeout.timeout(timeout) { parse(ruby, file) } + end + class << self # Parse the given source and translate it into the seattlerb/ruby_parser # gem's Sexp format. @@ -1639,6 +1668,7 @@ module Prism raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}" end + result.attach_comments! result.value.accept(Compiler.new(filepath)) end end |
