summaryrefslogtreecommitdiff
path: root/test/prism/ruby/parser_test.rb
diff options
context:
space:
mode:
Diffstat (limited to 'test/prism/ruby/parser_test.rb')
-rw-r--r--test/prism/ruby/parser_test.rb308
1 files changed, 308 insertions, 0 deletions
diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb
new file mode 100644
index 0000000000..55c12cab6f
--- /dev/null
+++ b/test/prism/ruby/parser_test.rb
@@ -0,0 +1,308 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+begin
+ verbose, $VERBOSE = $VERBOSE, nil
+ require "parser/ruby33"
+rescue LoadError
+ # In CRuby's CI, we're not going to test against the parser gem because we
+ # don't want to have to install it. So in this case we'll just skip this test.
+ return
+ensure
+ $VERBOSE = verbose
+end
+
+# First, opt in to every AST feature.
+Parser::Builders::Default.modernize
+Prism::Translation::Parser::Builder.modernize
+
+# The parser gem rejects some strings that would most likely lead to errors
+# in consumers due to encoding problems. RuboCop however monkey-patches this
+# method out in order to accept such code.
+# https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/builders/default.rb#L2289-L2295
+Parser::Builders::Default.prepend(
+ Module.new {
+ def string_value(token)
+ value(token)
+ end
+ }
+)
+
+# Modify the source map == check so that it doesn't check against the node
+# itself so we don't get into a recursive loop.
+Parser::Source::Map.prepend(
+ Module.new {
+ def ==(other)
+ self.class == other.class &&
+ (instance_variables - %i[@node]).map do |ivar|
+ instance_variable_get(ivar) == other.instance_variable_get(ivar)
+ end.reduce(:&)
+ end
+ }
+)
+
+# Next, ensure that we're comparing the nodes and also comparing the source
+# ranges so that we're getting all of the necessary information.
+Parser::AST::Node.prepend(
+ Module.new {
+ def ==(other)
+ super && (location == other.location)
+ end
+ }
+)
+
+module Prism
+ class ParserTest < TestCase
+ # These files contain code with valid syntax that can't be parsed.
+ skip_syntax_error = [
+ # alias/undef with %s(abc) symbol literal
+ "alias.txt",
+ "seattlerb/bug_215.txt",
+
+ # %Q with newline delimiter and heredoc interpolation
+ "heredoc_percent_q_newline_delimiter.txt",
+
+ # 1.. && 2
+ "ranges.txt",
+
+ # https://bugs.ruby-lang.org/issues/21168#note-5
+ "command_method_call_2.txt",
+ ]
+
+ # These files contain code that is being parsed incorrectly by the parser
+ # gem, and therefore we don't want to compare against our translation.
+ skip_incorrect = [
+ # https://github.com/whitequark/parser/issues/1017
+ "spanning_heredoc.txt",
+ "spanning_heredoc_newlines.txt",
+
+ # https://github.com/whitequark/parser/issues/1021
+ "seattlerb/heredoc_nested.txt",
+
+ # https://github.com/whitequark/parser/issues/1016
+ "whitequark/unary_num_pow_precedence.txt",
+
+ # https://github.com/whitequark/parser/issues/950
+ "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
+
+ # Contains an escaped multibyte character. This is supposed to drop to backslash
+ "seattlerb/regexp_escape_extended.txt",
+
+ # https://github.com/whitequark/parser/issues/1020
+ # These contain consecutive \r characters, followed by \n. Prism only receives
+ # the already modified source buffer which dropped one \r but must know the
+ # original code to parse it correctly.
+ "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
+ "seattlerb/heredoc_with_only_carriage_returns_windows.txt",
+ "seattlerb/heredoc_with_only_carriage_returns.txt",
+
+ # https://github.com/whitequark/parser/issues/1026
+ # Regex with \c escape
+ "unescaping.txt",
+ "seattlerb/regexp_esc_C_slash.txt",
+
+ # https://github.com/whitequark/parser/issues/1084
+ "unary_method_calls.txt",
+ ]
+
+ # These files are failing to translate their lexer output into the lexer
+ # output expected by the parser gem, so we'll skip them for now.
+ skip_tokens = [
+ "dash_heredocs.txt",
+ "embdoc_no_newline_at_end.txt",
+ "methods.txt",
+ "seattlerb/bug169.txt",
+ "seattlerb/case_in.txt",
+ "seattlerb/difficult4__leading_dots2.txt",
+ "seattlerb/difficult6__7.txt",
+ "seattlerb/difficult6__8.txt",
+ "seattlerb/heredoc_unicode.txt",
+ "seattlerb/parse_line_heredoc.txt",
+ "seattlerb/pct_w_heredoc_interp_nested.txt",
+ "seattlerb/required_kwarg_no_value.txt",
+ "seattlerb/TestRubyParserShared.txt",
+ "unparser/corpus/literal/assignment.txt",
+ "unparser/corpus/literal/literal.txt",
+ "whitequark/args.txt",
+ "whitequark/beginless_erange_after_newline.txt",
+ "whitequark/beginless_irange_after_newline.txt",
+ "whitequark/forward_arg_with_open_args.txt",
+ "whitequark/kwarg_no_paren.txt",
+ "whitequark/lbrace_arg_after_command_args.txt",
+ "whitequark/multiple_pattern_matches.txt",
+ "whitequark/newline_in_hash_argument.txt",
+ "whitequark/pattern_matching_expr_in_paren.txt",
+ "whitequark/pattern_matching_hash.txt",
+ "whitequark/ruby_bug_14690.txt",
+ "whitequark/ruby_bug_9669.txt",
+ "whitequark/space_args_arg_block.txt",
+ "whitequark/space_args_block.txt"
+ ]
+
+ Fixture.each_for_version(except: skip_syntax_error, version: "3.3") do |fixture|
+ define_method(fixture.test_name) do
+ assert_equal_parses(
+ fixture,
+ compare_asts: !skip_incorrect.include?(fixture.path),
+ compare_tokens: !skip_tokens.include?(fixture.path),
+ compare_comments: fixture.path != "embdoc_no_newline_at_end.txt"
+ )
+ end
+ end
+
+ def test_non_prism_builder_class_deprecated
+ warnings = capture_warnings { Prism::Translation::Parser33.new(Parser::Builders::Default.new) }
+
+ assert_include(warnings, "#{__FILE__}:#{__LINE__ - 2}")
+ assert_include(warnings, "is not a `Prism::Translation::Parser::Builder` subclass")
+
+ warnings = capture_warnings { Prism::Translation::Parser33.new }
+ assert_empty(warnings)
+ end
+
+ if RUBY_VERSION >= "3.3"
+ def test_current_parser_for_current_ruby
+ major, minor = CURRENT_MAJOR_MINOR.split(".")
+ # Let's just hope there never is a Ruby 3.10 or similar
+ expected = major.to_i * 10 + minor.to_i
+ assert_equal(expected, Translation::ParserCurrent.new.version)
+ end
+ end
+
+ def test_invalid_syntax
+ code = <<~RUBY
+ foo do
+ case bar
+ when
+ end
+ end
+ RUBY
+ buffer = Parser::Source::Buffer.new("(string)")
+ buffer.source = code
+
+ parser = Prism::Translation::Parser33.new
+ parser.diagnostics.all_errors_are_fatal = true
+ assert_raise(Parser::SyntaxError) { parser.tokenize(buffer) }
+ end
+
+ def test_it_block_parameter_syntax
+ it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/3.4/it.txt")
+
+ buffer = Parser::Source::Buffer.new(it_fixture_path)
+ buffer.source = it_fixture_path.read
+ actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0]
+
+ it_block_parameter_sexp = parse_sexp {
+ s(:begin,
+ s(:itblock,
+ s(:send, nil, :x), :it,
+ s(:lvar, :it)),
+ s(:itblock,
+ s(:lambda), :it,
+ s(:lvar, :it)))
+ }
+
+ assert_equal(it_block_parameter_sexp, actual_ast.to_sexp)
+ end
+
+ private
+
+ def assert_equal_parses(fixture, compare_asts: true, compare_tokens: true, compare_comments: true)
+ buffer = Parser::Source::Buffer.new(fixture.path, 1)
+ buffer.source = fixture.read
+
+ parser = Parser::Ruby33.new
+ parser.diagnostics.consumer = ->(*) {}
+ parser.diagnostics.all_errors_are_fatal = true
+
+ expected_ast, expected_comments, expected_tokens =
+ ignore_warnings { parser.tokenize(buffer) }
+
+ actual_ast, actual_comments, actual_tokens =
+ ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) }
+
+ if expected_ast == actual_ast
+ if !compare_asts && !Fixture.custom_base_path?
+ puts "#{fixture.path} is now passing"
+ end
+
+ assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
+
+ begin
+ assert_equal_tokens(expected_tokens, actual_tokens)
+ rescue Test::Unit::AssertionFailedError
+ raise if compare_tokens
+ else
+ puts "#{fixture.path} is now passing" if !compare_tokens && !Fixture.custom_base_path?
+ end
+
+ assert_equal_comments(expected_comments, actual_comments) if compare_comments
+ elsif compare_asts
+ assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
+ end
+ end
+
+ def assert_equal_asts_message(expected_ast, actual_ast)
+ queue = [[expected_ast, actual_ast]]
+
+ while (left, right = queue.shift)
+ if left.type != right.type
+ return "expected: #{left.type}\nactual: #{right.type}"
+ end
+
+ if left.location != right.location
+ return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
+ end
+
+ if left.type == :str && left.children[0] != right.children[0]
+ return "expected: #{left.inspect}\nactual: #{right.inspect}"
+ end
+
+ left.children.zip(right.children).each do |left_child, right_child|
+ queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
+ end
+ end
+
+ "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
+ end
+
+ def assert_equal_tokens(expected_tokens, actual_tokens)
+ if expected_tokens != actual_tokens
+ index = 0
+ max_index = [expected_tokens, actual_tokens].map(&:size).max
+
+ while index <= max_index
+ expected_token = expected_tokens.fetch(index, [])
+ actual_token = actual_tokens.fetch(index, [])
+
+ index += 1
+
+ # There are a lot of tokens that have very specific meaning according
+ # to the context of the parser. We don't expose that information in
+ # prism, so we need to normalize these tokens a bit.
+ if expected_token[0] == :kDO_BLOCK && actual_token[0] == :kDO
+ actual_token[0] = expected_token[0]
+ end
+
+ # Now we can assert that the tokens are actually equal.
+ assert_equal expected_token, actual_token, -> {
+ "expected: #{expected_token.inspect}\n" \
+ "actual: #{actual_token.inspect}"
+ }
+ end
+ end
+ end
+
+ def assert_equal_comments(expected_comments, actual_comments)
+ assert_equal expected_comments, actual_comments, -> {
+ "expected: #{expected_comments.inspect}\n" \
+ "actual: #{actual_comments.inspect}"
+ }
+ end
+
+ def parse_sexp(&block)
+ Class.new { extend AST::Sexp }.instance_eval(&block).to_sexp
+ end
+ end
+end