1 files changed, 308 insertions, 0 deletions
diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb
new file mode 100644
index 0000000000..55c12cab6f
--- /dev/null
+++ b/test/prism/ruby/parser_test.rb
@@ -0,0 +1,308 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+begin
+  verbose, $VERBOSE = $VERBOSE, nil
+  require "parser/ruby33"
+rescue LoadError
+  # In CRuby's CI, we're not going to test against the parser gem because we
+  # don't want to have to install it. So in this case we'll just skip this test.
+  return
+ensure
+  $VERBOSE = verbose
+end
+
+# First, opt in to every AST feature.
+Parser::Builders::Default.modernize
+Prism::Translation::Parser::Builder.modernize
+
+# The parser gem rejects some strings that would most likely lead to errors
+# in consumers due to encoding problems. RuboCop however monkey-patches this
+# method out in order to accept such code.
+# https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/builders/default.rb#L2289-L2295
+Parser::Builders::Default.prepend(
+  Module.new {
+    def string_value(token)
+      value(token)
+    end
+  }
+)
+
+# Modify the source map == check so that it doesn't check against the node
+# itself so we don't get into a recursive loop.
+Parser::Source::Map.prepend(
+  Module.new {
+    def ==(other)
+      self.class == other.class &&
+        (instance_variables - %i[@node]).map do |ivar|
+          instance_variable_get(ivar) == other.instance_variable_get(ivar)
+        end.reduce(:&)
+    end
+  }
+)
+
+# Next, ensure that we're comparing the nodes and also comparing the source
+# ranges so that we're getting all of the necessary information.
+Parser::AST::Node.prepend(
+  Module.new {
+    def ==(other)
+      super && (location == other.location)
+    end
+  }
+)
+
+module Prism
+  class ParserTest < TestCase
+    # These files contain code with valid syntax that can't be parsed.
+    skip_syntax_error = [
+      # alias/undef with %s(abc) symbol literal
+      "alias.txt",
+      "seattlerb/bug_215.txt",
+
+      # %Q with newline delimiter and heredoc interpolation
+      "heredoc_percent_q_newline_delimiter.txt",
+
+      # 1.. && 2
+      "ranges.txt",
+
+      # https://bugs.ruby-lang.org/issues/21168#note-5
+      "command_method_call_2.txt",
+    ]
+
+    # These files contain code that is being parsed incorrectly by the parser
+    # gem, and therefore we don't want to compare against our translation.
+    skip_incorrect = [
+      # https://github.com/whitequark/parser/issues/1017
+      "spanning_heredoc.txt",
+      "spanning_heredoc_newlines.txt",
+
+      # https://github.com/whitequark/parser/issues/1021
+      "seattlerb/heredoc_nested.txt",
+
+      # https://github.com/whitequark/parser/issues/1016
+      "whitequark/unary_num_pow_precedence.txt",
+
+      # https://github.com/whitequark/parser/issues/950
+      "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
+
+      # Contains an escaped multibyte character. This is supposed to drop to backslash
+      "seattlerb/regexp_escape_extended.txt",
+
+      # https://github.com/whitequark/parser/issues/1020
+      # These contain consecutive \r characters, followed by \n. Prism only receives
+      # the already modified source buffer which dropped one \r but must know the
+      # original code to parse it correctly.
+      "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
+      "seattlerb/heredoc_with_only_carriage_returns_windows.txt",
+      "seattlerb/heredoc_with_only_carriage_returns.txt",
+
+      # https://github.com/whitequark/parser/issues/1026
+      # Regex with \c escape
+      "unescaping.txt",
+      "seattlerb/regexp_esc_C_slash.txt",
+
+      # https://github.com/whitequark/parser/issues/1084
+      "unary_method_calls.txt",
+    ]
+
+    # These files are failing to translate their lexer output into the lexer
+    # output expected by the parser gem, so we'll skip them for now.
+    skip_tokens = [
+      "dash_heredocs.txt",
+      "embdoc_no_newline_at_end.txt",
+      "methods.txt",
+      "seattlerb/bug169.txt",
+      "seattlerb/case_in.txt",
+      "seattlerb/difficult4__leading_dots2.txt",
+      "seattlerb/difficult6__7.txt",
+      "seattlerb/difficult6__8.txt",
+      "seattlerb/heredoc_unicode.txt",
+      "seattlerb/parse_line_heredoc.txt",
+      "seattlerb/pct_w_heredoc_interp_nested.txt",
+      "seattlerb/required_kwarg_no_value.txt",
+      "seattlerb/TestRubyParserShared.txt",
+      "unparser/corpus/literal/assignment.txt",
+      "unparser/corpus/literal/literal.txt",
+      "whitequark/args.txt",
+      "whitequark/beginless_erange_after_newline.txt",
+      "whitequark/beginless_irange_after_newline.txt",
+      "whitequark/forward_arg_with_open_args.txt",
+      "whitequark/kwarg_no_paren.txt",
+      "whitequark/lbrace_arg_after_command_args.txt",
+      "whitequark/multiple_pattern_matches.txt",
+      "whitequark/newline_in_hash_argument.txt",
+      "whitequark/pattern_matching_expr_in_paren.txt",
+      "whitequark/pattern_matching_hash.txt",
+      "whitequark/ruby_bug_14690.txt",
+      "whitequark/ruby_bug_9669.txt",
+      "whitequark/space_args_arg_block.txt",
+      "whitequark/space_args_block.txt"
+    ]
+
+    Fixture.each_for_version(except: skip_syntax_error, version: "3.3") do |fixture|
+      define_method(fixture.test_name) do
+        assert_equal_parses(
+          fixture,
+          compare_asts: !skip_incorrect.include?(fixture.path),
+          compare_tokens: !skip_tokens.include?(fixture.path),
+          compare_comments: fixture.path != "embdoc_no_newline_at_end.txt"
+        )
+      end
+    end
+
+    def test_non_prism_builder_class_deprecated
+      warnings = capture_warnings { Prism::Translation::Parser33.new(Parser::Builders::Default.new) }
+
+      assert_include(warnings, "#{__FILE__}:#{__LINE__ - 2}")
+      assert_include(warnings, "is not a `Prism::Translation::Parser::Builder` subclass")
+
+      warnings = capture_warnings { Prism::Translation::Parser33.new }
+      assert_empty(warnings)
+    end
+
+    if RUBY_VERSION >= "3.3"
+      def test_current_parser_for_current_ruby
+        major, minor = CURRENT_MAJOR_MINOR.split(".")
+        # Let's just hope there never is a Ruby 3.10 or similar
+        expected = major.to_i * 10 + minor.to_i
+        assert_equal(expected, Translation::ParserCurrent.new.version)
+      end
+    end
+
+    def test_invalid_syntax
+      code = <<~RUBY
+        foo do
+          case bar
+          when
+          end
+        end
+      RUBY
+      buffer = Parser::Source::Buffer.new("(string)")
+      buffer.source = code
+
+      parser = Prism::Translation::Parser33.new
+      parser.diagnostics.all_errors_are_fatal = true
+      assert_raise(Parser::SyntaxError) { parser.tokenize(buffer) }
+    end
+
+    def test_it_block_parameter_syntax
+      it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/3.4/it.txt")
+
+      buffer = Parser::Source::Buffer.new(it_fixture_path)
+      buffer.source = it_fixture_path.read
+      actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0]
+
+      it_block_parameter_sexp = parse_sexp {
+        s(:begin,
+        s(:itblock,
+          s(:send, nil, :x), :it,
+          s(:lvar, :it)),
+        s(:itblock,
+          s(:lambda), :it,
+          s(:lvar, :it)))
+      }
+
+      assert_equal(it_block_parameter_sexp, actual_ast.to_sexp)
+    end
+
+    private
+
+    def assert_equal_parses(fixture, compare_asts: true, compare_tokens: true, compare_comments: true)
+      buffer = Parser::Source::Buffer.new(fixture.path, 1)
+      buffer.source = fixture.read
+
+      parser = Parser::Ruby33.new
+      parser.diagnostics.consumer = ->(*) {}
+      parser.diagnostics.all_errors_are_fatal = true
+
+      expected_ast, expected_comments, expected_tokens =
+        ignore_warnings { parser.tokenize(buffer) }
+
+      actual_ast, actual_comments, actual_tokens =
+        ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) }
+
+      if expected_ast == actual_ast
+        if !compare_asts && !Fixture.custom_base_path?
+          puts "#{fixture.path} is now passing"
+        end
+
+        assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
+
+        begin
+          assert_equal_tokens(expected_tokens, actual_tokens)
+        rescue Test::Unit::AssertionFailedError
+          raise if compare_tokens
+        else
+          puts "#{fixture.path} is now passing" if !compare_tokens && !Fixture.custom_base_path?
+        end
+
+        assert_equal_comments(expected_comments, actual_comments) if compare_comments
+      elsif compare_asts
+        assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
+      end
+    end
+
+    def assert_equal_asts_message(expected_ast, actual_ast)
+      queue = [[expected_ast, actual_ast]]
+
+      while (left, right = queue.shift)
+        if left.type != right.type
+          return "expected: #{left.type}\nactual: #{right.type}"
+        end
+
+        if left.location != right.location
+          return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
+        end
+
+        if left.type == :str && left.children[0] != right.children[0]
+          return "expected: #{left.inspect}\nactual: #{right.inspect}"
+        end
+
+        left.children.zip(right.children).each do |left_child, right_child|
+          queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
+        end
+      end
+
+      "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
+    end
+
+    def assert_equal_tokens(expected_tokens, actual_tokens)
+      if expected_tokens != actual_tokens
+        index = 0
+        max_index = [expected_tokens, actual_tokens].map(&:size).max
+
+        while index <= max_index
+          expected_token = expected_tokens.fetch(index, [])
+          actual_token = actual_tokens.fetch(index, [])
+
+          index += 1
+
+          # There are a lot of tokens that have very specific meaning according
+          # to the context of the parser. We don't expose that information in
+          # prism, so we need to normalize these tokens a bit.
+          if expected_token[0] == :kDO_BLOCK && actual_token[0] == :kDO
+            actual_token[0] = expected_token[0]
+          end
+
+          # Now we can assert that the tokens are actually equal.
+          assert_equal expected_token, actual_token, -> {
+            "expected: #{expected_token.inspect}\n" \
+            "actual: #{actual_token.inspect}"
+          }
+        end
+      end
+    end
+
+    def assert_equal_comments(expected_comments, actual_comments)
+      assert_equal expected_comments, actual_comments, -> {
+        "expected: #{expected_comments.inspect}\n" \
+        "actual: #{actual_comments.inspect}"
+      }
+    end
+
+    def parse_sexp(&block)
+      Class.new { extend AST::Sexp }.instance_eval(&block).to_sexp
+    end
+  end
+end