1 files changed, 121 insertions, 104 deletions
diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb
index 606a0e54f6..55c12cab6f 100644
--- a/test/prism/ruby/parser_test.rb
+++ b/test/prism/ruby/parser_test.rb
@@ -5,7 +5,6 @@ require_relative "../test_helper"
 begin
   verbose, $VERBOSE = $VERBOSE, nil
   require "parser/ruby33"
-  require "prism/translation/parser33"
 rescue LoadError
   # In CRuby's CI, we're not going to test against the parser gem because we
   # don't want to have to install it. So in this case we'll just skip this test.
@@ -16,6 +15,19 @@ end
 
 # First, opt in to every AST feature.
 Parser::Builders::Default.modernize
+Prism::Translation::Parser::Builder.modernize
+
+# The parser gem rejects some strings that would most likely lead to errors
+# in consumers due to encoding problems. RuboCop however monkey-patches this
+# method out in order to accept such code.
+# https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/builders/default.rb#L2289-L2295
+Parser::Builders::Default.prepend(
+  Module.new {
+    def string_value(token)
+      value(token)
+    end
+  }
+)
 
 # Modify the source map == check so that it doesn't check against the node
 # itself so we don't get into a recursive loop.
@@ -42,6 +54,22 @@ Parser::AST::Node.prepend(
 
 module Prism
   class ParserTest < TestCase
+    # These files contain code with valid syntax that can't be parsed.
+    skip_syntax_error = [
+      # alias/undef with %s(abc) symbol literal
+      "alias.txt",
+      "seattlerb/bug_215.txt",
+
+      # %Q with newline delimiter and heredoc interpolation
+      "heredoc_percent_q_newline_delimiter.txt",
+
+      # 1.. && 2
+      "ranges.txt",
+
+      # https://bugs.ruby-lang.org/issues/21168#note-5
+      "command_method_call_2.txt",
+    ]
+
     # These files contain code that is being parsed incorrectly by the parser
     # gem, and therefore we don't want to compare against our translation.
     skip_incorrect = [
@@ -53,134 +81,131 @@ module Prism
       "seattlerb/heredoc_nested.txt",
 
       # https://github.com/whitequark/parser/issues/1016
-      "whitequark/unary_num_pow_precedence.txt"
-    ]
+      "whitequark/unary_num_pow_precedence.txt",
 
-    # These files are either failing to parse or failing to translate, so we'll
-    # skip them for now.
-    skip_all = skip_incorrect | [
-      "regex.txt",
-      "unescaping.txt",
-      "seattlerb/bug190.txt",
+      # https://github.com/whitequark/parser/issues/950
+      "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
+
+      # Contains an escaped multibyte character. This is supposed to drop to backslash
+      "seattlerb/regexp_escape_extended.txt",
+
+      # https://github.com/whitequark/parser/issues/1020
+      # These contain consecutive \r characters, followed by \n. Prism only receives
+      # the already modified source buffer which dropped one \r but must know the
+      # original code to parse it correctly.
       "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
       "seattlerb/heredoc_with_only_carriage_returns_windows.txt",
       "seattlerb/heredoc_with_only_carriage_returns.txt",
-      "seattlerb/parse_line_heredoc_hardnewline.txt",
-      "seattlerb/pctW_lineno.txt",
+
+      # https://github.com/whitequark/parser/issues/1026
+      # Regex with \c escape
+      "unescaping.txt",
       "seattlerb/regexp_esc_C_slash.txt",
-      "unparser/corpus/literal/literal.txt",
-      "unparser/corpus/semantic/dstr.txt",
-      "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
-      "whitequark/parser_slash_slash_n_escaping_in_literals.txt",
-      "whitequark/ruby_bug_11989.txt"
-    ]
 
-    # Not sure why these files are failing on JRuby, but skipping them for now.
-    if RUBY_ENGINE == "jruby"
-      skip_all.push("emoji_method_calls.txt", "symbols.txt")
-    end
+      # https://github.com/whitequark/parser/issues/1084
+      "unary_method_calls.txt",
+    ]
 
     # These files are failing to translate their lexer output into the lexer
     # output expected by the parser gem, so we'll skip them for now.
     skip_tokens = [
-      "comments.txt",
       "dash_heredocs.txt",
-      "dos_endings.txt",
       "embdoc_no_newline_at_end.txt",
-      "heredoc_with_comment.txt",
-      "heredocs_with_ignored_newlines.txt",
-      "indented_file_end.txt",
       "methods.txt",
-      "strings.txt",
-      "tilde_heredocs.txt",
-      "xstring_with_backslash.txt",
-      "seattlerb/backticks_interpolation_line.txt",
       "seattlerb/bug169.txt",
       "seattlerb/case_in.txt",
-      "seattlerb/class_comments.txt",
       "seattlerb/difficult4__leading_dots2.txt",
       "seattlerb/difficult6__7.txt",
       "seattlerb/difficult6__8.txt",
-      "seattlerb/dsym_esc_to_sym.txt",
-      "seattlerb/heredoc__backslash_dos_format.txt",
-      "seattlerb/heredoc_backslash_nl.txt",
-      "seattlerb/heredoc_comma_arg.txt",
-      "seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt",
-      "seattlerb/heredoc_squiggly_blank_lines.txt",
-      "seattlerb/heredoc_squiggly_interp.txt",
-      "seattlerb/heredoc_squiggly_tabs_extra.txt",
-      "seattlerb/heredoc_squiggly_tabs.txt",
-      "seattlerb/heredoc_squiggly_visually_blank_lines.txt",
-      "seattlerb/heredoc_squiggly.txt",
       "seattlerb/heredoc_unicode.txt",
-      "seattlerb/heredoc_with_carriage_return_escapes_windows.txt",
-      "seattlerb/heredoc_with_carriage_return_escapes.txt",
-      "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt",
-      "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes.txt",
-      "seattlerb/interpolated_symbol_array_line_breaks.txt",
-      "seattlerb/interpolated_word_array_line_breaks.txt",
-      "seattlerb/label_vs_string.txt",
-      "seattlerb/module_comments.txt",
-      "seattlerb/non_interpolated_symbol_array_line_breaks.txt",
-      "seattlerb/non_interpolated_word_array_line_breaks.txt",
-      "seattlerb/parse_line_block_inline_comment_leading_newlines.txt",
-      "seattlerb/parse_line_block_inline_comment.txt",
-      "seattlerb/parse_line_block_inline_multiline_comment.txt",
-      "seattlerb/parse_line_dstr_escaped_newline.txt",
       "seattlerb/parse_line_heredoc.txt",
-      "seattlerb/parse_line_multiline_str_literal_n.txt",
-      "seattlerb/parse_line_str_with_newline_escape.txt",
       "seattlerb/pct_w_heredoc_interp_nested.txt",
-      "seattlerb/qsymbols_empty_space.txt",
-      "seattlerb/qw_escape_term.txt",
-      "seattlerb/qWords_space.txt",
-      "seattlerb/read_escape_unicode_curlies.txt",
-      "seattlerb/read_escape_unicode_h4.txt",
       "seattlerb/required_kwarg_no_value.txt",
-      "seattlerb/slashy_newlines_within_string.txt",
-      "seattlerb/str_double_escaped_newline.txt",
-      "seattlerb/str_double_newline.txt",
-      "seattlerb/str_evstr_escape.txt",
-      "seattlerb/str_newline_hash_line_number.txt",
-      "seattlerb/str_single_newline.txt",
-      "seattlerb/symbols_empty_space.txt",
       "seattlerb/TestRubyParserShared.txt",
       "unparser/corpus/literal/assignment.txt",
-      "unparser/corpus/literal/dstr.txt",
-      "unparser/corpus/semantic/opasgn.txt",
+      "unparser/corpus/literal/literal.txt",
       "whitequark/args.txt",
       "whitequark/beginless_erange_after_newline.txt",
       "whitequark/beginless_irange_after_newline.txt",
-      "whitequark/bug_ascii_8bit_in_literal.txt",
-      "whitequark/bug_def_no_paren_eql_begin.txt",
-      "whitequark/dedenting_heredoc.txt",
-      "whitequark/dedenting_non_interpolating_heredoc_line_continuation.txt",
       "whitequark/forward_arg_with_open_args.txt",
-      "whitequark/interp_digit_var.txt",
+      "whitequark/kwarg_no_paren.txt",
       "whitequark/lbrace_arg_after_command_args.txt",
       "whitequark/multiple_pattern_matches.txt",
       "whitequark/newline_in_hash_argument.txt",
-      "whitequark/parser_bug_640.txt",
-      "whitequark/parser_drops_truncated_parts_of_squiggly_heredoc.txt",
-      "whitequark/ruby_bug_11990.txt",
+      "whitequark/pattern_matching_expr_in_paren.txt",
+      "whitequark/pattern_matching_hash.txt",
       "whitequark/ruby_bug_14690.txt",
       "whitequark/ruby_bug_9669.txt",
-      "whitequark/slash_newline_in_heredocs.txt",
       "whitequark/space_args_arg_block.txt",
       "whitequark/space_args_block.txt"
     ]
 
-    Fixture.each do |fixture|
+    Fixture.each_for_version(except: skip_syntax_error, version: "3.3") do |fixture|
       define_method(fixture.test_name) do
         assert_equal_parses(
           fixture,
-          compare_asts: !skip_all.include?(fixture.path),
+          compare_asts: !skip_incorrect.include?(fixture.path),
           compare_tokens: !skip_tokens.include?(fixture.path),
           compare_comments: fixture.path != "embdoc_no_newline_at_end.txt"
         )
       end
     end
 
+    def test_non_prism_builder_class_deprecated
+      warnings = capture_warnings { Prism::Translation::Parser33.new(Parser::Builders::Default.new) }
+
+      assert_include(warnings, "#{__FILE__}:#{__LINE__ - 2}")
+      assert_include(warnings, "is not a `Prism::Translation::Parser::Builder` subclass")
+
+      warnings = capture_warnings { Prism::Translation::Parser33.new }
+      assert_empty(warnings)
+    end
+
+    if RUBY_VERSION >= "3.3"
+      def test_current_parser_for_current_ruby
+        major, minor = CURRENT_MAJOR_MINOR.split(".")
+        # Let's just hope there never is a Ruby 3.10 or similar
+        expected = major.to_i * 10 + minor.to_i
+        assert_equal(expected, Translation::ParserCurrent.new.version)
+      end
+    end
+
+    def test_invalid_syntax
+      code = <<~RUBY
+        foo do
+          case bar
+          when
+          end
+        end
+      RUBY
+      buffer = Parser::Source::Buffer.new("(string)")
+      buffer.source = code
+
+      parser = Prism::Translation::Parser33.new
+      parser.diagnostics.all_errors_are_fatal = true
+      assert_raise(Parser::SyntaxError) { parser.tokenize(buffer) }
+    end
+
+    def test_it_block_parameter_syntax
+      it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/3.4/it.txt")
+
+      buffer = Parser::Source::Buffer.new(it_fixture_path)
+      buffer.source = it_fixture_path.read
+      actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0]
+
+      it_block_parameter_sexp = parse_sexp {
+        s(:begin,
+        s(:itblock,
+          s(:send, nil, :x), :it,
+          s(:lvar, :it)),
+        s(:itblock,
+          s(:lambda), :it,
+          s(:lvar, :it)))
+      }
+
+      assert_equal(it_block_parameter_sexp, actual_ast.to_sexp)
+    end
+
     private
 
     def assert_equal_parses(fixture, compare_asts: true, compare_tokens: true, compare_comments: true)
@@ -192,17 +217,13 @@ module Prism
       parser.diagnostics.all_errors_are_fatal = true
 
       expected_ast, expected_comments, expected_tokens =
-        begin
-          ignore_warnings { parser.tokenize(buffer) }
-        rescue ArgumentError, Parser::SyntaxError
-          return
-        end
+        ignore_warnings { parser.tokenize(buffer) }
 
       actual_ast, actual_comments, actual_tokens =
         ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) }
 
       if expected_ast == actual_ast
-        if !compare_asts
+        if !compare_asts && !Fixture.custom_base_path?
           puts "#{fixture.path} is now passing"
         end
 
@@ -213,7 +234,7 @@ module Prism
         rescue Test::Unit::AssertionFailedError
           raise if compare_tokens
         else
-          puts "#{fixture.path} is now passing" if !compare_tokens
+          puts "#{fixture.path} is now passing" if !compare_tokens && !Fixture.custom_base_path?
         end
 
         assert_equal_comments(expected_comments, actual_comments) if compare_comments
@@ -248,22 +269,14 @@ module Prism
 
     def assert_equal_tokens(expected_tokens, actual_tokens)
       if expected_tokens != actual_tokens
-        expected_index = 0
-        actual_index = 0
-
-        while expected_index < expected_tokens.length
-          expected_token = expected_tokens[expected_index]
-          actual_token = actual_tokens.fetch(actual_index, [])
+        index = 0
+        max_index = [expected_tokens, actual_tokens].map(&:size).max
 
-          expected_index += 1
-          actual_index += 1
+        while index <= max_index
+          expected_token = expected_tokens.fetch(index, [])
+          actual_token = actual_tokens.fetch(index, [])
 
-          # The parser gem always has a space before a string end in list
-          # literals, but we don't. So we'll skip over the space.
-          if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
-            expected_index += 1
-            next
-          end
+          index += 1
 
           # There are a lot of tokens that have very specific meaning according
           # to the context of the parser. We don't expose that information in
@@ -287,5 +300,9 @@ module Prism
         "actual: #{actual_comments.inspect}"
       }
     end
+
+    def parse_sexp(&block)
+      Class.new { extend AST::Sexp }.instance_eval(&block).to_sexp
+    end
   end
 end