1 files changed, 371 insertions, 0 deletions
diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb
new file mode 100644
index 0000000000..afb53e0668
--- /dev/null
+++ b/test/prism/parse_test.rb
@@ -0,0 +1,371 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+module Prism
+  class ParseTest < TestCase
+    # A subclass of Ripper that extracts out magic comments.
+    class MagicCommentRipper < Ripper
+      attr_reader :magic_comments
+
+      def initialize(*)
+        super
+        @magic_comments = []
+      end
+
+      def on_magic_comment(key, value)
+        @magic_comments << [key, value]
+        super
+      end
+    end
+
+    # When we pretty-print the trees to compare against the snapshots, we want to
+    # be certain that we print with the same external encoding. This is because
+    # methods like Symbol#inspect take into account external encoding and it could
+    # change how the snapshot is generated. On machines with certain settings
+    # (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
+    # going to force it to be UTF-8 to keep the snapshots consistent.
+    def setup
+      @previous_default_external = Encoding.default_external
+      ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
+    end
+
+    def teardown
+      ignore_warnings { Encoding.default_external = @previous_default_external }
+    end
+
+    def test_empty_string
+      result = Prism.parse("")
+      assert_equal [], result.value.statements.body
+    end
+
+    def test_parse_takes_file_path
+      filepath = "filepath.rb"
+      result = Prism.parse("def foo; __FILE__; end", filepath: filepath)
+
+      assert_equal filepath, find_source_file_node(result.value).filepath
+    end
+
+    def test_parse_takes_line
+      line = 4
+      result = Prism.parse("def foo\n __FILE__\nend", line: line)
+
+      assert_equal line, result.value.location.start_line
+      assert_equal line + 1, find_source_file_node(result.value).location.start_line
+
+      result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
+      assert_equal line, result.value.first.location.start_line
+    end
+
+    def test_parse_takes_negative_lines
+      line = -2
+      result = Prism.parse("def foo\n __FILE__\nend", line: line)
+
+      assert_equal line, result.value.location.start_line
+      assert_equal line + 1, find_source_file_node(result.value).location.start_line
+
+      result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
+      assert_equal line, result.value.first.location.start_line
+    end
+
+    def test_parse_lex
+      node, tokens = Prism.parse_lex("def foo; end").value
+
+      assert_kind_of ProgramNode, node
+      assert_equal 5, tokens.length
+    end
+
+    if !ENV["PRISM_BUILD_MINIMAL"]
+      def test_dump_file
+        assert_nothing_raised do
+          Prism.dump_file(__FILE__)
+        end
+
+        error = assert_raise Errno::ENOENT do
+          Prism.dump_file("idontexist.rb")
+        end
+
+        assert_equal "No such file or directory - idontexist.rb", error.message
+
+        assert_raise TypeError do
+          Prism.dump_file(nil)
+        end
+      end
+    end
+
+    def test_lex_file
+      assert_nothing_raised do
+        Prism.lex_file(__FILE__)
+      end
+
+      error = assert_raise Errno::ENOENT do
+        Prism.lex_file("idontexist.rb")
+      end
+
+      assert_equal "No such file or directory - idontexist.rb", error.message
+
+      assert_raise TypeError do
+        Prism.lex_file(nil)
+      end
+    end
+
+    def test_parse_lex_file
+      node, tokens = Prism.parse_lex_file(__FILE__).value
+
+      assert_kind_of ProgramNode, node
+      refute_empty tokens
+
+      error = assert_raise Errno::ENOENT do
+        Prism.parse_lex_file("idontexist.rb")
+      end
+
+      assert_equal "No such file or directory - idontexist.rb", error.message
+
+      assert_raise TypeError do
+        Prism.parse_lex_file(nil)
+      end
+    end
+
+    def test_parse_file
+      node = Prism.parse_file(__FILE__).value
+      assert_kind_of ProgramNode, node
+
+      error = assert_raise Errno::ENOENT do
+        Prism.parse_file("idontexist.rb")
+      end
+
+      assert_equal "No such file or directory - idontexist.rb", error.message
+
+      assert_raise TypeError do
+        Prism.parse_file(nil)
+      end
+    end
+
+    def test_parse_file_success
+      assert_predicate Prism.parse_file_comments(__FILE__), :any?
+
+      error = assert_raise Errno::ENOENT do
+        Prism.parse_file_comments("idontexist.rb")
+      end
+
+      assert_equal "No such file or directory - idontexist.rb", error.message
+
+      assert_raise TypeError do
+        Prism.parse_file_comments(nil)
+      end
+    end
+
+    def test_parse_file_comments
+      assert_predicate Prism.parse_file_comments(__FILE__), :any?
+
+      error = assert_raise Errno::ENOENT do
+        Prism.parse_file_comments("idontexist.rb")
+      end
+
+      assert_equal "No such file or directory - idontexist.rb", error.message
+
+      assert_raise TypeError do
+        Prism.parse_file_comments(nil)
+      end
+    end
+
+    # To accurately compare against Ripper, we need to make sure that we're
+    # running on CRuby 3.2+.
+    ripper_enabled = RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0"
+
+    # The FOCUS environment variable allows you to specify one particular fixture
+    # to test, instead of all of them.
+    base = File.join(__dir__, "fixtures")
+    relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
+
+    relatives.each do |relative|
+      # These fail on TruffleRuby due to a difference in Symbol#inspect: :测试 vs :"测试"
+      next if RUBY_ENGINE == "truffleruby" and %w[emoji_method_calls.txt seattlerb/bug202.txt seattlerb/magic_encoding_comment.txt].include?(relative)
+
+      filepath = File.join(base, relative)
+      snapshot = File.expand_path(File.join("snapshots", relative), __dir__)
+
+      directory = File.dirname(snapshot)
+      FileUtils.mkdir_p(directory) unless File.directory?(directory)
+
+      ripper_should_match = ripper_enabled
+      check_valid_syntax = RUBY_VERSION >= "3.2.0"
+
+      case relative
+      when "seattlerb/pct_w_heredoc_interp_nested.txt"
+        # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
+        # we're on an earlier version.
+        ripper_should_match = false if RUBY_VERSION < "3.3.0"
+      when "seattlerb/heredoc_nested.txt", "whitequark/dedenting_heredoc.txt"
+        # It seems like there are some oddities with nested heredocs and ripper.
+        # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
+        ripper_should_match = false
+      when "spanning_heredoc.txt", "spanning_heredoc_newlines.txt"
+        # Ripper seems to have a bug that the regex portions before and after
+        # the heredoc are combined into a single token. See
+        # https://bugs.ruby-lang.org/issues/19838.
+        ripper_should_match = false
+      when "heredocs_leading_whitespace.txt"
+        # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
+        # characters in the heredoc start.
+        # Example: <<~'   EOF' or <<-'  EOF'
+        # https://bugs.ruby-lang.org/issues/19539
+        if RUBY_VERSION < "3.3.0"
+          ripper_should_match = false
+          check_valid_syntax = false
+        end
+      end
+
+      define_method "test_filepath_#{relative}" do
+        # First, read the source from the filepath. Use binmode to avoid
+        # converting CRLF on Windows, and explicitly set the external encoding
+        # to UTF-8 to override the binmode default.
+        source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
+
+        # Make sure that the given source is valid syntax, otherwise we have an
+        # invalid fixture.
+        assert_valid_syntax(source) if check_valid_syntax
+
+        # Next, assert that there were no errors during parsing.
+        result = Prism.parse(source, filepath: relative)
+        assert_empty result.errors
+
+        # Next, pretty print the source.
+        printed = PP.pp(result.value, +"", 79)
+
+        if File.exist?(snapshot)
+          saved = File.read(snapshot)
+
+          # If the snapshot file exists, but the printed value does not match the
+          # snapshot, then update the snapshot file.
+          if printed != saved
+            File.write(snapshot, printed)
+            warn("Updated snapshot at #{snapshot}.")
+          end
+
+          # If the snapshot file exists, then assert that the printed value
+          # matches the snapshot.
+          assert_equal(saved, printed)
+        else
+          # If the snapshot file does not yet exist, then write it out now.
+          File.write(snapshot, printed)
+          warn("Created snapshot at #{snapshot}.")
+        end
+
+        if !ENV["PRISM_BUILD_MINIMAL"]
+          # Next, assert that the value can be serialized and deserialized
+          # without changing the shape of the tree.
+          assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value)
+        end
+
+        # Next, check that the location ranges of each node in the tree are a
+        # superset of their respective child nodes.
+        assert_non_overlapping_locations(result.value)
+
+        # Next, assert that the newlines are in the expected places.
+        expected_newlines = [0]
+        source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
+        assert_equal expected_newlines, Debug.newlines(source)
+
+        if ripper_should_match
+          # Finally, assert that we can lex the source and get the same tokens as
+          # Ripper.
+          lex_result = Prism.lex_compat(source)
+          assert_equal [], lex_result.errors
+          tokens = lex_result.value
+
+          begin
+            Prism.lex_ripper(source).zip(tokens).each do |(ripper, prism)|
+              assert_equal ripper, prism
+            end
+          rescue SyntaxError
+            raise ArgumentError, "Test file has invalid syntax #{filepath}"
+          end
+
+          # Next, check that we get the correct number of magic comments when
+          # lexing with ripper.
+          expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments
+          actual = result.magic_comments
+
+          assert_equal expected.length, actual.length
+          expected.zip(actual).each do |(expected_key, expected_value), magic_comment|
+            assert_equal expected_key, magic_comment.key
+            assert_equal expected_value, magic_comment.value
+          end
+        end
+      end
+    end
+
+    Dir["*.txt", base: base].each do |relative|
+      next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt"
+
+      # We test every snippet (separated by \n\n) in isolation
+      # to ensure the parser does not try to read bytes further than the end of each snippet
+      define_method "test_individual_snippets_#{relative}" do
+        filepath = File.join(base, relative)
+
+        # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows,
+        # and explicitly set the external encoding to UTF-8 to override the binmode default.
+        file_contents = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
+
+        file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
+          snippet = snippet.rstrip
+          result = Prism.parse(snippet, filepath: relative)
+          assert_empty result.errors
+
+          if !ENV["PRISM_BUILD_MINIMAL"]
+            assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value)
+          end
+        end
+      end
+    end
+
+    private
+
+    # Check that the location ranges of each node in the tree are a superset of
+    # their respective child nodes.
+    def assert_non_overlapping_locations(node)
+      queue = [node]
+
+      while (current = queue.shift)
+        # We only want to compare parent/child location overlap in the case that
+        # we are not looking at a heredoc. That's because heredoc locations are
+        # special in that they only use the declaration of the heredoc.
+        compare = !(current.is_a?(StringNode) ||
+                    current.is_a?(XStringNode) ||
+                    current.is_a?(InterpolatedStringNode) ||
+                    current.is_a?(InterpolatedXStringNode)) ||
+        !current.opening&.start_with?("<<")
+
+        current.child_nodes.each do |child|
+          # child_nodes can return nil values, so we need to skip those.
+          next unless child
+
+          # Now that we know we have a child node, add that to the queue.
+          queue << child
+
+          if compare
+            assert_operator current.location.start_offset, :<=, child.location.start_offset
+            assert_operator current.location.end_offset, :>=, child.location.end_offset
+          end
+        end
+      end
+    end
+
+    def find_source_file_node(program)
+      queue = [program]
+      while (node = queue.shift)
+        return node if node.is_a?(SourceFileNode)
+        queue.concat(node.compact_child_nodes)
+      end
+    end
+
+    def ignore_warnings
+      previous_verbosity = $VERBOSE
+      $VERBOSE = nil
+      yield
+    ensure
+      $VERBOSE = previous_verbosity
+    end
+  end
+end