summaryrefslogtreecommitdiff
path: root/test/prism/parse_test.rb
blob: afb53e06685c110fa5295fe0679acf25fde6b8ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# frozen_string_literal: true

require_relative "test_helper"

module Prism
  class ParseTest < TestCase
    # A subclass of Ripper that extracts out magic comments.
    class MagicCommentRipper < Ripper
      attr_reader :magic_comments

      def initialize(*)
        super
        @magic_comments = []
      end

      def on_magic_comment(key, value)
        @magic_comments << [key, value]
        super
      end
    end

    # When we pretty-print the trees to compare against the snapshots, we want to
    # be certain that we print with the same external encoding. This is because
    # methods like Symbol#inspect take into account external encoding and it could
    # change how the snapshot is generated. On machines with certain settings
    # (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
    # going to force it to be UTF-8 to keep the snapshots consistent.
    def setup
      @previous_default_external = Encoding.default_external
      ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
    end

    def teardown
      ignore_warnings { Encoding.default_external = @previous_default_external }
    end

    def test_empty_string
      result = Prism.parse("")
      assert_equal [], result.value.statements.body
    end

    def test_parse_takes_file_path
      filepath = "filepath.rb"
      result = Prism.parse("def foo; __FILE__; end", filepath: filepath)

      assert_equal filepath, find_source_file_node(result.value).filepath
    end

    def test_parse_takes_line
      line = 4
      result = Prism.parse("def foo\n __FILE__\nend", line: line)

      assert_equal line, result.value.location.start_line
      assert_equal line + 1, find_source_file_node(result.value).location.start_line

      result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
      assert_equal line, result.value.first.location.start_line
    end

    def test_parse_takes_negative_lines
      line = -2
      result = Prism.parse("def foo\n __FILE__\nend", line: line)

      assert_equal line, result.value.location.start_line
      assert_equal line + 1, find_source_file_node(result.value).location.start_line

      result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
      assert_equal line, result.value.first.location.start_line
    end

    def test_parse_lex
      node, tokens = Prism.parse_lex("def foo; end").value

      assert_kind_of ProgramNode, node
      assert_equal 5, tokens.length
    end

    if !ENV["PRISM_BUILD_MINIMAL"]
      def test_dump_file
        assert_nothing_raised do
          Prism.dump_file(__FILE__)
        end

        error = assert_raise Errno::ENOENT do
          Prism.dump_file("idontexist.rb")
        end

        assert_equal "No such file or directory - idontexist.rb", error.message

        assert_raise TypeError do
          Prism.dump_file(nil)
        end
      end
    end

    def test_lex_file
      assert_nothing_raised do
        Prism.lex_file(__FILE__)
      end

      error = assert_raise Errno::ENOENT do
        Prism.lex_file("idontexist.rb")
      end

      assert_equal "No such file or directory - idontexist.rb", error.message

      assert_raise TypeError do
        Prism.lex_file(nil)
      end
    end

    def test_parse_lex_file
      node, tokens = Prism.parse_lex_file(__FILE__).value

      assert_kind_of ProgramNode, node
      refute_empty tokens

      error = assert_raise Errno::ENOENT do
        Prism.parse_lex_file("idontexist.rb")
      end

      assert_equal "No such file or directory - idontexist.rb", error.message

      assert_raise TypeError do
        Prism.parse_lex_file(nil)
      end
    end

    def test_parse_file
      node = Prism.parse_file(__FILE__).value
      assert_kind_of ProgramNode, node

      error = assert_raise Errno::ENOENT do
        Prism.parse_file("idontexist.rb")
      end

      assert_equal "No such file or directory - idontexist.rb", error.message

      assert_raise TypeError do
        Prism.parse_file(nil)
      end
    end

    def test_parse_file_success
      assert_predicate Prism.parse_file_comments(__FILE__), :any?

      error = assert_raise Errno::ENOENT do
        Prism.parse_file_comments("idontexist.rb")
      end

      assert_equal "No such file or directory - idontexist.rb", error.message

      assert_raise TypeError do
        Prism.parse_file_comments(nil)
      end
    end

    def test_parse_file_comments
      assert_predicate Prism.parse_file_comments(__FILE__), :any?

      error = assert_raise Errno::ENOENT do
        Prism.parse_file_comments("idontexist.rb")
      end

      assert_equal "No such file or directory - idontexist.rb", error.message

      assert_raise TypeError do
        Prism.parse_file_comments(nil)
      end
    end

    # To accurately compare against Ripper, we need to make sure that we're
    # running on CRuby 3.2+.
    ripper_enabled = RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0"

    # The FOCUS environment variable allows you to specify one particular fixture
    # to test, instead of all of them.
    base = File.join(__dir__, "fixtures")
    relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]

    relatives.each do |relative|
      # These fail on TruffleRuby due to a difference in Symbol#inspect: :测试 vs :"测试"
      next if RUBY_ENGINE == "truffleruby" and %w[emoji_method_calls.txt seattlerb/bug202.txt seattlerb/magic_encoding_comment.txt].include?(relative)

      filepath = File.join(base, relative)
      snapshot = File.expand_path(File.join("snapshots", relative), __dir__)

      directory = File.dirname(snapshot)
      FileUtils.mkdir_p(directory) unless File.directory?(directory)

      ripper_should_match = ripper_enabled
      check_valid_syntax = RUBY_VERSION >= "3.2.0"

      case relative
      when "seattlerb/pct_w_heredoc_interp_nested.txt"
        # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
        # we're on an earlier version.
        ripper_should_match = false if RUBY_VERSION < "3.3.0"
      when "seattlerb/heredoc_nested.txt", "whitequark/dedenting_heredoc.txt"
        # It seems like there are some oddities with nested heredocs and ripper.
        # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
        ripper_should_match = false
      when "spanning_heredoc.txt", "spanning_heredoc_newlines.txt"
        # Ripper seems to have a bug that the regex portions before and after
        # the heredoc are combined into a single token. See
        # https://bugs.ruby-lang.org/issues/19838.
        ripper_should_match = false
      when "heredocs_leading_whitespace.txt"
        # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
        # characters in the heredoc start.
        # Example: <<~'   EOF' or <<-'  EOF'
        # https://bugs.ruby-lang.org/issues/19539
        if RUBY_VERSION < "3.3.0"
          ripper_should_match = false
          check_valid_syntax = false
        end
      end

      define_method "test_filepath_#{relative}" do
        # First, read the source from the filepath. Use binmode to avoid
        # converting CRLF on Windows, and explicitly set the external encoding
        # to UTF-8 to override the binmode default.
        source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)

        # Make sure that the given source is valid syntax, otherwise we have an
        # invalid fixture.
        assert_valid_syntax(source) if check_valid_syntax

        # Next, assert that there were no errors during parsing.
        result = Prism.parse(source, filepath: relative)
        assert_empty result.errors

        # Next, pretty print the source.
        printed = PP.pp(result.value, +"", 79)

        if File.exist?(snapshot)
          saved = File.read(snapshot)

          # If the snapshot file exists, but the printed value does not match the
          # snapshot, then update the snapshot file.
          if printed != saved
            File.write(snapshot, printed)
            warn("Updated snapshot at #{snapshot}.")
          end

          # If the snapshot file exists, then assert that the printed value
          # matches the snapshot.
          assert_equal(saved, printed)
        else
          # If the snapshot file does not yet exist, then write it out now.
          File.write(snapshot, printed)
          warn("Created snapshot at #{snapshot}.")
        end

        if !ENV["PRISM_BUILD_MINIMAL"]
          # Next, assert that the value can be serialized and deserialized
          # without changing the shape of the tree.
          assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value)
        end

        # Next, check that the location ranges of each node in the tree are a
        # superset of their respective child nodes.
        assert_non_overlapping_locations(result.value)

        # Next, assert that the newlines are in the expected places.
        expected_newlines = [0]
        source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
        assert_equal expected_newlines, Debug.newlines(source)

        if ripper_should_match
          # Finally, assert that we can lex the source and get the same tokens as
          # Ripper.
          lex_result = Prism.lex_compat(source)
          assert_equal [], lex_result.errors
          tokens = lex_result.value

          begin
            Prism.lex_ripper(source).zip(tokens).each do |(ripper, prism)|
              assert_equal ripper, prism
            end
          rescue SyntaxError
            raise ArgumentError, "Test file has invalid syntax #{filepath}"
          end

          # Next, check that we get the correct number of magic comments when
          # lexing with ripper.
          expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments
          actual = result.magic_comments

          assert_equal expected.length, actual.length
          expected.zip(actual).each do |(expected_key, expected_value), magic_comment|
            assert_equal expected_key, magic_comment.key
            assert_equal expected_value, magic_comment.value
          end
        end
      end
    end

    Dir["*.txt", base: base].each do |relative|
      next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt"

      # We test every snippet (separated by \n\n) in isolation
      # to ensure the parser does not try to read bytes further than the end of each snippet
      define_method "test_individual_snippets_#{relative}" do
        filepath = File.join(base, relative)

        # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows,
        # and explicitly set the external encoding to UTF-8 to override the binmode default.
        file_contents = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)

        file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
          snippet = snippet.rstrip
          result = Prism.parse(snippet, filepath: relative)
          assert_empty result.errors

          if !ENV["PRISM_BUILD_MINIMAL"]
            assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value)
          end
        end
      end
    end

    private

    # Check that the location ranges of each node in the tree are a superset of
    # their respective child nodes.
    def assert_non_overlapping_locations(node)
      queue = [node]

      while (current = queue.shift)
        # We only want to compare parent/child location overlap in the case that
        # we are not looking at a heredoc. That's because heredoc locations are
        # special in that they only use the declaration of the heredoc.
        compare = !(current.is_a?(StringNode) ||
                    current.is_a?(XStringNode) ||
                    current.is_a?(InterpolatedStringNode) ||
                    current.is_a?(InterpolatedXStringNode)) ||
        !current.opening&.start_with?("<<")

        current.child_nodes.each do |child|
          # child_nodes can return nil values, so we need to skip those.
          next unless child

          # Now that we know we have a child node, add that to the queue.
          queue << child

          if compare
            assert_operator current.location.start_offset, :<=, child.location.start_offset
            assert_operator current.location.end_offset, :>=, child.location.end_offset
          end
        end
      end
    end

    def find_source_file_node(program)
      queue = [program]
      while (node = queue.shift)
        return node if node.is_a?(SourceFileNode)
        queue.concat(node.compact_child_nodes)
      end
    end

    def ignore_warnings
      previous_verbosity = $VERBOSE
      $VERBOSE = nil
      yield
    ensure
      $VERBOSE = previous_verbosity
    end
  end
end