test/prism/lex_test.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

# frozen_string_literal: true

return if !(RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0")

require_relative "test_helper"
require "ripper"

module Prism
  class LexTest < TestCase
    def test_lex_file
      assert_nothing_raised do
        Prism.lex_file(__FILE__)
      end

      error = assert_raise Errno::ENOENT do
        Prism.lex_file("idontexist.rb")
      end

      assert_equal "No such file or directory - idontexist.rb", error.message

      assert_raise TypeError do
        Prism.lex_file(nil)
      end
    end

    def test_parse_lex
      node, tokens = Prism.parse_lex("def foo; end").value

      assert_kind_of ProgramNode, node
      assert_equal 5, tokens.length
    end

    def test_parse_lex_file
      node, tokens = Prism.parse_lex_file(__FILE__).value

      assert_kind_of ProgramNode, node
      refute_empty tokens

      error = assert_raise Errno::ENOENT do
        Prism.parse_lex_file("idontexist.rb")
      end

      assert_equal "No such file or directory - idontexist.rb", error.message

      assert_raise TypeError do
        Prism.parse_lex_file(nil)
      end
    end

    def test_lex_encoding
      tokens = Prism.lex('"わたし"', encoding: Encoding::Windows_31J).value
      tokens.each do |t|
        assert_equal(Encoding::Windows_31J, t[0].value.encoding)
      end

      # Shebangs must appear on the first line. For these cases, the encoding
      # comment may appear second, but it should still change encoding.
      tokens = Prism.lex(<<~RUBY, encoding: Encoding::Windows_31J).value
        #! /usr/bin/env ruby
        # encoding: utf-8
        "わたし"
      RUBY
      tokens.each do |t|
        assert_equal(Encoding::UTF_8, t[0].value.encoding)
      end
    end

    if RUBY_VERSION >= "3.3"
      def test_lex_compat
        source = "foo bar"
        prism = Prism.lex_compat(source, version: "current").value
        ripper = Ripper.lex(source)
        assert_equal(ripper, prism)
      end
    end

    def test_lex_interpolation_unterminated
      assert_equal(
        %i[STRING_BEGIN EMBEXPR_BEGIN EOF],
        token_types('"#{')
      )

      assert_equal(
        %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
        token_types('"#{' + "\n")
      )
    end

    def test_lex_interpolation_unterminated_with_content
      # FIXME: Emits EOL twice.
      assert_equal(
        %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
        token_types('"#{C')
      )

      assert_equal(
        %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
        token_types('"#{C' + "\n")
      )
    end

    def test_lex_heredoc_unterminated
      code = <<~'RUBY'.strip
        <<A+B
        #{C
      RUBY

      assert_equal(
        %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT HEREDOC_END PLUS CONSTANT NEWLINE EOF],
        token_types(code)
      )

      assert_equal(
        %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT NEWLINE HEREDOC_END PLUS CONSTANT NEWLINE EOF],
        token_types(code + "\n")
      )
    end

    def token_types(code)
      Prism.lex(code).value.map { |token, _state| token.type }
    end
  end
end