summaryrefslogtreecommitdiff
path: root/lib/prism/translation/parser.rb
blob: 0d11b8f5668cca0067a3eb2bbe67123dd3346880 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# frozen_string_literal: true

require "parser"

module Prism
  module Translation
    # This class is the entry-point for converting a prism syntax tree into the
    # whitequark/parser gem's syntax tree. It inherits from the base parser for
    # the parser gem, and overrides the parse* methods to parse with prism and
    # then translate.
    class Parser < ::Parser::Base
      Diagnostic = ::Parser::Diagnostic # :nodoc:
      private_constant :Diagnostic

      # The parser gem has a list of diagnostics with a hard-coded set of error
      # messages. We create our own diagnostic class in order to set our own
      # error messages.
      class PrismDiagnostic < Diagnostic
        # This is the cached message coming from prism.
        attr_reader :message

        # Initialize a new diagnostic with the given message and location.
        def initialize(message, level, reason, location)
          @message = message
          super(level, reason, {}, location, [])
        end
      end

      Racc_debug_parser = false # :nodoc:

      def version # :nodoc:
        34
      end

      # The default encoding for Ruby files is UTF-8.
      def default_encoding
        Encoding::UTF_8
      end

      def yyerror # :nodoc:
      end

      # Parses a source buffer and returns the AST.
      def parse(source_buffer)
        @source_buffer = source_buffer
        source = source_buffer.source

        offset_cache = build_offset_cache(source)
        result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)

        build_ast(result.value, offset_cache)
      ensure
        @source_buffer = nil
      end

      # Parses a source buffer and returns the AST and the source code comments.
      def parse_with_comments(source_buffer)
        @source_buffer = source_buffer
        source = source_buffer.source

        offset_cache = build_offset_cache(source)
        result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)

        [
          build_ast(result.value, offset_cache),
          build_comments(result.comments, offset_cache)
        ]
      ensure
        @source_buffer = nil
      end

      # Parses a source buffer and returns the AST, the source code comments,
      # and the tokens emitted by the lexer.
      def tokenize(source_buffer, recover = false)
        @source_buffer = source_buffer
        source = source_buffer.source

        offset_cache = build_offset_cache(source)
        result =
          begin
            unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
          rescue ::Parser::SyntaxError
            raise if !recover
          end

        program, tokens = result.value
        ast = build_ast(program, offset_cache) if result.success?

        [
          ast,
          build_comments(result.comments, offset_cache),
          build_tokens(tokens, offset_cache)
        ]
      ensure
        @source_buffer = nil
      end

      # Since prism resolves num params for us, we don't need to support this
      # kind of logic here.
      def try_declare_numparam(node)
        node.children[0].match?(/\A_[1-9]\z/)
      end

      private

      # This is a hook to allow consumers to disable some errors if they don't
      # want them to block creating the syntax tree.
      def valid_error?(error)
        true
      end

      # This is a hook to allow consumers to disable some warnings if they don't
      # want them to block creating the syntax tree.
      def valid_warning?(warning)
        true
      end

      # Build a diagnostic from the given prism parse error.
      def error_diagnostic(error, offset_cache)
        location = error.location
        diagnostic_location = build_range(location, offset_cache)

        case error.type
        when :argument_block_multi
          Diagnostic.new(:error, :block_and_blockarg, {}, diagnostic_location, [])
        when :argument_formal_constant
          Diagnostic.new(:error, :argument_const, {}, diagnostic_location, [])
        when :argument_formal_class
          Diagnostic.new(:error, :argument_cvar, {}, diagnostic_location, [])
        when :argument_formal_global
          Diagnostic.new(:error, :argument_gvar, {}, diagnostic_location, [])
        when :argument_formal_ivar
          Diagnostic.new(:error, :argument_ivar, {}, diagnostic_location, [])
        when :argument_no_forwarding_amp
          Diagnostic.new(:error, :no_anonymous_blockarg, {}, diagnostic_location, [])
        when :argument_no_forwarding_star
          Diagnostic.new(:error, :no_anonymous_restarg, {}, diagnostic_location, [])
        when :argument_no_forwarding_star_star
          Diagnostic.new(:error, :no_anonymous_kwrestarg, {}, diagnostic_location, [])
        when :begin_lonely_else
          location = location.copy(length: 4)
          diagnostic_location = build_range(location, offset_cache)
          Diagnostic.new(:error, :useless_else, {}, diagnostic_location, [])
        when :class_name, :module_name
          Diagnostic.new(:error, :module_name_const, {}, diagnostic_location, [])
        when :class_in_method
          Diagnostic.new(:error, :class_in_def, {}, diagnostic_location, [])
        when :def_endless_setter
          Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, [])
        when :embdoc_term
          Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, [])
        when :incomplete_variable_class, :incomplete_variable_class_3_3_0
          location = location.copy(length: location.length + 1)
          diagnostic_location = build_range(location, offset_cache)

          Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, [])
        when :incomplete_variable_instance, :incomplete_variable_instance_3_3_0
          location = location.copy(length: location.length + 1)
          diagnostic_location = build_range(location, offset_cache)

          Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, [])
        when :invalid_variable_global, :invalid_variable_global_3_3_0
          Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, [])
        when :module_in_method
          Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, [])
        when :numbered_parameter_ordinary
          Diagnostic.new(:error, :ordinary_param_defined, {}, diagnostic_location, [])
        when :numbered_parameter_outer_scope
          Diagnostic.new(:error, :numparam_used_in_outer_scope, {}, diagnostic_location, [])
        when :parameter_circular
          Diagnostic.new(:error, :circular_argument_reference, { var_name: location.slice }, diagnostic_location, [])
        when :parameter_name_repeat
          Diagnostic.new(:error, :duplicate_argument, {}, diagnostic_location, [])
        when :parameter_numbered_reserved
          Diagnostic.new(:error, :reserved_for_numparam, { name: location.slice }, diagnostic_location, [])
        when :regexp_unknown_options
          Diagnostic.new(:error, :regexp_options, { options: location.slice[1..] }, diagnostic_location, [])
        when :singleton_for_literals
          Diagnostic.new(:error, :singleton_literal, {}, diagnostic_location, [])
        when :string_literal_eof
          Diagnostic.new(:error, :string_eof, {}, diagnostic_location, [])
        when :unexpected_token_ignore
          Diagnostic.new(:error, :unexpected_token, { token: location.slice }, diagnostic_location, [])
        when :write_target_in_method
          Diagnostic.new(:error, :dynamic_const, {}, diagnostic_location, [])
        else
          PrismDiagnostic.new(error.message, :error, error.type, diagnostic_location)
        end
      end

      # Build a diagnostic from the given prism parse warning.
      def warning_diagnostic(warning, offset_cache)
        diagnostic_location = build_range(warning.location, offset_cache)

        case warning.type
        when :ambiguous_first_argument_plus
          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "+" }, diagnostic_location, [])
        when :ambiguous_first_argument_minus
          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "-" }, diagnostic_location, [])
        when :ambiguous_prefix_ampersand
          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "&" }, diagnostic_location, [])
        when :ambiguous_prefix_star
          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "*" }, diagnostic_location, [])
        when :ambiguous_prefix_star_star
          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "**" }, diagnostic_location, [])
        when :ambiguous_slash
          Diagnostic.new(:warning, :ambiguous_regexp, {}, diagnostic_location, [])
        when :dot_dot_dot_eol
          Diagnostic.new(:warning, :triple_dot_at_eol, {}, diagnostic_location, [])
        when :duplicated_hash_key
          # skip, parser does this on its own
        else
          PrismDiagnostic.new(warning.message, :warning, warning.type, diagnostic_location)
        end
      end

      # If there was a error generated during the parse, then raise an
      # appropriate syntax error. Otherwise return the result.
      def unwrap(result, offset_cache)
        result.errors.each do |error|
          next unless valid_error?(error)
          diagnostics.process(error_diagnostic(error, offset_cache))
        end

        result.warnings.each do |warning|
          next unless valid_warning?(warning)
          diagnostic = warning_diagnostic(warning, offset_cache)
          diagnostics.process(diagnostic) if diagnostic
        end

        result
      end

      # Prism deals with offsets in bytes, while the parser gem deals with
      # offsets in characters. We need to handle this conversion in order to
      # build the parser gem AST.
      #
      # If the bytesize of the source is the same as the length, then we can
      # just use the offset directly. Otherwise, we build an array where the
      # index is the byte offset and the value is the character offset.
      def build_offset_cache(source)
        if source.bytesize == source.length
          -> (offset) { offset }
        else
          offset_cache = []
          offset = 0

          source.each_char do |char|
            char.bytesize.times { offset_cache << offset }
            offset += 1
          end

          offset_cache << offset
        end
      end

      # Build the parser gem AST from the prism AST.
      def build_ast(program, offset_cache)
        program.accept(Compiler.new(self, offset_cache))
      end

      # Build the parser gem comments from the prism comments.
      def build_comments(comments, offset_cache)
        comments.map do |comment|
          ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
        end
      end

      # Build the parser gem tokens from the prism tokens.
      def build_tokens(tokens, offset_cache)
        Lexer.new(source_buffer, tokens, offset_cache).to_a
      end

      # Build a range from a prism location.
      def build_range(location, offset_cache)
        ::Parser::Source::Range.new(
          source_buffer,
          offset_cache[location.start_offset],
          offset_cache[location.end_offset]
        )
      end

      # Converts the version format handled by Parser to the format handled by Prism.
      def convert_for_prism(version)
        case version
        when 33
          "3.3.0"
        when 34
          "3.4.0"
        else
          "latest"
        end
      end

      require_relative "parser/compiler"
      require_relative "parser/lexer"

      private_constant :Compiler
      private_constant :Lexer
    end
  end
end