summaryrefslogtreecommitdiff
path: root/lib/prism/translation/parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/prism/translation/parser.rb')
-rw-r--r--lib/prism/translation/parser.rb302
1 files changed, 302 insertions, 0 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
new file mode 100644
index 0000000000..0d11b8f566
--- /dev/null
+++ b/lib/prism/translation/parser.rb
@@ -0,0 +1,302 @@
+# frozen_string_literal: true
+
+require "parser"
+
+module Prism
+ module Translation
+ # This class is the entry-point for converting a prism syntax tree into the
+ # whitequark/parser gem's syntax tree. It inherits from the base parser for
+ # the parser gem, and overrides the parse* methods to parse with prism and
+ # then translate.
+ class Parser < ::Parser::Base
+ Diagnostic = ::Parser::Diagnostic # :nodoc:
+ private_constant :Diagnostic
+
+ # The parser gem has a list of diagnostics with a hard-coded set of error
+ # messages. We create our own diagnostic class in order to set our own
+ # error messages.
+ class PrismDiagnostic < Diagnostic
+ # This is the cached message coming from prism.
+ attr_reader :message
+
+ # Initialize a new diagnostic with the given message and location.
+ def initialize(message, level, reason, location)
+ @message = message
+ super(level, reason, {}, location, [])
+ end
+ end
+
+ Racc_debug_parser = false # :nodoc:
+
+ def version # :nodoc:
+ 34
+ end
+
+ # The default encoding for Ruby files is UTF-8.
+ def default_encoding
+ Encoding::UTF_8
+ end
+
+ def yyerror # :nodoc:
+ end
+
+ # Parses a source buffer and returns the AST.
+ def parse(source_buffer)
+ @source_buffer = source_buffer
+ source = source_buffer.source
+
+ offset_cache = build_offset_cache(source)
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
+
+ build_ast(result.value, offset_cache)
+ ensure
+ @source_buffer = nil
+ end
+
+ # Parses a source buffer and returns the AST and the source code comments.
+ def parse_with_comments(source_buffer)
+ @source_buffer = source_buffer
+ source = source_buffer.source
+
+ offset_cache = build_offset_cache(source)
+ result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
+
+ [
+ build_ast(result.value, offset_cache),
+ build_comments(result.comments, offset_cache)
+ ]
+ ensure
+ @source_buffer = nil
+ end
+
+ # Parses a source buffer and returns the AST, the source code comments,
+ # and the tokens emitted by the lexer.
+ def tokenize(source_buffer, recover = false)
+ @source_buffer = source_buffer
+ source = source_buffer.source
+
+ offset_cache = build_offset_cache(source)
+ result =
+ begin
+ unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
+ rescue ::Parser::SyntaxError
+ raise if !recover
+ end
+
+ program, tokens = result.value
+ ast = build_ast(program, offset_cache) if result.success?
+
+ [
+ ast,
+ build_comments(result.comments, offset_cache),
+ build_tokens(tokens, offset_cache)
+ ]
+ ensure
+ @source_buffer = nil
+ end
+
+ # Since prism resolves num params for us, we don't need to support this
+ # kind of logic here.
+ def try_declare_numparam(node)
+ node.children[0].match?(/\A_[1-9]\z/)
+ end
+
+ private
+
+ # This is a hook to allow consumers to disable some errors if they don't
+ # want them to block creating the syntax tree.
+ def valid_error?(error)
+ true
+ end
+
+ # This is a hook to allow consumers to disable some warnings if they don't
+ # want them to block creating the syntax tree.
+ def valid_warning?(warning)
+ true
+ end
+
+ # Build a diagnostic from the given prism parse error.
+ def error_diagnostic(error, offset_cache)
+ location = error.location
+ diagnostic_location = build_range(location, offset_cache)
+
+ case error.type
+ when :argument_block_multi
+ Diagnostic.new(:error, :block_and_blockarg, {}, diagnostic_location, [])
+ when :argument_formal_constant
+ Diagnostic.new(:error, :argument_const, {}, diagnostic_location, [])
+ when :argument_formal_class
+ Diagnostic.new(:error, :argument_cvar, {}, diagnostic_location, [])
+ when :argument_formal_global
+ Diagnostic.new(:error, :argument_gvar, {}, diagnostic_location, [])
+ when :argument_formal_ivar
+ Diagnostic.new(:error, :argument_ivar, {}, diagnostic_location, [])
+ when :argument_no_forwarding_amp
+ Diagnostic.new(:error, :no_anonymous_blockarg, {}, diagnostic_location, [])
+ when :argument_no_forwarding_star
+ Diagnostic.new(:error, :no_anonymous_restarg, {}, diagnostic_location, [])
+ when :argument_no_forwarding_star_star
+ Diagnostic.new(:error, :no_anonymous_kwrestarg, {}, diagnostic_location, [])
+ when :begin_lonely_else
+ location = location.copy(length: 4)
+ diagnostic_location = build_range(location, offset_cache)
+ Diagnostic.new(:error, :useless_else, {}, diagnostic_location, [])
+ when :class_name, :module_name
+ Diagnostic.new(:error, :module_name_const, {}, diagnostic_location, [])
+ when :class_in_method
+ Diagnostic.new(:error, :class_in_def, {}, diagnostic_location, [])
+ when :def_endless_setter
+ Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, [])
+ when :embdoc_term
+ Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, [])
+ when :incomplete_variable_class, :incomplete_variable_class_3_3_0
+ location = location.copy(length: location.length + 1)
+ diagnostic_location = build_range(location, offset_cache)
+
+ Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, [])
+ when :incomplete_variable_instance, :incomplete_variable_instance_3_3_0
+ location = location.copy(length: location.length + 1)
+ diagnostic_location = build_range(location, offset_cache)
+
+ Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, [])
+ when :invalid_variable_global, :invalid_variable_global_3_3_0
+ Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, [])
+ when :module_in_method
+ Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, [])
+ when :numbered_parameter_ordinary
+ Diagnostic.new(:error, :ordinary_param_defined, {}, diagnostic_location, [])
+ when :numbered_parameter_outer_scope
+ Diagnostic.new(:error, :numparam_used_in_outer_scope, {}, diagnostic_location, [])
+ when :parameter_circular
+ Diagnostic.new(:error, :circular_argument_reference, { var_name: location.slice }, diagnostic_location, [])
+ when :parameter_name_repeat
+ Diagnostic.new(:error, :duplicate_argument, {}, diagnostic_location, [])
+ when :parameter_numbered_reserved
+ Diagnostic.new(:error, :reserved_for_numparam, { name: location.slice }, diagnostic_location, [])
+ when :regexp_unknown_options
+ Diagnostic.new(:error, :regexp_options, { options: location.slice[1..] }, diagnostic_location, [])
+ when :singleton_for_literals
+ Diagnostic.new(:error, :singleton_literal, {}, diagnostic_location, [])
+ when :string_literal_eof
+ Diagnostic.new(:error, :string_eof, {}, diagnostic_location, [])
+ when :unexpected_token_ignore
+ Diagnostic.new(:error, :unexpected_token, { token: location.slice }, diagnostic_location, [])
+ when :write_target_in_method
+ Diagnostic.new(:error, :dynamic_const, {}, diagnostic_location, [])
+ else
+ PrismDiagnostic.new(error.message, :error, error.type, diagnostic_location)
+ end
+ end
+
+ # Build a diagnostic from the given prism parse warning.
+ def warning_diagnostic(warning, offset_cache)
+ diagnostic_location = build_range(warning.location, offset_cache)
+
+ case warning.type
+ when :ambiguous_first_argument_plus
+ Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "+" }, diagnostic_location, [])
+ when :ambiguous_first_argument_minus
+ Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "-" }, diagnostic_location, [])
+ when :ambiguous_prefix_ampersand
+ Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "&" }, diagnostic_location, [])
+ when :ambiguous_prefix_star
+ Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "*" }, diagnostic_location, [])
+ when :ambiguous_prefix_star_star
+ Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "**" }, diagnostic_location, [])
+ when :ambiguous_slash
+ Diagnostic.new(:warning, :ambiguous_regexp, {}, diagnostic_location, [])
+ when :dot_dot_dot_eol
+ Diagnostic.new(:warning, :triple_dot_at_eol, {}, diagnostic_location, [])
+ when :duplicated_hash_key
+ # skip, parser does this on its own
+ else
+ PrismDiagnostic.new(warning.message, :warning, warning.type, diagnostic_location)
+ end
+ end
+
+ # If there was a error generated during the parse, then raise an
+ # appropriate syntax error. Otherwise return the result.
+ def unwrap(result, offset_cache)
+ result.errors.each do |error|
+ next unless valid_error?(error)
+ diagnostics.process(error_diagnostic(error, offset_cache))
+ end
+
+ result.warnings.each do |warning|
+ next unless valid_warning?(warning)
+ diagnostic = warning_diagnostic(warning, offset_cache)
+ diagnostics.process(diagnostic) if diagnostic
+ end
+
+ result
+ end
+
+ # Prism deals with offsets in bytes, while the parser gem deals with
+ # offsets in characters. We need to handle this conversion in order to
+ # build the parser gem AST.
+ #
+ # If the bytesize of the source is the same as the length, then we can
+ # just use the offset directly. Otherwise, we build an array where the
+ # index is the byte offset and the value is the character offset.
+ def build_offset_cache(source)
+ if source.bytesize == source.length
+ -> (offset) { offset }
+ else
+ offset_cache = []
+ offset = 0
+
+ source.each_char do |char|
+ char.bytesize.times { offset_cache << offset }
+ offset += 1
+ end
+
+ offset_cache << offset
+ end
+ end
+
+ # Build the parser gem AST from the prism AST.
+ def build_ast(program, offset_cache)
+ program.accept(Compiler.new(self, offset_cache))
+ end
+
+ # Build the parser gem comments from the prism comments.
+ def build_comments(comments, offset_cache)
+ comments.map do |comment|
+ ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
+ end
+ end
+
+ # Build the parser gem tokens from the prism tokens.
+ def build_tokens(tokens, offset_cache)
+ Lexer.new(source_buffer, tokens, offset_cache).to_a
+ end
+
+ # Build a range from a prism location.
+ def build_range(location, offset_cache)
+ ::Parser::Source::Range.new(
+ source_buffer,
+ offset_cache[location.start_offset],
+ offset_cache[location.end_offset]
+ )
+ end
+
+ # Converts the version format handled by Parser to the format handled by Prism.
+ def convert_for_prism(version)
+ case version
+ when 33
+ "3.3.0"
+ when 34
+ "3.4.0"
+ else
+ "latest"
+ end
+ end
+
+ require_relative "parser/compiler"
+ require_relative "parser/lexer"
+
+ private_constant :Compiler
+ private_constant :Lexer
+ end
+ end
+end