diff options
Diffstat (limited to 'lib/prism/translation/parser.rb')
-rw-r--r-- | lib/prism/translation/parser.rb | 302 |
1 files changed, 302 insertions, 0 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb new file mode 100644 index 0000000000..0d11b8f566 --- /dev/null +++ b/lib/prism/translation/parser.rb @@ -0,0 +1,302 @@ +# frozen_string_literal: true + +require "parser" + +module Prism + module Translation + # This class is the entry-point for converting a prism syntax tree into the + # whitequark/parser gem's syntax tree. It inherits from the base parser for + # the parser gem, and overrides the parse* methods to parse with prism and + # then translate. + class Parser < ::Parser::Base + Diagnostic = ::Parser::Diagnostic # :nodoc: + private_constant :Diagnostic + + # The parser gem has a list of diagnostics with a hard-coded set of error + # messages. We create our own diagnostic class in order to set our own + # error messages. + class PrismDiagnostic < Diagnostic + # This is the cached message coming from prism. + attr_reader :message + + # Initialize a new diagnostic with the given message and location. + def initialize(message, level, reason, location) + @message = message + super(level, reason, {}, location, []) + end + end + + Racc_debug_parser = false # :nodoc: + + def version # :nodoc: + 34 + end + + # The default encoding for Ruby files is UTF-8. + def default_encoding + Encoding::UTF_8 + end + + def yyerror # :nodoc: + end + + # Parses a source buffer and returns the AST. + def parse(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache) + + build_ast(result.value, offset_cache) + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST and the source code comments. + def parse_with_comments(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache) + + [ + build_ast(result.value, offset_cache), + build_comments(result.comments, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST, the source code comments, + # and the tokens emitted by the lexer. + def tokenize(source_buffer, recover = false) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = + begin + unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache) + rescue ::Parser::SyntaxError + raise if !recover + end + + program, tokens = result.value + ast = build_ast(program, offset_cache) if result.success? + + [ + ast, + build_comments(result.comments, offset_cache), + build_tokens(tokens, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Since prism resolves num params for us, we don't need to support this + # kind of logic here. + def try_declare_numparam(node) + node.children[0].match?(/\A_[1-9]\z/) + end + + private + + # This is a hook to allow consumers to disable some errors if they don't + # want them to block creating the syntax tree. + def valid_error?(error) + true + end + + # This is a hook to allow consumers to disable some warnings if they don't + # want them to block creating the syntax tree. + def valid_warning?(warning) + true + end + + # Build a diagnostic from the given prism parse error. + def error_diagnostic(error, offset_cache) + location = error.location + diagnostic_location = build_range(location, offset_cache) + + case error.type + when :argument_block_multi + Diagnostic.new(:error, :block_and_blockarg, {}, diagnostic_location, []) + when :argument_formal_constant + Diagnostic.new(:error, :argument_const, {}, diagnostic_location, []) + when :argument_formal_class + Diagnostic.new(:error, :argument_cvar, {}, diagnostic_location, []) + when :argument_formal_global + Diagnostic.new(:error, :argument_gvar, {}, diagnostic_location, []) + when :argument_formal_ivar + Diagnostic.new(:error, :argument_ivar, {}, diagnostic_location, []) + when :argument_no_forwarding_amp + Diagnostic.new(:error, :no_anonymous_blockarg, {}, diagnostic_location, []) + when :argument_no_forwarding_star + Diagnostic.new(:error, :no_anonymous_restarg, {}, diagnostic_location, []) + when :argument_no_forwarding_star_star + Diagnostic.new(:error, :no_anonymous_kwrestarg, {}, diagnostic_location, []) + when :begin_lonely_else + location = location.copy(length: 4) + diagnostic_location = build_range(location, offset_cache) + Diagnostic.new(:error, :useless_else, {}, diagnostic_location, []) + when :class_name, :module_name + Diagnostic.new(:error, :module_name_const, {}, diagnostic_location, []) + when :class_in_method + Diagnostic.new(:error, :class_in_def, {}, diagnostic_location, []) + when :def_endless_setter + Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, []) + when :embdoc_term + Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, []) + when :incomplete_variable_class, :incomplete_variable_class_3_3_0 + location = location.copy(length: location.length + 1) + diagnostic_location = build_range(location, offset_cache) + + Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, []) + when :incomplete_variable_instance, :incomplete_variable_instance_3_3_0 + location = location.copy(length: location.length + 1) + diagnostic_location = build_range(location, offset_cache) + + Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, []) + when :invalid_variable_global, :invalid_variable_global_3_3_0 + Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, []) + when :module_in_method + Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, []) + when :numbered_parameter_ordinary + Diagnostic.new(:error, :ordinary_param_defined, {}, diagnostic_location, []) + when :numbered_parameter_outer_scope + Diagnostic.new(:error, :numparam_used_in_outer_scope, {}, diagnostic_location, []) + when :parameter_circular + Diagnostic.new(:error, :circular_argument_reference, { var_name: location.slice }, diagnostic_location, []) + when :parameter_name_repeat + Diagnostic.new(:error, :duplicate_argument, {}, diagnostic_location, []) + when :parameter_numbered_reserved + Diagnostic.new(:error, :reserved_for_numparam, { name: location.slice }, diagnostic_location, []) + when :regexp_unknown_options + Diagnostic.new(:error, :regexp_options, { options: location.slice[1..] }, diagnostic_location, []) + when :singleton_for_literals + Diagnostic.new(:error, :singleton_literal, {}, diagnostic_location, []) + when :string_literal_eof + Diagnostic.new(:error, :string_eof, {}, diagnostic_location, []) + when :unexpected_token_ignore + Diagnostic.new(:error, :unexpected_token, { token: location.slice }, diagnostic_location, []) + when :write_target_in_method + Diagnostic.new(:error, :dynamic_const, {}, diagnostic_location, []) + else + PrismDiagnostic.new(error.message, :error, error.type, diagnostic_location) + end + end + + # Build a diagnostic from the given prism parse warning. + def warning_diagnostic(warning, offset_cache) + diagnostic_location = build_range(warning.location, offset_cache) + + case warning.type + when :ambiguous_first_argument_plus + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "+" }, diagnostic_location, []) + when :ambiguous_first_argument_minus + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "-" }, diagnostic_location, []) + when :ambiguous_prefix_ampersand + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "&" }, diagnostic_location, []) + when :ambiguous_prefix_star + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "*" }, diagnostic_location, []) + when :ambiguous_prefix_star_star + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "**" }, diagnostic_location, []) + when :ambiguous_slash + Diagnostic.new(:warning, :ambiguous_regexp, {}, diagnostic_location, []) + when :dot_dot_dot_eol + Diagnostic.new(:warning, :triple_dot_at_eol, {}, diagnostic_location, []) + when :duplicated_hash_key + # skip, parser does this on its own + else + PrismDiagnostic.new(warning.message, :warning, warning.type, diagnostic_location) + end + end + + # If there was a error generated during the parse, then raise an + # appropriate syntax error. Otherwise return the result. + def unwrap(result, offset_cache) + result.errors.each do |error| + next unless valid_error?(error) + diagnostics.process(error_diagnostic(error, offset_cache)) + end + + result.warnings.each do |warning| + next unless valid_warning?(warning) + diagnostic = warning_diagnostic(warning, offset_cache) + diagnostics.process(diagnostic) if diagnostic + end + + result + end + + # Prism deals with offsets in bytes, while the parser gem deals with + # offsets in characters. We need to handle this conversion in order to + # build the parser gem AST. + # + # If the bytesize of the source is the same as the length, then we can + # just use the offset directly. Otherwise, we build an array where the + # index is the byte offset and the value is the character offset. + def build_offset_cache(source) + if source.bytesize == source.length + -> (offset) { offset } + else + offset_cache = [] + offset = 0 + + source.each_char do |char| + char.bytesize.times { offset_cache << offset } + offset += 1 + end + + offset_cache << offset + end + end + + # Build the parser gem AST from the prism AST. + def build_ast(program, offset_cache) + program.accept(Compiler.new(self, offset_cache)) + end + + # Build the parser gem comments from the prism comments. + def build_comments(comments, offset_cache) + comments.map do |comment| + ::Parser::Source::Comment.new(build_range(comment.location, offset_cache)) + end + end + + # Build the parser gem tokens from the prism tokens. + def build_tokens(tokens, offset_cache) + Lexer.new(source_buffer, tokens, offset_cache).to_a + end + + # Build a range from a prism location. + def build_range(location, offset_cache) + ::Parser::Source::Range.new( + source_buffer, + offset_cache[location.start_offset], + offset_cache[location.end_offset] + ) + end + + # Converts the version format handled by Parser to the format handled by Prism. + def convert_for_prism(version) + case version + when 33 + "3.3.0" + when 34 + "3.4.0" + else + "latest" + end + end + + require_relative "parser/compiler" + require_relative "parser/lexer" + + private_constant :Compiler + private_constant :Lexer + end + end +end |