diff options
Diffstat (limited to 'lib/prism/translation/parser.rb')
| -rw-r--r-- | lib/prism/translation/parser.rb | 376 |
1 files changed, 376 insertions, 0 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb new file mode 100644 index 0000000000..70031f133a --- /dev/null +++ b/lib/prism/translation/parser.rb @@ -0,0 +1,376 @@ +# frozen_string_literal: true +# :markup: markdown + +begin + required_version = ">= 3.3.7.2" + gem "parser", required_version + require "parser" +rescue LoadError + warn(<<~MSG) + Error: Unable to load parser #{required_version}. \ + Add `gem "parser"` to your Gemfile or run `bundle update parser`. + MSG + exit(1) +end + +module Prism + module Translation + # This class is the entry-point for converting a prism syntax tree into the + # whitequark/parser gem's syntax tree. It inherits from the base parser for + # the parser gem, and overrides the parse* methods to parse with prism and + # then translate. + # + # Note that this version of the parser always parses using the latest + # version of Ruby syntax supported by Prism. If you want specific version + # support, use one of the version-specific subclasses, such as + # `Prism::Translation::Parser34`. If you want to parse using the same + # version of Ruby syntax as the currently running version of Ruby, use + # `Prism::Translation::ParserCurrent`. + class Parser < ::Parser::Base + Diagnostic = ::Parser::Diagnostic # :nodoc: + private_constant :Diagnostic + + # The parser gem has a list of diagnostics with a hard-coded set of error + # messages. We create our own diagnostic class in order to set our own + # error messages. + class PrismDiagnostic < Diagnostic # :nodoc: + # This is the cached message coming from prism. + attr_reader :message + + # Initialize a new diagnostic with the given message and location. + def initialize(message, level, reason, location) + @message = message + super(level, reason, {}, location, []) + end + end + + Racc_debug_parser = false # :nodoc: + + # The `builder` argument is used to create the parser using our custom builder class by default. + # + # By using the `:parser` keyword argument, you can translate in a way that is compatible with + # the Parser gem using any parser. + # + # For example, in RuboCop for Ruby LSP, the following approach can be used to improve performance + # by reusing a pre-parsed `Prism::ParseLexResult`: + # + # class PrismPreparsed + # def initialize(prism_result) + # @prism_result = prism_result + # end + # + # def parse_lex(source, **options) + # @prism_result + # end + # end + # + # prism_preparsed = PrismPreparsed.new(prism_result) + # + # Prism::Translation::Ruby34.new(builder, parser: prism_preparsed) + # + # In an object passed to the `:parser` keyword argument, the `parse` and `parse_lex` methods + # should be implemented as needed. + # + def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism) + if !builder.is_a?(Prism::Translation::Parser::Builder) + warn(<<~MSG, uplevel: 1, category: :deprecated) + [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \ + `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version. + MSG + end + @parser = parser + + super(builder) + end + + def version # :nodoc: + 41 + end + + # The default encoding for Ruby files is UTF-8. + def default_encoding + Encoding::UTF_8 + end + + def yyerror # :nodoc: + end + + # Parses a source buffer and returns the AST. + def parse(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = unwrap(@parser.parse(source, **prism_options), offset_cache) + + build_ast(result.value, offset_cache) + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST and the source code comments. + def parse_with_comments(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = unwrap(@parser.parse(source, **prism_options), offset_cache) + + [ + build_ast(result.value, offset_cache), + build_comments(result.comments, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST, the source code comments, + # and the tokens emitted by the lexer. + def tokenize(source_buffer, recover = false) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = + begin + unwrap(@parser.parse_lex(source, **prism_options), offset_cache) + rescue ::Parser::SyntaxError + raise if !recover + end + + program, tokens = result.value + ast = build_ast(program, offset_cache) if result.success? + + [ + ast, + build_comments(result.comments, offset_cache), + build_tokens(tokens, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Since prism resolves num params for us, we don't need to support this + # kind of logic here. + def try_declare_numparam(node) + node.children[0].match?(/\A_[1-9]\z/) + end + + private + + # This is a hook to allow consumers to disable some errors if they don't + # want them to block creating the syntax tree. + def valid_error?(error) + true + end + + # This is a hook to allow consumers to disable some warnings if they don't + # want them to block creating the syntax tree. + def valid_warning?(warning) + true + end + + # Build a diagnostic from the given prism parse error. + def error_diagnostic(error, offset_cache) + location = error.location + diagnostic_location = build_range(location, offset_cache) + + case error.type + when :argument_block_multi + Diagnostic.new(:error, :block_and_blockarg, {}, diagnostic_location, []) + when :argument_formal_constant + Diagnostic.new(:error, :argument_const, {}, diagnostic_location, []) + when :argument_formal_class + Diagnostic.new(:error, :argument_cvar, {}, diagnostic_location, []) + when :argument_formal_global + Diagnostic.new(:error, :argument_gvar, {}, diagnostic_location, []) + when :argument_formal_ivar + Diagnostic.new(:error, :argument_ivar, {}, diagnostic_location, []) + when :argument_no_forwarding_amp + Diagnostic.new(:error, :no_anonymous_blockarg, {}, diagnostic_location, []) + when :argument_no_forwarding_star + Diagnostic.new(:error, :no_anonymous_restarg, {}, diagnostic_location, []) + when :argument_no_forwarding_star_star + Diagnostic.new(:error, :no_anonymous_kwrestarg, {}, diagnostic_location, []) + when :begin_lonely_else + location = location.copy(length: 4) + diagnostic_location = build_range(location, offset_cache) + Diagnostic.new(:error, :useless_else, {}, diagnostic_location, []) + when :class_name, :module_name + Diagnostic.new(:error, :module_name_const, {}, diagnostic_location, []) + when :class_in_method + Diagnostic.new(:error, :class_in_def, {}, diagnostic_location, []) + when :def_endless_setter + Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, []) + when :embdoc_term + Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, []) + when :incomplete_variable_class, :incomplete_variable_class_3_3 + location = location.copy(length: location.length + 1) + diagnostic_location = build_range(location, offset_cache) + + Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, []) + when :incomplete_variable_instance, :incomplete_variable_instance_3_3 + location = location.copy(length: location.length + 1) + diagnostic_location = build_range(location, offset_cache) + + Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, []) + when :invalid_variable_global, :invalid_variable_global_3_3 + Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, []) + when :module_in_method + Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, []) + when :numbered_parameter_ordinary + Diagnostic.new(:error, :ordinary_param_defined, {}, diagnostic_location, []) + when :numbered_parameter_outer_scope + Diagnostic.new(:error, :numparam_used_in_outer_scope, {}, diagnostic_location, []) + when :parameter_circular + Diagnostic.new(:error, :circular_argument_reference, { var_name: location.slice }, diagnostic_location, []) + when :parameter_name_repeat + Diagnostic.new(:error, :duplicate_argument, {}, diagnostic_location, []) + when :parameter_numbered_reserved + Diagnostic.new(:error, :reserved_for_numparam, { name: location.slice }, diagnostic_location, []) + when :regexp_unknown_options + Diagnostic.new(:error, :regexp_options, { options: location.slice[1..] }, diagnostic_location, []) + when :singleton_for_literals + Diagnostic.new(:error, :singleton_literal, {}, diagnostic_location, []) + when :string_literal_eof + Diagnostic.new(:error, :string_eof, {}, diagnostic_location, []) + when :unexpected_token_ignore + Diagnostic.new(:error, :unexpected_token, { token: location.slice }, diagnostic_location, []) + when :write_target_in_method + Diagnostic.new(:error, :dynamic_const, {}, diagnostic_location, []) + else + PrismDiagnostic.new(error.message, :error, error.type, diagnostic_location) + end + end + + # Build a diagnostic from the given prism parse warning. + def warning_diagnostic(warning, offset_cache) + diagnostic_location = build_range(warning.location, offset_cache) + + case warning.type + when :ambiguous_first_argument_plus + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "+" }, diagnostic_location, []) + when :ambiguous_first_argument_minus + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "-" }, diagnostic_location, []) + when :ambiguous_prefix_ampersand + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "&" }, diagnostic_location, []) + when :ambiguous_prefix_star + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "*" }, diagnostic_location, []) + when :ambiguous_prefix_star_star + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "**" }, diagnostic_location, []) + when :ambiguous_slash + Diagnostic.new(:warning, :ambiguous_regexp, {}, diagnostic_location, []) + when :dot_dot_dot_eol + Diagnostic.new(:warning, :triple_dot_at_eol, {}, diagnostic_location, []) + when :duplicated_hash_key + # skip, parser does this on its own + else + PrismDiagnostic.new(warning.message, :warning, warning.type, diagnostic_location) + end + end + + # If there was a error generated during the parse, then raise an + # appropriate syntax error. Otherwise return the result. + def unwrap(result, offset_cache) + result.errors.each do |error| + next unless valid_error?(error) + diagnostics.process(error_diagnostic(error, offset_cache)) + end + + result.warnings.each do |warning| + next unless valid_warning?(warning) + diagnostic = warning_diagnostic(warning, offset_cache) + diagnostics.process(diagnostic) if diagnostic + end + + result + end + + # Prism deals with offsets in bytes, while the parser gem deals with + # offsets in characters. We need to handle this conversion in order to + # build the parser gem AST. + # + # If the bytesize of the source is the same as the length, then we can + # just use the offset directly. Otherwise, we build an array where the + # index is the byte offset and the value is the character offset. + def build_offset_cache(source) + if source.bytesize == source.length + -> (offset) { offset } + else + offset_cache = [] + offset = 0 + + source.each_char do |char| + char.bytesize.times { offset_cache << offset } + offset += 1 + end + + offset_cache << offset + end + end + + # Build the parser gem AST from the prism AST. + def build_ast(program, offset_cache) + program.accept(Compiler.new(self, offset_cache)) + end + + # Build the parser gem comments from the prism comments. + def build_comments(comments, offset_cache) + comments.map do |comment| + ::Parser::Source::Comment.new(build_range(comment.location, offset_cache)) + end + end + + # Build the parser gem tokens from the prism tokens. + def build_tokens(tokens, offset_cache) + Lexer.new(source_buffer, tokens, offset_cache).to_a + end + + # Build a range from a prism location. + def build_range(location, offset_cache) + ::Parser::Source::Range.new( + source_buffer, + offset_cache[location.start_offset], + offset_cache[location.end_offset] + ) + end + + # Options for how prism should parse/lex the source. + def prism_options + options = { + filepath: @source_buffer.name, + version: convert_for_prism(version), + partial_script: true, + } + # The parser gem always encodes to UTF-8, unless it is binary. + # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107 + options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY + + options + end + + # Converts the version format handled by Parser to the format handled by Prism. + def convert_for_prism(version) + case version + when 33 + "3.3.1" + when 34 + "3.4.0" + when 35, 40 + "4.0.0" + when 41 + "4.1.0" + else + "latest" + end + end + + require_relative "parser/builder" + require_relative "parser/compiler" + require_relative "parser/lexer" + + private_constant :Compiler + private_constant :Lexer + end + end +end |
