1 files changed, 277 insertions, 0 deletions
diff --git a/lib/rdoc/parser.rb b/lib/rdoc/parser.rb
new file mode 100644
index 0000000000..2b826d9284
--- /dev/null
+++ b/lib/rdoc/parser.rb
@@ -0,0 +1,277 @@
+# -*- coding: us-ascii -*-
+# frozen_string_literal: true
+
+##
+# A parser is simple a class that subclasses RDoc::Parser and implements #scan
+# to fill in an RDoc::TopLevel with parsed data.
+#
+# The initialize method takes an RDoc::TopLevel to fill with parsed content,
+# the name of the file to be parsed, the content of the file, an RDoc::Options
+# object and an RDoc::Stats object to inform the user of parsed items.  The
+# scan method is then called to parse the file and must return the
+# RDoc::TopLevel object.  By calling super these items will be set for you.
+#
+# In order to be used by RDoc the parser needs to register the file extensions
+# it can parse.  Use ::parse_files_matching to register extensions.
+#
+#   require 'rdoc'
+#
+#   class RDoc::Parser::Xyz < RDoc::Parser
+#     parse_files_matching /\.xyz$/
+#
+#     def initialize top_level, file_name, content, options, stats
+#       super
+#
+#       # extra initialization if needed
+#     end
+#
+#     def scan
+#       # parse file and fill in @top_level
+#     end
+#   end
+
+class RDoc::Parser
+
+  @parsers = []
+
+  class << self
+
+    ##
+    # An Array of arrays that maps file extension (or name) regular
+    # expressions to parser classes that will parse matching filenames.
+    #
+    # Use parse_files_matching to register a parser's file extensions.
+
+    attr_reader :parsers
+
+  end
+
+  ##
+  # The name of the file being parsed
+
+  attr_reader :file_name
+
+  ##
+  # Alias an extension to another extension. After this call, files ending
+  # "new_ext" will be parsed using the same parser as "old_ext"
+
+  def self.alias_extension(old_ext, new_ext)
+    old_ext = old_ext.sub(/^\.(.*)/, '\1')
+    new_ext = new_ext.sub(/^\.(.*)/, '\1')
+
+    parser = can_parse_by_name "xxx.#{old_ext}"
+    return false unless parser
+
+    RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]
+
+    true
+  end
+
+  ##
+  # Determines if the file is a "binary" file which basically means it has
+  # content that an RDoc parser shouldn't try to consume.
+
+  def self.binary?(file)
+    return false if file =~ /\.(rdoc|txt)$/
+
+    s = File.read(file, 1024) or return false
+
+    return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00")
+
+    mode = 'r:utf-8' # default source encoding has been chagened to utf-8
+    s.sub!(/\A#!.*\n/, '')     # assume shebang line isn't longer than 1024.
+    encoding = s[/^\s*\#\s*(?:-\*-\s*)?(?:en)?coding:\s*([^\s;]+?)(?:-\*-|[\s;])/, 1]
+    mode = "rb:#{encoding}" if encoding
+    s = File.open(file, mode) {|f| f.gets(nil, 1024)}
+
+    not s.valid_encoding?
+  end
+
+  ##
+  # Checks if +file+ is a zip file in disguise.  Signatures from
+  # http://www.garykessler.net/library/file_sigs.html
+
+  def self.zip? file
+    zip_signature = File.read file, 4
+
+    zip_signature == "PK\x03\x04" or
+      zip_signature == "PK\x05\x06" or
+      zip_signature == "PK\x07\x08"
+  rescue
+    false
+  end
+
+  ##
+  # Return a parser that can handle a particular extension
+
+  def self.can_parse file_name
+    parser = can_parse_by_name file_name
+
+    # HACK Selenium hides a jar file using a .txt extension
+    return if parser == RDoc::Parser::Simple and zip? file_name
+
+    parser
+  end
+
+  ##
+  # Returns a parser that can handle the extension for +file_name+.  This does
+  # not depend upon the file being readable.
+
+  def self.can_parse_by_name file_name
+    _, parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }
+
+    # The default parser must not parse binary files
+    ext_name = File.extname file_name
+    return parser if ext_name.empty?
+
+    if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/ then
+      case check_modeline file_name
+      when nil, 'rdoc' then # continue
+      else return nil
+      end
+    end
+
+    parser
+  rescue Errno::EACCES
+  end
+
+  ##
+  # Returns the file type from the modeline in +file_name+
+
+  def self.check_modeline file_name
+    line = open file_name do |io|
+      io.gets
+    end
+
+    /-\*-\s*(.*?\S)\s*-\*-/ =~ line
+
+    return nil unless type = $1
+
+    if /;/ =~ type then
+      return nil unless /(?:\s|\A)mode:\s*([^\s;]+)/i =~ type
+      type = $1
+    end
+
+    return nil if /coding:/i =~ type
+
+    type.downcase
+  rescue ArgumentError
+  rescue Encoding::InvalidByteSequenceError # invalid byte sequence
+
+  end
+
+  ##
+  # Finds and instantiates the correct parser for the given +file_name+ and
+  # +content+.
+
+  def self.for top_level, file_name, content, options, stats
+    return if binary? file_name
+
+    parser = use_markup content
+
+    unless parser then
+      parse_name = file_name
+
+      # If no extension, look for shebang
+      if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then
+        shebang = $1
+        case shebang
+        when %r{env\s+ruby}, %r{/ruby}
+          parse_name = 'dummy.rb'
+        end
+      end
+
+      parser = can_parse parse_name
+    end
+
+    return unless parser
+
+    content = remove_modeline content
+
+    parser.new top_level, file_name, content, options, stats
+  rescue SystemCallError
+    nil
+  end
+
+  ##
+  # Record which file types this parser can understand.
+  #
+  # It is ok to call this multiple times.
+
+  def self.parse_files_matching(regexp)
+    RDoc::Parser.parsers.unshift [regexp, self]
+  end
+
+  ##
+  # Removes an emacs-style modeline from the first line of the document
+
+  def self.remove_modeline content
+    content.sub(/\A.*-\*-\s*(.*?\S)\s*-\*-.*\r?\n/, '')
+  end
+
+  ##
+  # If there is a <tt>markup: parser_name</tt> comment at the front of the
+  # file, use it to determine the parser.  For example:
+  #
+  #   # markup: rdoc
+  #   # Class comment can go here
+  #
+  #   class C
+  #   end
+  #
+  # The comment should appear as the first line of the +content+.
+  #
+  # If the content contains a shebang or editor modeline the comment may
+  # appear on the second or third line.
+  #
+  # Any comment style may be used to hide the markup comment.
+
+  def self.use_markup content
+    markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first
+
+    return unless markup
+
+    # TODO Ruby should be returned only when the filename is correct
+    return RDoc::Parser::Ruby if %w[tomdoc markdown].include? markup
+
+    markup = Regexp.escape markup
+
+    _, selected = RDoc::Parser.parsers.find do |_, parser|
+      /^#{markup}$/i =~ parser.name.sub(/.*:/, '')
+    end
+
+    selected
+  end
+
+  ##
+  # Creates a new Parser storing +top_level+, +file_name+, +content+,
+  # +options+ and +stats+ in instance variables.  In +@preprocess+ an
+  # RDoc::Markup::PreProcess object is created which allows processing of
+  # directives.
+
+  def initialize top_level, file_name, content, options, stats
+    @top_level = top_level
+    @top_level.parser = self.class
+    @store = @top_level.store
+
+    @file_name = file_name
+    @content = content
+    @options = options
+    @stats = stats
+
+    @preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include
+    @preprocess.options = @options
+  end
+
+  autoload :RubyTools, 'rdoc/parser/ruby_tools'
+  autoload :Text,      'rdoc/parser/text'
+
+end
+
+# simple must come first in order to show up last in the parsers list
+require 'rdoc/parser/simple'
+require 'rdoc/parser/c'
+require 'rdoc/parser/changelog'
+require 'rdoc/parser/markdown'
+require 'rdoc/parser/rd'
+require 'rdoc/parser/ruby'