diff options
Diffstat (limited to 'lib/rdoc/parser.rb')
| -rw-r--r-- | lib/rdoc/parser.rb | 277 |
1 files changed, 277 insertions, 0 deletions
diff --git a/lib/rdoc/parser.rb b/lib/rdoc/parser.rb new file mode 100644 index 0000000000..2b826d9284 --- /dev/null +++ b/lib/rdoc/parser.rb @@ -0,0 +1,277 @@ +# -*- coding: us-ascii -*- +# frozen_string_literal: true + +## +# A parser is simple a class that subclasses RDoc::Parser and implements #scan +# to fill in an RDoc::TopLevel with parsed data. +# +# The initialize method takes an RDoc::TopLevel to fill with parsed content, +# the name of the file to be parsed, the content of the file, an RDoc::Options +# object and an RDoc::Stats object to inform the user of parsed items. The +# scan method is then called to parse the file and must return the +# RDoc::TopLevel object. By calling super these items will be set for you. +# +# In order to be used by RDoc the parser needs to register the file extensions +# it can parse. Use ::parse_files_matching to register extensions. +# +# require 'rdoc' +# +# class RDoc::Parser::Xyz < RDoc::Parser +# parse_files_matching /\.xyz$/ +# +# def initialize top_level, file_name, content, options, stats +# super +# +# # extra initialization if needed +# end +# +# def scan +# # parse file and fill in @top_level +# end +# end + +class RDoc::Parser + + @parsers = [] + + class << self + + ## + # An Array of arrays that maps file extension (or name) regular + # expressions to parser classes that will parse matching filenames. + # + # Use parse_files_matching to register a parser's file extensions. + + attr_reader :parsers + + end + + ## + # The name of the file being parsed + + attr_reader :file_name + + ## + # Alias an extension to another extension. After this call, files ending + # "new_ext" will be parsed using the same parser as "old_ext" + + def self.alias_extension(old_ext, new_ext) + old_ext = old_ext.sub(/^\.(.*)/, '\1') + new_ext = new_ext.sub(/^\.(.*)/, '\1') + + parser = can_parse_by_name "xxx.#{old_ext}" + return false unless parser + + RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser] + + true + end + + ## + # Determines if the file is a "binary" file which basically means it has + # content that an RDoc parser shouldn't try to consume. + + def self.binary?(file) + return false if file =~ /\.(rdoc|txt)$/ + + s = File.read(file, 1024) or return false + + return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00") + + mode = 'r:utf-8' # default source encoding has been chagened to utf-8 + s.sub!(/\A#!.*\n/, '') # assume shebang line isn't longer than 1024. + encoding = s[/^\s*\#\s*(?:-\*-\s*)?(?:en)?coding:\s*([^\s;]+?)(?:-\*-|[\s;])/, 1] + mode = "rb:#{encoding}" if encoding + s = File.open(file, mode) {|f| f.gets(nil, 1024)} + + not s.valid_encoding? + end + + ## + # Checks if +file+ is a zip file in disguise. Signatures from + # http://www.garykessler.net/library/file_sigs.html + + def self.zip? file + zip_signature = File.read file, 4 + + zip_signature == "PK\x03\x04" or + zip_signature == "PK\x05\x06" or + zip_signature == "PK\x07\x08" + rescue + false + end + + ## + # Return a parser that can handle a particular extension + + def self.can_parse file_name + parser = can_parse_by_name file_name + + # HACK Selenium hides a jar file using a .txt extension + return if parser == RDoc::Parser::Simple and zip? file_name + + parser + end + + ## + # Returns a parser that can handle the extension for +file_name+. This does + # not depend upon the file being readable. + + def self.can_parse_by_name file_name + _, parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name } + + # The default parser must not parse binary files + ext_name = File.extname file_name + return parser if ext_name.empty? + + if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/ then + case check_modeline file_name + when nil, 'rdoc' then # continue + else return nil + end + end + + parser + rescue Errno::EACCES + end + + ## + # Returns the file type from the modeline in +file_name+ + + def self.check_modeline file_name + line = open file_name do |io| + io.gets + end + + /-\*-\s*(.*?\S)\s*-\*-/ =~ line + + return nil unless type = $1 + + if /;/ =~ type then + return nil unless /(?:\s|\A)mode:\s*([^\s;]+)/i =~ type + type = $1 + end + + return nil if /coding:/i =~ type + + type.downcase + rescue ArgumentError + rescue Encoding::InvalidByteSequenceError # invalid byte sequence + + end + + ## + # Finds and instantiates the correct parser for the given +file_name+ and + # +content+. + + def self.for top_level, file_name, content, options, stats + return if binary? file_name + + parser = use_markup content + + unless parser then + parse_name = file_name + + # If no extension, look for shebang + if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then + shebang = $1 + case shebang + when %r{env\s+ruby}, %r{/ruby} + parse_name = 'dummy.rb' + end + end + + parser = can_parse parse_name + end + + return unless parser + + content = remove_modeline content + + parser.new top_level, file_name, content, options, stats + rescue SystemCallError + nil + end + + ## + # Record which file types this parser can understand. + # + # It is ok to call this multiple times. + + def self.parse_files_matching(regexp) + RDoc::Parser.parsers.unshift [regexp, self] + end + + ## + # Removes an emacs-style modeline from the first line of the document + + def self.remove_modeline content + content.sub(/\A.*-\*-\s*(.*?\S)\s*-\*-.*\r?\n/, '') + end + + ## + # If there is a <tt>markup: parser_name</tt> comment at the front of the + # file, use it to determine the parser. For example: + # + # # markup: rdoc + # # Class comment can go here + # + # class C + # end + # + # The comment should appear as the first line of the +content+. + # + # If the content contains a shebang or editor modeline the comment may + # appear on the second or third line. + # + # Any comment style may be used to hide the markup comment. + + def self.use_markup content + markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first + + return unless markup + + # TODO Ruby should be returned only when the filename is correct + return RDoc::Parser::Ruby if %w[tomdoc markdown].include? markup + + markup = Regexp.escape markup + + _, selected = RDoc::Parser.parsers.find do |_, parser| + /^#{markup}$/i =~ parser.name.sub(/.*:/, '') + end + + selected + end + + ## + # Creates a new Parser storing +top_level+, +file_name+, +content+, + # +options+ and +stats+ in instance variables. In +@preprocess+ an + # RDoc::Markup::PreProcess object is created which allows processing of + # directives. + + def initialize top_level, file_name, content, options, stats + @top_level = top_level + @top_level.parser = self.class + @store = @top_level.store + + @file_name = file_name + @content = content + @options = options + @stats = stats + + @preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include + @preprocess.options = @options + end + + autoload :RubyTools, 'rdoc/parser/ruby_tools' + autoload :Text, 'rdoc/parser/text' + +end + +# simple must come first in order to show up last in the parsers list +require 'rdoc/parser/simple' +require 'rdoc/parser/c' +require 'rdoc/parser/changelog' +require 'rdoc/parser/markdown' +require 'rdoc/parser/rd' +require 'rdoc/parser/ruby' |
