summaryrefslogtreecommitdiff
path: root/lib/rdoc/parser/c.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rdoc/parser/c.rb')
-rw-r--r--lib/rdoc/parser/c.rb666
1 files changed, 666 insertions, 0 deletions
diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb
new file mode 100644
index 0000000000..1db1e442c7
--- /dev/null
+++ b/lib/rdoc/parser/c.rb
@@ -0,0 +1,666 @@
+require 'rdoc/parser'
+require 'rdoc/known_classes'
+
+##
+# We attempt to parse C extension files. Basically we look for
+# the standard patterns that you find in extensions: <tt>rb_define_class,
+# rb_define_method</tt> and so on. We also try to find the corresponding
+# C source for the methods and extract comments, but if we fail
+# we don't worry too much.
+#
+# The comments associated with a Ruby method are extracted from the C
+# comment block associated with the routine that _implements_ that
+# method, that is to say the method whose name is given in the
+# <tt>rb_define_method</tt> call. For example, you might write:
+#
+# /*
+# * Returns a new array that is a one-dimensional flattening of this
+# * array (recursively). That is, for every element that is an array,
+# * extract its elements into the new array.
+# *
+# * s = [ 1, 2, 3 ] #=> [1, 2, 3]
+# * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]]
+# * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
+# * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+# */
+# static VALUE
+# rb_ary_flatten(ary)
+# VALUE ary;
+# {
+# ary = rb_obj_dup(ary);
+# rb_ary_flatten_bang(ary);
+# return ary;
+# }
+#
+# ...
+#
+# void
+# Init_Array()
+# {
+# ...
+# rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
+#
+# Here RDoc will determine from the rb_define_method line that there's a
+# method called "flatten" in class Array, and will look for the implementation
+# in the method rb_ary_flatten. It will then use the comment from that
+# method in the HTML output. This method must be in the same source file
+# as the rb_define_method.
+#
+# C classes can be diagrammed (see /tc/dl/ruby/ruby/error.c), and RDoc
+# integrates C and Ruby source into one tree
+#
+# The comment blocks may include special directives:
+#
+# [Document-class: <i>name</i>]
+# This comment block is documentation for the given class. Use this
+# when the <tt>Init_xxx</tt> method is not named after the class.
+#
+# [Document-method: <i>name</i>]
+# This comment documents the named method. Use when RDoc cannot
+# automatically find the method from it's declaration
+#
+# [call-seq: <i>text up to an empty line</i>]
+# Because C source doesn't give descripive names to Ruby-level parameters,
+# you need to document the calling sequence explicitly
+#
+# In addition, RDoc assumes by default that the C method implementing a
+# Ruby function is in the same source file as the rb_define_method call.
+# If this isn't the case, add the comment:
+#
+# rb_define_method(....); // in: filename
+#
+# As an example, we might have an extension that defines multiple classes
+# in its Init_xxx method. We could document them using
+#
+# /*
+# * Document-class: MyClass
+# *
+# * Encapsulate the writing and reading of the configuration
+# * file. ...
+# */
+#
+# /*
+# * Document-method: read_value
+# *
+# * call-seq:
+# * cfg.read_value(key) -> value
+# * cfg.read_value(key} { |key| } -> value
+# *
+# * Return the value corresponding to +key+ from the configuration.
+# * In the second form, if the key isn't found, invoke the
+# * block and return its value.
+# */
+
+class RDoc::Parser::C < RDoc::Parser
+
+ parse_files_matching(/\.(?:([CcHh])\1?|c([+xp])\2|y)\z/)
+
+ attr_writer :progress
+
+ @@enclosure_classes = {}
+ @@known_bodies = {}
+
+ ##
+ # Prepare to parse a C file
+
+ def initialize(top_level, file_name, content, options, stats)
+ super
+
+ @known_classes = RDoc::KNOWN_CLASSES.dup
+ @content = handle_tab_width handle_ifdefs_in(@content)
+ @classes = Hash.new
+ @file_dir = File.dirname(@file_name)
+ end
+
+ def do_aliases
+ @content.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do
+ |var_name, new_name, old_name|
+ @stats.num_methods += 1
+ class_name = @known_classes[var_name] || var_name
+ class_obj = find_class(var_name, class_name)
+
+ class_obj.add_alias RDoc::Alias.new("", old_name, new_name, "")
+ end
+ end
+
+ def do_classes
+ @content.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do
+ |var_name, class_name|
+ handle_class_module(var_name, "module", class_name, nil, nil)
+ end
+
+ # The '.' lets us handle SWIG-generated files
+ @content.scan(/([\w\.]+)\s* = \s*rb_define_class\s*
+ \(
+ \s*"(\w+)",
+ \s*(\w+)\s*
+ \)/mx) do |var_name, class_name, parent|
+ handle_class_module(var_name, "class", class_name, parent, nil)
+ end
+
+ @content.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do
+ |var_name, class_name, parent|
+ parent = nil if parent == "0"
+ handle_class_module(var_name, "class", class_name, parent, nil)
+ end
+
+ @content.scan(/(\w+)\s* = \s*rb_define_module_under\s*
+ \(
+ \s*(\w+),
+ \s*"(\w+)"
+ \s*\)/mx) do |var_name, in_module, class_name|
+ handle_class_module(var_name, "module", class_name, nil, in_module)
+ end
+
+ @content.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s*
+ \(
+ \s*(\w+),
+ \s*"(\w+)",
+ \s*(\w+)\s*
+ \s*\)/mx) do |var_name, in_module, class_name, parent|
+ handle_class_module(var_name, "class", class_name, parent, in_module)
+ end
+ end
+
+ def do_constants
+ @content.scan(%r{\Wrb_define_
+ (
+ variable |
+ readonly_variable |
+ const |
+ global_const |
+ )
+ \s*\(
+ (?:\s*(\w+),)?
+ \s*"(\w+)",
+ \s*(.*?)\s*\)\s*;
+ }xm) do |type, var_name, const_name, definition|
+ var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel"
+ handle_constants(type, var_name, const_name, definition)
+ end
+ end
+
+ ##
+ # Look for includes of the form:
+ #
+ # rb_include_module(rb_cArray, rb_mEnumerable);
+
+ def do_includes
+ @content.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m|
+ if cls = @classes[c]
+ m = @known_classes[m] || m
+ cls.add_include RDoc::Include.new(m, "")
+ end
+ end
+ end
+
+ def do_methods
+ @content.scan(%r{rb_define_
+ (
+ singleton_method |
+ method |
+ module_function |
+ private_method
+ )
+ \s*\(\s*([\w\.]+),
+ \s*"([^"]+)",
+ \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
+ \s*(-?\w+)\s*\)
+ (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
+ }xm) do
+ |type, var_name, meth_name, meth_body, param_count, source_file|
+
+ # Ignore top-object and weird struct.c dynamic stuff
+ next if var_name == "ruby_top_self"
+ next if var_name == "nstr"
+ next if var_name == "envtbl"
+ next if var_name == "argf" # it'd be nice to handle this one
+
+ var_name = "rb_cObject" if var_name == "rb_mKernel"
+ handle_method(type, var_name, meth_name,
+ meth_body, param_count, source_file)
+ end
+
+ @content.scan(%r{rb_define_attr\(
+ \s*([\w\.]+),
+ \s*"([^"]+)",
+ \s*(\d+),
+ \s*(\d+)\s*\);
+ }xm) do |var_name, attr_name, attr_reader, attr_writer|
+ #var_name = "rb_cObject" if var_name == "rb_mKernel"
+ handle_attr(var_name, attr_name,
+ attr_reader.to_i != 0,
+ attr_writer.to_i != 0)
+ end
+
+ @content.scan(%r{rb_define_global_function\s*\(
+ \s*"([^"]+)",
+ \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
+ \s*(-?\w+)\s*\)
+ (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
+ }xm) do |meth_name, meth_body, param_count, source_file|
+ handle_method("method", "rb_mKernel", meth_name,
+ meth_body, param_count, source_file)
+ end
+
+ @content.scan(/define_filetest_function\s*\(
+ \s*"([^"]+)",
+ \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
+ \s*(-?\w+)\s*\)/xm) do
+ |meth_name, meth_body, param_count|
+
+ handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count)
+ handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count)
+ end
+ end
+
+ def find_attr_comment(attr_name)
+ if @content =~ %r{((?>/\*.*?\*/\s+))
+ rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi
+ $1
+ elsif @content =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m
+ $1
+ else
+ ''
+ end
+ end
+
+ ##
+ # Find the C code corresponding to a Ruby method
+
+ def find_body(meth_name, meth_obj, body, quiet = false)
+ case body
+ when %r"((?>/\*.*?\*/\s*))(?:static\s+)?VALUE\s+#{meth_name}
+ \s*(\([^)]*\))\s*\{.*?^\}"xm
+ comment, params = $1, $2
+ body_text = $&
+
+ remove_private_comments(comment) if comment
+
+ # see if we can find the whole body
+
+ re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}'
+ if Regexp.new(re, Regexp::MULTILINE).match(body)
+ body_text = $&
+ end
+
+ # The comment block may have been overridden with a 'Document-method'
+ # block. This happens in the interpreter when multiple methods are
+ # vectored through to the same C method but those methods are logically
+ # distinct (for example Kernel.hash and Kernel.object_id share the same
+ # implementation
+
+ override_comment = find_override_comment(meth_obj.name)
+ comment = override_comment if override_comment
+
+ find_modifiers(comment, meth_obj) if comment
+
+# meth_obj.params = params
+ meth_obj.start_collecting_tokens
+ meth_obj.add_token(RDoc::RubyToken::Token.new(1,1).set_text(body_text))
+ meth_obj.comment = mangle_comment(comment)
+ when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
+ comment = $1
+ find_body($2, meth_obj, body, true)
+ find_modifiers(comment, meth_obj)
+ meth_obj.comment = mangle_comment(comment) + meth_obj.comment
+ when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
+ unless find_body($1, meth_obj, body, true)
+ warn "No definition for #{meth_name}" unless quiet
+ return false
+ end
+ else
+
+ # No body, but might still have an override comment
+ comment = find_override_comment(meth_obj.name)
+
+ if comment
+ find_modifiers(comment, meth_obj)
+ meth_obj.comment = mangle_comment(comment)
+ else
+ warn "No definition for #{meth_name}" unless quiet
+ return false
+ end
+ end
+ true
+ end
+
+ def find_class(raw_name, name)
+ unless @classes[raw_name]
+ if raw_name =~ /^rb_m/
+ container = @top_level.add_module RDoc::NormalModule, name
+ else
+ container = @top_level.add_class RDoc::NormalClass, name, nil
+ end
+
+ container.record_location @top_level
+ @classes[raw_name] = container
+ end
+ @classes[raw_name]
+ end
+
+ ##
+ # Look for class or module documentation above Init_+class_name+(void),
+ # in a Document-class +class_name+ (or module) comment or above an
+ # rb_define_class (or module). If a comment is supplied above a matching
+ # Init_ and a rb_define_class the Init_ comment is used.
+ #
+ # /*
+ # * This is a comment for Foo
+ # */
+ # Init_Foo(void) {
+ # VALUE cFoo = rb_define_class("Foo", rb_cObject);
+ # }
+ #
+ # /*
+ # * Document-class: Foo
+ # * This is a comment for Foo
+ # */
+ # Init_foo(void) {
+ # VALUE cFoo = rb_define_class("Foo", rb_cObject);
+ # }
+ #
+ # /*
+ # * This is a comment for Foo
+ # */
+ # VALUE cFoo = rb_define_class("Foo", rb_cObject);
+
+ def find_class_comment(class_name, class_meth)
+ comment = nil
+ if @content =~ %r{((?>/\*.*?\*/\s+))
+ (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)\)}xmi
+ comment = $1
+ elsif @content =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m
+ comment = $2
+ else
+ if @content =~ /rb_define_(class|module)/m then
+ class_name = class_name.split("::").last
+ comments = []
+ @content.split(/(\/\*.*?\*\/)\s*?\n/m).each_with_index do |chunk, index|
+ comments[index] = chunk
+ if chunk =~ /rb_define_(class|module).*?"(#{class_name})"/m then
+ comment = comments[index-1]
+ break
+ end
+ end
+ end
+ end
+ class_meth.comment = mangle_comment(comment) if comment
+ end
+
+ ##
+ # Finds a comment matching +type+ and +const_name+ either above the
+ # comment or in the matching Document- section.
+
+ def find_const_comment(type, const_name)
+ if @content =~ %r{((?>^\s*/\*.*?\*/\s+))
+ rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi
+ $1
+ elsif @content =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m
+ $1
+ else
+ ''
+ end
+ end
+
+ ##
+ # If the comment block contains a section that looks like:
+ #
+ # call-seq:
+ # Array.new
+ # Array.new(10)
+ #
+ # use it for the parameters.
+
+ def find_modifiers(comment, meth_obj)
+ if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or
+ comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '')
+ meth_obj.document_self = false
+ end
+ if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or
+ comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '')
+ seq = $1
+ seq.gsub!(/^\s*\*\s*/, '')
+ meth_obj.call_seq = seq
+ end
+ end
+
+ def find_override_comment(meth_name)
+ name = Regexp.escape(meth_name)
+ if @content =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m
+ $1
+ end
+ end
+
+ def handle_attr(var_name, attr_name, reader, writer)
+ rw = ''
+ if reader
+ #@stats.num_methods += 1
+ rw << 'R'
+ end
+ if writer
+ #@stats.num_methods += 1
+ rw << 'W'
+ end
+
+ class_name = @known_classes[var_name]
+
+ return unless class_name
+
+ class_obj = find_class(var_name, class_name)
+
+ if class_obj
+ comment = find_attr_comment(attr_name)
+ unless comment.empty?
+ comment = mangle_comment(comment)
+ end
+ att = RDoc::Attr.new '', attr_name, rw, comment
+ class_obj.add_attribute(att)
+ end
+ end
+
+ def handle_class_module(var_name, class_mod, class_name, parent, in_module)
+ progress(class_mod[0, 1])
+
+ parent_name = @known_classes[parent] || parent
+
+ if in_module
+ enclosure = @classes[in_module] || @@enclosure_classes[in_module]
+ unless enclosure
+ if enclosure = @known_classes[in_module]
+ handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"),
+ enclosure, nil, nil)
+ enclosure = @classes[in_module]
+ end
+ end
+ unless enclosure
+ warn("Enclosing class/module '#{in_module}' for " +
+ "#{class_mod} #{class_name} not known")
+ return
+ end
+ else
+ enclosure = @top_level
+ end
+
+ if class_mod == "class"
+ cm = enclosure.add_class RDoc::NormalClass, class_name, parent_name
+ @stats.num_classes += 1
+ else
+ cm = enclosure.add_module RDoc::NormalModule, class_name
+ @stats.num_modules += 1
+ end
+ cm.record_location(enclosure.toplevel)
+
+ find_class_comment(cm.full_name, cm)
+ @classes[var_name] = cm
+ @@enclosure_classes[var_name] = cm
+ @known_classes[var_name] = cm.full_name
+ end
+
+ ##
+ # Adds constant comments. By providing some_value: at the start ofthe
+ # comment you can override the C value of the comment to give a friendly
+ # definition.
+ #
+ # /* 300: The perfect score in bowling */
+ # rb_define_const(cFoo, "PERFECT", INT2FIX(300);
+ #
+ # Will override +INT2FIX(300)+ with the value +300+ in the output RDoc.
+ # Values may include quotes and escaped colons (\:).
+
+ def handle_constants(type, var_name, const_name, definition)
+ #@stats.num_constants += 1
+ class_name = @known_classes[var_name]
+
+ return unless class_name
+
+ class_obj = find_class(var_name, class_name)
+
+ unless class_obj
+ warn("Enclosing class/module '#{const_name}' for not known")
+ return
+ end
+
+ comment = find_const_comment(type, const_name)
+
+ # In the case of rb_define_const, the definition and comment are in
+ # "/* definition: comment */" form. The literal ':' and '\' characters
+ # can be escaped with a backslash.
+ if type.downcase == 'const' then
+ elements = mangle_comment(comment).split(':')
+ if elements.nil? or elements.empty? then
+ con = RDoc::Constant.new(const_name, definition,
+ mangle_comment(comment))
+ else
+ new_definition = elements[0..-2].join(':')
+ if new_definition.empty? then # Default to literal C definition
+ new_definition = definition
+ else
+ new_definition.gsub!("\:", ":")
+ new_definition.gsub!("\\", '\\')
+ end
+ new_definition.sub!(/\A(\s+)/, '')
+ new_comment = $1.nil? ? elements.last : "#{$1}#{elements.last.lstrip}"
+ con = RDoc::Constant.new(const_name, new_definition,
+ mangle_comment(new_comment))
+ end
+ else
+ con = RDoc::Constant.new const_name, definition, mangle_comment(comment)
+ end
+
+ class_obj.add_constant(con)
+ end
+
+ ##
+ # Removes #ifdefs that would otherwise confuse us
+
+ def handle_ifdefs_in(body)
+ body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m, '\1')
+ end
+
+ def handle_method(type, var_name, meth_name, meth_body, param_count,
+ source_file = nil)
+ progress(".")
+
+ @stats.num_methods += 1
+ class_name = @known_classes[var_name]
+
+ return unless class_name
+
+ class_obj = find_class(var_name, class_name)
+
+ if class_obj
+ if meth_name == "initialize"
+ meth_name = "new"
+ type = "singleton_method"
+ end
+ meth_obj = RDoc::AnyMethod.new("", meth_name)
+ meth_obj.singleton =
+ %w{singleton_method module_function}.include?(type)
+
+ p_count = (Integer(param_count) rescue -1)
+
+ if p_count < 0
+ meth_obj.params = "(...)"
+ elsif p_count == 0
+ meth_obj.params = "()"
+ else
+ meth_obj.params = "(" + (1..p_count).map{|i| "p#{i}"}.join(", ") + ")"
+ end
+
+ if source_file
+ file_name = File.join(@file_dir, source_file)
+ body = (@@known_bodies[source_file] ||= File.read(file_name))
+ else
+ body = @content
+ end
+ if find_body(meth_body, meth_obj, body) and meth_obj.document_self
+ class_obj.add_method(meth_obj)
+ end
+ end
+ end
+
+ def handle_tab_width(body)
+ if /\t/ =~ body
+ tab_width = @options.tab_width
+ body.split(/\n/).map do |line|
+ 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
+ line
+ end .join("\n")
+ else
+ body
+ end
+ end
+
+ ##
+ # Remove the /*'s and leading asterisks from C comments
+
+ def mangle_comment(comment)
+ comment.sub!(%r{/\*+}) { " " * $&.length }
+ comment.sub!(%r{\*+/}) { " " * $&.length }
+ comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
+ comment
+ end
+
+ def progress(char)
+ unless @options.quiet
+ @progress.print(char)
+ @progress.flush
+ end
+ end
+
+ ##
+ # Removes lines that are commented out that might otherwise get picked up
+ # when scanning for classes and methods
+
+ def remove_commented_out_lines
+ @content.gsub!(%r{//.*rb_define_}, '//')
+ end
+
+ def remove_private_comments(comment)
+ comment.gsub!(/\/?\*--(.*?)\/?\*\+\+/m, '')
+ comment.sub!(/\/?\*--.*/m, '')
+ end
+
+ ##
+ # Extract the classes/modules and methods from a C file and return the
+ # corresponding top-level object
+
+ def scan
+ remove_commented_out_lines
+ do_classes
+ do_constants
+ do_methods
+ do_includes
+ do_aliases
+ @top_level
+ end
+
+ def warn(msg)
+ $stderr.puts
+ $stderr.puts msg
+ $stderr.flush
+ end
+
+end
+