# We attempt to parse C extension files. Basically we look for # the standard patterns that you find in extensions: rb_define_class, # rb_define_method and so on. We also try to find the corresponding # C source for the methods and extract comments, but if we fail # we don't worry too much. # # The comments associated with a Ruby method are extracted from the C # comment block associated with the routine that _implements_ that # method, that is to say the method whose name is given in the # rb_define_method call. For example, you might write: # # /* # * Returns a new array that is a one-dimensional flattening of this # * array (recursively). That is, for every element that is an array, # * extract its elements into the new array. # * # * s = [ 1, 2, 3 ] #=> [1, 2, 3] # * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] # * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] # * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # */ # static VALUE # rb_ary_flatten(ary) # VALUE ary; # { # ary = rb_obj_dup(ary); # rb_ary_flatten_bang(ary); # return ary; # } # # ... # # void # Init_Array() # { # ... # rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0); # # Here RDoc will determine from the rb_define_method line that there's a # method called "flatten" in class Array, and will look for the implementation # in the method rb_ary_flatten. It will then use the comment from that # method in the HTML output. This method must be in the same source file # as the rb_define_method. # # C classes can be diagramed (see /tc/dl/ruby/ruby/error.c), and RDoc # integrates C and Ruby source into one tree # # The comment blocks may include special direcives: # # [Document-class: name] # This comment block is documentation for the given class. Use this # when the Init_xxx method is not named after the class. # # [Document-method: name] # This comment documents the named method. Use when RDoc cannot outomatically # find the method from it's declaration # # [call-seq: text up to an empty line] # Because C source doesn't give descripive names to Ruby-level parameters, # you need to document the calling sequence explicitly # # In additon, RDoc assumes by default that the C method implementing a # Ruby function is in the same source file as the rb_define_method call. # If this isn't the case, add the comment # # rb_define_method(....); // in: filename # # As an example, we might have an extension that defines multiple classes # in its Init_xxx method. We could document them using # # # /* # * Document-class: MyClass # * # * Encapsulate the writing and reading of the configuration # * file. ... # */ # # /* # * Document-method: read_value # * # * call-seq: # * cfg.read_value(key) -> value # * cfg.read_value(key} { |key| } -> value # * # * Return the value corresponding to +key+ from the configuration. # * In the second form, if the key isn't found, invoke the # * block and return its value. # */ # # Classes and modules built in to the interpreter. We need # these to define superclasses of user objects require "rdoc/code_objects" require "rdoc/parsers/parserfactory" module RDoc KNOWN_CLASSES = { "rb_cObject" => "Object", "rb_cArray" => "Array", "rb_cBignum" => "Bignum", "rb_cClass" => "Class", "rb_cDir" => "Dir", "rb_cData" => "Data", "rb_cFalseClass" => "FalseClass", "rb_cFile" => "File", "rb_cFixnum" => "Fixnum", "rb_cFloat" => "Float", "rb_cHash" => "Hash", "rb_cInteger" => "Integer", "rb_cIO" => "IO", "rb_cModule" => "Module", "rb_cNilClass" => "NilClass", "rb_cNumeric" => "Numeric", "rb_cProc" => "Proc", "rb_cRange" => "Range", "rb_cRegexp" => "Regexp", "rb_cString" => "String", "rb_cSymbol" => "Symbol", "rb_cThread" => "Thread", "rb_cTime" => "Time", "rb_cTrueClass" => "TrueClass", "rb_cStruct" => "Struct", "rb_eException" => "Exception", "rb_eStandardError" => "StandardError", "rb_eSystemExit" => "SystemExit", "rb_eInterrupt" => "Interrupt", "rb_eSignal" => "Signal", "rb_eFatal" => "Fatal", "rb_eArgError" => "ArgError", "rb_eEOFError" => "EOFError", "rb_eIndexError" => "IndexError", "rb_eRangeError" => "RangeError", "rb_eIOError" => "IOError", "rb_eRuntimeError" => "RuntimeError", "rb_eSecurityError" => "SecurityError", "rb_eSystemCallError" => "SystemCallError", "rb_eTypeError" => "TypeError", "rb_eZeroDivError" => "ZeroDivError", "rb_eNotImpError" => "NotImpError", "rb_eNoMemError" => "NoMemError", "rb_eFloatDomainError" => "FloatDomainError", "rb_eScriptError" => "ScriptError", "rb_eNameError" => "NameError", "rb_eSyntaxError" => "SyntaxError", "rb_eLoadError" => "LoadError", "rb_mKernel" => "Kernel", "rb_mComparable" => "Comparable", "rb_mEnumerable" => "Enumerable", "rb_mPrecision" => "Precision", "rb_mErrno" => "Errno", "rb_mFileTest" => "FileTest", "rb_mGC" => "GC", "rb_mMath" => "Math", "rb_mProcess" => "Process" } # See rdoc/c_parse.rb class C_Parser extend ParserFactory parse_files_matching(/\.(c|cc|cpp|CC)$/) @@known_bodies = {} # prepare to parse a C file def initialize(top_level, file_name, body, options, stats) @known_classes = KNOWN_CLASSES.dup @body = handle_tab_width(handle_ifdefs_in(body)) @options = options @stats = stats @top_level = top_level @classes = Hash.new @file_dir = File.dirname(file_name) end # Extract the classes/modules and methods from a C file # and return the corresponding top-level object def scan remove_commented_out_lines do_classes do_constants do_methods do_includes do_aliases @top_level end ####### private ####### def progress(char) unless @options.quiet $stderr.print(char) $stderr.flush end end def warn(msg) $stderr.puts $stderr.puts msg $stderr.flush end # remove lines that are commented out that might otherwise get # picked up when scanning for classes and methods def remove_commented_out_lines @body.gsub!(%r{//.*rb_define_}, '//') end def handle_class_module(var_name, class_mod, class_name, parent, in_module) progress(class_mod[0, 1]) parent_name = @known_classes[parent] || parent if in_module enclosure = @classes[in_module] unless enclosure warn("Enclosing class/module '#{in_module}' for " + "#{class_mod} #{class_name} not known") return end else enclosure = @top_level end if class_mod == "class" cm = enclosure.add_class(NormalClass, class_name, parent_name) @stats.num_classes += 1 else cm = enclosure.add_module(NormalModule, class_name) @stats.num_modules += 1 end cm.record_location(enclosure.toplevel) find_class_comment(cm.full_name, cm) @classes[var_name] = cm @known_classes[var_name] = cm.full_name end ############################################################ def find_class_comment(class_name, class_meth) comment = nil if @body =~ %r{((?>/\*.*?\*/\s+)) (static\s+)?void\s+Init_#{class_name}\s*\(\)}xmi comment = $1 elsif @body =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m comment = $2 end class_meth.comment = mangle_comment(comment) if comment end ############################################################ def do_classes @body.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do |var_name, class_name| handle_class_module(var_name, "module", class_name, nil, nil) end # The '.' lets us handle SWIG-generated files @body.scan(/([\w\.]+)\s* = \s*rb_define_class\s* \( \s*"(\w+)", \s*(\w+)\s* \)/mx) do |var_name, class_name, parent| handle_class_module(var_name, "class", class_name, parent, nil) end @body.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do |var_name, class_name, parent| parent = nil if parent == "0" handle_class_module(var_name, "class", class_name, parent, nil) end @body.scan(/(\w+)\s* = \s*rb_define_module_under\s* \( \s*(\w+), \s*"(\w+)" \s*\)/mx) do |var_name, in_module, class_name| handle_class_module(var_name, "module", class_name, nil, in_module) end @body.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s* \( \s*(\w+), \s*"(\w+)", \s*(\w+)\s* \s*\)/mx) do |var_name, in_module, class_name, parent| handle_class_module(var_name, "class", class_name, parent, in_module) end end ########################################################### def do_constants @body.scan(%r{\Wrb_define_ ( variable | readonly_variable | const | global_const | ) \s*\( (?:\s*(\w+),)? \s*"(\w+)", \s*(.*?)\s*\)\s*; }xm) do |type, var_name, const_name, definition| var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel" handle_constants(type, var_name, const_name, definition) end end ############################################################ def do_methods @body.scan(%r{rb_define_ ( singleton_method | method | module_function | private_method ) \s*\(\s*([\w\.]+), \s*"([^"]+)", \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, \s*(-?\w+)\s*\) (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? }xm) do |type, var_name, meth_name, meth_body, param_count, source_file| #" # Ignore top-object and weird struct.c dynamic stuff next if var_name == "ruby_top_self" next if var_name == "nstr" next if var_name == "envtbl" next if var_name == "argf" # it'd be nice to handle this one var_name = "rb_cObject" if var_name == "rb_mKernel" handle_method(type, var_name, meth_name, meth_body, param_count, source_file) end @body.scan(%r{rb_define_attr\( \s*([\w\.]+), \s*"([^"]+)", \s*(\d+), \s*(\d+)\s*\); }xm) do #" |var_name, attr_name, attr_reader, attr_writer| #var_name = "rb_cObject" if var_name == "rb_mKernel" handle_attr(var_name, attr_name, attr_reader.to_i != 0, attr_writer.to_i != 0) end @body.scan(%r{rb_define_global_function\s*\( \s*"([^"]+)", \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, \s*(-?\w+)\s*\) (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? }xm) do #" |meth_name, meth_body, param_count, source_file| handle_method("method", "rb_mKernel", meth_name, meth_body, param_count, source_file) end @body.scan(/define_filetest_function\s*\( \s*"([^"]+)", \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, \s*(-?\w+)\s*\)/xm) do #" |meth_name, meth_body, param_count| handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count) handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count) end end ############################################################ def do_aliases @body.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do |var_name, new_name, old_name| @stats.num_methods += 1 class_name = @known_classes[var_name] || var_name class_obj = find_class(var_name, class_name) class_obj.add_alias(Alias.new("", old_name, new_name, "")) end end ############################################################ def handle_constants(type, var_name, const_name, definition) #@stats.num_constants += 1 class_name = @known_classes[var_name] return unless class_name class_obj = find_class(var_name, class_name) unless class_obj warn("Enclosing class/module '#{const_name}' for not known") return end comment = find_const_comment(type, const_name) con = Constant.new(const_name, definition, mangle_comment(comment)) class_obj.add_constant(con) end ########################################################### def find_const_comment(type, const_name) if @body =~ %r{((?>/\*.*?\*/\s+)) rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi $1 elsif @body =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m $1 else '' end end ########################################################### def handle_attr(var_name, attr_name, reader, writer) rw = '' if reader #@stats.num_methods += 1 rw << 'R' end if writer #@stats.num_methods += 1 rw << 'W' end class_name = @known_classes[var_name] return unless class_name class_obj = find_class(var_name, class_name) if class_obj comment = find_attr_comment(attr_name) unless comment.empty? comment = mangle_comment(comment) end att = Attr.new('', attr_name, rw, comment) class_obj.add_attribute(att) end end ########################################################### def find_attr_comment(attr_name) if @body =~ %r{((?>/\*.*?\*/\s+)) rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi $1 elsif @body =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m $1 else '' end end ########################################################### def handle_method(type, var_name, meth_name, meth_body, param_count, source_file = nil) progress(".") @stats.num_methods += 1 class_name = @known_classes[var_name] return unless class_name class_obj = find_class(var_name, class_name) if class_obj if meth_name == "initialize" meth_name = "new" type = "singleton_method" end meth_obj = AnyMethod.new("", meth_name) meth_obj.singleton = type == "singleton_method" p_count = (Integer(param_count) rescue -1) if p_count < 0 meth_obj.params = "(...)" elsif p_count == 0 meth_obj.params = "()" else meth_obj.params = "(" + (1..p_count).map{|i| "p#{i}"}.join(", ") + ")" end if source_file file_name = File.join(@file_dir, source_file) body = (@@known_bodies[source_file] ||= File.read(file_name)) else body = @body end find_body(meth_body, meth_obj, body) class_obj.add_method(meth_obj) end end ############################################################ # Find the C code corresponding to a Ruby method def find_body(meth_name, meth_obj, body) if body =~ %r{((?>/\*.*?\*/\s*))(static\s+)?VALUE\s+#{meth_name} \s*(\(.*?\)).*?^}xm comment, params = $1, $3 body_text = $& # see if we can find the whole body re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}' if Regexp.new(re, Regexp::MULTILINE).match(body) body_text = $& end # The comment block may have been overridden with a # 'Document-method' block. This happens in the interpreter # when multiple methods are vectored through to the same # C method but those methods are logically distinct (for # example Kernel.hash and Kernel.object_id share the same # implementation override_comment = find_override_comment(meth_obj.name) comment = override_comment if override_comment find_call_seq(comment, meth_obj) if comment # meth_obj.params = params meth_obj.start_collecting_tokens meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text)) meth_obj.comment = mangle_comment(comment) else # No body, but might still have an override comment comment = find_override_comment(meth_obj.name) if comment find_call_seq(comment, meth_obj) meth_obj.comment = mangle_comment(comment) else warn "No definition for #{meth_name}" end end end ################################################## # # If the comment block contains a section that looks like # call-seq: # Array.new # Array.new(10) # use it for the parameters def find_call_seq(comment, meth_obj) if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') seq = $1 seq.gsub!(/^\s*\*\s*/, '') meth_obj.call_seq = seq end end ############################################################ def find_override_comment(meth_name) name = Regexp.escape(meth_name) if @body =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m $1 end end ############################################################ # Look for includes of the form # rb_include_module(rb_cArray, rb_mEnumerable); def do_includes @body.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m| if cls = @classes[c] m = @known_classes[m] || m cls.add_include(Include.new(m, "")) end end end ############################################################ # Remove the /*'s and leading asterisks from C comments def mangle_comment(comment) comment.sub!(%r{/\*+}) { " " * $&.length } comment.sub!(%r{\*+/}) { " " * $&.length } comment.gsub!(/^[ \t]*\*/m) { " " * $&.length } comment end def find_class(raw_name, name) unless @classes[raw_name] if raw_name =~ /^rb_m/ @classes[raw_name] = @top_level.add_module(NormalModule, name) else @classes[raw_name] = @top_level.add_class(NormalClass, name, nil) end end @classes[raw_name] end def handle_tab_width(body) if /\t/ =~ body tab_width = Options.instance.tab_width body.split(/\n/).map do |line| 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #` line end .join("\n") else body end end # Remove #ifdefs that would otherwise confuse us def handle_ifdefs_in(body) body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m) { $1 } end end end