summaryrefslogtreecommitdiff
path: root/lib/rdoc/parsers/parse_c.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rdoc/parsers/parse_c.rb')
-rw-r--r--lib/rdoc/parsers/parse_c.rb287
1 files changed, 287 insertions, 0 deletions
diff --git a/lib/rdoc/parsers/parse_c.rb b/lib/rdoc/parsers/parse_c.rb
new file mode 100644
index 0000000000..3d9d7a247f
--- /dev/null
+++ b/lib/rdoc/parsers/parse_c.rb
@@ -0,0 +1,287 @@
+ # We attempt to parse C extension files. Basically we look for
+ # the standard patterns that you find in extensions: <tt>rb_define_class,
+ # rb_define_method</tt> and so on. We also try to find the corresponding
+ # C source for the methods and extract comments, but if we fail
+ # we don't worry too much.
+ #
+ # The comments associated with a Ruby method are extracted from the C
+ # comment block associated with the routine that _implements_ that
+ # method, that is to say the method whose name is given in the
+ # <tt>rb_define_method</tt> call. For example, you might write:
+ #
+ # /*
+ # * Returns a new array that is a one-dimensional flattening of this
+ # * array (recursively). That is, for every element that is an array,
+ # * extract its elements into the new array.
+ # *
+ # * s = [ 1, 2, 3 ] #=> [1, 2, 3]
+ # * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]]
+ # * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
+ # * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ # */
+ # static VALUE
+ # rb_ary_flatten(ary)
+ # VALUE ary;
+ # {
+ # ary = rb_obj_dup(ary);
+ # rb_ary_flatten_bang(ary);
+ # return ary;
+ # }
+ #
+ # ...
+ #
+ # void
+ # Init_Array()
+ # {
+ # ...
+ # rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
+ #
+ # Here RDoc will determine from the rb_define_method line that there's a
+ # method called "flatten" in class Array, and will look for the implementation
+ # in the method rb_ary_flatten. It will then use the comment from that
+ # method in the HTML output. This method must be in the same source file
+ # as the rb_define_method.
+ #
+ # C classes can be diagramed (see /tc/dl/ruby/ruby/error.c), and RDoc
+ # integrates C and Ruby source into one tree
+
+ # Classes and modules built in to the interpreter. We need
+ # these to define superclasses of user objects
+
+require "rdoc/code_objects"
+require "rdoc/parsers/parserfactory"
+
+
+module RDoc
+
+ KNOWN_CLASSES = {
+ "rb_cObject" => "Object",
+ "rb_cArray" => "Array",
+ "rb_cBignum" => "Bignum",
+ "rb_cClass" => "Class",
+ "rb_cDir" => "Dir",
+ "rb_cData" => "Data",
+ "rb_cFalseClass" => "FalseClass",
+ "rb_cFile" => "File",
+ "rb_cFixnum" => "Fixnum",
+ "rb_cFloat" => "Float",
+ "rb_cHash" => "Hash",
+ "rb_cInteger" => "Integer",
+ "rb_cIO" => "IO",
+ "rb_cModule" => "Module",
+ "rb_cNilClass" => "NilClass",
+ "rb_cNumeric" => "Numeric",
+ "rb_cProc" => "Proc",
+ "rb_cRange" => "Range",
+ "rb_cRegexp" => "Regexp",
+ "rb_cString" => "String",
+ "rb_cSymbol" => "Symbol",
+ "rb_cThread" => "Thread",
+ "rb_cTime" => "Time",
+ "rb_cTrueClass" => "TrueClass",
+ "rb_cStruct" => "Struct",
+ "rb_eException" => "Exception",
+ "rb_eStandardError" => "StandardError",
+ "rb_eSystemExit" => "SystemExit",
+ "rb_eInterrupt" => "Interrupt",
+ "rb_eSignal" => "Signal",
+ "rb_eFatal" => "Fatal",
+ "rb_eArgError" => "ArgError",
+ "rb_eEOFError" => "EOFError",
+ "rb_eIndexError" => "IndexError",
+ "rb_eRangeError" => "RangeError",
+ "rb_eIOError" => "IOError",
+ "rb_eRuntimeError" => "RuntimeError",
+ "rb_eSecurityError" => "SecurityError",
+ "rb_eSystemCallError" => "SystemCallError",
+ "rb_eTypeError" => "TypeError",
+ "rb_eZeroDivError" => "ZeroDivError",
+ "rb_eNotImpError" => "NotImpError",
+ "rb_eNoMemError" => "NoMemError",
+ "rb_eFloatDomainError" => "FloatDomainError",
+ "rb_eScriptError" => "ScriptError",
+ "rb_eNameError" => "NameError",
+ "rb_eSyntaxError" => "SyntaxError",
+ "rb_eLoadError" => "LoadError",
+
+ "rb_mKernel" => "Kernel",
+ "rb_mComparable" => "Comparable",
+ "rb_mEnumerable" => "Enumerable",
+ "rb_mPrecision" => "Precision",
+ "rb_mErrno" => "Errno",
+ "rb_mFileTest" => "FileTest",
+ "rb_mGC" => "GC",
+ "rb_mMath" => "Math",
+ "rb_mProcess" => "Process"
+
+ }
+
+ # See rdoc/c_parse.rb
+
+ class C_Parser
+
+
+ extend ParserFactory
+ parse_files_matching(/\.(c|cc|cpp|CC)$/)
+
+
+ # prepare to parse a C file
+ def initialize(top_level, file_name, body, options)
+ @known_classes = KNOWN_CLASSES.dup
+ @body = body
+ @options = options
+ @top_level = top_level
+ @classes = Hash.new
+ end
+
+ # Extract the classes/modules and methods from a C file
+ # and return the corresponding top-level object
+ def scan
+ remove_commented_out_lines
+ do_classes
+ do_methods
+ @top_level
+ end
+
+ #######
+ private
+ #######
+
+ # remove lines that are commented out that might otherwise get
+ # picked up when scanning for classes and methods
+
+ def remove_commented_out_lines
+ @body.gsub!(%r{//.*rb_define_}, '//')
+ end
+
+ def handle_class_module(var_name, class_mod, class_name, parent, in_module)
+ @known_classes[var_name] = class_name
+ parent_name = @known_classes[parent] || parent
+
+ if in_module
+ enclosure = @classes[in_module]
+ unless enclosure
+ $stderr.puts("Enclosing class/module '#{in_module}' for " +
+ class_mod + " #{class_name} not known")
+ return
+ end
+ else
+ enclosure = @top_level
+ end
+
+ if class_mod == "class"
+ cm = enclosure.add_class(NormalClass, class_name, parent_name)
+ else
+ cm = enclosure.add_module(NormalModule, class_name)
+ end
+ cm.record_location(enclosure.toplevel)
+ @classes[var_name] = cm
+ end
+
+
+
+ def do_classes
+ @body.scan(/(\w+)\s* = \s*rb_define_module\(\s*"(\w+)"\s*\)/mx) do
+ |var_name, class_name|
+ handle_class_module(var_name, "module", class_name, nil, nil)
+ end
+
+ @body.scan(/(\w+)\s* = \s*rb_define_module_under
+ \(
+ \s*(\w+),
+ \s*"(\w+)"
+ \)/mx) do
+
+ |var_name, in_module, class_name|
+ handle_class_module(var_name, "module", class_name, nil, in_module)
+ end
+
+ @body.scan(/(\w+)\s* = \s*rb_define_class
+ \(
+ \s*"(\w+)",
+ \s*(\w+)\s*
+ \)/mx) do
+
+ |var_name, class_name, parent|
+ handle_class_module(var_name, "class", class_name, parent, nil)
+ end
+
+ @body.scan(/(\w+)\s* = \s*rb_define_class_under
+ \(
+ \s*(\w+),
+ \s*"(\w+)",
+ \s*(\w+)\s*
+ \)/mx) do
+
+ |var_name, in_module, class_name, parent|
+ handle_class_module(var_name, "class", class_name, parent, in_module)
+ end
+
+ end
+
+
+ def do_methods
+ @body.scan(/rb_define_(singleton_method|method|module_function)\(\s*(\w+),
+ \s*"([^"]+)",
+ \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
+ \s*(-?\w+)\s*\)/xm) do
+ |type, var_name, meth_name, meth_body, param_count|
+
+ class_name = @known_classes[var_name] || var_name
+ class_obj = @classes[var_name]
+ if class_obj
+ meth_obj = AnyMethod.new("", meth_name)
+ meth_obj.singleton = type == "singleton_method"
+
+ p_count = (Integer(param_count) rescue -1)
+
+ if p_count < 0
+ meth_obj.params = "(...)"
+ elsif p_count == 0
+ meth_obj.params = "()"
+ else
+ meth_obj.params = "(" +
+ (1..p_count).map{|i| "p#{i}"}.join(", ") +
+ ")"
+ end
+
+ find_body(meth_body, meth_obj)
+ class_obj.add_method(meth_obj)
+ end
+ end
+ end
+
+ # Find the C code corresponding to a c method
+ def find_body(meth_name, meth_obj)
+ if @body =~ %r{((?>/\*.*?\*/\s+))(static\s+)?VALUE\s+#{meth_name}
+ \s*(\(.*?\)).*?^}xm
+ comment, params = $1, $3
+ body_text = $&
+
+ # see if we can find the whole body
+
+ re = Regexp.escape(body_text) + "[^(]*^{.*?^}"
+ if Regexp.new(re, Regexp::MULTILINE).match(@body)
+ body_text = $&
+ end
+
+ meth_obj.params = params
+ meth_obj.start_collecting_tokens
+ meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text))
+ meth_obj.comment = mangle_comment(comment)
+
+ end
+ end
+
+ # Remove the /*'s and leading asterisks from C comments
+
+ def mangle_comment(comment)
+ comment.sub!(%r{/\*+}) { " " * $&.length }
+ comment.sub!(%r{\*+/}) { " " * $&.length }
+ comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
+ comment
+ end
+
+ end
+
+end