diff options
Diffstat (limited to 'ruby_1_8_6/lib/rdoc/parsers')
-rw-r--r-- | ruby_1_8_6/lib/rdoc/parsers/parse_c.rb | 773 | ||||
-rw-r--r-- | ruby_1_8_6/lib/rdoc/parsers/parse_f95.rb | 1841 | ||||
-rw-r--r-- | ruby_1_8_6/lib/rdoc/parsers/parse_rb.rb | 2609 | ||||
-rw-r--r-- | ruby_1_8_6/lib/rdoc/parsers/parse_simple.rb | 41 | ||||
-rw-r--r-- | ruby_1_8_6/lib/rdoc/parsers/parserfactory.rb | 99 |
5 files changed, 5363 insertions, 0 deletions
diff --git a/ruby_1_8_6/lib/rdoc/parsers/parse_c.rb b/ruby_1_8_6/lib/rdoc/parsers/parse_c.rb new file mode 100644 index 0000000000..25fc66af3f --- /dev/null +++ b/ruby_1_8_6/lib/rdoc/parsers/parse_c.rb @@ -0,0 +1,773 @@ +# Classes and modules built in to the interpreter. We need +# these to define superclasses of user objects + +require "rdoc/code_objects" +require "rdoc/parsers/parserfactory" +require "rdoc/options" +require "rdoc/rdoc" + +module RDoc + + ## + # Ruby's built-in classes. + + KNOWN_CLASSES = { + "rb_cObject" => "Object", + "rb_cArray" => "Array", + "rb_cBignum" => "Bignum", + "rb_cClass" => "Class", + "rb_cDir" => "Dir", + "rb_cData" => "Data", + "rb_cFalseClass" => "FalseClass", + "rb_cFile" => "File", + "rb_cFixnum" => "Fixnum", + "rb_cFloat" => "Float", + "rb_cHash" => "Hash", + "rb_cInteger" => "Integer", + "rb_cIO" => "IO", + "rb_cModule" => "Module", + "rb_cNilClass" => "NilClass", + "rb_cNumeric" => "Numeric", + "rb_cProc" => "Proc", + "rb_cRange" => "Range", + "rb_cRegexp" => "Regexp", + "rb_cString" => "String", + "rb_cSymbol" => "Symbol", + "rb_cThread" => "Thread", + "rb_cTime" => "Time", + "rb_cTrueClass" => "TrueClass", + "rb_cStruct" => "Struct", + "rb_eException" => "Exception", + "rb_eStandardError" => "StandardError", + "rb_eSystemExit" => "SystemExit", + "rb_eInterrupt" => "Interrupt", + "rb_eSignal" => "Signal", + "rb_eFatal" => "Fatal", + "rb_eArgError" => "ArgError", + "rb_eEOFError" => "EOFError", + "rb_eIndexError" => "IndexError", + "rb_eRangeError" => "RangeError", + "rb_eIOError" => "IOError", + "rb_eRuntimeError" => "RuntimeError", + "rb_eSecurityError" => "SecurityError", + "rb_eSystemCallError" => "SystemCallError", + "rb_eTypeError" => "TypeError", + "rb_eZeroDivError" => "ZeroDivError", + "rb_eNotImpError" => "NotImpError", + "rb_eNoMemError" => "NoMemError", + "rb_eFloatDomainError" => "FloatDomainError", + "rb_eScriptError" => "ScriptError", + "rb_eNameError" => "NameError", + "rb_eSyntaxError" => "SyntaxError", + "rb_eLoadError" => "LoadError", + + "rb_mKernel" => "Kernel", + "rb_mComparable" => "Comparable", + "rb_mEnumerable" => "Enumerable", + "rb_mPrecision" => "Precision", + "rb_mErrno" => "Errno", + "rb_mFileTest" => "FileTest", + "rb_mGC" => "GC", + "rb_mMath" => "Math", + "rb_mProcess" => "Process" + } + + ## + # We attempt to parse C extension files. Basically we look for + # the standard patterns that you find in extensions: <tt>rb_define_class, + # rb_define_method</tt> and so on. We also try to find the corresponding + # C source for the methods and extract comments, but if we fail + # we don't worry too much. + # + # The comments associated with a Ruby method are extracted from the C + # comment block associated with the routine that _implements_ that + # method, that is to say the method whose name is given in the + # <tt>rb_define_method</tt> call. For example, you might write: + # + # /* + # * Returns a new array that is a one-dimensional flattening of this + # * array (recursively). That is, for every element that is an array, + # * extract its elements into the new array. + # * + # * s = [ 1, 2, 3 ] #=> [1, 2, 3] + # * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]] + # * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10] + # * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + # */ + # static VALUE + # rb_ary_flatten(ary) + # VALUE ary; + # { + # ary = rb_obj_dup(ary); + # rb_ary_flatten_bang(ary); + # return ary; + # } + # + # ... + # + # void + # Init_Array() + # { + # ... + # rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0); + # + # Here RDoc will determine from the rb_define_method line that there's a + # method called "flatten" in class Array, and will look for the implementation + # in the method rb_ary_flatten. It will then use the comment from that + # method in the HTML output. This method must be in the same source file + # as the rb_define_method. + # + # C classes can be diagramed (see /tc/dl/ruby/ruby/error.c), and RDoc + # integrates C and Ruby source into one tree + # + # The comment blocks may include special direcives: + # + # [Document-class: <i>name</i>] + # This comment block is documentation for the given class. Use this + # when the <tt>Init_xxx</tt> method is not named after the class. + # + # [Document-method: <i>name</i>] + # This comment documents the named method. Use when RDoc cannot + # automatically find the method from it's declaration + # + # [call-seq: <i>text up to an empty line</i>] + # Because C source doesn't give descripive names to Ruby-level parameters, + # you need to document the calling sequence explicitly + # + # In additon, RDoc assumes by default that the C method implementing a + # Ruby function is in the same source file as the rb_define_method call. + # If this isn't the case, add the comment + # + # rb_define_method(....); // in: filename + # + # As an example, we might have an extension that defines multiple classes + # in its Init_xxx method. We could document them using + # + # + # /* + # * Document-class: MyClass + # * + # * Encapsulate the writing and reading of the configuration + # * file. ... + # */ + # + # /* + # * Document-method: read_value + # * + # * call-seq: + # * cfg.read_value(key) -> value + # * cfg.read_value(key} { |key| } -> value + # * + # * Return the value corresponding to +key+ from the configuration. + # * In the second form, if the key isn't found, invoke the + # * block and return its value. + # */ + # + + class C_Parser + + attr_accessor :progress + + extend ParserFactory + parse_files_matching(/\.(?:([CcHh])\1?|c([+xp])\2|y)\z/) + + @@known_bodies = {} + + # prepare to parse a C file + def initialize(top_level, file_name, body, options, stats) + @known_classes = KNOWN_CLASSES.dup + @body = handle_tab_width(handle_ifdefs_in(body)) + @options = options + @stats = stats + @top_level = top_level + @classes = Hash.new + @file_dir = File.dirname(file_name) + @progress = $stderr unless options.quiet + end + + # Extract the classes/modules and methods from a C file + # and return the corresponding top-level object + def scan + remove_commented_out_lines + do_classes + do_constants + do_methods + do_includes + do_aliases + @top_level + end + + ####### + private + ####### + + def progress(char) + unless @options.quiet + @progress.print(char) + @progress.flush + end + end + + def warn(msg) + $stderr.puts + $stderr.puts msg + $stderr.flush + end + + def remove_private_comments(comment) + comment.gsub!(/\/?\*--(.*?)\/?\*\+\+/m, '') + comment.sub!(/\/?\*--.*/m, '') + end + + ## + # removes lines that are commented out that might otherwise get picked up + # when scanning for classes and methods + + def remove_commented_out_lines + @body.gsub!(%r{//.*rb_define_}, '//') + end + + def handle_class_module(var_name, class_mod, class_name, parent, in_module) + progress(class_mod[0, 1]) + + parent_name = @known_classes[parent] || parent + + if in_module + enclosure = @classes[in_module] + unless enclosure + if enclosure = @known_classes[in_module] + handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"), + enclosure, nil, nil) + enclosure = @classes[in_module] + end + end + unless enclosure + warn("Enclosing class/module '#{in_module}' for " + + "#{class_mod} #{class_name} not known") + return + end + else + enclosure = @top_level + end + + if class_mod == "class" + cm = enclosure.add_class(NormalClass, class_name, parent_name) + @stats.num_classes += 1 + else + cm = enclosure.add_module(NormalModule, class_name) + @stats.num_modules += 1 + end + cm.record_location(enclosure.toplevel) + + find_class_comment(cm.full_name, cm) + @classes[var_name] = cm + @known_classes[var_name] = cm.full_name + end + + ## + # Look for class or module documentation above Init_+class_name+(void), + # in a Document-class +class_name+ (or module) comment or above an + # rb_define_class (or module). If a comment is supplied above a matching + # Init_ and a rb_define_class the Init_ comment is used. + # + # /* + # * This is a comment for Foo + # */ + # Init_Foo(void) { + # VALUE cFoo = rb_define_class("Foo", rb_cObject); + # } + # + # /* + # * Document-class: Foo + # * This is a comment for Foo + # */ + # Init_foo(void) { + # VALUE cFoo = rb_define_class("Foo", rb_cObject); + # } + # + # /* + # * This is a comment for Foo + # */ + # VALUE cFoo = rb_define_class("Foo", rb_cObject); + + def find_class_comment(class_name, class_meth) + comment = nil + if @body =~ %r{((?>/\*.*?\*/\s+)) + (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)?\)}xmi + comment = $1 + elsif @body =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m + comment = $2 + else + if @body =~ /rb_define_(class|module)/m then + class_name = class_name.split("::").last + comments = [] + @body.split(/(\/\*.*?\*\/)\s*?\n/m).each_with_index do |chunk, index| + comments[index] = chunk + if chunk =~ /rb_define_(class|module).*?"(#{class_name})"/m then + comment = comments[index-1] + break + end + end + end + end + class_meth.comment = mangle_comment(comment) if comment + end + + ############################################################ + + def do_classes + @body.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do + |var_name, class_name| + handle_class_module(var_name, "module", class_name, nil, nil) + end + + # The '.' lets us handle SWIG-generated files + @body.scan(/([\w\.]+)\s* = \s*rb_define_class\s* + \( + \s*"(\w+)", + \s*(\w+)\s* + \)/mx) do + + |var_name, class_name, parent| + handle_class_module(var_name, "class", class_name, parent, nil) + end + + @body.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do + |var_name, class_name, parent| + parent = nil if parent == "0" + handle_class_module(var_name, "class", class_name, parent, nil) + end + + @body.scan(/(\w+)\s* = \s*rb_define_module_under\s* + \( + \s*(\w+), + \s*"(\w+)" + \s*\)/mx) do + + |var_name, in_module, class_name| + handle_class_module(var_name, "module", class_name, nil, in_module) + end + + @body.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s* + \( + \s*(\w+), + \s*"(\w+)", + \s*(\w+)\s* + \s*\)/mx) do + + |var_name, in_module, class_name, parent| + handle_class_module(var_name, "class", class_name, parent, in_module) + end + + end + + ########################################################### + + def do_constants + @body.scan(%r{\Wrb_define_ + ( + variable | + readonly_variable | + const | + global_const | + ) + \s*\( + (?:\s*(\w+),)? + \s*"(\w+)", + \s*(.*?)\s*\)\s*; + }xm) do + + |type, var_name, const_name, definition| + var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel" + handle_constants(type, var_name, const_name, definition) + end + end + + ############################################################ + + def do_methods + + @body.scan(%r{rb_define_ + ( + singleton_method | + method | + module_function | + private_method + ) + \s*\(\s*([\w\.]+), + \s*"([^"]+)", + \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, + \s*(-?\w+)\s*\) + (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? + }xm) do + |type, var_name, meth_name, meth_body, param_count, source_file| + #" + + # Ignore top-object and weird struct.c dynamic stuff + next if var_name == "ruby_top_self" + next if var_name == "nstr" + next if var_name == "envtbl" + next if var_name == "argf" # it'd be nice to handle this one + + var_name = "rb_cObject" if var_name == "rb_mKernel" + handle_method(type, var_name, meth_name, + meth_body, param_count, source_file) + end + + @body.scan(%r{rb_define_attr\( + \s*([\w\.]+), + \s*"([^"]+)", + \s*(\d+), + \s*(\d+)\s*\); + }xm) do #" + |var_name, attr_name, attr_reader, attr_writer| + + #var_name = "rb_cObject" if var_name == "rb_mKernel" + handle_attr(var_name, attr_name, + attr_reader.to_i != 0, + attr_writer.to_i != 0) + end + + @body.scan(%r{rb_define_global_function\s*\( + \s*"([^"]+)", + \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, + \s*(-?\w+)\s*\) + (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? + }xm) do #" + |meth_name, meth_body, param_count, source_file| + handle_method("method", "rb_mKernel", meth_name, + meth_body, param_count, source_file) + end + + @body.scan(/define_filetest_function\s*\( + \s*"([^"]+)", + \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, + \s*(-?\w+)\s*\)/xm) do #" + |meth_name, meth_body, param_count| + + handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count) + handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count) + end + end + + ############################################################ + + def do_aliases + @body.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do + |var_name, new_name, old_name| + @stats.num_methods += 1 + class_name = @known_classes[var_name] || var_name + class_obj = find_class(var_name, class_name) + + class_obj.add_alias(Alias.new("", old_name, new_name, "")) + end + end + + ## + # Adds constant comments. By providing some_value: at the start ofthe + # comment you can override the C value of the comment to give a friendly + # definition. + # + # /* 300: The perfect score in bowling */ + # rb_define_const(cFoo, "PERFECT", INT2FIX(300); + # + # Will override +INT2FIX(300)+ with the value +300+ in the output RDoc. + # Values may include quotes and escaped colons (\:). + + def handle_constants(type, var_name, const_name, definition) + #@stats.num_constants += 1 + class_name = @known_classes[var_name] + + return unless class_name + + class_obj = find_class(var_name, class_name) + + unless class_obj + warn("Enclosing class/module '#{const_name}' for not known") + return + end + + comment = find_const_comment(type, const_name) + + # In the case of rb_define_const, the definition and comment are in + # "/* definition: comment */" form. The literal ':' and '\' characters + # can be escaped with a backslash. + if type.downcase == 'const' then + elements = mangle_comment(comment).split(':') + if elements.nil? or elements.empty? then + con = Constant.new(const_name, definition, mangle_comment(comment)) + else + new_definition = elements[0..-2].join(':') + if new_definition.empty? then # Default to literal C definition + new_definition = definition + else + new_definition.gsub!("\:", ":") + new_definition.gsub!("\\", '\\') + end + new_definition.sub!(/\A(\s+)/, '') + new_comment = $1.nil? ? elements.last : "#{$1}#{elements.last.lstrip}" + con = Constant.new(const_name, new_definition, + mangle_comment(new_comment)) + end + else + con = Constant.new(const_name, definition, mangle_comment(comment)) + end + + class_obj.add_constant(con) + end + + ## + # Finds a comment matching +type+ and +const_name+ either above the + # comment or in the matching Document- section. + + def find_const_comment(type, const_name) + if @body =~ %r{((?>^\s*/\*.*?\*/\s+)) + rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi + $1 + elsif @body =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m + $1 + else + '' + end + end + + ########################################################### + + def handle_attr(var_name, attr_name, reader, writer) + rw = '' + if reader + #@stats.num_methods += 1 + rw << 'R' + end + if writer + #@stats.num_methods += 1 + rw << 'W' + end + + class_name = @known_classes[var_name] + + return unless class_name + + class_obj = find_class(var_name, class_name) + + if class_obj + comment = find_attr_comment(attr_name) + unless comment.empty? + comment = mangle_comment(comment) + end + att = Attr.new('', attr_name, rw, comment) + class_obj.add_attribute(att) + end + + end + + ########################################################### + + def find_attr_comment(attr_name) + if @body =~ %r{((?>/\*.*?\*/\s+)) + rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi + $1 + elsif @body =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m + $1 + else + '' + end + end + + ########################################################### + + def handle_method(type, var_name, meth_name, + meth_body, param_count, source_file = nil) + progress(".") + + @stats.num_methods += 1 + class_name = @known_classes[var_name] + + return unless class_name + + class_obj = find_class(var_name, class_name) + + if class_obj + if meth_name == "initialize" + meth_name = "new" + type = "singleton_method" + end + meth_obj = AnyMethod.new("", meth_name) + meth_obj.singleton = + %w{singleton_method module_function}.include?(type) + + p_count = (Integer(param_count) rescue -1) + + if p_count < 0 + meth_obj.params = "(...)" + elsif p_count == 0 + meth_obj.params = "()" + else + meth_obj.params = "(" + + (1..p_count).map{|i| "p#{i}"}.join(", ") + + ")" + end + + if source_file + file_name = File.join(@file_dir, source_file) + body = (@@known_bodies[source_file] ||= File.read(file_name)) + else + body = @body + end + if find_body(meth_body, meth_obj, body) and meth_obj.document_self + class_obj.add_method(meth_obj) + end + end + end + + ############################################################ + + # Find the C code corresponding to a Ruby method + def find_body(meth_name, meth_obj, body, quiet = false) + case body + when %r{((?>/\*.*?\*/\s*))(?:static\s+)?VALUE\s+#{meth_name} + \s*(\(.*?\)).*?^}xm + comment, params = $1, $2 + body_text = $& + + remove_private_comments(comment) if comment + + # see if we can find the whole body + + re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}' + if Regexp.new(re, Regexp::MULTILINE).match(body) + body_text = $& + end + + # The comment block may have been overridden with a + # 'Document-method' block. This happens in the interpreter + # when multiple methods are vectored through to the same + # C method but those methods are logically distinct (for + # example Kernel.hash and Kernel.object_id share the same + # implementation + + override_comment = find_override_comment(meth_obj.name) + comment = override_comment if override_comment + + find_modifiers(comment, meth_obj) if comment + +# meth_obj.params = params + meth_obj.start_collecting_tokens + meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text)) + meth_obj.comment = mangle_comment(comment) + when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m + comment = $1 + find_body($2, meth_obj, body, true) + find_modifiers(comment, meth_obj) + meth_obj.comment = mangle_comment(comment) + meth_obj.comment + when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m + unless find_body($1, meth_obj, body, true) + warn "No definition for #{meth_name}" unless quiet + return false + end + else + + # No body, but might still have an override comment + comment = find_override_comment(meth_obj.name) + + if comment + find_modifiers(comment, meth_obj) + meth_obj.comment = mangle_comment(comment) + else + warn "No definition for #{meth_name}" unless quiet + return false + end + end + true + end + + + ## + # If the comment block contains a section that looks like: + # + # call-seq: + # Array.new + # Array.new(10) + # + # use it for the parameters. + + def find_modifiers(comment, meth_obj) + if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or + comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '') + meth_obj.document_self = false + end + if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or + comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '') + seq = $1 + seq.gsub!(/^\s*\*\s*/, '') + meth_obj.call_seq = seq + end + end + + ############################################################ + + def find_override_comment(meth_name) + name = Regexp.escape(meth_name) + if @body =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m + $1 + end + end + + ## + # Look for includes of the form: + # + # rb_include_module(rb_cArray, rb_mEnumerable); + + def do_includes + @body.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m| + if cls = @classes[c] + m = @known_classes[m] || m + cls.add_include(Include.new(m, "")) + end + end + end + + ## + # Remove the /*'s and leading asterisks from C comments + + def mangle_comment(comment) + comment.sub!(%r{/\*+}) { " " * $&.length } + comment.sub!(%r{\*+/}) { " " * $&.length } + comment.gsub!(/^[ \t]*\*/m) { " " * $&.length } + comment + end + + def find_class(raw_name, name) + unless @classes[raw_name] + if raw_name =~ /^rb_m/ + @classes[raw_name] = @top_level.add_module(NormalModule, name) + else + @classes[raw_name] = @top_level.add_class(NormalClass, name, nil) + end + end + @classes[raw_name] + end + + def handle_tab_width(body) + if /\t/ =~ body + tab_width = Options.instance.tab_width + body.split(/\n/).map do |line| + 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #` + line + end .join("\n") + else + body + end + end + + ## + # Removes #ifdefs that would otherwise confuse us + + def handle_ifdefs_in(body) + body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m) { $1 } + end + + end + +end + diff --git a/ruby_1_8_6/lib/rdoc/parsers/parse_f95.rb b/ruby_1_8_6/lib/rdoc/parsers/parse_f95.rb new file mode 100644 index 0000000000..f3f6d76103 --- /dev/null +++ b/ruby_1_8_6/lib/rdoc/parsers/parse_f95.rb @@ -0,0 +1,1841 @@ +#= parse_f95.rb - Fortran95 Parser +# +#== Overview +# +#"parse_f95.rb" parses Fortran95 files with suffixes "f90", "F90", "f95" +#and "F95". Fortran95 files are expected to be conformed to Fortran95 +#standards. +# +#== Rules +# +#Fundamental rules are same as that of the Ruby parser. +#But comment markers are '!' not '#'. +# +#=== Correspondence between RDoc documentation and Fortran95 programs +# +#"parse_f95.rb" parses main programs, modules, subroutines, functions, +#derived-types, public variables, public constants, +#defined operators and defined assignments. +#These components are described in items of RDoc documentation, as follows. +# +#Files :: Files (same as Ruby) +#Classes :: Modules +#Methods :: Subroutines, functions, variables, constants, derived-types, defined operators, defined assignments +#Required files :: Files in which imported modules, external subroutines and external functions are defined. +#Included Modules :: List of imported modules +#Attributes :: List of derived-types, List of imported modules all of whose components are published again +# +#Components listed in 'Methods' (subroutines, functions, ...) +#defined in modules are described in the item of 'Classes'. +#On the other hand, components defined in main programs or +#as external procedures are described in the item of 'Files'. +# +#=== Components parsed by default +# +#By default, documentation on public components (subroutines, functions, +#variables, constants, derived-types, defined operators, +#defined assignments) are generated. +#With "--all" option, documentation on all components +#are generated (almost same as the Ruby parser). +# +#=== Information parsed automatically +# +#The following information is automatically parsed. +# +#* Types of arguments +#* Types of variables and constants +#* Types of variables in the derived types, and initial values +#* NAMELISTs and types of variables in them, and initial values +# +#Aliases by interface statement are described in the item of 'Methods'. +# +#Components which are imported from other modules and published again +#are described in the item of 'Methods'. +# +#=== Format of comment blocks +# +#Comment blocks should be written as follows. +#Comment blocks are considered to be ended when the line without '!' +#appears. +#The indentation is not necessary. +# +# ! (Top of file) +# ! +# ! Comment blocks for the files. +# ! +# !-- +# ! The comment described in the part enclosed by +# ! "!--" and "!++" is ignored. +# !++ +# ! +# module hogehoge +# ! +# ! Comment blocks for the modules (or the programs). +# ! +# +# private +# +# logical :: a ! a private variable +# real, public :: b ! a public variable +# integer, parameter :: c = 0 ! a public constant +# +# public :: c +# public :: MULTI_ARRAY +# public :: hoge, foo +# +# type MULTI_ARRAY +# ! +# ! Comment blocks for the derived-types. +# ! +# real, pointer :: var(:) =>null() ! Comments block for the variables. +# integer :: num = 0 +# end type MULTI_ARRAY +# +# contains +# +# subroutine hoge( in, & ! Comment blocks between continuation lines are ignored. +# & out ) +# ! +# ! Comment blocks for the subroutines or functions +# ! +# character(*),intent(in):: in ! Comment blocks for the arguments. +# character(*),intent(out),allocatable,target :: in +# ! Comment blocks can be +# ! written under Fortran statements. +# +# character(32) :: file ! This comment parsed as a variable in below NAMELIST. +# integer :: id +# +# namelist /varinfo_nml/ file, id +# ! +# ! Comment blocks for the NAMELISTs. +# ! Information about variables are described above. +# ! +# +# .... +# +# end subroutine hoge +# +# integer function foo( in ) +# ! +# ! This part is considered as comment block. +# +# ! Comment blocks under blank lines are ignored. +# ! +# integer, intent(in):: inA ! This part is considered as comment block. +# +# ! This part is ignored. +# +# end function foo +# +# subroutine hide( in, & +# & out ) !:nodoc: +# ! +# ! If "!:nodoc:" is described at end-of-line in subroutine +# ! statement as above, the subroutine is ignored. +# ! This assignment can be used to modules, subroutines, +# ! functions, variables, constants, derived-types, +# ! defined operators, defined assignments, +# ! list of imported modules ("use" statement). +# ! +# +# .... +# +# end subroutine hide +# +# end module hogehoge +# + + +require "rdoc/code_objects" + +module RDoc + + class Token + + NO_TEXT = "??".freeze + + def initialize(line_no, char_no) + @line_no = line_no + @char_no = char_no + @text = NO_TEXT + end + # Because we're used in contexts that expect to return a token, + # we set the text string and then return ourselves + def set_text(text) + @text = text + self + end + + attr_reader :line_no, :char_no, :text + + end + + # See rdoc/parsers/parse_f95.rb + + class Fortran95parser + + extend ParserFactory + parse_files_matching(/\.((f|F)9(0|5)|F)$/) + + @@external_aliases = [] + @@public_methods = [] + + # "false":: Comments are below source code + # "true" :: Comments are upper source code + COMMENTS_ARE_UPPER = false + + # Internal alias message + INTERNAL_ALIAS_MES = "Alias for" + + # External alias message + EXTERNAL_ALIAS_MES = "The entity is" + + # prepare to parse a Fortran 95 file + def initialize(top_level, file_name, body, options, stats) + @body = body + @stats = stats + @file_name = file_name + @options = options + @top_level = top_level + @progress = $stderr unless options.quiet + end + + # devine code constructs + def scan + + # remove private comment + remaining_code = remove_private_comments(@body) + + # continuation lines are united to one line + remaining_code = united_to_one_line(remaining_code) + + # semicolons are replaced to line feed + remaining_code = semicolon_to_linefeed(remaining_code) + + # collect comment for file entity + whole_comment, remaining_code = collect_first_comment(remaining_code) + @top_level.comment = whole_comment + + # String "remaining_code" is converted to Array "remaining_lines" + remaining_lines = remaining_code.split("\n") + + # "module" or "program" parts are parsed (new) + # + level_depth = 0 + block_searching_flag = nil + block_searching_lines = [] + pre_comment = [] + module_program_trailing = "" + module_program_name = "" + other_block_level_depth = 0 + other_block_searching_flag = nil + remaining_lines.collect!{|line| + if !block_searching_flag && !other_block_searching_flag + if line =~ /^\s*?module\s+(\w+)\s*?(!.*?)?$/i + block_searching_flag = :module + block_searching_lines << line + module_program_name = $1 + module_program_trailing = find_comments($2) + next false + elsif line =~ /^\s*?program\s+(\w+)\s*?(!.*?)?$/i || + line =~ /^\s*?\w/ && !block_start?(line) + block_searching_flag = :program + block_searching_lines << line + module_program_name = $1 || "" + module_program_trailing = find_comments($2) + next false + + elsif block_start?(line) + other_block_searching_flag = true + next line + + elsif line =~ /^\s*?!\s?(.*)/ + pre_comment << line + next line + else + pre_comment = [] + next line + end + elsif other_block_searching_flag + other_block_level_depth += 1 if block_start?(line) + other_block_level_depth -= 1 if block_end?(line) + if other_block_level_depth < 0 + other_block_level_depth = 0 + other_block_searching_flag = nil + end + next line + end + + block_searching_lines << line + level_depth += 1 if block_start?(line) + level_depth -= 1 if block_end?(line) + if level_depth >= 0 + next false + end + + # "module_program_code" is formatted. + # ":nodoc:" flag is checked. + # + module_program_code = block_searching_lines.join("\n") + module_program_code = remove_empty_head_lines(module_program_code) + if module_program_trailing =~ /^:nodoc:/ + # next loop to search next block + level_depth = 0 + block_searching_flag = false + block_searching_lines = [] + pre_comment = [] + next false + end + + # NormalClass is created, and added to @top_level + # + if block_searching_flag == :module + module_name = module_program_name + module_code = module_program_code + module_trailing = module_program_trailing + progress "m" + @stats.num_modules += 1 + f9x_module = @top_level.add_module NormalClass, module_name + f9x_module.record_location @top_level + + f9x_comment = COMMENTS_ARE_UPPER ? + find_comments(pre_comment.join("\n")) + "\n" + module_trailing : + module_trailing + "\n" + find_comments(module_code.sub(/^.*$\n/i, '')) + f9x_module.comment = f9x_comment + parse_program_or_module(f9x_module, module_code) + + TopLevel.all_files.each do |name, toplevel| + if toplevel.include_includes?(module_name, @options.ignore_case) + if !toplevel.include_requires?(@file_name, @options.ignore_case) + toplevel.add_require(Require.new(@file_name, "")) + end + end + toplevel.each_classmodule{|m| + if m.include_includes?(module_name, @options.ignore_case) + if !m.include_requires?(@file_name, @options.ignore_case) + m.add_require(Require.new(@file_name, "")) + end + end + } + end + elsif block_searching_flag == :program + program_name = module_program_name + program_code = module_program_code + program_trailing = module_program_trailing + progress "p" + program_comment = COMMENTS_ARE_UPPER ? + find_comments(pre_comment.join("\n")) + "\n" + program_trailing : + program_trailing + "\n" + find_comments(program_code.sub(/^.*$\n/i, '')) + program_comment = "\n\n= <i>Program</i> <tt>#{program_name}</tt>\n\n" \ + + program_comment + @top_level.comment << program_comment + parse_program_or_module(@top_level, program_code, :private) + end + + # next loop to search next block + level_depth = 0 + block_searching_flag = false + block_searching_lines = [] + pre_comment = [] + next false + } + + remaining_lines.delete_if{ |line| + line == false + } + + # External subprograms and functions are parsed + # + parse_program_or_module(@top_level, remaining_lines.join("\n"), + :public, true) + + @top_level + end # End of scan + + private + + def parse_program_or_module(container, code, + visibility=:public, external=nil) + return unless container + return unless code + remaining_lines = code.split("\n") + remaining_code = "#{code}" + + # + # Parse variables before "contains" in module + # + level_depth = 0 + before_contains_lines = [] + before_contains_code = nil + before_contains_flag = nil + remaining_lines.each{ |line| + if !before_contains_flag + if line =~ /^\s*?module\s+\w+\s*?(!.*?)?$/i + before_contains_flag = true + end + else + break if line =~ /^\s*?contains\s*?(!.*?)?$/i + level_depth += 1 if block_start?(line) + level_depth -= 1 if block_end?(line) + break if level_depth < 0 + before_contains_lines << line + end + } + before_contains_code = before_contains_lines.join("\n") + if before_contains_code + before_contains_code.gsub!(/^\s*?interface\s+.*?\s+end\s+interface.*?$/im, "") + before_contains_code.gsub!(/^\s*?type[\s\,]+.*?\s+end\s+type.*?$/im, "") + end + + # + # Parse global "use" + # + use_check_code = "#{before_contains_code}" + cascaded_modules_list = [] + while use_check_code =~ /^\s*?use\s+(\w+)(.*?)(!.*?)?$/i + use_check_code = $~.pre_match + use_check_code << $~.post_match + used_mod_name = $1.strip.chomp + used_list = $2 || "" + used_trailing = $3 || "" + next if used_trailing =~ /!:nodoc:/ + if !container.include_includes?(used_mod_name, @options.ignore_case) + progress "." + container.add_include Include.new(used_mod_name, "") + end + if ! (used_list =~ /\,\s*?only\s*?:/i ) + cascaded_modules_list << "\#" + used_mod_name + end + end + + # + # Parse public and private, and store information. + # This information is used when "add_method" and + # "set_visibility_for" are called. + # + visibility_default, visibility_info = + parse_visibility(remaining_lines.join("\n"), visibility, container) + @@public_methods.concat visibility_info + if visibility_default == :public + if !cascaded_modules_list.empty? + cascaded_modules = + Attr.new("Cascaded Modules", + "Imported modules all of whose components are published again", + "", + cascaded_modules_list.join(", ")) + container.add_attribute(cascaded_modules) + end + end + + # + # Check rename elements + # + use_check_code = "#{before_contains_code}" + while use_check_code =~ /^\s*?use\s+(\w+)\s*?\,(.+)$/i + use_check_code = $~.pre_match + use_check_code << $~.post_match + used_mod_name = $1.strip.chomp + used_elements = $2.sub(/\s*?only\s*?:\s*?/i, '') + used_elements.split(",").each{ |used| + if /\s*?(\w+)\s*?=>\s*?(\w+)\s*?/ =~ used + local = $1 + org = $2 + @@public_methods.collect!{ |pub_meth| + if local == pub_meth["name"] || + local.upcase == pub_meth["name"].upcase && + @options.ignore_case + pub_meth["name"] = org + pub_meth["local_name"] = local + end + pub_meth + } + end + } + end + + # + # Parse private "use" + # + use_check_code = remaining_lines.join("\n") + while use_check_code =~ /^\s*?use\s+(\w+)(.*?)(!.*?)?$/i + use_check_code = $~.pre_match + use_check_code << $~.post_match + used_mod_name = $1.strip.chomp + used_trailing = $3 || "" + next if used_trailing =~ /!:nodoc:/ + if !container.include_includes?(used_mod_name, @options.ignore_case) + progress "." + container.add_include Include.new(used_mod_name, "") + end + end + + container.each_includes{ |inc| + TopLevel.all_files.each do |name, toplevel| + indicated_mod = toplevel.find_symbol(inc.name, + nil, @options.ignore_case) + if indicated_mod + indicated_name = indicated_mod.parent.file_relative_name + if !container.include_requires?(indicated_name, @options.ignore_case) + container.add_require(Require.new(indicated_name, "")) + end + break + end + end + } + + # + # Parse derived-types definitions + # + derived_types_comment = "" + remaining_code = remaining_lines.join("\n") + while remaining_code =~ /^\s*? + type[\s\,]+(public|private)?\s*?(::)?\s*? + (\w+)\s*?(!.*?)?$ + (.*?) + ^\s*?end\s+type.*?$ + /imx + remaining_code = $~.pre_match + remaining_code << $~.post_match + typename = $3.chomp.strip + type_elements = $5 || "" + type_code = remove_empty_head_lines($&) + type_trailing = find_comments($4) + next if type_trailing =~ /^:nodoc:/ + type_visibility = $1 + type_comment = COMMENTS_ARE_UPPER ? + find_comments($~.pre_match) + "\n" + type_trailing : + type_trailing + "\n" + find_comments(type_code.sub(/^.*$\n/i, '')) + type_element_visibility_public = true + type_code.split("\n").each{ |line| + if /^\s*?private\s*?$/ =~ line + type_element_visibility_public = nil + break + end + } if type_code + + args_comment = "" + type_args_info = nil + + if @options.show_all + args_comment = find_arguments(nil, type_code, true) + else + type_public_args_list = [] + type_args_info = definition_info(type_code) + type_args_info.each{ |arg| + arg_is_public = type_element_visibility_public + arg_is_public = true if arg.include_attr?("public") + arg_is_public = nil if arg.include_attr?("private") + type_public_args_list << arg.varname if arg_is_public + } + args_comment = find_arguments(type_public_args_list, type_code) + end + + type = AnyMethod.new("type #{typename}", typename) + type.singleton = false + type.params = "" + type.comment = "<b><em> Derived Type </em></b> :: <tt></tt>\n" + type.comment << args_comment if args_comment + type.comment << type_comment if type_comment + progress "t" + @stats.num_methods += 1 + container.add_method type + + set_visibility(container, typename, visibility_default, @@public_methods) + + if type_visibility + type_visibility.gsub!(/\s/,'') + type_visibility.gsub!(/\,/,'') + type_visibility.gsub!(/:/,'') + type_visibility.downcase! + if type_visibility == "public" + container.set_visibility_for([typename], :public) + elsif type_visibility == "private" + container.set_visibility_for([typename], :private) + end + end + + check_public_methods(type, container.name) + + if @options.show_all + derived_types_comment << ", " unless derived_types_comment.empty? + derived_types_comment << typename + else + if type.visibility == :public + derived_types_comment << ", " unless derived_types_comment.empty? + derived_types_comment << typename + end + end + + end + + if !derived_types_comment.empty? + derived_types_table = + Attr.new("Derived Types", "Derived_Types", "", + derived_types_comment) + container.add_attribute(derived_types_table) + end + + # + # move interface scope + # + interface_code = "" + while remaining_code =~ /^\s*? + interface( + \s+\w+ | + \s+operator\s*?\(.*?\) | + \s+assignment\s*?\(\s*?=\s*?\) + )?\s*?$ + (.*?) + ^\s*?end\s+interface.*?$ + /imx + interface_code << remove_empty_head_lines($&) + "\n" + remaining_code = $~.pre_match + remaining_code << $~.post_match + end + + # + # Parse global constants or variables in modules + # + const_var_defs = definition_info(before_contains_code) + const_var_defs.each{|defitem| + next if defitem.nodoc + const_or_var_type = "Variable" + const_or_var_progress = "v" + if defitem.include_attr?("parameter") + const_or_var_type = "Constant" + const_or_var_progress = "c" + end + const_or_var = AnyMethod.new(const_or_var_type, defitem.varname) + const_or_var.singleton = false + const_or_var.params = "" + self_comment = find_arguments([defitem.varname], before_contains_code) + const_or_var.comment = "<b><em>" + const_or_var_type + "</em></b> :: <tt></tt>\n" + const_or_var.comment << self_comment if self_comment + progress const_or_var_progress + @stats.num_methods += 1 + container.add_method const_or_var + + set_visibility(container, defitem.varname, visibility_default, @@public_methods) + + if defitem.include_attr?("public") + container.set_visibility_for([defitem.varname], :public) + elsif defitem.include_attr?("private") + container.set_visibility_for([defitem.varname], :private) + end + + check_public_methods(const_or_var, container.name) + + } if const_var_defs + + remaining_lines = remaining_code.split("\n") + + # "subroutine" or "function" parts are parsed (new) + # + level_depth = 0 + block_searching_flag = nil + block_searching_lines = [] + pre_comment = [] + procedure_trailing = "" + procedure_name = "" + procedure_params = "" + procedure_prefix = "" + procedure_result_arg = "" + procedure_type = "" + contains_lines = [] + contains_flag = nil + remaining_lines.collect!{|line| + if !block_searching_flag + # subroutine + if line =~ /^\s*? + (recursive|pure|elemental)?\s*? + subroutine\s+(\w+)\s*?(\(.*?\))?\s*?(!.*?)?$ + /ix + block_searching_flag = :subroutine + block_searching_lines << line + + procedure_name = $2.chomp.strip + procedure_params = $3 || "" + procedure_prefix = $1 || "" + procedure_trailing = $4 || "!" + next false + + # function + elsif line =~ /^\s*? + (recursive|pure|elemental)?\s*? + ( + character\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | type\s*?\([\w\s]+?\)\s+ + | integer\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | real\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | double\s+precision\s+ + | logical\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | complex\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + )? + function\s+(\w+)\s*? + (\(.*?\))?(\s+result\((.*?)\))?\s*?(!.*?)?$ + /ix + block_searching_flag = :function + block_searching_lines << line + + procedure_prefix = $1 || "" + procedure_type = $2 ? $2.chomp.strip : nil + procedure_name = $8.chomp.strip + procedure_params = $9 || "" + procedure_result_arg = $11 ? $11.chomp.strip : procedure_name + procedure_trailing = $12 || "!" + next false + elsif line =~ /^\s*?!\s?(.*)/ + pre_comment << line + next line + else + pre_comment = [] + next line + end + end + contains_flag = true if line =~ /^\s*?contains\s*?(!.*?)?$/ + block_searching_lines << line + contains_lines << line if contains_flag + + level_depth += 1 if block_start?(line) + level_depth -= 1 if block_end?(line) + if level_depth >= 0 + next false + end + + # "procedure_code" is formatted. + # ":nodoc:" flag is checked. + # + procedure_code = block_searching_lines.join("\n") + procedure_code = remove_empty_head_lines(procedure_code) + if procedure_trailing =~ /^!:nodoc:/ + # next loop to search next block + level_depth = 0 + block_searching_flag = nil + block_searching_lines = [] + pre_comment = [] + procedure_trailing = "" + procedure_name = "" + procedure_params = "" + procedure_prefix = "" + procedure_result_arg = "" + procedure_type = "" + contains_lines = [] + contains_flag = nil + next false + end + + # AnyMethod is created, and added to container + # + subroutine_function = nil + if block_searching_flag == :subroutine + subroutine_prefix = procedure_prefix + subroutine_name = procedure_name + subroutine_params = procedure_params + subroutine_trailing = procedure_trailing + subroutine_code = procedure_code + + subroutine_comment = COMMENTS_ARE_UPPER ? + pre_comment.join("\n") + "\n" + subroutine_trailing : + subroutine_trailing + "\n" + subroutine_code.sub(/^.*$\n/i, '') + subroutine = AnyMethod.new("subroutine", subroutine_name) + parse_subprogram(subroutine, subroutine_params, + subroutine_comment, subroutine_code, + before_contains_code, nil, subroutine_prefix) + progress "s" + @stats.num_methods += 1 + container.add_method subroutine + subroutine_function = subroutine + + elsif block_searching_flag == :function + function_prefix = procedure_prefix + function_type = procedure_type + function_name = procedure_name + function_params_org = procedure_params + function_result_arg = procedure_result_arg + function_trailing = procedure_trailing + function_code_org = procedure_code + + function_comment = COMMENTS_ARE_UPPER ? + pre_comment.join("\n") + "\n" + function_trailing : + function_trailing + "\n " + function_code_org.sub(/^.*$\n/i, '') + + function_code = "#{function_code_org}" + if function_type + function_code << "\n" + function_type + " :: " + function_result_arg + end + + function_params = + function_params_org.sub(/^\(/, "\(#{function_result_arg}, ") + + function = AnyMethod.new("function", function_name) + parse_subprogram(function, function_params, + function_comment, function_code, + before_contains_code, true, function_prefix) + + # Specific modification due to function + function.params.sub!(/\(\s*?#{function_result_arg}\s*?,\s*?/, "\( ") + function.params << " result(" + function_result_arg + ")" + function.start_collecting_tokens + function.add_token Token.new(1,1).set_text(function_code_org) + + progress "f" + @stats.num_methods += 1 + container.add_method function + subroutine_function = function + + end + + # The visibility of procedure is specified + # + set_visibility(container, procedure_name, + visibility_default, @@public_methods) + + # The alias for this procedure from external modules + # + check_external_aliases(procedure_name, + subroutine_function.params, + subroutine_function.comment, subroutine_function) if external + check_public_methods(subroutine_function, container.name) + + + # contains_lines are parsed as private procedures + if contains_flag + parse_program_or_module(container, + contains_lines.join("\n"), :private) + end + + # next loop to search next block + level_depth = 0 + block_searching_flag = nil + block_searching_lines = [] + pre_comment = [] + procedure_trailing = "" + procedure_name = "" + procedure_params = "" + procedure_prefix = "" + procedure_result_arg = "" + contains_lines = [] + contains_flag = nil + next false + } # End of remaining_lines.collect!{|line| + + # Array remains_lines is converted to String remains_code again + # + remaining_code = remaining_lines.join("\n") + + # + # Parse interface + # + interface_scope = false + generic_name = "" + interface_code.split("\n").each{ |line| + if /^\s*? + interface( + \s+\w+| + \s+operator\s*?\(.*?\)| + \s+assignment\s*?\(\s*?=\s*?\) + )? + \s*?(!.*?)?$ + /ix =~ line + generic_name = $1 ? $1.strip.chomp : nil + interface_trailing = $2 || "!" + interface_scope = true + interface_scope = false if interface_trailing =~ /!:nodoc:/ +# if generic_name =~ /operator\s*?\((.*?)\)/i +# operator_name = $1 +# if operator_name && !operator_name.empty? +# generic_name = "#{operator_name}" +# end +# end +# if generic_name =~ /assignment\s*?\((.*?)\)/i +# assignment_name = $1 +# if assignment_name && !assignment_name.empty? +# generic_name = "#{assignment_name}" +# end +# end + end + if /^\s*?end\s+interface/i =~ line + interface_scope = false + generic_name = nil + end + # internal alias + if interface_scope && /^\s*?module\s+procedure\s+(.*?)(!.*?)?$/i =~ line + procedures = $1.strip.chomp + procedures_trailing = $2 || "!" + next if procedures_trailing =~ /!:nodoc:/ + procedures.split(",").each{ |proc| + proc.strip! + proc.chomp! + next if generic_name == proc || !generic_name + old_meth = container.find_symbol(proc, nil, @options.ignore_case) + next if !old_meth + nolink = old_meth.visibility == :private ? true : nil + nolink = nil if @options.show_all + new_meth = + initialize_external_method(generic_name, proc, + old_meth.params, nil, + old_meth.comment, + old_meth.clone.token_stream[0].text, + true, nolink) + new_meth.singleton = old_meth.singleton + + progress "i" + @stats.num_methods += 1 + container.add_method new_meth + + set_visibility(container, generic_name, visibility_default, @@public_methods) + + check_public_methods(new_meth, container.name) + + } + end + + # external aliases + if interface_scope + # subroutine + proc = nil + params = nil + procedures_trailing = nil + if line =~ /^\s*? + (recursive|pure|elemental)?\s*? + subroutine\s+(\w+)\s*?(\(.*?\))?\s*?(!.*?)?$ + /ix + proc = $2.chomp.strip + generic_name = proc unless generic_name + params = $3 || "" + procedures_trailing = $4 || "!" + + # function + elsif line =~ /^\s*? + (recursive|pure|elemental)?\s*? + ( + character\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | type\s*?\([\w\s]+?\)\s+ + | integer\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | real\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | double\s+precision\s+ + | logical\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | complex\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + )? + function\s+(\w+)\s*? + (\(.*?\))?(\s+result\((.*?)\))?\s*?(!.*?)?$ + /ix + proc = $8.chomp.strip + generic_name = proc unless generic_name + params = $9 || "" + procedures_trailing = $12 || "!" + else + next + end + next if procedures_trailing =~ /!:nodoc:/ + indicated_method = nil + indicated_file = nil + TopLevel.all_files.each do |name, toplevel| + indicated_method = toplevel.find_local_symbol(proc, @options.ignore_case) + indicated_file = name + break if indicated_method + end + + if indicated_method + external_method = + initialize_external_method(generic_name, proc, + indicated_method.params, + indicated_file, + indicated_method.comment) + + progress "e" + @stats.num_methods += 1 + container.add_method external_method + set_visibility(container, generic_name, visibility_default, @@public_methods) + if !container.include_requires?(indicated_file, @options.ignore_case) + container.add_require(Require.new(indicated_file, "")) + end + check_public_methods(external_method, container.name) + + else + @@external_aliases << { + "new_name" => generic_name, + "old_name" => proc, + "file_or_module" => container, + "visibility" => find_visibility(container, generic_name, @@public_methods) || visibility_default + } + end + end + + } if interface_code # End of interface_code.split("\n").each ... + + # + # Already imported methods are removed from @@public_methods. + # Remainders are assumed to be imported from other modules. + # + @@public_methods.delete_if{ |method| method["entity_is_discovered"]} + + @@public_methods.each{ |pub_meth| + next unless pub_meth["file_or_module"].name == container.name + pub_meth["used_modules"].each{ |used_mod| + TopLevel.all_classes_and_modules.each{ |modules| + if modules.name == used_mod || + modules.name.upcase == used_mod.upcase && + @options.ignore_case + modules.method_list.each{ |meth| + if meth.name == pub_meth["name"] || + meth.name.upcase == pub_meth["name"].upcase && + @options.ignore_case + new_meth = initialize_public_method(meth, + modules.name) + if pub_meth["local_name"] + new_meth.name = pub_meth["local_name"] + end + progress "e" + @stats.num_methods += 1 + container.add_method new_meth + end + } + end + } + } + } + + container + end # End of parse_program_or_module + + # + # Parse arguments, comment, code of subroutine and function. + # Return AnyMethod object. + # + def parse_subprogram(subprogram, params, comment, code, + before_contains=nil, function=nil, prefix=nil) + subprogram.singleton = false + prefix = "" if !prefix + arguments = params.sub(/\(/, "").sub(/\)/, "").split(",") if params + args_comment, params_opt = + find_arguments(arguments, code.sub(/^s*?contains\s*?(!.*?)?$.*/im, ""), + nil, nil, true) + params_opt = "( " + params_opt + " ) " if params_opt + subprogram.params = params_opt || "" + namelist_comment = find_namelists(code, before_contains) + + block_comment = find_comments comment + if function + subprogram.comment = "<b><em> Function </em></b> :: <em>#{prefix}</em>\n" + else + subprogram.comment = "<b><em> Subroutine </em></b> :: <em>#{prefix}</em>\n" + end + subprogram.comment << args_comment if args_comment + subprogram.comment << block_comment if block_comment + subprogram.comment << namelist_comment if namelist_comment + + # For output source code + subprogram.start_collecting_tokens + subprogram.add_token Token.new(1,1).set_text(code) + + subprogram + end + + # + # Collect comment for file entity + # + def collect_first_comment(body) + comment = "" + not_comment = "" + comment_start = false + comment_end = false + body.split("\n").each{ |line| + if comment_end + not_comment << line + not_comment << "\n" + elsif /^\s*?!\s?(.*)$/i =~ line + comment_start = true + comment << $1 + comment << "\n" + elsif /^\s*?$/i =~ line + comment_end = true if comment_start && COMMENTS_ARE_UPPER + else + comment_end = true + not_comment << line + not_comment << "\n" + end + } + return comment, not_comment + end + + + # Return comments of definitions of arguments + # + # If "all" argument is true, information of all arguments are returned. + # If "modified_params" is true, list of arguments are decorated, + # for exameple, optional arguments are parenthetic as "[arg]". + # + def find_arguments(args, text, all=nil, indent=nil, modified_params=nil) + return unless args || all + indent = "" unless indent + args = ["all"] if all + params = "" if modified_params + comma = "" + return unless text + args_rdocforms = "\n" + remaining_lines = "#{text}" + definitions = definition_info(remaining_lines) + args.each{ |arg| + arg.strip! + arg.chomp! + definitions.each { |defitem| + if arg == defitem.varname.strip.chomp || all + args_rdocforms << <<-"EOF" + +#{indent}<tt><b>#{defitem.varname.chomp.strip}#{defitem.arraysuffix}</b> #{defitem.inivalue}</tt> :: +#{indent} <tt>#{defitem.types.chomp.strip}</tt> +EOF + if !defitem.comment.chomp.strip.empty? + comment = "" + defitem.comment.split("\n").each{ |line| + comment << " " + line + "\n" + } + args_rdocforms << <<-"EOF" + +#{indent} <tt></tt> :: +#{indent} <tt></tt> +#{indent} #{comment.chomp.strip} +EOF + end + + if modified_params + if defitem.include_attr?("optional") + params << "#{comma}[#{arg}]" + else + params << "#{comma}#{arg}" + end + comma = ", " + end + end + } + } + if modified_params + return args_rdocforms, params + else + return args_rdocforms + end + end + + # Return comments of definitions of namelists + # + def find_namelists(text, before_contains=nil) + return nil if !text + result = "" + lines = "#{text}" + before_contains = "" if !before_contains + while lines =~ /^\s*?namelist\s+\/\s*?(\w+)\s*?\/([\s\w\,]+)$/i + lines = $~.post_match + nml_comment = COMMENTS_ARE_UPPER ? + find_comments($~.pre_match) : find_comments($~.post_match) + nml_name = $1 + nml_args = $2.split(",") + result << "\n\n=== NAMELIST <tt><b>" + nml_name + "</tt></b>\n\n" + result << nml_comment + "\n" if nml_comment + if lines.split("\n")[0] =~ /^\//i + lines = "namelist " + lines + end + result << find_arguments(nml_args, "#{text}" + "\n" + before_contains) + end + return result + end + + # + # Comments just after module or subprogram, or arguments are + # returnd. If "COMMENTS_ARE_UPPER" is true, comments just before + # modules or subprograms are returnd + # + def find_comments text + return "" unless text + lines = text.split("\n") + lines.reverse! if COMMENTS_ARE_UPPER + comment_block = Array.new + lines.each do |line| + break if line =~ /^\s*?\w/ || line =~ /^\s*?$/ + if COMMENTS_ARE_UPPER + comment_block.unshift line.sub(/^\s*?!\s?/,"") + else + comment_block.push line.sub(/^\s*?!\s?/,"") + end + end + nice_lines = comment_block.join("\n").split "\n\s*?\n" + nice_lines[0] ||= "" + nice_lines.shift + end + + def progress(char) + unless @options.quiet + @progress.print(char) + @progress.flush + end + end + + # + # Create method for internal alias + # + def initialize_public_method(method, parent) + return if !method || !parent + + new_meth = AnyMethod.new("External Alias for module", method.name) + new_meth.singleton = method.singleton + new_meth.params = method.params.clone + new_meth.comment = remove_trailing_alias(method.comment.clone) + new_meth.comment << "\n\n#{EXTERNAL_ALIAS_MES} #{parent.strip.chomp}\##{method.name}" + + return new_meth + end + + # + # Create method for external alias + # + # If argument "internal" is true, file is ignored. + # + def initialize_external_method(new, old, params, file, comment, token=nil, + internal=nil, nolink=nil) + return nil unless new || old + + if internal + external_alias_header = "#{INTERNAL_ALIAS_MES} " + external_alias_text = external_alias_header + old + elsif file + external_alias_header = "#{EXTERNAL_ALIAS_MES} " + external_alias_text = external_alias_header + file + "#" + old + else + return nil + end + external_meth = AnyMethod.new(external_alias_text, new) + external_meth.singleton = false + external_meth.params = params + external_comment = remove_trailing_alias(comment) + "\n\n" if comment + external_meth.comment = external_comment || "" + if nolink && token + external_meth.start_collecting_tokens + external_meth.add_token Token.new(1,1).set_text(token) + else + external_meth.comment << external_alias_text + end + + return external_meth + end + + + + # + # Parse visibility + # + def parse_visibility(code, default, container) + result = [] + visibility_default = default || :public + + used_modules = [] + container.includes.each{|i| used_modules << i.name} if container + + remaining_code = code.gsub(/^\s*?type[\s\,]+.*?\s+end\s+type.*?$/im, "") + remaining_code.split("\n").each{ |line| + if /^\s*?private\s*?$/ =~ line + visibility_default = :private + break + end + } if remaining_code + + remaining_code.split("\n").each{ |line| + if /^\s*?private\s*?(::)?\s+(.*)\s*?(!.*?)?/i =~ line + methods = $2.sub(/!.*$/, '') + methods.split(",").each{ |meth| + meth.sub!(/!.*$/, '') + meth.gsub!(/:/, '') + result << { + "name" => meth.chomp.strip, + "visibility" => :private, + "used_modules" => used_modules.clone, + "file_or_module" => container, + "entity_is_discovered" => nil, + "local_name" => nil + } + } + elsif /^\s*?public\s*?(::)?\s+(.*)\s*?(!.*?)?/i =~ line + methods = $2.sub(/!.*$/, '') + methods.split(",").each{ |meth| + meth.sub!(/!.*$/, '') + meth.gsub!(/:/, '') + result << { + "name" => meth.chomp.strip, + "visibility" => :public, + "used_modules" => used_modules.clone, + "file_or_module" => container, + "entity_is_discovered" => nil, + "local_name" => nil + } + } + end + } if remaining_code + + if container + result.each{ |vis_info| + vis_info["parent"] = container.name + } + end + + return visibility_default, result + end + + # + # Set visibility + # + # "subname" element of "visibility_info" is deleted. + # + def set_visibility(container, subname, visibility_default, visibility_info) + return unless container || subname || visibility_default || visibility_info + not_found = true + visibility_info.collect!{ |info| + if info["name"] == subname || + @options.ignore_case && info["name"].upcase == subname.upcase + if info["file_or_module"].name == container.name + container.set_visibility_for([subname], info["visibility"]) + info["entity_is_discovered"] = true + not_found = false + end + end + info + } + if not_found + return container.set_visibility_for([subname], visibility_default) + else + return container + end + end + + # + # Find visibility + # + def find_visibility(container, subname, visibility_info) + return nil if !subname || !visibility_info + visibility_info.each{ |info| + if info["name"] == subname || + @options.ignore_case && info["name"].upcase == subname.upcase + if info["parent"] == container.name + return info["visibility"] + end + end + } + return nil + end + + # + # Check external aliases + # + def check_external_aliases(subname, params, comment, test=nil) + @@external_aliases.each{ |alias_item| + if subname == alias_item["old_name"] || + subname.upcase == alias_item["old_name"].upcase && + @options.ignore_case + + new_meth = initialize_external_method(alias_item["new_name"], + subname, params, @file_name, + comment) + new_meth.visibility = alias_item["visibility"] + + progress "e" + @stats.num_methods += 1 + alias_item["file_or_module"].add_method(new_meth) + + if !alias_item["file_or_module"].include_requires?(@file_name, @options.ignore_case) + alias_item["file_or_module"].add_require(Require.new(@file_name, "")) + end + end + } + end + + # + # Check public_methods + # + def check_public_methods(method, parent) + return if !method || !parent + @@public_methods.each{ |alias_item| + parent_is_used_module = nil + alias_item["used_modules"].each{ |used_module| + if used_module == parent || + used_module.upcase == parent.upcase && + @options.ignore_case + parent_is_used_module = true + end + } + next if !parent_is_used_module + + if method.name == alias_item["name"] || + method.name.upcase == alias_item["name"].upcase && + @options.ignore_case + + new_meth = initialize_public_method(method, parent) + if alias_item["local_name"] + new_meth.name = alias_item["local_name"] + end + + progress "e" + @stats.num_methods += 1 + alias_item["file_or_module"].add_method new_meth + end + } + end + + # + # Continuous lines are united. + # + # Comments in continuous lines are removed. + # + def united_to_one_line(f90src) + return "" unless f90src + lines = f90src.split("\n") + previous_continuing = false + now_continuing = false + body = "" + lines.each{ |line| + words = line.split("") + next if words.empty? && previous_continuing + commentout = false + brank_flag = true ; brank_char = "" + squote = false ; dquote = false + ignore = false + words.collect! { |char| + if previous_continuing && brank_flag + now_continuing = true + ignore = true + case char + when "!" ; break + when " " ; brank_char << char ; next "" + when "&" + brank_flag = false + now_continuing = false + next "" + else + brank_flag = false + now_continuing = false + ignore = false + next brank_char + char + end + end + ignore = false + + if now_continuing + next "" + elsif !(squote) && !(dquote) && !(commentout) + case char + when "!" ; commentout = true ; next char + when "\""; dquote = true ; next char + when "\'"; squote = true ; next char + when "&" ; now_continuing = true ; next "" + else next char + end + elsif commentout + next char + elsif squote + case char + when "\'"; squote = false ; next char + else next char + end + elsif dquote + case char + when "\""; dquote = false ; next char + else next char + end + end + } + if !ignore && !previous_continuing || !brank_flag + if previous_continuing + body << words.join("") + else + body << "\n" + words.join("") + end + end + previous_continuing = now_continuing ? true : nil + now_continuing = nil + } + return body + end + + + # + # Continuous line checker + # + def continuous_line?(line) + continuous = false + if /&\s*?(!.*)?$/ =~ line + continuous = true + if comment_out?($~.pre_match) + continuous = false + end + end + return continuous + end + + # + # Comment out checker + # + def comment_out?(line) + return nil unless line + commentout = false + squote = false ; dquote = false + line.split("").each { |char| + if !(squote) && !(dquote) + case char + when "!" ; commentout = true ; break + when "\""; dquote = true + when "\'"; squote = true + else next + end + elsif squote + case char + when "\'"; squote = false + else next + end + elsif dquote + case char + when "\""; dquote = false + else next + end + end + } + return commentout + end + + # + # Semicolons are replaced to line feed. + # + def semicolon_to_linefeed(text) + return "" unless text + lines = text.split("\n") + lines.collect!{ |line| + words = line.split("") + commentout = false + squote = false ; dquote = false + words.collect! { |char| + if !(squote) && !(dquote) && !(commentout) + case char + when "!" ; commentout = true ; next char + when "\""; dquote = true ; next char + when "\'"; squote = true ; next char + when ";" ; "\n" + else next char + end + elsif commentout + next char + elsif squote + case char + when "\'"; squote = false ; next char + else next char + end + elsif dquote + case char + when "\""; dquote = false ; next char + else next char + end + end + } + words.join("") + } + return lines.join("\n") + end + + # + # Which "line" is start of block (module, program, block data, + # subroutine, function) statement ? + # + def block_start?(line) + return nil if !line + + if line =~ /^\s*?module\s+(\w+)\s*?(!.*?)?$/i || + line =~ /^\s*?program\s+(\w+)\s*?(!.*?)?$/i || + line =~ /^\s*?block\s+data(\s+\w+)?\s*?(!.*?)?$/i || + line =~ \ + /^\s*? + (recursive|pure|elemental)?\s*? + subroutine\s+(\w+)\s*?(\(.*?\))?\s*?(!.*?)?$ + /ix || + line =~ \ + /^\s*? + (recursive|pure|elemental)?\s*? + ( + character\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | type\s*?\([\w\s]+?\)\s+ + | integer\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | real\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | double\s+precision\s+ + | logical\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + | complex\s*?(\([\w\s\=\(\)\*]+?\))?\s+ + )? + function\s+(\w+)\s*? + (\(.*?\))?(\s+result\((.*?)\))?\s*?(!.*?)?$ + /ix + return true + end + + return nil + end + + # + # Which "line" is end of block (module, program, block data, + # subroutine, function) statement ? + # + def block_end?(line) + return nil if !line + + if line =~ /^\s*?end\s*?(!.*?)?$/i || + line =~ /^\s*?end\s+module(\s+\w+)?\s*?(!.*?)?$/i || + line =~ /^\s*?end\s+program(\s+\w+)?\s*?(!.*?)?$/i || + line =~ /^\s*?end\s+block\s+data(\s+\w+)?\s*?(!.*?)?$/i || + line =~ /^\s*?end\s+subroutine(\s+\w+)?\s*?(!.*?)?$/i || + line =~ /^\s*?end\s+function(\s+\w+)?\s*?(!.*?)?$/i + return true + end + + return nil + end + + # + # Remove "Alias for" in end of comments + # + def remove_trailing_alias(text) + return "" if !text + lines = text.split("\n").reverse + comment_block = Array.new + checked = false + lines.each do |line| + if !checked + if /^\s?#{INTERNAL_ALIAS_MES}/ =~ line || + /^\s?#{EXTERNAL_ALIAS_MES}/ =~ line + checked = true + next + end + end + comment_block.unshift line + end + nice_lines = comment_block.join("\n") + nice_lines ||= "" + return nice_lines + end + + # Empty lines in header are removed + def remove_empty_head_lines(text) + return "" unless text + lines = text.split("\n") + header = true + lines.delete_if{ |line| + header = false if /\S/ =~ line + header && /^\s*?$/ =~ line + } + lines.join("\n") + end + + + # header marker "=", "==", ... are removed + def remove_header_marker(text) + return text.gsub(/^\s?(=+)/, '<tt></tt>\1') + end + + def remove_private_comments(body) + body.gsub!(/^\s*!--\s*?$.*?^\s*!\+\+\s*?$/m, '') + return body + end + + + # + # Information of arguments of subroutines and functions in Fortran95 + # + class Fortran95Definition + + # Name of variable + # + attr_reader :varname + + # Types of variable + # + attr_reader :types + + # Initial Value + # + attr_reader :inivalue + + # Suffix of array + # + attr_reader :arraysuffix + + # Comments + # + attr_accessor :comment + + # Flag of non documentation + # + attr_accessor :nodoc + + def initialize(varname, types, inivalue, arraysuffix, comment, + nodoc=false) + @varname = varname + @types = types + @inivalue = inivalue + @arraysuffix = arraysuffix + @comment = comment + @nodoc = nodoc + end + + def to_s + return <<-EOF +<Fortran95Definition: + varname=#{@varname}, types=#{types}, + inivalue=#{@inivalue}, arraysuffix=#{@arraysuffix}, nodoc=#{@nodoc}, + comment= +#{@comment} +> +EOF + end + + # + # If attr is included, true is returned + # + def include_attr?(attr) + return if !attr + @types.split(",").each{ |type| + return true if type.strip.chomp.upcase == attr.strip.chomp.upcase + } + return nil + end + + end # End of Fortran95Definition + + # + # Parse string argument "text", and Return Array of + # Fortran95Definition object + # + def definition_info(text) + return nil unless text + lines = "#{text}" + defs = Array.new + comment = "" + trailing_comment = "" + under_comment_valid = false + lines.split("\n").each{ |line| + if /^\s*?!\s?(.*)/ =~ line + if COMMENTS_ARE_UPPER + comment << remove_header_marker($1) + comment << "\n" + elsif defs[-1] && under_comment_valid + defs[-1].comment << "\n" + defs[-1].comment << remove_header_marker($1) + end + next + elsif /^\s*?$/ =~ line + comment = "" + under_comment_valid = false + next + end + type = "" + characters = "" + if line =~ /^\s*? + ( + character\s*?(\([\w\s\=\(\)\*]+?\))?[\s\,]* + | type\s*?\([\w\s]+?\)[\s\,]* + | integer\s*?(\([\w\s\=\(\)\*]+?\))?[\s\,]* + | real\s*?(\([\w\s\=\(\)\*]+?\))?[\s\,]* + | double\s+precision[\s\,]* + | logical\s*?(\([\w\s\=\(\)\*]+?\))?[\s\,]* + | complex\s*?(\([\w\s\=\(\)\*]+?\))?[\s\,]* + ) + (.*?::)? + (.+)$ + /ix + characters = $8 + type = $1 + type << $7.gsub(/::/, '').gsub(/^\s*?\,/, '') if $7 + else + under_comment_valid = false + next + end + squote = false ; dquote = false ; bracket = 0 + iniflag = false; commentflag = false + varname = "" ; arraysuffix = "" ; inivalue = "" + start_pos = defs.size + characters.split("").each { |char| + if !(squote) && !(dquote) && bracket <= 0 && !(iniflag) && !(commentflag) + case char + when "!" ; commentflag = true + when "(" ; bracket += 1 ; arraysuffix = char + when "\""; dquote = true + when "\'"; squote = true + when "=" ; iniflag = true ; inivalue << char + when "," + defs << Fortran95Definition.new(varname, type, inivalue, arraysuffix, comment) + varname = "" ; arraysuffix = "" ; inivalue = "" + under_comment_valid = true + when " " ; next + else ; varname << char + end + elsif commentflag + comment << remove_header_marker(char) + trailing_comment << remove_header_marker(char) + elsif iniflag + if dquote + case char + when "\"" ; dquote = false ; inivalue << char + else ; inivalue << char + end + elsif squote + case char + when "\'" ; squote = false ; inivalue << char + else ; inivalue << char + end + elsif bracket > 0 + case char + when "(" ; bracket += 1 ; inivalue << char + when ")" ; bracket -= 1 ; inivalue << char + else ; inivalue << char + end + else + case char + when "," + defs << Fortran95Definition.new(varname, type, inivalue, arraysuffix, comment) + varname = "" ; arraysuffix = "" ; inivalue = "" + iniflag = false + under_comment_valid = true + when "(" ; bracket += 1 ; inivalue << char + when "\""; dquote = true ; inivalue << char + when "\'"; squote = true ; inivalue << char + when "!" ; commentflag = true + else ; inivalue << char + end + end + elsif !(squote) && !(dquote) && bracket > 0 + case char + when "(" ; bracket += 1 ; arraysuffix << char + when ")" ; bracket -= 1 ; arraysuffix << char + else ; arraysuffix << char + end + elsif squote + case char + when "\'"; squote = false ; inivalue << char + else ; inivalue << char + end + elsif dquote + case char + when "\""; dquote = false ; inivalue << char + else ; inivalue << char + end + end + } + defs << Fortran95Definition.new(varname, type, inivalue, arraysuffix, comment) + if trailing_comment =~ /^:nodoc:/ + defs[start_pos..-1].collect!{ |defitem| + defitem.nodoc = true + } + end + varname = "" ; arraysuffix = "" ; inivalue = "" + comment = "" + under_comment_valid = true + trailing_comment = "" + } + return defs + end + + + end # class Fortran95parser + +end # module RDoc diff --git a/ruby_1_8_6/lib/rdoc/parsers/parse_rb.rb b/ruby_1_8_6/lib/rdoc/parsers/parse_rb.rb new file mode 100644 index 0000000000..750c483c15 --- /dev/null +++ b/ruby_1_8_6/lib/rdoc/parsers/parse_rb.rb @@ -0,0 +1,2609 @@ +#!/usr/local/bin/ruby + +# Parse a Ruby source file, building a set of objects +# representing the modules, classes, methods, +# requires, and includes we find (these classes +# are defined in code_objects.rb). + +# This file contains stuff stolen outright from: +# +# rtags.rb - +# ruby-lex.rb - ruby lexcal analizer +# ruby-token.rb - ruby tokens +# by Keiju ISHITSUKA (Nippon Rational Inc.) +# + +require "e2mmap" +require "irb/slex" + +require "rdoc/code_objects" +require "rdoc/tokenstream" + +require "rdoc/markup/simple_markup/preprocess" + +require "rdoc/parsers/parserfactory" + +$TOKEN_DEBUG = $DEBUG + +# Definitions of all tokens involved in the lexical analysis + +module RubyToken + EXPR_BEG = :EXPR_BEG + EXPR_MID = :EXPR_MID + EXPR_END = :EXPR_END + EXPR_ARG = :EXPR_ARG + EXPR_FNAME = :EXPR_FNAME + EXPR_DOT = :EXPR_DOT + EXPR_CLASS = :EXPR_CLASS + + class Token + NO_TEXT = "??".freeze + attr :text + + def initialize(line_no, char_no) + @line_no = line_no + @char_no = char_no + @text = NO_TEXT + end + + # Because we're used in contexts that expect to return a token, + # we set the text string and then return ourselves + def set_text(text) + @text = text + self + end + + attr_reader :line_no, :char_no, :text + end + + class TkNode < Token + attr :node + end + + class TkId < Token + def initialize(line_no, char_no, name) + super(line_no, char_no) + @name = name + end + attr :name + end + + class TkKW < TkId + end + + class TkVal < Token + def initialize(line_no, char_no, value = nil) + super(line_no, char_no) + set_text(value) + end + end + + class TkOp < Token + def name + self.class.op_name + end + end + + class TkOPASGN < TkOp + def initialize(line_no, char_no, op) + super(line_no, char_no) + op = TkReading2Token[op] unless op.kind_of?(Symbol) + @op = op + end + attr :op + end + + class TkUnknownChar < Token + def initialize(line_no, char_no, id) + super(line_no, char_no) + @name = char_no.chr + end + attr :name + end + + class TkError < Token + end + + def set_token_position(line, char) + @prev_line_no = line + @prev_char_no = char + end + + def Token(token, value = nil) + tk = nil + case token + when String, Symbol + source = token.kind_of?(String) ? TkReading2Token : TkSymbol2Token + if (tk = source[token]).nil? + IRB.fail TkReading2TokenNoKey, token + end + tk = Token(tk[0], value) + else + tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty? + token.new(@prev_line_no, @prev_char_no) + else + token.new(@prev_line_no, @prev_char_no, value) + end + end + tk + end + + TokenDefinitions = [ + [:TkCLASS, TkKW, "class", EXPR_CLASS], + [:TkMODULE, TkKW, "module", EXPR_BEG], + [:TkDEF, TkKW, "def", EXPR_FNAME], + [:TkUNDEF, TkKW, "undef", EXPR_FNAME], + [:TkBEGIN, TkKW, "begin", EXPR_BEG], + [:TkRESCUE, TkKW, "rescue", EXPR_MID], + [:TkENSURE, TkKW, "ensure", EXPR_BEG], + [:TkEND, TkKW, "end", EXPR_END], + [:TkIF, TkKW, "if", EXPR_BEG, :TkIF_MOD], + [:TkUNLESS, TkKW, "unless", EXPR_BEG, :TkUNLESS_MOD], + [:TkTHEN, TkKW, "then", EXPR_BEG], + [:TkELSIF, TkKW, "elsif", EXPR_BEG], + [:TkELSE, TkKW, "else", EXPR_BEG], + [:TkCASE, TkKW, "case", EXPR_BEG], + [:TkWHEN, TkKW, "when", EXPR_BEG], + [:TkWHILE, TkKW, "while", EXPR_BEG, :TkWHILE_MOD], + [:TkUNTIL, TkKW, "until", EXPR_BEG, :TkUNTIL_MOD], + [:TkFOR, TkKW, "for", EXPR_BEG], + [:TkBREAK, TkKW, "break", EXPR_END], + [:TkNEXT, TkKW, "next", EXPR_END], + [:TkREDO, TkKW, "redo", EXPR_END], + [:TkRETRY, TkKW, "retry", EXPR_END], + [:TkIN, TkKW, "in", EXPR_BEG], + [:TkDO, TkKW, "do", EXPR_BEG], + [:TkRETURN, TkKW, "return", EXPR_MID], + [:TkYIELD, TkKW, "yield", EXPR_END], + [:TkSUPER, TkKW, "super", EXPR_END], + [:TkSELF, TkKW, "self", EXPR_END], + [:TkNIL, TkKW, "nil", EXPR_END], + [:TkTRUE, TkKW, "true", EXPR_END], + [:TkFALSE, TkKW, "false", EXPR_END], + [:TkAND, TkKW, "and", EXPR_BEG], + [:TkOR, TkKW, "or", EXPR_BEG], + [:TkNOT, TkKW, "not", EXPR_BEG], + [:TkIF_MOD, TkKW], + [:TkUNLESS_MOD, TkKW], + [:TkWHILE_MOD, TkKW], + [:TkUNTIL_MOD, TkKW], + [:TkALIAS, TkKW, "alias", EXPR_FNAME], + [:TkDEFINED, TkKW, "defined?", EXPR_END], + [:TklBEGIN, TkKW, "BEGIN", EXPR_END], + [:TklEND, TkKW, "END", EXPR_END], + [:Tk__LINE__, TkKW, "__LINE__", EXPR_END], + [:Tk__FILE__, TkKW, "__FILE__", EXPR_END], + + [:TkIDENTIFIER, TkId], + [:TkFID, TkId], + [:TkGVAR, TkId], + [:TkIVAR, TkId], + [:TkCONSTANT, TkId], + + [:TkINTEGER, TkVal], + [:TkFLOAT, TkVal], + [:TkSTRING, TkVal], + [:TkXSTRING, TkVal], + [:TkREGEXP, TkVal], + [:TkCOMMENT, TkVal], + + [:TkDSTRING, TkNode], + [:TkDXSTRING, TkNode], + [:TkDREGEXP, TkNode], + [:TkNTH_REF, TkId], + [:TkBACK_REF, TkId], + + [:TkUPLUS, TkOp, "+@"], + [:TkUMINUS, TkOp, "-@"], + [:TkPOW, TkOp, "**"], + [:TkCMP, TkOp, "<=>"], + [:TkEQ, TkOp, "=="], + [:TkEQQ, TkOp, "==="], + [:TkNEQ, TkOp, "!="], + [:TkGEQ, TkOp, ">="], + [:TkLEQ, TkOp, "<="], + [:TkANDOP, TkOp, "&&"], + [:TkOROP, TkOp, "||"], + [:TkMATCH, TkOp, "=~"], + [:TkNMATCH, TkOp, "!~"], + [:TkDOT2, TkOp, ".."], + [:TkDOT3, TkOp, "..."], + [:TkAREF, TkOp, "[]"], + [:TkASET, TkOp, "[]="], + [:TkLSHFT, TkOp, "<<"], + [:TkRSHFT, TkOp, ">>"], + [:TkCOLON2, TkOp], + [:TkCOLON3, TkOp], +# [:OPASGN, TkOp], # +=, -= etc. # + [:TkASSOC, TkOp, "=>"], + [:TkQUESTION, TkOp, "?"], #? + [:TkCOLON, TkOp, ":"], #: + + [:TkfLPAREN], # func( # + [:TkfLBRACK], # func[ # + [:TkfLBRACE], # func{ # + [:TkSTAR], # *arg + [:TkAMPER], # &arg # + [:TkSYMBOL, TkId], # :SYMBOL + [:TkSYMBEG, TkId], + [:TkGT, TkOp, ">"], + [:TkLT, TkOp, "<"], + [:TkPLUS, TkOp, "+"], + [:TkMINUS, TkOp, "-"], + [:TkMULT, TkOp, "*"], + [:TkDIV, TkOp, "/"], + [:TkMOD, TkOp, "%"], + [:TkBITOR, TkOp, "|"], + [:TkBITXOR, TkOp, "^"], + [:TkBITAND, TkOp, "&"], + [:TkBITNOT, TkOp, "~"], + [:TkNOTOP, TkOp, "!"], + + [:TkBACKQUOTE, TkOp, "`"], + + [:TkASSIGN, Token, "="], + [:TkDOT, Token, "."], + [:TkLPAREN, Token, "("], #(exp) + [:TkLBRACK, Token, "["], #[arry] + [:TkLBRACE, Token, "{"], #{hash} + [:TkRPAREN, Token, ")"], + [:TkRBRACK, Token, "]"], + [:TkRBRACE, Token, "}"], + [:TkCOMMA, Token, ","], + [:TkSEMICOLON, Token, ";"], + + [:TkRD_COMMENT], + [:TkSPACE], + [:TkNL], + [:TkEND_OF_SCRIPT], + + [:TkBACKSLASH, TkUnknownChar, "\\"], + [:TkAT, TkUnknownChar, "@"], + [:TkDOLLAR, TkUnknownChar, "\$"], #" + ] + + # {reading => token_class} + # {reading => [token_class, *opt]} + TkReading2Token = {} + TkSymbol2Token = {} + + def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts) + token_n = token_n.id2name unless token_n.kind_of?(String) + if RubyToken.const_defined?(token_n) + IRB.fail AlreadyDefinedToken, token_n + end + + token_c = Class.new super_token + RubyToken.const_set token_n, token_c +# token_c.inspect + + if reading + if TkReading2Token[reading] + IRB.fail TkReading2TokenDuplicateError, token_n, reading + end + if opts.empty? + TkReading2Token[reading] = [token_c] + else + TkReading2Token[reading] = [token_c].concat(opts) + end + end + TkSymbol2Token[token_n.intern] = token_c + + if token_c <= TkOp + token_c.class_eval %{ + def self.op_name; "#{reading}"; end + } + end + end + + for defs in TokenDefinitions + def_token(*defs) + end + + NEWLINE_TOKEN = TkNL.new(0,0) + NEWLINE_TOKEN.set_text("\n") + +end + + + +# Lexical analyzer for Ruby source + +class RubyLex + + ###################################################################### + # + # Read an input stream character by character. We allow for unlimited + # ungetting of characters just read. + # + # We simplify the implementation greatly by reading the entire input + # into a buffer initially, and then simply traversing it using + # pointers. + # + # We also have to allow for the <i>here document diversion</i>. This + # little gem comes about when the lexer encounters a here + # document. At this point we effectively need to split the input + # stream into two parts: one to read the body of the here document, + # the other to read the rest of the input line where the here + # document was initially encountered. For example, we might have + # + # do_something(<<-A, <<-B) + # stuff + # for + # A + # stuff + # for + # B + # + # When the lexer encounters the <<A, it reads until the end of the + # line, and keeps it around for later. It then reads the body of the + # here document. Once complete, it needs to read the rest of the + # original line, but then skip the here document body. + # + + class BufferedReader + + attr_reader :line_num + + def initialize(content) + if /\t/ =~ content + tab_width = Options.instance.tab_width + content = content.split(/\n/).map do |line| + 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #` + line + end .join("\n") + end + @content = content + @content << "\n" unless @content[-1,1] == "\n" + @size = @content.size + @offset = 0 + @hwm = 0 + @line_num = 1 + @read_back_offset = 0 + @last_newline = 0 + @newline_pending = false + end + + def column + @offset - @last_newline + end + + def getc + return nil if @offset >= @size + ch = @content[@offset, 1] + + @offset += 1 + @hwm = @offset if @hwm < @offset + + if @newline_pending + @line_num += 1 + @last_newline = @offset - 1 + @newline_pending = false + end + + if ch == "\n" + @newline_pending = true + end + ch + end + + def getc_already_read + getc + end + + def ungetc(ch) + raise "unget past beginning of file" if @offset <= 0 + @offset -= 1 + if @content[@offset] == ?\n + @newline_pending = false + end + end + + def get_read + res = @content[@read_back_offset...@offset] + @read_back_offset = @offset + res + end + + def peek(at) + pos = @offset + at + if pos >= @size + nil + else + @content[pos, 1] + end + end + + def peek_equal(str) + @content[@offset, str.length] == str + end + + def divert_read_from(reserve) + @content[@offset, 0] = reserve + @size = @content.size + end + end + + # end of nested class BufferedReader + + extend Exception2MessageMapper + def_exception(:AlreadyDefinedToken, "Already defined token(%s)") + def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')") + def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')") + def_exception(:TkReading2TokenDuplicateError, + "key duplicate(token_n='%s', key='%s')") + def_exception(:SyntaxError, "%s") + + include RubyToken + include IRB + + attr_reader :continue + attr_reader :lex_state + + def RubyLex.debug? + false + end + + def initialize(content) + lex_init + + @reader = BufferedReader.new(content) + + @exp_line_no = @line_no = 1 + @base_char_no = 0 + @indent = 0 + + @ltype = nil + @quoted = nil + @lex_state = EXPR_BEG + @space_seen = false + + @continue = false + @line = "" + + @skip_space = false + @read_auto_clean_up = false + @exception_on_syntax_error = true + end + + attr :skip_space, true + attr :read_auto_clean_up, true + attr :exception_on_syntax_error, true + + attr :indent + + # io functions + def line_no + @reader.line_num + end + + def char_no + @reader.column + end + + def get_read + @reader.get_read + end + + def getc + @reader.getc + end + + def getc_of_rests + @reader.getc_already_read + end + + def gets + c = getc or return + l = "" + begin + l.concat c unless c == "\r" + break if c == "\n" + end while c = getc + l + end + + + def ungetc(c = nil) + @reader.ungetc(c) + end + + def peek_equal?(str) + @reader.peek_equal(str) + end + + def peek(i = 0) + @reader.peek(i) + end + + def lex + until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) && + !@continue or + tk.nil?) + end + line = get_read + + if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil? + nil + else + line + end + end + + def token + set_token_position(line_no, char_no) + begin + begin + tk = @OP.match(self) + @space_seen = tk.kind_of?(TkSPACE) + rescue SyntaxError + abort if @exception_on_syntax_error + tk = TkError.new(line_no, char_no) + end + end while @skip_space and tk.kind_of?(TkSPACE) + if @read_auto_clean_up + get_read + end +# throw :eof unless tk + p tk if $DEBUG + tk + end + + ENINDENT_CLAUSE = [ + "case", "class", "def", "do", "for", "if", + "module", "unless", "until", "while", "begin" #, "when" + ] + DEINDENT_CLAUSE = ["end" #, "when" + ] + + PERCENT_LTYPE = { + "q" => "\'", + "Q" => "\"", + "x" => "\`", + "r" => "/", + "w" => "]" + } + + PERCENT_PAREN = { + "{" => "}", + "[" => "]", + "<" => ">", + "(" => ")" + } + + Ltype2Token = { + "\'" => TkSTRING, + "\"" => TkSTRING, + "\`" => TkXSTRING, + "/" => TkREGEXP, + "]" => TkDSTRING + } + Ltype2Token.default = TkSTRING + + DLtype2Token = { + "\"" => TkDSTRING, + "\`" => TkDXSTRING, + "/" => TkDREGEXP, + } + + def lex_init() + @OP = SLex.new + @OP.def_rules("\0", "\004", "\032") do |chars, io| + Token(TkEND_OF_SCRIPT).set_text(chars) + end + + @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io| + @space_seen = TRUE + while (ch = getc) =~ /[ \t\f\r\13]/ + chars << ch + end + ungetc + Token(TkSPACE).set_text(chars) + end + + @OP.def_rule("#") do + |op, io| + identify_comment + end + + @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do + |op, io| + str = op + @ltype = "=" + + + begin + line = "" + begin + ch = getc + line << ch + end until ch == "\n" + str << line + end until line =~ /^=end/ + + ungetc + + @ltype = nil + + if str =~ /\A=begin\s+rdoc/i + str.sub!(/\A=begin.*\n/, '') + str.sub!(/^=end.*/m, '') + Token(TkCOMMENT).set_text(str) + else + Token(TkRD_COMMENT)#.set_text(str) + end + end + + @OP.def_rule("\n") do + print "\\n\n" if RubyLex.debug? + case @lex_state + when EXPR_BEG, EXPR_FNAME, EXPR_DOT + @continue = TRUE + else + @continue = FALSE + @lex_state = EXPR_BEG + end + Token(TkNL).set_text("\n") + end + + @OP.def_rules("*", "**", + "!", "!=", "!~", + "=", "==", "===", + "=~", "<=>", + "<", "<=", + ">", ">=", ">>") do + |op, io| + @lex_state = EXPR_BEG + Token(op).set_text(op) + end + + @OP.def_rules("<<") do + |op, io| + tk = nil + if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && + (@lex_state != EXPR_ARG || @space_seen) + c = peek(0) + if /[-\w_\"\'\`]/ =~ c + tk = identify_here_document + end + end + if !tk + @lex_state = EXPR_BEG + tk = Token(op).set_text(op) + end + tk + end + + @OP.def_rules("'", '"') do + |op, io| + identify_string(op) + end + + @OP.def_rules("`") do + |op, io| + if @lex_state == EXPR_FNAME + Token(op).set_text(op) + else + identify_string(op) + end + end + + @OP.def_rules('?') do + |op, io| + if @lex_state == EXPR_END + @lex_state = EXPR_BEG + Token(TkQUESTION).set_text(op) + else + ch = getc + if @lex_state == EXPR_ARG && ch !~ /\s/ + ungetc + @lex_state = EXPR_BEG; + Token(TkQUESTION).set_text(op) + else + str = op + str << ch + if (ch == '\\') #' + str << read_escape + end + @lex_state = EXPR_END + Token(TkINTEGER).set_text(str) + end + end + end + + @OP.def_rules("&", "&&", "|", "||") do + |op, io| + @lex_state = EXPR_BEG + Token(op).set_text(op) + end + + @OP.def_rules("+=", "-=", "*=", "**=", + "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do + |op, io| + @lex_state = EXPR_BEG + op =~ /^(.*)=$/ + Token(TkOPASGN, $1).set_text(op) + end + + @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io| + Token(TkUPLUS).set_text(op) + end + + @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io| + Token(TkUMINUS).set_text(op) + end + + @OP.def_rules("+", "-") do + |op, io| + catch(:RET) do + if @lex_state == EXPR_ARG + if @space_seen and peek(0) =~ /[0-9]/ + throw :RET, identify_number(op) + else + @lex_state = EXPR_BEG + end + elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/ + throw :RET, identify_number(op) + else + @lex_state = EXPR_BEG + end + Token(op).set_text(op) + end + end + + @OP.def_rule(".") do + @lex_state = EXPR_BEG + if peek(0) =~ /[0-9]/ + ungetc + identify_number("") + else + # for obj.if + @lex_state = EXPR_DOT + Token(TkDOT).set_text(".") + end + end + + @OP.def_rules("..", "...") do + |op, io| + @lex_state = EXPR_BEG + Token(op).set_text(op) + end + + lex_int2 + end + + def lex_int2 + @OP.def_rules("]", "}", ")") do + |op, io| + @lex_state = EXPR_END + @indent -= 1 + Token(op).set_text(op) + end + + @OP.def_rule(":") do + if @lex_state == EXPR_END || peek(0) =~ /\s/ + @lex_state = EXPR_BEG + tk = Token(TkCOLON) + else + @lex_state = EXPR_FNAME; + tk = Token(TkSYMBEG) + end + tk.set_text(":") + end + + @OP.def_rule("::") do +# p @lex_state.id2name, @space_seen + if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen + @lex_state = EXPR_BEG + tk = Token(TkCOLON3) + else + @lex_state = EXPR_DOT + tk = Token(TkCOLON2) + end + tk.set_text("::") + end + + @OP.def_rule("/") do + |op, io| + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + identify_string(op) + elsif peek(0) == '=' + getc + @lex_state = EXPR_BEG + Token(TkOPASGN, :/).set_text("/=") #") + elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ + identify_string(op) + else + @lex_state = EXPR_BEG + Token("/").set_text(op) + end + end + + @OP.def_rules("^") do + @lex_state = EXPR_BEG + Token("^").set_text("^") + end + + # @OP.def_rules("^=") do + # @lex_state = EXPR_BEG + # Token(TkOPASGN, :^) + # end + + @OP.def_rules(",", ";") do + |op, io| + @lex_state = EXPR_BEG + Token(op).set_text(op) + end + + @OP.def_rule("~") do + @lex_state = EXPR_BEG + Token("~").set_text("~") + end + + @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do + @lex_state = EXPR_BEG + Token("~").set_text("~@") + end + + @OP.def_rule("(") do + @indent += 1 + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + @lex_state = EXPR_BEG + tk = Token(TkfLPAREN) + else + @lex_state = EXPR_BEG + tk = Token(TkLPAREN) + end + tk.set_text("(") + end + + @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do + Token("[]").set_text("[]") + end + + @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do + Token("[]=").set_text("[]=") + end + + @OP.def_rule("[") do + @indent += 1 + if @lex_state == EXPR_FNAME + t = Token(TkfLBRACK) + else + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + t = Token(TkLBRACK) + elsif @lex_state == EXPR_ARG && @space_seen + t = Token(TkLBRACK) + else + t = Token(TkfLBRACK) + end + @lex_state = EXPR_BEG + end + t.set_text("[") + end + + @OP.def_rule("{") do + @indent += 1 + if @lex_state != EXPR_END && @lex_state != EXPR_ARG + t = Token(TkLBRACE) + else + t = Token(TkfLBRACE) + end + @lex_state = EXPR_BEG + t.set_text("{") + end + + @OP.def_rule('\\') do #' + if getc == "\n" + @space_seen = true + @continue = true + Token(TkSPACE).set_text("\\\n") + else + ungetc + Token("\\").set_text("\\") #" + end + end + + @OP.def_rule('%') do + |op, io| + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + identify_quotation('%') + elsif peek(0) == '=' + getc + Token(TkOPASGN, "%").set_text("%=") + elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ + identify_quotation('%') + else + @lex_state = EXPR_BEG + Token("%").set_text("%") + end + end + + @OP.def_rule('$') do #' + identify_gvar + end + + @OP.def_rule('@') do + if peek(0) =~ /[@\w_]/ + ungetc + identify_identifier + else + Token("@").set_text("@") + end + end + + # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do + # |op, io| + # @indent += 1 + # @lex_state = EXPR_FNAME + # # @lex_state = EXPR_END + # # until @rests[0] == "\n" or @rests[0] == ";" + # # rests.shift + # # end + # end + + @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do + throw :eof + end + + @OP.def_rule("") do + |op, io| + printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug? + if peek(0) =~ /[0-9]/ + t = identify_number("") + elsif peek(0) =~ /[\w_]/ + t = identify_identifier + end + printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug? + t + end + + p @OP if RubyLex.debug? + end + + def identify_gvar + @lex_state = EXPR_END + str = "$" + + tk = case ch = getc + when /[~_*$?!@\/\\;,=:<>".]/ #" + str << ch + Token(TkGVAR, str) + + when "-" + str << "-" << getc + Token(TkGVAR, str) + + when "&", "`", "'", "+" + str << ch + Token(TkBACK_REF, str) + + when /[1-9]/ + str << ch + while (ch = getc) =~ /[0-9]/ + str << ch + end + ungetc + Token(TkNTH_REF) + when /\w/ + ungetc + ungetc + return identify_identifier + else + ungetc + Token("$") + end + tk.set_text(str) + end + + def identify_identifier + token = "" + token.concat getc if peek(0) =~ /[$@]/ + token.concat getc if peek(0) == "@" + + while (ch = getc) =~ /\w|_/ + print ":", ch, ":" if RubyLex.debug? + token.concat ch + end + ungetc + + if ch == "!" or ch == "?" + token.concat getc + end + # fix token + + # $stderr.puts "identifier - #{token}, state = #@lex_state" + + case token + when /^\$/ + return Token(TkGVAR, token).set_text(token) + when /^\@/ + @lex_state = EXPR_END + return Token(TkIVAR, token).set_text(token) + end + + if @lex_state != EXPR_DOT + print token, "\n" if RubyLex.debug? + + token_c, *trans = TkReading2Token[token] + if token_c + # reserved word? + + if (@lex_state != EXPR_BEG && + @lex_state != EXPR_FNAME && + trans[1]) + # modifiers + token_c = TkSymbol2Token[trans[1]] + @lex_state = trans[0] + else + if @lex_state != EXPR_FNAME + if ENINDENT_CLAUSE.include?(token) + @indent += 1 + elsif DEINDENT_CLAUSE.include?(token) + @indent -= 1 + end + @lex_state = trans[0] + else + @lex_state = EXPR_END + end + end + return Token(token_c, token).set_text(token) + end + end + + if @lex_state == EXPR_FNAME + @lex_state = EXPR_END + if peek(0) == '=' + token.concat getc + end + elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_END + end + + if token[0, 1] =~ /[A-Z]/ + return Token(TkCONSTANT, token).set_text(token) + elsif token[token.size - 1, 1] =~ /[!?]/ + return Token(TkFID, token).set_text(token) + else + return Token(TkIDENTIFIER, token).set_text(token) + end + end + + def identify_here_document + ch = getc + if ch == "-" + ch = getc + indent = true + end + if /['"`]/ =~ ch # ' + lt = ch + quoted = "" + while (c = getc) && c != lt + quoted.concat c + end + else + lt = '"' + quoted = ch.dup + while (c = getc) && c =~ /\w/ + quoted.concat c + end + ungetc + end + + ltback, @ltype = @ltype, lt + reserve = "" + + while ch = getc + reserve << ch + if ch == "\\" #" + ch = getc + reserve << ch + elsif ch == "\n" + break + end + end + + str = "" + while (l = gets) + l.chomp! + l.strip! if indent + break if l == quoted + str << l.chomp << "\n" + end + + @reader.divert_read_from(reserve) + + @ltype = ltback + @lex_state = EXPR_END + Token(Ltype2Token[lt], str).set_text(str.dump) + end + + def identify_quotation(initial_char) + ch = getc + if lt = PERCENT_LTYPE[ch] + initial_char += ch + ch = getc + elsif ch =~ /\W/ + lt = "\"" + else + RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')" + end +# if ch !~ /\W/ +# ungetc +# next +# end + #@ltype = lt + @quoted = ch unless @quoted = PERCENT_PAREN[ch] + identify_string(lt, @quoted, ch, initial_char) + end + + def identify_number(start) + str = start.dup + + if start == "+" or start == "-" or start == "" + start = getc + str << start + end + + @lex_state = EXPR_END + + if start == "0" + if peek(0) == "x" + ch = getc + str << ch + match = /[0-9a-f_]/ + else + match = /[0-7_]/ + end + while ch = getc + if ch !~ match + ungetc + break + else + str << ch + end + end + return Token(TkINTEGER).set_text(str) + end + + type = TkINTEGER + allow_point = TRUE + allow_e = TRUE + while ch = getc + case ch + when /[0-9_]/ + str << ch + + when allow_point && "." + type = TkFLOAT + if peek(0) !~ /[0-9]/ + ungetc + break + end + str << ch + allow_point = false + + when allow_e && "e", allow_e && "E" + str << ch + type = TkFLOAT + if peek(0) =~ /[+-]/ + str << getc + end + allow_e = false + allow_point = false + else + ungetc + break + end + end + Token(type).set_text(str) + end + + def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil) + @ltype = ltype + @quoted = quoted + subtype = nil + + str = "" + str << initial_char if initial_char + str << (opener||quoted) + + nest = 0 + begin + while ch = getc + str << ch + if @quoted == ch + if nest == 0 + break + else + nest -= 1 + end + elsif opener == ch + nest += 1 + elsif @ltype != "'" && @ltype != "]" and ch == "#" + ch = getc + if ch == "{" + subtype = true + str << ch << skip_inner_expression + else + ungetc(ch) + end + elsif ch == '\\' #' + str << read_escape + end + end + if @ltype == "/" + if peek(0) =~ /i|o|n|e|s/ + str << getc + end + end + if subtype + Token(DLtype2Token[ltype], str) + else + Token(Ltype2Token[ltype], str) + end.set_text(str) + ensure + @ltype = nil + @quoted = nil + @lex_state = EXPR_END + end + end + + def skip_inner_expression + res = "" + nest = 0 + while (ch = getc) + res << ch + if ch == '}' + break if nest.zero? + nest -= 1 + elsif ch == '{' + nest += 1 + end + end + res + end + + def identify_comment + @ltype = "#" + comment = "#" + while ch = getc + if ch == "\\" + ch = getc + if ch == "\n" + ch = " " + else + comment << "\\" + end + else + if ch == "\n" + @ltype = nil + ungetc + break + end + end + comment << ch + end + return Token(TkCOMMENT).set_text(comment) + end + + def read_escape + res = "" + case ch = getc + when /[0-7]/ + ungetc ch + 3.times do + case ch = getc + when /[0-7]/ + when nil + break + else + ungetc + break + end + res << ch + end + + when "x" + res << ch + 2.times do + case ch = getc + when /[0-9a-fA-F]/ + when nil + break + else + ungetc + break + end + res << ch + end + + when "M" + res << ch + if (ch = getc) != '-' + ungetc + else + res << ch + if (ch = getc) == "\\" #" + res << ch + res << read_escape + else + res << ch + end + end + + when "C", "c" #, "^" + res << ch + if ch == "C" and (ch = getc) != "-" + ungetc + else + res << ch + if (ch = getc) == "\\" #" + res << ch + res << read_escape + else + res << ch + end + end + else + res << ch + end + res + end +end + + + +# Extract code elements from a source file, returning a TopLevel +# object containing the constituent file elements. +# +# This file is based on rtags + +module RDoc + + GENERAL_MODIFIERS = [ 'nodoc' ].freeze + + CLASS_MODIFIERS = GENERAL_MODIFIERS + + ATTR_MODIFIERS = GENERAL_MODIFIERS + + CONSTANT_MODIFIERS = GENERAL_MODIFIERS + + METHOD_MODIFIERS = GENERAL_MODIFIERS + + [ 'arg', 'args', 'yield', 'yields', 'notnew', 'not-new', 'not_new', 'doc' ] + + + class RubyParser + include RubyToken + include TokenStream + + extend ParserFactory + + parse_files_matching(/\.rbw?$/) + + + def initialize(top_level, file_name, content, options, stats) + @options = options + @stats = stats + @size = 0 + @token_listeners = nil + @input_file_name = file_name + @scanner = RubyLex.new(content) + @scanner.exception_on_syntax_error = false + @top_level = top_level + @progress = $stderr unless options.quiet + end + + def scan + @tokens = [] + @unget_read = [] + @read = [] + catch(:eof) do + catch(:enddoc) do + begin + parse_toplevel_statements(@top_level) + rescue Exception => e + $stderr.puts "\n\n" + $stderr.puts "RDoc failure in #@input_file_name at or around " + + "line #{@scanner.line_no} column #{@scanner.char_no}" + $stderr.puts + $stderr.puts "Before reporting this, could you check that the file" + $stderr.puts "you're documenting compiles cleanly--RDoc is not a" + $stderr.puts "full Ruby parser, and gets confused easily if fed" + $stderr.puts "invalid programs." + $stderr.puts + $stderr.puts "The internal error was:\n\n" + + e.set_backtrace(e.backtrace[0,4]) + raise + end + end + end + @top_level + end + + private + + def make_message(msg) + prefix = "\n" + @input_file_name + ":" + if @scanner + prefix << "#{@scanner.line_no}:#{@scanner.char_no}: " + end + return prefix + msg + end + + def warn(msg) + return if @options.quiet + msg = make_message msg + $stderr.puts msg + end + + def error(msg) + msg = make_message msg + $stderr.puts msg + exit(1) + end + + def progress(char) + unless @options.quiet + @progress.print(char) + @progress.flush + end + end + + def add_token_listener(obj) + @token_listeners ||= [] + @token_listeners << obj + end + + def remove_token_listener(obj) + @token_listeners.delete(obj) + end + + def get_tk + tk = nil + if @tokens.empty? + tk = @scanner.token + @read.push @scanner.get_read + puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG + else + @read.push @unget_read.shift + tk = @tokens.shift + puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG + end + + if tk.kind_of?(TkSYMBEG) + set_token_position(tk.line_no, tk.char_no) + tk1 = get_tk + if tk1.kind_of?(TkId) || tk1.kind_of?(TkOp) + tk = Token(TkSYMBOL).set_text(":" + tk1.name) + # remove the identifier we just read (we're about to + # replace it with a symbol) + @token_listeners.each do |obj| + obj.pop_token + end if @token_listeners + else + warn("':' not followed by identifier or operator") + tk = tk1 + end + end + + # inform any listeners of our shiny new token + @token_listeners.each do |obj| + obj.add_token(tk) + end if @token_listeners + + tk + end + + def peek_tk + unget_tk(tk = get_tk) + tk + end + + def unget_tk(tk) + @tokens.unshift tk + @unget_read.unshift @read.pop + + # Remove this token from any listeners + @token_listeners.each do |obj| + obj.pop_token + end if @token_listeners + end + + def skip_tkspace(skip_nl = true) + tokens = [] + while ((tk = get_tk).kind_of?(TkSPACE) || + (skip_nl && tk.kind_of?(TkNL))) + tokens.push tk + end + unget_tk(tk) + tokens + end + + def get_tkread + read = @read.join("") + @read = [] + read + end + + def peek_read + @read.join('') + end + + NORMAL = "::" + SINGLE = "<<" + + # Look for the first comment in a file that isn't + # a shebang line. + + def collect_first_comment + skip_tkspace + res = '' + first_line = true + + tk = get_tk + while tk.kind_of?(TkCOMMENT) + if first_line && /\A#!/ =~ tk.text + skip_tkspace + tk = get_tk + elsif first_line && /\A#\s*-\*-/ =~ tk.text + first_line = false + skip_tkspace + tk = get_tk + else + first_line = false + res << tk.text << "\n" + tk = get_tk + if tk.kind_of? TkNL + skip_tkspace(false) + tk = get_tk + end + end + end + unget_tk(tk) + res + end + + def parse_toplevel_statements(container) + comment = collect_first_comment + look_for_directives_in(container, comment) + container.comment = comment unless comment.empty? + parse_statements(container, NORMAL, nil, comment) + end + + def parse_statements(container, single=NORMAL, current_method=nil, comment='') + nest = 1 + save_visibility = container.visibility + +# if container.kind_of?(TopLevel) +# else +# comment = '' +# end + + non_comment_seen = true + + while tk = get_tk + + keep_comment = false + + non_comment_seen = true unless tk.kind_of?(TkCOMMENT) + + case tk + + when TkNL + skip_tkspace(true) # Skip blanks and newlines + tk = get_tk + if tk.kind_of?(TkCOMMENT) + if non_comment_seen + comment = '' + non_comment_seen = false + end + while tk.kind_of?(TkCOMMENT) + comment << tk.text << "\n" + tk = get_tk # this is the newline + skip_tkspace(false) # leading spaces + tk = get_tk + end + unless comment.empty? + look_for_directives_in(container, comment) + if container.done_documenting + container.ongoing_visibility = save_visibility +# return + end + end + keep_comment = true + else + non_comment_seen = true + end + unget_tk(tk) + keep_comment = true + + + when TkCLASS + if container.document_children + parse_class(container, single, tk, comment) + else + nest += 1 + end + + when TkMODULE + if container.document_children + parse_module(container, single, tk, comment) + else + nest += 1 + end + + when TkDEF + if container.document_self + parse_method(container, single, tk, comment) + else + nest += 1 + end + + when TkCONSTANT + if container.document_self + parse_constant(container, single, tk, comment) + end + + when TkALIAS + if container.document_self + parse_alias(container, single, tk, comment) + end + + when TkYIELD + if current_method.nil? + warn("Warning: yield outside of method") if container.document_self + else + parse_yield(container, single, tk, current_method) + end + + # Until and While can have a 'do', which shouldn't increas + # the nesting. We can't solve the general case, but we can + # handle most occurrences by ignoring a do at the end of a line + + when TkUNTIL, TkWHILE + nest += 1 + puts "FOUND #{tk.class} in #{container.name}, nest = #{nest}, " + + "line #{tk.line_no}" if $DEBUG + skip_optional_do_after_expression + + # 'for' is trickier + when TkFOR + nest += 1 + puts "FOUND #{tk.class} in #{container.name}, nest = #{nest}, " + + "line #{tk.line_no}" if $DEBUG + skip_for_variable + skip_optional_do_after_expression + + when TkCASE, TkDO, TkIF, TkUNLESS, TkBEGIN + nest += 1 + puts "Found #{tk.class} in #{container.name}, nest = #{nest}, " + + "line #{tk.line_no}" if $DEBUG + + when TkIDENTIFIER + if nest == 1 and current_method.nil? + case tk.name + when "private", "protected", "public", + "private_class_method", "public_class_method" + parse_visibility(container, single, tk) + keep_comment = true + when "attr" + parse_attr(container, single, tk, comment) + when /^attr_(reader|writer|accessor)$/, @options.extra_accessors + parse_attr_accessor(container, single, tk, comment) + when "alias_method" + if container.document_self + parse_alias(container, single, tk, comment) + end + end + end + + case tk.name + when "require" + parse_require(container, comment) + when "include" + parse_include(container, comment) + end + + + when TkEND + nest -= 1 + puts "Found 'end' in #{container.name}, nest = #{nest}, line #{tk.line_no}" if $DEBUG + puts "Method = #{current_method.name}" if $DEBUG and current_method + if nest == 0 + read_documentation_modifiers(container, CLASS_MODIFIERS) + container.ongoing_visibility = save_visibility + return + end + + end + + comment = '' unless keep_comment + begin + get_tkread + skip_tkspace(false) + end while peek_tk == TkNL + + end + end + + def parse_class(container, single, tk, comment, &block) + progress("c") + + @stats.num_classes += 1 + + container, name_t = get_class_or_module(container) + + case name_t + when TkCONSTANT + name = name_t.name + superclass = "Object" + + if peek_tk.kind_of?(TkLT) + get_tk + skip_tkspace(true) + superclass = get_class_specification + superclass = "<unknown>" if superclass.empty? + end + + if single == SINGLE + cls_type = SingleClass + else + cls_type = NormalClass + end + + cls = container.add_class(cls_type, name, superclass) + read_documentation_modifiers(cls, CLASS_MODIFIERS) + cls.record_location(@top_level) + parse_statements(cls) + cls.comment = comment + + when TkLSHFT + case name = get_class_specification + when "self", container.name + parse_statements(container, SINGLE, &block) + else + other = TopLevel.find_class_named(name) + unless other +# other = @top_level.add_class(NormalClass, name, nil) +# other.record_location(@top_level) +# other.comment = comment + other = NormalClass.new("Dummy", nil) + end + read_documentation_modifiers(other, CLASS_MODIFIERS) + parse_statements(other, SINGLE, &block) + end + + else + warn("Expected class name or '<<'. Got #{name_t.class}: #{name_t.text.inspect}") + end + end + + def parse_module(container, single, tk, comment) + progress("m") + @stats.num_modules += 1 + container, name_t = get_class_or_module(container) +# skip_tkspace + name = name_t.name + mod = container.add_module(NormalModule, name) + mod.record_location(@top_level) + read_documentation_modifiers(mod, CLASS_MODIFIERS) + parse_statements(mod) + mod.comment = comment + end + + # Look for the name of a class of module (optionally with a leading :: or + # with :: separated named) and return the ultimate name and container + + def get_class_or_module(container) + skip_tkspace + name_t = get_tk + + # class ::A -> A is in the top level + if name_t.kind_of?(TkCOLON2) + name_t = get_tk + container = @top_level + end + + skip_tkspace(false) + + while peek_tk.kind_of?(TkCOLON2) + prev_container = container + container = container.find_module_named(name_t.name) + if !container +# warn("Couldn't find module #{name_t.name}") + container = prev_container.add_module(NormalModule, name_t.name) + end + get_tk + name_t = get_tk + end + skip_tkspace(false) + return [container, name_t] + end + + def parse_constant(container, single, tk, comment) + name = tk.name + skip_tkspace(false) + eq_tk = get_tk + + unless eq_tk.kind_of?(TkASSIGN) + unget_tk(eq_tk) + return + end + + + nest = 0 + get_tkread + + tk = get_tk + if tk.kind_of? TkGT + unget_tk(tk) + unget_tk(eq_tk) + return + end + + loop do + puts("Param: #{tk}, #{@scanner.continue} " + + "#{@scanner.lex_state} #{nest}") if $DEBUG + + case tk + when TkSEMICOLON + break + when TkLPAREN, TkfLPAREN + nest += 1 + when TkRPAREN + nest -= 1 + when TkCOMMENT + if nest <= 0 && @scanner.lex_state == EXPR_END + unget_tk(tk) + break + end + when TkNL + if (@scanner.lex_state == EXPR_END and nest <= 0) || !@scanner.continue + unget_tk(tk) + break + end + end + tk = get_tk + end + + res = get_tkread.tr("\n", " ").strip + res = "" if res == ";" + con = Constant.new(name, res, comment) + read_documentation_modifiers(con, CONSTANT_MODIFIERS) + if con.document_self + container.add_constant(con) + end + end + + def parse_method(container, single, tk, comment) + progress(".") + @stats.num_methods += 1 + line_no = tk.line_no + column = tk.char_no + + start_collecting_tokens + add_token(tk) + add_token_listener(self) + + @scanner.instance_eval{@lex_state = EXPR_FNAME} + skip_tkspace(false) + name_t = get_tk + back_tk = skip_tkspace + meth = nil + added_container = false + + dot = get_tk + if dot.kind_of?(TkDOT) or dot.kind_of?(TkCOLON2) + @scanner.instance_eval{@lex_state = EXPR_FNAME} + skip_tkspace + name_t2 = get_tk + case name_t + when TkSELF + name = name_t2.name + when TkCONSTANT + name = name_t2.name + prev_container = container + container = container.find_module_named(name_t.name) + if !container + added_container = true + obj = name_t.name.split("::").inject(Object) do |state, item| + state.const_get(item) + end rescue nil + + type = obj.class == Class ? NormalClass : NormalModule + if not [Class, Module].include?(obj.class) + warn("Couldn't find #{name_t.name}. Assuming it's a module") + end + + if type == NormalClass then + container = prev_container.add_class(type, name_t.name, obj.superclass.name) + else + container = prev_container.add_module(type, name_t.name) + end + end + else + # warn("Unexpected token '#{name_t2.inspect}'") + # break + skip_method(container) + return + end + meth = AnyMethod.new(get_tkread, name) + meth.singleton = true + else + unget_tk dot + back_tk.reverse_each do + |tk| + unget_tk tk + end + name = name_t.name + + meth = AnyMethod.new(get_tkread, name) + meth.singleton = (single == SINGLE) + end + + remove_token_listener(self) + + meth.start_collecting_tokens + indent = TkSPACE.new(1,1) + indent.set_text(" " * column) + + meth.add_tokens([TkCOMMENT.new(line_no, + 1, + "# File #{@top_level.file_absolute_name}, line #{line_no}"), + NEWLINE_TOKEN, + indent]) + + meth.add_tokens(@token_stream) + + add_token_listener(meth) + + @scanner.instance_eval{@continue = false} + parse_method_parameters(meth) + + if meth.document_self + container.add_method(meth) + elsif added_container + container.document_self = false + end + + # Having now read the method parameters and documentation modifiers, we + # now know whether we have to rename #initialize to ::new + + if name == "initialize" && !meth.singleton + if meth.dont_rename_initialize + meth.visibility = :protected + else + meth.singleton = true + meth.name = "new" + meth.visibility = :public + end + end + + parse_statements(container, single, meth) + + remove_token_listener(meth) + + # Look for a 'call-seq' in the comment, and override the + # normal parameter stuff + + if comment.sub!(/:?call-seq:(.*?)^\s*\#?\s*$/m, '') + seq = $1 + seq.gsub!(/^\s*\#\s*/, '') + meth.call_seq = seq + end + + meth.comment = comment + + end + + def skip_method(container) + meth = AnyMethod.new("", "anon") + parse_method_parameters(meth) + parse_statements(container, false, meth) + end + + # Capture the method's parameters. Along the way, + # look for a comment containing + # + # # yields: .... + # + # and add this as the block_params for the method + + def parse_method_parameters(method) + res = parse_method_or_yield_parameters(method) + res = "(" + res + ")" unless res[0] == ?( + method.params = res unless method.params + if method.block_params.nil? + skip_tkspace(false) + read_documentation_modifiers(method, METHOD_MODIFIERS) + end + end + + def parse_method_or_yield_parameters(method=nil, modifiers=METHOD_MODIFIERS) + skip_tkspace(false) + tk = get_tk + + # Little hack going on here. In the statement + # f = 2*(1+yield) + # We see the RPAREN as the next token, so we need + # to exit early. This still won't catch all cases + # (such as "a = yield + 1" + end_token = case tk + when TkLPAREN, TkfLPAREN + TkRPAREN + when TkRPAREN + return "" + else + TkNL + end + nest = 0 + + loop do + puts("Param: #{tk.inspect}, #{@scanner.continue} " + + "#{@scanner.lex_state} #{nest}") if $DEBUG + case tk + when TkSEMICOLON + break + when TkLBRACE + nest += 1 + when TkRBRACE + # we might have a.each {|i| yield i } + unget_tk(tk) if nest.zero? + nest -= 1 + break if nest <= 0 + when TkLPAREN, TkfLPAREN + nest += 1 + when end_token + if end_token == TkRPAREN + nest -= 1 + break if @scanner.lex_state == EXPR_END and nest <= 0 + else + break unless @scanner.continue + end + when method && method.block_params.nil? && TkCOMMENT + unget_tk(tk) + read_documentation_modifiers(method, modifiers) + end + tk = get_tk + end + res = get_tkread.tr("\n", " ").strip + res = "" if res == ";" + res + end + + # skip the var [in] part of a 'for' statement + def skip_for_variable + skip_tkspace(false) + tk = get_tk + skip_tkspace(false) + tk = get_tk + unget_tk(tk) unless tk.kind_of?(TkIN) + end + + # while, until, and for have an optional + def skip_optional_do_after_expression + skip_tkspace(false) + tk = get_tk + case tk + when TkLPAREN, TkfLPAREN + end_token = TkRPAREN + else + end_token = TkNL + end + + nest = 0 + @scanner.instance_eval{@continue = false} + + loop do + puts("\nWhile: #{tk}, #{@scanner.continue} " + + "#{@scanner.lex_state} #{nest}") if $DEBUG + case tk + when TkSEMICOLON + break + when TkLPAREN, TkfLPAREN + nest += 1 + when TkDO + break if nest.zero? + when end_token + if end_token == TkRPAREN + nest -= 1 + break if @scanner.lex_state == EXPR_END and nest.zero? + else + break unless @scanner.continue + end + end + tk = get_tk + end + skip_tkspace(false) + if peek_tk.kind_of? TkDO + get_tk + end + end + + # Return a superclass, which can be either a constant + # of an expression + + def get_class_specification + tk = get_tk + return "self" if tk.kind_of?(TkSELF) + + res = "" + while tk.kind_of?(TkCOLON2) || + tk.kind_of?(TkCOLON3) || + tk.kind_of?(TkCONSTANT) + + res += tk.text + tk = get_tk + end + + unget_tk(tk) + skip_tkspace(false) + + get_tkread # empty out read buffer + + tk = get_tk + + case tk + when TkNL, TkCOMMENT, TkSEMICOLON + unget_tk(tk) + return res + end + + res += parse_call_parameters(tk) + res + end + + def parse_call_parameters(tk) + + end_token = case tk + when TkLPAREN, TkfLPAREN + TkRPAREN + when TkRPAREN + return "" + else + TkNL + end + nest = 0 + + loop do + puts("Call param: #{tk}, #{@scanner.continue} " + + "#{@scanner.lex_state} #{nest}") if $DEBUG + case tk + when TkSEMICOLON + break + when TkLPAREN, TkfLPAREN + nest += 1 + when end_token + if end_token == TkRPAREN + nest -= 1 + break if @scanner.lex_state == EXPR_END and nest <= 0 + else + break unless @scanner.continue + end + when TkCOMMENT + unget_tk(tk) + break + end + tk = get_tk + end + res = get_tkread.tr("\n", " ").strip + res = "" if res == ";" + res + end + + + # Parse a constant, which might be qualified by + # one or more class or module names + + def get_constant + res = "" + skip_tkspace(false) + tk = get_tk + + while tk.kind_of?(TkCOLON2) || + tk.kind_of?(TkCOLON3) || + tk.kind_of?(TkCONSTANT) + + res += tk.text + tk = get_tk + end + +# if res.empty? +# warn("Unexpected token #{tk} in constant") +# end + unget_tk(tk) + res + end + + # Get a constant that may be surrounded by parens + + def get_constant_with_optional_parens + skip_tkspace(false) + nest = 0 + while (tk = peek_tk).kind_of?(TkLPAREN) || tk.kind_of?(TkfLPAREN) + get_tk + skip_tkspace(true) + nest += 1 + end + + name = get_constant + + while nest > 0 + skip_tkspace(true) + tk = get_tk + nest -= 1 if tk.kind_of?(TkRPAREN) + end + name + end + + # Directives are modifier comments that can appear after class, module, + # or method names. For example + # + # def fred # :yields: a, b + # + # or + # + # class SM # :nodoc: + # + # we return the directive name and any parameters as a two element array + + def read_directive(allowed) + tk = get_tk + puts "directive: #{tk.inspect}" if $DEBUG + result = nil + if tk.kind_of?(TkCOMMENT) + if tk.text =~ /\s*:?(\w+):\s*(.*)/ + directive = $1.downcase + if allowed.include?(directive) + result = [directive, $2] + end + end + else + unget_tk(tk) + end + result + end + + + def read_documentation_modifiers(context, allow) + dir = read_directive(allow) + + case dir[0] + + when "notnew", "not_new", "not-new" + context.dont_rename_initialize = true + + when "nodoc" + context.document_self = false + if dir[1].downcase == "all" + context.document_children = false + end + + when "doc" + context.document_self = true + context.force_documentation = true + + when "yield", "yields" + unless context.params.nil? + context.params.sub!(/(,|)\s*&\w+/,'') # remove parameter &proc + end + context.block_params = dir[1] + + when "arg", "args" + context.params = dir[1] + end if dir + end + + + # Look for directives in a normal comment block: + # + # #-- - don't display comment from this point forward + # + # + # This routine modifies it's parameter + + def look_for_directives_in(context, comment) + + preprocess = SM::PreProcess.new(@input_file_name, + @options.rdoc_include) + + preprocess.handle(comment) do |directive, param| + case directive + when "stopdoc" + context.stop_doc + "" + when "startdoc" + context.start_doc + context.force_documentation = true + "" + + when "enddoc" + #context.done_documenting = true + #"" + throw :enddoc + + when "main" + options = Options.instance + options.main_page = param + "" + + when "title" + options = Options.instance + options.title = param + "" + + when "section" + context.set_current_section(param, comment) + comment.replace("") # 1.8 doesn't support #clear + break + else + warn "Unrecognized directive '#{directive}'" + break + end + end + + remove_private_comments(comment) + end + + def remove_private_comments(comment) + comment.gsub!(/^#--.*?^#\+\+/m, '') + comment.sub!(/^#--.*/m, '') + end + + + + def get_symbol_or_name + tk = get_tk + case tk + when TkSYMBOL + tk.text.sub(/^:/, '') + when TkId, TkOp + tk.name + when TkSTRING + tk.text + else + raise "Name or symbol expected (got #{tk})" + end + end + + def parse_alias(context, single, tk, comment) + skip_tkspace + if (peek_tk.kind_of? TkLPAREN) + get_tk + skip_tkspace + end + new_name = get_symbol_or_name + @scanner.instance_eval{@lex_state = EXPR_FNAME} + skip_tkspace + if (peek_tk.kind_of? TkCOMMA) + get_tk + skip_tkspace + end + old_name = get_symbol_or_name + + al = Alias.new(get_tkread, old_name, new_name, comment) + read_documentation_modifiers(al, ATTR_MODIFIERS) + if al.document_self + context.add_alias(al) + end + end + + def parse_yield_parameters + parse_method_or_yield_parameters + end + + def parse_yield(context, single, tk, method) + if method.block_params.nil? + get_tkread + @scanner.instance_eval{@continue = false} + method.block_params = parse_yield_parameters + end + end + + def parse_require(context, comment) + skip_tkspace_comment + tk = get_tk + if tk.kind_of? TkLPAREN + skip_tkspace_comment + tk = get_tk + end + + name = nil + case tk + when TkSTRING + name = tk.text +# when TkCONSTANT, TkIDENTIFIER, TkIVAR, TkGVAR +# name = tk.name + when TkDSTRING + warn "Skipping require of dynamic string: #{tk.text}" + # else + # warn "'require' used as variable" + end + if name + context.add_require(Require.new(name, comment)) + else + unget_tk(tk) + end + end + + def parse_include(context, comment) + loop do + skip_tkspace_comment + name = get_constant_with_optional_parens + unless name.empty? + context.add_include(Include.new(name, comment)) + end + return unless peek_tk.kind_of?(TkCOMMA) + get_tk + end + end + + def get_bool + skip_tkspace + tk = get_tk + case tk + when TkTRUE + true + when TkFALSE, TkNIL + false + else + unget_tk tk + true + end + end + + def parse_attr(context, single, tk, comment) + args = parse_symbol_arg(1) + if args.size > 0 + name = args[0] + rw = "R" + skip_tkspace(false) + tk = get_tk + if tk.kind_of? TkCOMMA + rw = "RW" if get_bool + else + unget_tk tk + end + att = Attr.new(get_tkread, name, rw, comment) + read_documentation_modifiers(att, ATTR_MODIFIERS) + if att.document_self + context.add_attribute(att) + end + else + warn("'attr' ignored - looks like a variable") + end + + end + + def parse_visibility(container, single, tk) + singleton = (single == SINGLE) + vis = case tk.name + when "private" then :private + when "protected" then :protected + when "public" then :public + when "private_class_method" + singleton = true + :private + when "public_class_method" + singleton = true + :public + else raise "Invalid visibility: #{tk.name}" + end + + skip_tkspace_comment(false) + case peek_tk + # Ryan Davis suggested the extension to ignore modifiers, because he + # often writes + # + # protected unless $TESTING + # + when TkNL, TkUNLESS_MOD, TkIF_MOD +# error("Missing argument") if singleton + container.ongoing_visibility = vis + else + args = parse_symbol_arg + container.set_visibility_for(args, vis, singleton) + end + end + + def parse_attr_accessor(context, single, tk, comment) + args = parse_symbol_arg + read = get_tkread + rw = "?" + + # If nodoc is given, don't document any of them + + tmp = CodeObject.new + read_documentation_modifiers(tmp, ATTR_MODIFIERS) + return unless tmp.document_self + + case tk.name + when "attr_reader" then rw = "R" + when "attr_writer" then rw = "W" + when "attr_accessor" then rw = "RW" + else + rw = @options.extra_accessor_flags[tk.name] + end + + for name in args + att = Attr.new(get_tkread, name, rw, comment) + context.add_attribute(att) + end + end + + def skip_tkspace_comment(skip_nl = true) + loop do + skip_tkspace(skip_nl) + return unless peek_tk.kind_of? TkCOMMENT + get_tk + end + end + + def parse_symbol_arg(no = nil) + + args = [] + skip_tkspace_comment + case tk = get_tk + when TkLPAREN + loop do + skip_tkspace_comment + if tk1 = parse_symbol_in_arg + args.push tk1 + break if no and args.size >= no + end + + skip_tkspace_comment + case tk2 = get_tk + when TkRPAREN + break + when TkCOMMA + else + warn("unexpected token: '#{tk2.inspect}'") if $DEBUG + break + end + end + else + unget_tk tk + if tk = parse_symbol_in_arg + args.push tk + return args if no and args.size >= no + end + + loop do +# skip_tkspace_comment(false) + skip_tkspace(false) + + tk1 = get_tk + unless tk1.kind_of?(TkCOMMA) + unget_tk tk1 + break + end + + skip_tkspace_comment + if tk = parse_symbol_in_arg + args.push tk + break if no and args.size >= no + end + end + end + args + end + + def parse_symbol_in_arg + case tk = get_tk + when TkSYMBOL + tk.text.sub(/^:/, '') + when TkSTRING + eval @read[-1] + else + warn("Expected symbol or string, got #{tk.inspect}") if $DEBUG + nil + end + end + end + +end diff --git a/ruby_1_8_6/lib/rdoc/parsers/parse_simple.rb b/ruby_1_8_6/lib/rdoc/parsers/parse_simple.rb new file mode 100644 index 0000000000..3f1a546964 --- /dev/null +++ b/ruby_1_8_6/lib/rdoc/parsers/parse_simple.rb @@ -0,0 +1,41 @@ +# Parse a non-source file. We basically take the whole thing +# as one big comment. If the first character in the file +# is '#', we strip leading pound signs. + + +require "rdoc/code_objects" +require "rdoc/markup/simple_markup/preprocess" + +module RDoc + # See rdoc/parsers/parse_c.rb + + class SimpleParser + + # prepare to parse a plain file + def initialize(top_level, file_name, body, options, stats) + + preprocess = SM::PreProcess.new(file_name, options.rdoc_include) + + preprocess.handle(body) do |directive, param| + $stderr.puts "Unrecognized directive '#{directive}' in #{file_name}" + end + + @body = body + @options = options + @top_level = top_level + end + + # Extract the file contents and attach them to the toplevel as a + # comment + + def scan + # @body.gsub(/^(\s\n)+/, '') + @top_level.comment = remove_private_comments(@body) + @top_level + end + + def remove_private_comments(comment) + comment.gsub(/^--.*?^\+\+/m, '').sub(/^--.*/m, '') + end + end +end diff --git a/ruby_1_8_6/lib/rdoc/parsers/parserfactory.rb b/ruby_1_8_6/lib/rdoc/parsers/parserfactory.rb new file mode 100644 index 0000000000..00a82cf4b1 --- /dev/null +++ b/ruby_1_8_6/lib/rdoc/parsers/parserfactory.rb @@ -0,0 +1,99 @@ +require "rdoc/parsers/parse_simple" + +module RDoc + + # A parser is simple a class that implements + # + # #initialize(file_name, body, options) + # + # and + # + # #scan + # + # The initialize method takes a file name to be used, the body of the + # file, and an RDoc::Options object. The scan method is then called + # to return an appropriately parsed TopLevel code object. + # + # The ParseFactory is used to redirect to the correct parser given a filename + # extension. This magic works because individual parsers have to register + # themselves with us as they are loaded in. The do this using the following + # incantation + # + # + # require "rdoc/parsers/parsefactory" + # + # module RDoc + # + # class XyzParser + # extend ParseFactory <<<< + # parse_files_matching /\.xyz$/ <<<< + # + # def initialize(file_name, body, options) + # ... + # end + # + # def scan + # ... + # end + # end + # end + # + # Just to make life interesting, if we suspect a plain text file, we + # also look for a shebang line just in case it's a potential + # shell script + + + + module ParserFactory + + @@parsers = [] + + Parsers = Struct.new(:regexp, :parser) + + # Record the fact that a particular class parses files that + # match a given extension + + def parse_files_matching(regexp) + @@parsers.unshift Parsers.new(regexp, self) + end + + # Return a parser that can handle a particular extension + + def ParserFactory.can_parse(file_name) + @@parsers.find {|p| p.regexp.match(file_name) } + end + + # Alias an extension to another extension. After this call, + # files ending "new_ext" will be parsed using the same parser + # as "old_ext" + + def ParserFactory.alias_extension(old_ext, new_ext) + parser = ParserFactory.can_parse("xxx.#{old_ext}") + return false unless parser + @@parsers.unshift Parsers.new(Regexp.new("\\.#{new_ext}$"), parser.parser) + true + end + + # Find the correct parser for a particular file name. Return a + # SimpleParser for ones that we don't know + + def ParserFactory.parser_for(top_level, file_name, body, options, stats) + # If no extension, look for shebang + if file_name !~ /\.\w+$/ && body =~ %r{\A#!(.+)} + shebang = $1 + case shebang + when %r{env\s+ruby}, %r{/ruby} + file_name = "dummy.rb" + end + end + parser_description = can_parse(file_name) + if parser_description + parser = parser_description.parser + else + parser = SimpleParser + end + + parser.new(top_level, file_name, body, options, stats) + end + end +end |