diff options
Diffstat (limited to 'lib/rdoc/parser')
| -rw-r--r-- | lib/rdoc/parser/c.rb | 704 | ||||
| -rw-r--r-- | lib/rdoc/parser/changelog.rb | 335 | ||||
| -rw-r--r-- | lib/rdoc/parser/markdown.rb | 24 | ||||
| -rw-r--r-- | lib/rdoc/parser/rd.rb | 23 | ||||
| -rw-r--r-- | lib/rdoc/parser/ripper_state_lex.rb | 590 | ||||
| -rw-r--r-- | lib/rdoc/parser/ruby.rb | 2125 | ||||
| -rw-r--r-- | lib/rdoc/parser/ruby_tools.rb | 75 | ||||
| -rw-r--r-- | lib/rdoc/parser/simple.rb | 34 | ||||
| -rw-r--r-- | lib/rdoc/parser/text.rb | 12 |
9 files changed, 2838 insertions, 1084 deletions
diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index 3da1820c50..b89aaa6dcc 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -1,18 +1,17 @@ - -require 'rdoc/parser/ruby' -require 'rdoc/known_classes' +# frozen_string_literal: true +require 'tsort' ## # RDoc::Parser::C attempts to parse C extension files. It looks for -# the standard patterns that you find in extensions: <tt>rb_define_class, -# rb_define_method</tt> and so on. It tries to find the corresponding +# the standard patterns that you find in extensions: +rb_define_class+, +# +rb_define_method+ and so on. It tries to find the corresponding # C source for the methods and extract comments, but if we fail # we don't worry too much. # # The comments associated with a Ruby method are extracted from the C # comment block associated with the routine that _implements_ that # method, that is to say the method whose name is given in the -# <tt>rb_define_method</tt> call. For example, you might write: +# +rb_define_method+ call. For example, you might write: # # /* # * Returns a new array that is a one-dimensional flattening of this @@ -25,8 +24,7 @@ require 'rdoc/known_classes' # * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # */ # static VALUE -# rb_ary_flatten(ary) -# VALUE ary; +# rb_ary_flatten(VALUE ary) # { # ary = rb_obj_dup(ary); # rb_ary_flatten_bang(ary); @@ -36,16 +34,16 @@ require 'rdoc/known_classes' # ... # # void -# Init_Array() +# Init_Array(void) # { # ... # rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0); # -# Here RDoc will determine from the rb_define_method line that there's a +# Here RDoc will determine from the +rb_define_method+ line that there's a # method called "flatten" in class Array, and will look for the implementation -# in the method rb_ary_flatten. It will then use the comment from that +# in the method +rb_ary_flatten+. It will then use the comment from that # method in the HTML output. This method must be in the same source file -# as the rb_define_method. +# as the +rb_define_method+. # # The comment blocks may include special directives: # @@ -58,21 +56,28 @@ require 'rdoc/known_classes' # [Document-const: +name+] # Documentation for the named +rb_define_const+. # +# Constant values can be supplied on the first line of the comment like so: +# +# /* 300: The highest possible score in bowling */ +# rb_define_const(cFoo, "PERFECT", INT2FIX(300)); +# +# The value can contain internal colons so long as they are escaped with a \ +# # [Document-global: +name+] # Documentation for the named +rb_define_global_const+ # # [Document-variable: +name+] # Documentation for the named +rb_define_variable+ # -# [Document-method: +method_name+] +# [Document-method\: +method_name+] # Documentation for the named method. Use this when the method name is # unambiguous. # -# [Document-method: <tt>ClassName::method_name<tt>] +# [Document-method\: <tt>ClassName::method_name</tt>] # Documentation for a singleton method in the given class. Use this when # the method name alone is ambiguous. # -# [Document-method: <tt>ClassName#method_name<tt>] +# [Document-method\: <tt>ClassName#method_name</tt>] # Documentation for a instance method in the given class. Use this when the # method name alone is ambiguous. # @@ -118,43 +123,92 @@ class RDoc::Parser::C < RDoc::Parser include RDoc::Text ## + # Maps C variable names to names of Ruby classes or modules + + attr_reader :classes + + ## # C file the parser is parsing attr_accessor :content + ## + # Dependencies from a missing enclosing class to the classes in + # missing_dependencies that depend upon it. + + attr_reader :enclosure_dependencies ## - # Maps C variable names to names of ruby classes (andsingleton classes) + # Maps C variable names to names of Ruby classes (and singleton classes) attr_reader :known_classes ## - # Maps C variable names to names of ruby singleton classes + # Classes found while parsing the C file that were not yet registered due to + # a missing enclosing class. These are processed by do_missing - attr_reader :singleton_classes + attr_reader :missing_dependencies ## - # Resets cross-file state. Call when parsing different projects that need - # separate documentation. + # Maps C variable names to names of Ruby singleton classes - def self.reset - @@enclosure_classes = {} - @@known_bodies = {} - end + attr_reader :singleton_classes + + ## + # The TopLevel items in the parsed file belong to - reset + attr_reader :top_level ## - # Prepare to parse a C file + # Prepares for parsing a C file. See RDoc::Parser#initialize for details on + # the arguments. - def initialize(top_level, file_name, content, options, stats) + def initialize top_level, file_name, content, options, stats super @known_classes = RDoc::KNOWN_CLASSES.dup - @content = handle_tab_width handle_ifdefs_in(@content) - @classes = {} - @singleton_classes = {} - @file_dir = File.dirname(@file_name) + @content = handle_tab_width handle_ifdefs_in @content + @file_dir = File.dirname @file_name + + @classes = load_variable_map :c_class_variables + @singleton_classes = load_variable_map :c_singleton_class_variables + + @markup = @options.markup + + # class_variable => { function => [method, ...] } + @methods = Hash.new { |h, f| h[f] = Hash.new { |i, m| i[m] = [] } } + + # missing variable => [handle_class_module arguments] + @missing_dependencies = {} + + # missing enclosure variable => [dependent handle_class_module arguments] + @enclosure_dependencies = Hash.new { |h, k| h[k] = [] } + @enclosure_dependencies.instance_variable_set :@missing_dependencies, + @missing_dependencies + + @enclosure_dependencies.extend TSort + + def @enclosure_dependencies.tsort_each_node &block + each_key(&block) + rescue TSort::Cyclic => e + cycle_vars = e.message.scan(/"(.*?)"/).flatten + + cycle = cycle_vars.sort.map do |var_name| + delete var_name + + var_name, type, mod_name, = @missing_dependencies[var_name] + + "#{type} #{mod_name} (#{var_name})" + end.join ', ' + + warn "Unable to create #{cycle} due to a cyclic class or module creation" + + retry + end + + def @enclosure_dependencies.tsort_each_child node, &block + fetch(node, []).each(&block) + end end ## @@ -169,28 +223,36 @@ class RDoc::Parser::C < RDoc::Parser class_name = @known_classes[var_name] unless class_name then - warn "Enclosing class/module %p for alias %s %s not known" % [ + @options.warn "Enclosing class or module %p for alias %s %s is not known" % [ var_name, new_name, old_name] next end class_obj = find_class var_name, class_name - - al = RDoc::Alias.new '', old_name, new_name, '' - al.singleton = @singleton_classes.key? var_name - comment = find_alias_comment var_name, new_name, old_name - comment = strip_stars comment - al.comment = comment - - al.record_location @top_level - - class_obj.add_alias al - @stats.add_alias al + comment.normalize + if comment.to_s.empty? and existing_method = class_obj.method_list.find { |m| m.name == old_name} + comment = existing_method.comment + end + add_alias(var_name, class_obj, old_name, new_name, comment) end end ## + # Add alias, either from a direct alias definition, or from two + # method that reference the same function. + + def add_alias(var_name, class_obj, old_name, new_name, comment) + al = RDoc::Alias.new '', old_name, new_name, '' + al.singleton = @singleton_classes.key? var_name + al.comment = comment + al.record_location @top_level + class_obj.add_alias al + @stats.add_alias al + al + end + + ## # Scans #content for rb_attr and rb_define_attr def do_attrs @@ -214,61 +276,113 @@ class RDoc::Parser::C < RDoc::Parser end ## - # Scans #content for rb_define_module, rb_define_class, boot_defclass, - # rb_define_module_under, rb_define_class_under and rb_singleton_class - - def do_classes - @content.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do - |var_name, class_name| - handle_class_module(var_name, "module", class_name, nil, nil) - end - - # The '.' lets us handle SWIG-generated files - @content.scan(/([\w\.]+)\s* = \s*rb_define_class\s* - \( - \s*"(\w+)", - \s*(\w+)\s* - \)/mx) do |var_name, class_name, parent| - handle_class_module(var_name, "class", class_name, parent, nil) - end + # Scans #content for boot_defclass + def do_boot_defclass @content.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do |var_name, class_name, parent| parent = nil if parent == "0" - handle_class_module(var_name, "class", class_name, parent, nil) - end - - @content.scan(/(\w+)\s* = \s*rb_define_module_under\s* - \( - \s*(\w+), - \s*"(\w+)" - \s*\)/mx) do |var_name, in_module, class_name| - handle_class_module(var_name, "module", class_name, nil, in_module) + handle_class_module(var_name, :class, class_name, parent, nil) end + end - @content.scan(/([\w\.]+)\s* = # var_name - \s*rb_define_class_under\s* - \( - \s* (\w+), # under - \s* "(\w+)", # class_name - \s* - (?: - ([\w\*\s\(\)\.\->]+) | # parent_name - rb_path2class\("([\w:]+)"\) # path - ) - \s* - \) - /mx) do |var_name, under, class_name, parent_name, path| - parent = path || parent_name - - handle_class_module var_name, 'class', class_name, parent, under - end + ## + # Scans #content for rb_define_class, boot_defclass, rb_define_class_under + # and rb_singleton_class + + def do_classes_and_modules + do_boot_defclass if @file_name == "class.c" + + @content.scan( + %r( + (?<var_name>[\w\.]+)\s* = + \s*rb_(?: + define_(?: + class(?: # rb_define_class(class_name_1, parent_name_1) + \s*\( + \s*"(?<class_name_1>\w+)", + \s*(?<parent_name_1>\w+)\s* + \) + | + _under\s*\( # rb_define_class_under(class_under, class_name2, parent_name2...) + \s* (?<class_under>\w+), + \s* "(?<class_name_2>\w+)", + \s* + (?: + (?<parent_name_2>[\w\*\s\(\)\.\->]+) | + rb_path2class\("(?<path>[\w:]+)"\) + ) + \s*\) + ) + | + module(?: # rb_define_module(module_name_1) + \s*\( + \s*"(?<module_name_1>\w+)"\s* + \) + | + _under\s*\( # rb_define_module_under(module_under, module_name_2) + \s*(?<module_under>\w+), + \s*"(?<module_name_2>\w+)" + \s*\) + ) + ) + | + struct_define_without_accessor\s*\( # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + \s*"(?<class_name_3>\w+)", + \s*(?<parent_name_3>\w+), + \s*\w+, # Allocation function + (?:\s*"\w+",)* # Attributes + \s*NULL + \) + | + singleton_class\s*\( # rb_singleton_class(target_class_name) + \s*(?<target_class_name>\w+) + \) + ) + )mx + ) do + class_name = $~[:class_name_1] + type = :class + if class_name + # rb_define_class(class_name_1, parent_name_1) + parent_name = $~[:parent_name_1] + #under = nil + else + class_name = $~[:class_name_2] + if class_name + # rb_define_class_under(class_under, class_name2, parent_name2...) + parent_name = $~[:parent_name_2] || $~[:path] + under = $~[:class_under] + else + class_name = $~[:class_name_3] + if class_name + # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + parent_name = $~[:parent_name_3] + #under = nil + else + type = :module + class_name = $~[:module_name_1] + #parent_name = nil + if class_name + # rb_define_module(module_name_1) + #under = nil + else + class_name = $~[:module_name_2] + if class_name + # rb_define_module_under(module_under, module_name_1) + under = $~[:module_under] + else + # rb_singleton_class(target_class_name) + target_class_name = $~[:target_class_name] + handle_singleton $~[:var_name], target_class_name + next + end + end + end + end + end - @content.scan(/([\w\.]+)\s* = \s*rb_singleton_class\s* - \( - \s*(\w+) - \s*\)/mx) do |sclass_var, class_var| - handle_singleton sclass_var, class_var + handle_class_module($~[:var_name], type, class_name, parent_name, under) end end @@ -300,20 +414,36 @@ class RDoc::Parser::C < RDoc::Parser \) \s*;%xm) do |consts| const = consts.first + handle_constants 'const', 'mCurses', const, "UINT2NUM(#{const})" end + + @content.scan(%r% + \Wrb_file_const + \s*\( + \s* + "([^"]+)", + \s* + (.*?) + \s* + \) + \s*;%xm) do |name, value| + handle_constants 'const', 'rb_mFConst', name, value + end end + ## # Scans #content for rb_include_module def do_includes @content.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m| - if cls = @classes[c] - m = @known_classes[m] || m - incl = cls.add_include RDoc::Include.new(m, "") - incl.record_location @top_level - end + next unless cls = @classes[c] + m = @known_classes[m] || m + + comment = new_comment '', @top_level, :c + incl = cls.add_include RDoc::Include.new(m, comment) + incl.record_location @top_level end end @@ -332,7 +462,7 @@ class RDoc::Parser::C < RDoc::Parser ) \s*\(\s*([\w\.]+), \s*"([^"]+)", - \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, + \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\(|\(METHOD\))?(\w+)\)?, \s*(-?\w+)\s*\) (?:;\s*/[*/]\s+in\s+(\w+?\.(?:cpp|c|y)))? %xm) do |type, var_name, meth_name, function, param_count, source_file| @@ -368,6 +498,22 @@ class RDoc::Parser::C < RDoc::Parser end ## + # Creates classes and module that were missing were defined due to the file + # order being different than the declaration order. + + def do_missing + return if @missing_dependencies.empty? + + @enclosure_dependencies.tsort.each do |in_module| + arguments = @missing_dependencies.delete in_module + + next unless arguments # dependency on existing class + + handle_class_module(*arguments) + end + end + + ## # Finds the comment for an alias on +class_name+ from +new_name+ to # +old_name+ @@ -377,7 +523,7 @@ class RDoc::Parser::C < RDoc::Parser \s*"#{Regexp.escape new_name}"\s*, \s*"#{Regexp.escape old_name}"\s*\);%xm - $1 || '' + new_comment($1 || '', @top_level, :c) end ## @@ -398,38 +544,69 @@ class RDoc::Parser::C < RDoc::Parser /.*?/m end - if @content =~ %r%((?>/\*.*?\*/\s+)) - rb_define_attr\((?:\s*#{var_name},)?\s* - "#{attr_name}"\s*, - #{rw}\)\s*;%xm then - $1 - elsif @content =~ %r%((?>/\*.*?\*/\s+)) - rb_attr\(\s*#{var_name}\s*, - \s*#{attr_name}\s*, - #{rw},.*?\)\s*;%xm then - $1 - elsif @content =~ %r%Document-attr:\s#{attr_name}\s*?\n - ((?>.*?\*/))%xm then - $1 - else - '' + comment = if @content =~ %r%((?>/\*.*?\*/\s+)) + rb_define_attr\((?:\s*#{var_name},)?\s* + "#{attr_name}"\s*, + #{rw}\)\s*;%xm then + $1 + elsif @content =~ %r%((?>/\*.*?\*/\s+)) + rb_attr\(\s*#{var_name}\s*, + \s*#{attr_name}\s*, + #{rw},.*?\)\s*;%xm then + $1 + elsif @content =~ %r%(/\*.*?(?:\s*\*\s*)?) + Document-attr:\s#{attr_name}\s*?\n + ((?>(.|\n)*?\*/))%x then + "#{$1}\n#{$2}" + else + '' + end + + new_comment comment, @top_level, :c + end + + ## + # Generate a Ruby-method table + + def gen_body_table file_content + table = {} + file_content.scan(%r{ + ((?>/\*.*?\*/\s*)?) + ((?:(?:\w+)\s+)? + (?:intern\s+)?VALUE\s+(\w+) + \s*(?:\([^)]*\))(?:[^\);]|$)) + | ((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+(\w+)\s+(\w+)) + | ^\s*\#\s*define\s+(\w+)\s+(\w+) + }xm) do + case + when $1 + table[$3] = [:func_def, $1, $2, $~.offset(2)] if !table[$3] || table[$3][0] != :func_def + when $4 + table[$6] = [:macro_def, $4, $5, $~.offset(5), $7] if !table[$6] || table[$6][0] == :macro_alias + when $8 + table[$8] ||= [:macro_alias, $9] + end end + table end ## # Find the C code corresponding to a Ruby method def find_body class_name, meth_name, meth_obj, file_content, quiet = false - case file_content - when %r%((?>/\*.*?\*/\s*)?) - ((?:(?:static|SWIGINTERN)\s+)? - (?:intern\s+)?VALUE\s+#{meth_name} - \s*(\([^)]*\))([^;]|$))%xm then - comment = $1 - body = $2 - offset = $~.offset(2).first + if file_content + @body_table ||= {} + @body_table[file_content] ||= gen_body_table file_content + type, *args = @body_table[file_content][meth_name] + end - remove_private_comments comment if comment + case type + when :func_def + comment = new_comment args[0], @top_level, :c + body = args[1] + offset, = args[2] + + comment.remove_private if comment # try to find the whole body body = $& if /#{Regexp.escape body}[^(]*?\{.*?^\}/m =~ file_content @@ -443,55 +620,55 @@ class RDoc::Parser::C < RDoc::Parser override_comment = find_override_comment class_name, meth_obj comment = override_comment if override_comment + comment.normalize find_modifiers comment, meth_obj if comment #meth_obj.params = params meth_obj.start_collecting_tokens - tk = RDoc::RubyToken::Token.new nil, 1, 1 - tk.set_text body + tk = { :line_no => 1, :char_no => 1, :text => body } meth_obj.add_token tk - meth_obj.comment = strip_stars comment - meth_obj.offset = offset + meth_obj.comment = comment meth_obj.line = file_content[0, offset].count("\n") + 1 body - when %r%((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+#{meth_name}\s+(\w+))%m then - comment = $1 - body = $2 - offset = $~.offset(2).first + when :macro_def + comment = new_comment args[0], @top_level, :c + body = args[1] + offset, = args[2] + + find_body class_name, args[3], meth_obj, file_content, true - find_body class_name, $3, meth_obj, file_content, true + comment.normalize find_modifiers comment, meth_obj meth_obj.start_collecting_tokens - tk = RDoc::RubyToken::Token.new nil, 1, 1 - tk.set_text body + tk = { :line_no => 1, :char_no => 1, :text => body } meth_obj.add_token tk - meth_obj.comment = strip_stars(comment) + meth_obj.comment.to_s - meth_obj.offset = offset + meth_obj.comment = comment meth_obj.line = file_content[0, offset].count("\n") + 1 body - when %r%^\s*\#\s*define\s+#{meth_name}\s+(\w+)%m then + when :macro_alias # with no comment we hope the aliased definition has it and use it's # definition - body = find_body(class_name, $1, meth_obj, file_content, true) + body = find_body(class_name, args[0], meth_obj, file_content, true) return body if body - warn "No definition for #{meth_name}" if @options.verbosity > 1 + @options.warn "No definition for #{meth_name}" false else # No body, but might still have an override comment comment = find_override_comment class_name, meth_obj if comment then + comment.normalize find_modifiers comment, meth_obj - meth_obj.comment = strip_stars comment + meth_obj.comment = comment '' else - warn "No definition for #{meth_name}" if @options.verbosity > 1 + @options.warn "No definition for #{meth_name}" false end end @@ -540,7 +717,7 @@ class RDoc::Parser::C < RDoc::Parser # */ # VALUE cFoo = rb_define_class("Foo", rb_cObject); - def find_class_comment(class_name, class_mod) + def find_class_comment class_name, class_mod comment = nil if @content =~ %r% @@ -551,99 +728,71 @@ class RDoc::Parser::C < RDoc::Parser comment = $1.sub(%r%Document-(?:class|module):\s+#{class_name}%, '') elsif @content =~ %r%Document-(?:class|module):\s+#{class_name}\s*? (?:<\s+[:,\w]+)?\n((?>.*?\*/))%xm then + comment = "/*\n#{$1}" + elsif @content =~ %r%((?>/\*.*?\*/\s+)) + ([\w\.\s]+\s* = \s+)?rb_define_(class|module)[\t (]*?"(#{class_name})"%xm then comment = $1 elsif @content =~ %r%((?>/\*.*?\*/\s+)) - ([\w\.\s]+\s* = \s+)?rb_define_(class|module).*?"(#{class_name})"%xm then + ([\w\. \t]+ = \s+)?rb_define_(class|module)_under[\t\w, (]*?"(#{class_name.split('::').last})"%xm then comment = $1 + else + comment = '' end - return unless comment - - comment = strip_stars comment + comment = new_comment comment, @top_level, :c + comment.normalize - comment = look_for_directives_in class_mod, comment + look_for_directives_in class_mod, comment class_mod.add_comment comment, @top_level end ## + # Generate a const table + + def gen_const_table file_content + table = {} + @content.scan(%r{ + ((?>^\s*/\*.*?\*/\s+)) + rb_define_(\w+)\((?:\s*(?:\w+),)?\s* + "(\w+)"\s*, + .*?\)\s*; + | Document-(?:const|global|variable):\s + ((?:\w+::)*\w+) + \s*?\n((?>.*?\*/)) + }mxi) do + case + when $1 then table[[$2, $3]] = $1 + when $4 then table[$4] = "/*\n" + $5 + end + end + table + end + + ## # Finds a comment matching +type+ and +const_name+ either above the # comment or in the matching Document- section. def find_const_comment(type, const_name, class_name = nil) - if @content =~ %r%((?>^\s*/\*.*?\*/\s+)) - rb_define_#{type}\((?:\s*(\w+),)?\s* - "#{const_name}"\s*, - .*?\)\s*;%xmi then - $1 - elsif class_name and - @content =~ %r%Document-(?:const|global|variable):\s - #{class_name}::#{const_name} - \s*?\n((?>.*?\*/))%xm then - $1 - elsif @content =~ %r%Document-(?:const|global|variable):\s#{const_name} - \s*?\n((?>.*?\*/))%xm then - $1 - else + @const_table ||= {} + @const_table[@content] ||= gen_const_table @content + table = @const_table[@content] + + comment = + table[[type, const_name]] || + (class_name && table[class_name + "::" + const_name]) || + table[const_name] || '' - end + + new_comment comment, @top_level, :c end ## # Handles modifiers in +comment+ and updates +meth_obj+ as appropriate. - # - # If <tt>:nodoc:</tt> is found, documentation on +meth_obj+ is suppressed. - # - # If <tt>:yields:</tt> is followed by an argument list it is used for the - # #block_params of +meth_obj+. - # - # If the comment block contains a <tt>call-seq:</tt> section like: - # - # call-seq: - # ARGF.readlines(sep=$/) -> array - # ARGF.readlines(limit) -> array - # ARGF.readlines(sep, limit) -> array - # - # ARGF.to_a(sep=$/) -> array - # ARGF.to_a(limit) -> array - # ARGF.to_a(sep, limit) -> array - # - # it is used for the parameters of +meth_obj+. def find_modifiers comment, meth_obj - # we must handle situations like the above followed by an unindented first - # comment. The difficulty is to make sure not to match lines starting - # with ARGF at the same indent, but that are after the first description - # paragraph. - - if comment =~ /call-seq:(.*?(?:\S|\*\/?).*?)^\s*(?:\*\/?)?\s*$/m then - all_start, all_stop = $~.offset(0) - seq_start, seq_stop = $~.offset(1) - - # we get the following lines that start with the leading word at the - # same indent, even if they have blank lines before - if $1 =~ /(^\s*\*?\s*\n)+^(\s*\*?\s*\w+)/m then - leading = $2 # ' * ARGF' in the example above - re = %r% - \A( - (^\s*\*?\s*\n)+ - (^#{Regexp.escape leading}.*?\n)+ - )+ - ^\s*\*?\s*$ - %xm - if comment[seq_stop..-1] =~ re then - all_stop = seq_stop + $~.offset(0).last - seq_stop = seq_stop + $~.offset(1).last - end - end - - seq = comment[seq_start..seq_stop] - seq.gsub!(/^(\s*\*?\s*?)(\S|\n)/m, '\2') - comment.slice! all_start...all_stop - meth_obj.call_seq = seq - elsif comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '') then - meth_obj.call_seq = $1.strip - end + comment.normalize + comment.extract_call_seq meth_obj look_for_directives_in meth_obj, comment end @@ -655,11 +804,18 @@ class RDoc::Parser::C < RDoc::Parser name = Regexp.escape meth_obj.name prefix = Regexp.escape meth_obj.name_prefix - if @content =~ %r%Document-method:\s+#{class_name}#{prefix}#{name}\s*?\n((?>.*?\*/))%m then - $1 - elsif @content =~ %r%Document-method:\s#{name}\s*?\n((?>.*?\*/))%m then - $1 - end + comment = if @content =~ %r%Document-method: + \s+#{class_name}#{prefix}#{name} + \s*?\n((?>.*?\*/))%xm then + "/*#{$1}" + elsif @content =~ %r%Document-method: + \s#{name}\s*?\n((?>.*?\*/))%xm then + "/*#{$1}" + end + + return unless comment + + new_comment comment, @top_level, :c end ## @@ -668,8 +824,8 @@ class RDoc::Parser::C < RDoc::Parser def handle_attr(var_name, attr_name, read, write) rw = '' - rw << 'R' if '1' == read - rw << 'W' if '1' == write + rw += 'R' if '1' == read + rw += 'W' if '1' == write class_name = @known_classes[var_name] @@ -680,9 +836,9 @@ class RDoc::Parser::C < RDoc::Parser return unless class_obj comment = find_attr_comment var_name, attr_name - comment = strip_stars comment + comment.normalize - name = attr_name.gsub(/rb_intern\("([^"]+)"\)/, '\1') + name = attr_name.gsub(/rb_intern(?:_const)?\("([^"]+)"\)/, '\1') attr = RDoc::Attr.new '', name, rw, comment @@ -699,23 +855,26 @@ class RDoc::Parser::C < RDoc::Parser parent_name = @known_classes[parent] || parent if in_module then - enclosure = @classes[in_module] || @@enclosure_classes[in_module] + enclosure = @classes[in_module] || @store.find_c_enclosure(in_module) if enclosure.nil? and enclosure = @known_classes[in_module] then - enc_type = /^rb_m/ =~ in_module ? "module" : "class" + enc_type = /^rb_m/ =~ in_module ? :module : :class handle_class_module in_module, enc_type, enclosure, nil, nil enclosure = @classes[in_module] end unless enclosure then - warn "Enclosing class/module '#{in_module}' for #{type} #{class_name} not known" + @enclosure_dependencies[in_module] << var_name + @missing_dependencies[var_name] = + [var_name, type, class_name, parent, in_module] + return end else enclosure = @top_level end - if type == "class" then + if type == :class then full_name = if RDoc::ClassModule === enclosure then enclosure.full_name + "::#{class_name}" else @@ -743,8 +902,8 @@ class RDoc::Parser::C < RDoc::Parser end @classes[var_name] = cm - @@enclosure_classes[var_name] = cm @known_classes[var_name] = cm.full_name + @store.add_c_enclosure var_name, cm end ## @@ -752,7 +911,7 @@ class RDoc::Parser::C < RDoc::Parser # can override the C value of the comment to give a friendly definition. # # /* 300: The perfect score in bowling */ - # rb_define_const(cFoo, "PERFECT", INT2FIX(300); + # rb_define_const(cFoo, "PERFECT", INT2FIX(300)); # # Will override <tt>INT2FIX(300)</tt> with the value +300+ in the output # RDoc. Values may include quotes and escaped colons (\:). @@ -765,41 +924,36 @@ class RDoc::Parser::C < RDoc::Parser class_obj = find_class var_name, class_name unless class_obj then - warn "Enclosing class/module #{const_name.inspect} not known" + @options.warn 'Enclosing class or module %p is not known' % [const_name] return end comment = find_const_comment type, const_name, class_name - comment = strip_stars comment - comment = normalize_comment comment + comment.normalize # In the case of rb_define_const, the definition and comment are in # "/* definition: comment */" form. The literal ':' and '\' characters # can be escaped with a backslash. if type.downcase == 'const' then - elements = comment.split ':' - - if elements.nil? or elements.empty? then - con = RDoc::Constant.new const_name, definition, comment - else - new_definition = elements[0..-2].join(':') + no_match, new_definition, new_comment = comment.text.split(/(\A.*):/) + if no_match and no_match.empty? then if new_definition.empty? then # Default to literal C definition new_definition = definition else - new_definition.gsub!("\:", ":") - new_definition.gsub!("\\", '\\') + new_definition = new_definition.gsub("\:", ":") + new_definition = new_definition.gsub("\\", '\\') end new_definition.sub!(/\A(\s+)/, '') - new_comment = if $1.nil? then - elements.last.lstrip - else - "#{$1}#{elements.last.lstrip}" - end + new_comment = "#{$1}#{new_comment.lstrip}" + + new_comment = self.new_comment(new_comment, @top_level, :c) con = RDoc::Constant.new const_name, new_definition, new_comment + else + con = RDoc::Constant.new const_name, definition, comment end else con = RDoc::Constant.new const_name, definition, comment @@ -827,10 +981,16 @@ class RDoc::Parser::C < RDoc::Parser class_name = @known_classes[var_name] singleton = @singleton_classes.key? var_name + @methods[var_name][function] << meth_name + return unless class_name class_obj = find_class var_name, class_name + if existing_method = class_obj.method_list.find { |m| m.c_function == function } + add_alias(var_name, class_obj, existing_method.name, meth_name, existing_method.comment) + end + if class_obj then if meth_name == 'initialize' then meth_name = 'new' @@ -849,9 +1009,9 @@ class RDoc::Parser::C < RDoc::Parser file_name = File.join @file_dir, source_file if File.exist? file_name then - file_content = (@@known_bodies[file_name] ||= File.read(file_name)) + file_content = File.read file_name else - warn "unknown source #{source_file} for #{meth_name} in #{@file_name}" + @options.warn "unknown source #{source_file} for #{meth_name} in #{@file_name}" end else file_content = @content @@ -870,7 +1030,12 @@ class RDoc::Parser::C < RDoc::Parser meth_obj.record_location @top_level + + if meth_obj.section_title + class_obj.temporary_section = class_obj.add_section(meth_obj.section_title) + end class_obj.add_method meth_obj + @stats.add_method meth_obj meth_obj.visibility = :private if 'private_method' == type end @@ -905,6 +1070,30 @@ class RDoc::Parser::C < RDoc::Parser end ## + # Loads the variable map with the given +name+ from the RDoc::Store, if + # present. + + def load_variable_map map_name + return {} unless files = @store.cache[map_name] + return {} unless name_map = files[@file_name] + + class_map = {} + + name_map.each do |variable, name| + next unless mod = @store.find_class_or_module(name) + + class_map[variable] = if map_name == :c_class_variables then + mod + else + name + end + @known_classes[variable] = name + end + + class_map + end + + ## # Look for directives in a normal comment block: # # /* @@ -1004,7 +1193,6 @@ class RDoc::Parser::C < RDoc::Parser if hash then args << "p#{position} = {}" - position += 1 end args << '&block' if block @@ -1017,15 +1205,7 @@ class RDoc::Parser::C < RDoc::Parser # when scanning for classes and methods def remove_commented_out_lines - @content.gsub!(%r%//.*rb_define_%, '//') - end - - ## - # Removes private comments from +comment+ - - def remove_private_comments(comment) - comment.gsub!(/\/?\*--\n(.*?)\/?\*\+\+/m, '') - comment.sub!(/\/?\*--\n.*/m, '') + @content = @content.gsub(%r%//.*rb_define_%, '//') end ## @@ -1034,14 +1214,24 @@ class RDoc::Parser::C < RDoc::Parser def scan remove_commented_out_lines - do_classes + + do_classes_and_modules + do_missing + do_constants do_methods do_includes do_aliases do_attrs + + @store.add_c_variables self + @top_level end + def new_comment text = nil, location = nil, language = nil + RDoc::Comment.new(text, location, language).tap do |comment| + comment.format = @markup + end + end end - diff --git a/lib/rdoc/parser/changelog.rb b/lib/rdoc/parser/changelog.rb new file mode 100644 index 0000000000..9245d49376 --- /dev/null +++ b/lib/rdoc/parser/changelog.rb @@ -0,0 +1,335 @@ +# frozen_string_literal: true + +## +# A ChangeLog file parser. +# +# This parser converts a ChangeLog into an RDoc::Markup::Document. When +# viewed as HTML a ChangeLog page will have an entry for each day's entries in +# the sidebar table of contents. +# +# This parser is meant to parse the MRI ChangeLog, but can be used to parse any +# {GNU style Change +# Log}[http://www.gnu.org/prep/standards/html_node/Style-of-Change-Logs.html]. + +class RDoc::Parser::ChangeLog < RDoc::Parser + + include RDoc::Parser::Text + + parse_files_matching(/(\/|\\|\A)ChangeLog[^\/\\]*\z/) + + ## + # Attaches the +continuation+ of the previous line to the +entry_body+. + # + # Continued function listings are joined together as a single entry. + # Continued descriptions are joined to make a single paragraph. + + def continue_entry_body entry_body, continuation + return unless last = entry_body.last + + if last =~ /\)\s*\z/ and continuation =~ /\A\(/ then + last.sub!(/\)\s*\z/, ',') + continuation = continuation.sub(/\A\(/, '') + end + + if last =~ /\s\z/ then + last << continuation + else + last << ' ' + continuation + end + end + + ## + # Creates an RDoc::Markup::Document given the +groups+ of ChangeLog entries. + + def create_document groups + doc = RDoc::Markup::Document.new + doc.omit_headings_below = 2 + doc.file = @top_level + + doc << RDoc::Markup::Heading.new(1, File.basename(@file_name)) + doc << RDoc::Markup::BlankLine.new + + groups.sort_by do |day,| day end.reverse_each do |day, entries| + doc << RDoc::Markup::Heading.new(2, day.dup) + doc << RDoc::Markup::BlankLine.new + + doc.concat create_entries entries + end + + doc + end + + ## + # Returns a list of ChangeLog entries an RDoc::Markup nodes for the given + # +entries+. + + def create_entries entries + out = [] + + entries.each do |entry, items| + out << RDoc::Markup::Heading.new(3, entry) + out << RDoc::Markup::BlankLine.new + + out << create_items(items) + end + + out + end + + ## + # Returns an RDoc::Markup::List containing the given +items+ in the + # ChangeLog + + def create_items items + list = RDoc::Markup::List.new :NOTE + + items.each do |item| + item =~ /\A(.*?(?:\([^)]+\))?):\s*/ + + title = $1 + body = $' + + paragraph = RDoc::Markup::Paragraph.new body + list_item = RDoc::Markup::ListItem.new title, paragraph + list << list_item + end + + list + end + + ## + # Groups +entries+ by date. + + def group_entries entries + @time_cache ||= {} + entries.group_by do |title, _| + begin + time = @time_cache[title] + (time || parse_date(title)).strftime '%Y-%m-%d' + rescue NoMethodError, ArgumentError + time, = title.split ' ', 2 + parse_date(time).strftime '%Y-%m-%d' + end + end + end + + ## + # Parse date in ISO-8601, RFC-2822, or default of Git + + def parse_date(date) + case date + when /\A\s*(\d+)-(\d+)-(\d+)(?:[ T](\d+):(\d+):(\d+) *([-+]\d\d):?(\d\d))?\b/ + Time.new($1, $2, $3, $4, $5, $6, ("#{$7}:#{$8}" if $7)) + when /\A\s*\w{3}, +(\d+) (\w{3}) (\d+) (\d+):(\d+):(\d+) *(?:([-+]\d\d):?(\d\d))\b/ + Time.new($3, $2, $1, $4, $5, $6, ("#{$7}:#{$8}" if $7)) + when /\A\s*\w{3} (\w{3}) +(\d+) (\d+) (\d+):(\d+):(\d+) *(?:([-+]\d\d):?(\d\d))\b/ + Time.new($3, $1, $2, $4, $5, $6, ("#{$7}:#{$8}" if $7)) + when /\A\s*\w{3} (\w{3}) +(\d+) (\d+):(\d+):(\d+) (\d+)\b/ + Time.new($6, $1, $2, $3, $4, $5) + else + raise ArgumentError, "bad date: #{date}" + end + end + + ## + # Parses the entries in the ChangeLog. + # + # Returns an Array of each ChangeLog entry in order of parsing. + # + # A ChangeLog entry is an Array containing the ChangeLog title (date and + # committer) and an Array of ChangeLog items (file and function changed with + # description). + # + # An example result would be: + # + # [ 'Tue Dec 4 08:33:46 2012 Eric Hodel <drbrain@segment7.net>', + # [ 'README.EXT: Converted to RDoc format', + # 'README.EXT.ja: ditto']] + + def parse_entries + @time_cache ||= {} + + if /\A((?:.*\n){,3})commit\s/ =~ @content + class << self; prepend Git; end + parse_info($1) + return parse_entries + end + + entries = [] + entry_name = nil + entry_body = [] + + @content.each_line do |line| + case line + when /^\s*$/ then + next + when /^\w.*/ then + entries << [entry_name, entry_body] if entry_name + + entry_name = $& + + begin + time = parse_date entry_name + @time_cache[entry_name] = time + rescue ArgumentError + entry_name = nil + end + + entry_body = [] + when /^(\t| {8})?\*\s*(.*)/ then # "\t* file.c (func): ..." + entry_body << $2.dup + when /^(\t| {8})?\s*(\(.*)/ then # "\t(func): ..." + entry = $2 + + if entry_body.last =~ /:/ then + entry_body << entry.dup + else + continue_entry_body entry_body, entry + end + when /^(\t| {8})?\s*(.*)/ then + continue_entry_body entry_body, $2 + end + end + + entries << [entry_name, entry_body] if entry_name + + entries.reject! do |(entry,_)| + entry == nil + end + + entries + end + + ## + # Converts the ChangeLog into an RDoc::Markup::Document + + def scan + @time_cache = {} + + entries = parse_entries + grouped_entries = group_entries entries + + doc = create_document grouped_entries + + @top_level.comment = doc + + @top_level + end + + module Git + def parse_info(info) + /^\s*base-url\s*=\s*(.*\S)/ =~ info + @base_url = $1 + end + + def parse_entries + entries = [] + + @content.scan(/^commit\s+(\h{20})\h*\n((?:.+\n)*)\n((?: {4}.*\n+)*)/) do + entry_name, header, entry_body = $1, $2, $3.gsub(/^ {4}/, '') + # header = header.scan(/^ *(\S+?): +(.*)/).to_h + # date = header["CommitDate"] || header["Date"] + date = header[/^ *(?:Author)?Date: +(.*)/, 1] + author = header[/^ *Author: +(.*)/, 1] + begin + time = parse_date(header[/^ *CommitDate: +(.*)/, 1] || date) + @time_cache[entry_name] = time + author.sub!(/\s*<(.*)>/, '') + email = $1 + entries << [entry_name, [author, email, date, entry_body]] + rescue ArgumentError + end + end + + entries + end + + def create_entries entries + # git log entries have no strictly itemized style like the old + # style, just assume Markdown. + entries.map do |commit, entry| + LogEntry.new(@base_url, commit, *entry) + end + end + + LogEntry = Struct.new(:base, :commit, :author, :email, :date, :contents) do + HEADING_LEVEL = 3 + + def initialize(base, commit, author, email, date, contents) + case contents + when String + contents = RDoc::Markdown.parse(contents).parts.each do |body| + case body + when RDoc::Markup::Heading + body.level += HEADING_LEVEL + 1 + end + end + case first = contents[0] + when RDoc::Markup::Paragraph + contents[0] = RDoc::Markup::Heading.new(HEADING_LEVEL + 1, first.text) + end + end + super + end + + def level + HEADING_LEVEL + end + + def aref + "label-#{commit}" + end + + def label context = nil + aref + end + + def text + case base + when nil + "#{date}" + when /%s/ + "{#{date}}[#{base % commit}]" + else + "{#{date}}[#{base}#{commit}]" + end + " {#{author}}[mailto:#{email}]" + end + + def accept visitor + visitor.accept_heading self + begin + if visitor.respond_to?(:code_object=) + code_object = visitor.code_object + visitor.code_object = self + end + contents.each do |body| + body.accept visitor + end + ensure + if visitor.respond_to?(:code_object) + visitor.code_object = code_object + end + end + end + + def pretty_print q # :nodoc: + q.group(2, '[log_entry: ', ']') do + q.text commit + q.text ',' + q.breakable + q.group(2, '[date: ', ']') { q.text date } + q.text ',' + q.breakable + q.group(2, '[author: ', ']') { q.text author } + q.text ',' + q.breakable + q.group(2, '[email: ', ']') { q.text email } + q.text ',' + q.breakable + q.pp contents + end + end + end + end +end + diff --git a/lib/rdoc/parser/markdown.rb b/lib/rdoc/parser/markdown.rb new file mode 100644 index 0000000000..9ff478f872 --- /dev/null +++ b/lib/rdoc/parser/markdown.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true +## +# Parse a Markdown format file. The parsed RDoc::Markup::Document is attached +# as a file comment. + +class RDoc::Parser::Markdown < RDoc::Parser + + include RDoc::Parser::Text + + parse_files_matching(/\.(md|markdown)(?:\.[^.]+)?$/) + + ## + # Creates an Markdown-format TopLevel for the given file. + + def scan + comment = RDoc::Comment.new @content, @top_level + comment.format = 'markdown' + + @top_level.comment = comment + end + +end + + diff --git a/lib/rdoc/parser/rd.rb b/lib/rdoc/parser/rd.rb new file mode 100644 index 0000000000..25f5711731 --- /dev/null +++ b/lib/rdoc/parser/rd.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true +## +# Parse a RD format file. The parsed RDoc::Markup::Document is attached as a +# file comment. + +class RDoc::Parser::RD < RDoc::Parser + + include RDoc::Parser::Text + + parse_files_matching(/\.rd(?:\.[^.]+)?$/) + + ## + # Creates an rd-format TopLevel for the given file. + + def scan + comment = RDoc::Comment.new @content, @top_level + comment.format = 'rd' + + @top_level.comment = comment + end + +end + diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb new file mode 100644 index 0000000000..5492f08726 --- /dev/null +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -0,0 +1,590 @@ +# frozen_string_literal: true +require 'ripper' + +class RDoc::Parser::RipperStateLex + # TODO: Remove this constants after Ruby 2.4 EOL + RIPPER_HAS_LEX_STATE = Ripper::Filter.method_defined?(:state) + + Token = Struct.new(:line_no, :char_no, :kind, :text, :state) + + EXPR_NONE = 0 + EXPR_BEG = 1 + EXPR_END = 2 + EXPR_ENDARG = 4 + EXPR_ENDFN = 8 + EXPR_ARG = 16 + EXPR_CMDARG = 32 + EXPR_MID = 64 + EXPR_FNAME = 128 + EXPR_DOT = 256 + EXPR_CLASS = 512 + EXPR_LABEL = 1024 + EXPR_LABELED = 2048 + EXPR_FITEM = 4096 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS) + EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG) + EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) + + class InnerStateLex < Ripper::Filter + attr_accessor :lex_state + + def initialize(code) + @lex_state = EXPR_BEG + @in_fname = false + @continue = false + reset + super(code) + end + + def reset + @command_start = false + @cmd_state = @command_start + end + + def on_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_ignored_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_op(tok, data) + case tok + when '&', '|', '!', '!=', '!~' + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '<<' + # TODO next token? + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '?' + @lex_state = EXPR_BEG + when '&&', '||', '+=', '-=', '*=', '**=', + '&=', '|=', '^=', '<<=', '>>=', '||=', '&&=' + @lex_state = EXPR_BEG + when '::' + case @lex_state + when EXPR_ARG, EXPR_CMDARG + @lex_state = EXPR_DOT + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + else + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_kw(tok, data) + case tok + when 'class' + @lex_state = EXPR_CLASS + @in_fname = true + when 'def' + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + when 'if', 'unless', 'while', 'until' + if ((EXPR_MID | EXPR_END | EXPR_ENDARG | EXPR_ENDFN | EXPR_ARG | EXPR_CMDARG) & @lex_state) != 0 # postfix if + @lex_state = EXPR_BEG | EXPR_LABEL + else + @lex_state = EXPR_BEG + end + when 'begin', 'case', 'when' + @lex_state = EXPR_BEG + when 'return', 'break' + @lex_state = EXPR_MID + else + if @lex_state == EXPR_FNAME + @lex_state = EXPR_END + else + @lex_state = EXPR_END + end + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_tstring_beg(tok, data) + @lex_state = EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_tstring_end(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_CHAR(tok, data) + @lex_state = EXPR_END + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_period(tok, data) + @lex_state = EXPR_DOT + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_int(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_float(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rational(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_imaginary(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_symbeg(tok, data) + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + private def on_variables(event, tok, data) + if @in_fname + @lex_state = EXPR_ENDFN + @in_fname = false + @continue = false + elsif @continue + case @lex_state + when EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_ENDFN + @continue = false + end + else + @lex_state = EXPR_CMDARG + end + data << Token.new(lineno, column, event, tok, @lex_state) + end + + def on_ident(tok, data) + on_variables(__method__, tok, data) + end + + def on_ivar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_cvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_gvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_backref(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_lparen(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rparen(tok, data) + @lex_state = EXPR_ENDFN + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_lbrace(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rbrace(tok, data) + @lex_state = EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_lbracket(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rbracket(tok, data) + @lex_state = EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_const(tok, data) + case @lex_state + when EXPR_FNAME + @lex_state = EXPR_ENDFN + when EXPR_CLASS, EXPR_CMDARG, EXPR_MID + @lex_state = EXPR_ARG + else + @lex_state = EXPR_CMDARG + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_sp(tok, data) + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_comma(tok, data) + @lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0 + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_comment(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_ignored_sp(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_heredoc_beg(tok, data) + data << Token.new(lineno, column, __method__, tok, @lex_state) + @lex_state = EXPR_END + data + end + + def on_heredoc_end(tok, data) + data << Token.new(lineno, column, __method__, tok, @lex_state) + @lex_state = EXPR_BEG + data + end + + def on_default(event, tok, data) + reset + data << Token.new(lineno, column, event, tok, @lex_state) + end + end unless RIPPER_HAS_LEX_STATE + + class InnerStateLex < Ripper::Filter + def initialize(code) + super(code) + end + + def on_default(event, tok, data) + data << Token.new(lineno, column, event, tok, state) + end + end if RIPPER_HAS_LEX_STATE + + def get_squashed_tk + if @buf.empty? + tk = @tokens.shift + else + tk = @buf.shift + end + return nil if tk.nil? + case tk[:kind] + when :on_symbeg then + tk = get_symbol_tk(tk) + when :on_tstring_beg then + tk = get_string_tk(tk) + when :on_backtick then + if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0 + @inner_lex.lex_state = EXPR_ARG unless RIPPER_HAS_LEX_STATE + tk[:kind] = :on_ident + tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG + else + tk = get_string_tk(tk) + end + when :on_regexp_beg then + tk = get_regexp_tk(tk) + when :on_embdoc_beg then + tk = get_embdoc_tk(tk) + when :on_heredoc_beg then + @heredoc_queue << retrieve_heredoc_info(tk) + @inner_lex.lex_state = EXPR_END unless RIPPER_HAS_LEX_STATE + when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then + if !@heredoc_queue.empty? + get_heredoc_tk(*@heredoc_queue.shift) + elsif tk[:text].nil? # :on_ignored_nl sometimes gives nil + tk[:text] = '' + end + when :on_words_beg then + tk = get_words_tk(tk) + when :on_qwords_beg then + tk = get_words_tk(tk) + when :on_symbols_beg then + tk = get_words_tk(tk) + when :on_qsymbols_beg then + tk = get_words_tk(tk) + when :on_op then + if '&.' == tk[:text] + tk[:kind] = :on_period + else + tk = get_op_tk(tk) + end + end + tk + end + + private def get_symbol_tk(tk) + is_symbol = true + symbol_tk = Token.new(tk.line_no, tk.char_no, :on_symbol) + if ":'" == tk[:text] or ':"' == tk[:text] + tk1 = get_string_tk(tk) + symbol_tk[:text] = tk1[:text] + symbol_tk[:state] = tk1[:state] + else + case (tk1 = get_squashed_tk)[:kind] + when :on_ident + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_tstring_content + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end + when :on_tstring_end + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_op + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_ivar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_cvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_gvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_const + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_kw + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + else + is_symbol = false + tk = tk1 + end + end + if is_symbol + tk = symbol_tk + end + tk + end + + private def get_string_tk(tk) + string = tk[:text] + state = nil + kind = :on_tstring + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_tstring_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + elsif :on_label_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + kind = :on_symbol + break + else + string = string + inner_str_tk[:text] + if :on_embexpr_beg == inner_str_tk[:kind] then + kind = :on_dstring if :on_tstring == kind + end + end + end + Token.new(tk.line_no, tk.char_no, kind, string, state) + end + + private def get_regexp_tk(tk) + string = tk[:text] + state = nil + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_regexp_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + else + string = string + inner_str_tk[:text] + end + end + Token.new(tk.line_no, tk.char_no, :on_regexp, string, state) + end + + private def get_embdoc_tk(tk) + string = tk[:text] + until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do + string = string + embdoc_tk[:text] + end + string = string + embdoc_tk[:text] + Token.new(tk.line_no, tk.char_no, :on_embdoc, string, embdoc_tk.state) + end + + private def get_heredoc_tk(heredoc_name, indent) + string = '' + start_tk = nil + prev_tk = nil + until heredoc_end?(heredoc_name, indent, tk = @tokens.shift) do + start_tk = tk unless start_tk + if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no] + string = string + (' ' * tk[:char_no]) + end + string = string + tk[:text] + prev_tk = tk + end + start_tk = tk unless start_tk + prev_tk = tk unless prev_tk + @buf.unshift tk # closing heredoc + heredoc_tk = Token.new(start_tk.line_no, start_tk.char_no, :on_heredoc, string, prev_tk.state) + @buf.unshift heredoc_tk + end + + private def retrieve_heredoc_info(tk) + name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2') + indent = tk[:text] =~ /\A<<[-~]/ + [name, indent] + end + + private def heredoc_end?(name, indent, tk) + result = false + if :on_heredoc_end == tk[:kind] then + tk_name = tk[:text].chomp + tk_name.lstrip! if indent + if name == tk_name + result = true + end + end + result + end + + private def get_words_tk(tk) + string = '' + start_token = tk[:text] + start_quote = tk[:text].rstrip[-1] + line_no = tk[:line_no] + char_no = tk[:char_no] + state = tk[:state] + end_quote = + case start_quote + when ?( then ?) + when ?[ then ?] + when ?{ then ?} + when ?< then ?> + else start_quote + end + end_token = nil + loop do + tk = get_squashed_tk + if tk.nil? + end_token = end_quote + break + elsif :on_tstring_content == tk[:kind] then + string += tk[:text] + elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then + if end_quote == tk[:text].strip then + end_token = tk[:text] + break + else + string += tk[:text] + end + else + string += tk[:text] + end + end + text = "#{start_token}#{string}#{end_token}" + Token.new(line_no, char_no, :on_dstring, text, state) + end + + private def get_op_tk(tk) + redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~] + if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then + @inner_lex.lex_state = EXPR_ARG unless RIPPER_HAS_LEX_STATE + tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG + tk[:kind] = :on_ident + elsif tk[:text] =~ /^[-+]$/ then + tk_ahead = get_squashed_tk + case tk_ahead[:kind] + when :on_int, :on_float, :on_rational, :on_imaginary then + tk[:text] += tk_ahead[:text] + tk[:kind] = tk_ahead[:kind] + tk[:state] = tk_ahead[:state] + when :on_heredoc_beg, :on_tstring, :on_dstring # frozen/non-frozen string literal + tk[:text] += tk_ahead[:text] + tk[:kind] = tk_ahead[:kind] + tk[:state] = tk_ahead[:state] + else + @buf.unshift tk_ahead + end + end + tk + end + + def initialize(code) + @buf = [] + @heredoc_queue = [] + @inner_lex = InnerStateLex.new(code) + @tokens = @inner_lex.parse([]) + end + + def self.parse(code) + lex = self.new(code) + tokens = [] + begin + while tk = lex.get_squashed_tk + tokens.push tk + end + rescue StopIteration + end + tokens + end + + def self.end?(token) + (token[:state] & EXPR_END) + end +end diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb index c9a12a8fe8..e546fe2141 100644 --- a/lib/rdoc/parser/ruby.rb +++ b/lib/rdoc/parser/ruby.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true ## # This file contains stuff stolen outright from: # @@ -7,17 +8,6 @@ # by Keiju ISHITSUKA (Nippon Rational Inc.) # -require 'rdoc/ruby_token' -require 'rdoc/ruby_lex' - -require 'rdoc/code_objects' -require 'rdoc/token_stream' -require 'rdoc/markup/pre_process' -require 'rdoc/parser' -require 'rdoc/parser/ruby_tools' - -$TOKEN_DEBUG ||= nil - ## # Extracts code elements from a source file returning a TopLevel object # containing the constituent file elements. @@ -32,6 +22,7 @@ $TOKEN_DEBUG ||= nil # * aliases # * private, public, protected # * private_class_function, public_class_function +# * private_constant, public_constant # * module_function # * attr, attr_reader, attr_writer, attr_accessor # * extra accessors given on the command line @@ -103,7 +94,7 @@ $TOKEN_DEBUG ||= nil # You can force the name of a method using the :method: directive: # # ## -# # :method: woo_hoo! +# # :method: some_method! # # By default, meta-methods are instance methods. To indicate that a method is # a singleton method instead use the :singleton-method: directive: @@ -114,7 +105,10 @@ $TOKEN_DEBUG ||= nil # You can also use the :singleton-method: directive with a name: # # ## -# # :singleton-method: woo_hoo! +# # :singleton-method: some_method! +# +# You can define arguments for metaprogrammed methods via either the +# :call-seq:, :arg: or :args: directives. # # Additionally you can mark a method as an attribute by # using :attr:, :attr_reader:, :attr_writer: or :attr_accessor:. Just like @@ -144,11 +138,13 @@ $TOKEN_DEBUG ||= nil # Note that by default, the :method: directive will be ignored if there is a # standard rdocable item following it. +require 'ripper' +require_relative 'ripper_state_lex' + class RDoc::Parser::Ruby < RDoc::Parser parse_files_matching(/\.rbw?$/) - include RDoc::RubyToken include RDoc::TokenStream include RDoc::Parser::RubyTools @@ -168,44 +164,110 @@ class RDoc::Parser::Ruby < RDoc::Parser def initialize(top_level, file_name, content, options, stats) super + if /\t/ =~ content then + tab_width = @options.tab_width + content = content.split(/\n/).map do |line| + 1 while line.gsub!(/\t+/) { + ' ' * (tab_width*$&.length - $`.length % tab_width) + } && $~ + line + end.join("\n") + end + @size = 0 @token_listeners = nil - @scanner = RDoc::RubyLex.new content, @options - @scanner.exception_on_syntax_error = false + content = RDoc::Encoding.remove_magic_comment content + @scanner = RDoc::Parser::RipperStateLex.parse(content) + @content = content + @scanner_point = 0 @prev_seek = nil - - @encoding = nil - @encoding = @options.encoding if Object.const_defined? :Encoding + @markup = @options.markup + @track_visibility = :nodoc != @options.visibility + @encoding = @options.encoding reset end + def tk_nl?(tk) + :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] + end + + ## + # Retrieves the read token stream and replaces +pattern+ with +replacement+ + # using gsub. If the result is only a ";" returns an empty string. + + def get_tkread_clean pattern, replacement # :nodoc: + read = get_tkread.gsub(pattern, replacement).strip + return '' if read == ';' + read + end + + ## + # Extracts the visibility information for the visibility token +tk+ + # and +single+ class type identifier. + # + # Returns the visibility type (a string), the visibility (a symbol) and + # +singleton+ if the methods following should be converted to singleton + # methods. + + def get_visibility_information tk, single # :nodoc: + vis_type = tk[:text] + singleton = single == SINGLE + + vis = + case vis_type + when 'private' then :private + when 'protected' then :protected + when 'public' then :public + when 'private_class_method' then + singleton = true + :private + when 'public_class_method' then + singleton = true + :public + when 'module_function' then + singleton = true + :public + else + raise RDoc::Error, "Invalid visibility: #{tk.name}" + end + + return vis_type, vis, singleton + end + ## # Look for the first comment in a file that isn't a shebang line. def collect_first_comment skip_tkspace - comment = '' - comment.force_encoding @encoding if @encoding + comment = ''.dup + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding first_line = true + first_comment_tk_kind = nil + line_no = nil tk = get_tk - while TkCOMMENT === tk - if first_line and tk.text =~ /\A#!/ then + while tk && (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) + comment_body = retrieve_comment_body(tk) + if first_line and comment_body =~ /\A#!/ then skip_tkspace tk = get_tk - elsif first_line and tk.text =~ /\A#\s*-\*-/ then + elsif first_line and comment_body =~ /\A#\s*-\*-/ then first_line = false skip_tkspace tk = get_tk else + break if first_comment_tk_kind and not first_comment_tk_kind === tk[:kind] + first_comment_tk_kind = tk[:kind] + + line_no = tk[:line_no] if first_line first_line = false - comment << tk.text << "\n" + comment << comment_body tk = get_tk - if TkNL === tk then - skip_tkspace false + if :on_nl === tk then + skip_tkspace_without_nl tk = get_tk end end @@ -213,45 +275,61 @@ class RDoc::Parser::Ruby < RDoc::Parser unget_tk tk - comment + new_comment comment, line_no end ## - # Aborts with +msg+ + # Consumes trailing whitespace from the token stream - def error(msg) - msg = make_message msg + def consume_trailing_spaces # :nodoc: + skip_tkspace_without_nl + end - abort msg + ## + # Creates a new attribute in +container+ with +name+. + + def create_attr container, single, name, rw, comment # :nodoc: + att = RDoc::Attr.new get_tkread, name, rw, comment, single == SINGLE + record_location att + + container.add_attribute att + @stats.add_attribute att + + att end ## - # Look for a 'call-seq' in the comment, and override the normal parameter - # stuff - #-- - # TODO handle undent + # Creates a module alias in +container+ at +rhs_name+ (or at the top-level + # for "::") with the name from +constant+. - def extract_call_seq(comment, meth) - if comment.sub!(/:?call-seq:(.*?)(^\s*#?\s*$|\z)/m, '') then - seq = $1 - seq.gsub!(/^\s*\#\s*/, '') - meth.call_seq = seq - end + def create_module_alias container, constant, rhs_name # :nodoc: + mod = if rhs_name =~ /^::/ then + @store.find_class_or_module rhs_name + else + container.find_module_named rhs_name + end - meth + container.add_module_alias mod, rhs_name, constant, @top_level end ## - # Looks for a true or false token. Returns false if TkFALSE or TkNIL are - # found. + # Aborts with +msg+ + + def error(msg) + msg = make_message msg + + abort msg + end + + ## + # Looks for a true or false token. def get_bool skip_tkspace tk = get_tk - case tk - when TkTRUE + if :on_kw == tk[:kind] && 'true' == tk[:text] true - when TkFALSE, TkNIL + elsif :on_kw == tk[:kind] && ('false' == tk[:text] || 'nil' == tk[:text]) false else unget_tk tk @@ -264,58 +342,87 @@ class RDoc::Parser::Ruby < RDoc::Parser # with :: separated named) and return the ultimate name, the associated # container, and the given name (with the ::). - def get_class_or_module(container) + def get_class_or_module container, ignore_constants = false skip_tkspace name_t = get_tk - given_name = '' + given_name = ''.dup # class ::A -> A is in the top level - case name_t - when TkCOLON2, TkCOLON3 then # bug + if :on_op == name_t[:kind] and '::' == name_t[:text] then # bug name_t = get_tk container = @top_level given_name << '::' end - skip_tkspace false - given_name << name_t.name + skip_tkspace_without_nl + given_name << name_t[:text] - while TkCOLON2 === peek_tk do + is_self = name_t[:kind] == :on_op && name_t[:text] == '<<' + new_modules = [] + while !is_self && (tk = peek_tk) and :on_op == tk[:kind] and '::' == tk[:text] do prev_container = container - container = container.find_module_named name_t.name - unless container then - container = prev_container.add_module RDoc::NormalModule, name_t.name - end + container = container.find_module_named name_t[:text] + container ||= + if ignore_constants then + c = RDoc::NormalModule.new name_t[:text] + c.store = @store + new_modules << [prev_container, c] + c + else + c = prev_container.add_module RDoc::NormalModule, name_t[:text] + c.ignore unless prev_container.document_children + @top_level.add_to_classes_or_modules c + c + end + + record_location container + get_tk + skip_tkspace + if :on_lparen == peek_tk[:kind] # ProcObjectInConstant::() + parse_method_or_yield_parameters + break + end name_t = get_tk - given_name << '::' << name_t.name + unless :on_const == name_t[:kind] || :on_ident == name_t[:kind] + raise RDoc::Error, "Invalid class or module definition: #{given_name}" + end + if prev_container == container and !ignore_constants + given_name = name_t[:text] + else + given_name << '::' + name_t[:text] + end end - skip_tkspace false - return [container, name_t, given_name] + + skip_tkspace_without_nl + + return [container, name_t, given_name, new_modules] end ## # Return a superclass, which can be either a constant of an expression def get_class_specification - tk = get_tk - return "self" if TkSELF === tk - - res = "" - while TkCOLON2 === tk or TkCOLON3 === tk or TkCONSTANT === tk do - res += tk.name - tk = get_tk + tk = peek_tk + if tk.nil? + return '' + elsif :on_kw == tk[:kind] && 'self' == tk[:text] + return 'self' + elsif :on_gvar == tk[:kind] + return '' end - unget_tk(tk) - skip_tkspace false + res = get_constant + + skip_tkspace_without_nl get_tkread # empty out read buffer tk = get_tk + return res unless tk - case tk - when TkNL, TkCOMMENT, TkSEMICOLON then + case tk[:kind] + when :on_nl, :on_comment, :on_embdoc, :on_semicolon then unget_tk(tk) return res end @@ -330,44 +437,164 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_constant res = "" - skip_tkspace false + skip_tkspace_without_nl tk = get_tk - while TkCOLON2 === tk or TkCOLON3 === tk or TkCONSTANT === tk do - res += tk.name + while tk && ((:on_op == tk[:kind] && '::' == tk[:text]) || :on_const == tk[:kind]) do + res += tk[:text] tk = get_tk end -# if res.empty? -# warn("Unexpected token #{tk} in constant") -# end unget_tk(tk) res end ## - # Get a constant that may be surrounded by parens + # Get an included module that may be surrounded by parens - def get_constant_with_optional_parens - skip_tkspace false + def get_included_module_with_optional_parens + skip_tkspace_without_nl + get_tkread + tk = get_tk + end_token = get_end_token tk + return '' unless end_token nest = 0 + continue = false + only_constant = true - while TkLPAREN === (tk = peek_tk) or TkfLPAREN === tk do - get_tk - skip_tkspace - nest += 1 + while tk != nil do + is_element_of_constant = false + case tk[:kind] + when :on_semicolon then + break if nest == 0 + when :on_lbracket then + nest += 1 + when :on_rbracket then + nest -= 1 + when :on_lbrace then + nest += 1 + when :on_rbrace then + nest -= 1 + if nest <= 0 + # we might have a.each { |i| yield i } + unget_tk(tk) if nest < 0 + break + end + when :on_lparen then + nest += 1 + when end_token[:kind] then + if end_token[:kind] == :on_rparen + nest -= 1 + break if nest <= 0 + else + break if nest <= 0 + end + when :on_rparen then + nest -= 1 + when :on_comment, :on_embdoc then + @read.pop + if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and + (!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then + break if !continue and nest <= 0 + end + when :on_comma then + continue = true + when :on_ident then + continue = false if continue + when :on_kw then + case tk[:text] + when 'def', 'do', 'case', 'for', 'begin', 'class', 'module' + nest += 1 + when 'if', 'unless', 'while', 'until', 'rescue' + # postfix if/unless/while/until/rescue must be EXPR_LABEL + nest += 1 unless (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0 + when 'end' + nest -= 1 + break if nest == 0 + end + when :on_const then + is_element_of_constant = true + when :on_op then + is_element_of_constant = true if '::' == tk[:text] + end + only_constant = false unless is_element_of_constant + tk = get_tk end - name = get_constant + if only_constant + get_tkread_clean(/\s+/, ' ') + else + '' + end + end - while nest > 0 - skip_tkspace - tk = get_tk - nest -= 1 if TkRPAREN === tk + ## + # Little hack going on here. In the statement: + # + # f = 2*(1+yield) + # + # We see the RPAREN as the next token, so we need to exit early. This still + # won't catch all cases (such as "a = yield + 1" + + def get_end_token tk # :nodoc: + case tk[:kind] + when :on_lparen + token = RDoc::Parser::RipperStateLex::Token.new + token[:kind] = :on_rparen + token[:text] = ')' + token + when :on_rparen + nil + else + token = RDoc::Parser::RipperStateLex::Token.new + token[:kind] = :on_nl + token[:text] = "\n" + token end + end + + ## + # Retrieves the method container for a singleton method. + + def get_method_container container, name_t # :nodoc: + prev_container = container + container = container.find_module_named(name_t[:text]) - name + unless container then + constant = prev_container.constants.find do |const| + const.name == name_t[:text] + end + + if constant then + parse_method_dummy prev_container + return + end + end + + unless container then + # TODO seems broken, should starting at Object in @store + obj = name_t[:text].split("::").inject(Object) do |state, item| + state.const_get(item) + end rescue nil + + type = obj.class == Class ? RDoc::NormalClass : RDoc::NormalModule + + unless [Class, Module].include?(obj.class) then + warn("Couldn't find #{name_t[:text]}. Assuming it's a module") + end + + if type == RDoc::NormalClass then + sclass = obj.superclass ? obj.superclass.name : nil + container = prev_container.add_class type, name_t[:text], sclass + else + container = prev_container.add_module type, name_t[:text] + end + + record_location container + end + + container end ## @@ -375,29 +602,37 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_symbol_or_name tk = get_tk - case tk - when TkSYMBOL then - text = tk.text.sub(/^:/, '') + case tk[:kind] + when :on_symbol then + text = tk[:text].sub(/^:/, '') - if TkASSIGN === peek_tk then + next_tk = peek_tk + if next_tk && :on_op == next_tk[:kind] && '=' == next_tk[:text] then get_tk text << '=' end text - when TkId, TkOp then - tk.name - when TkAMPER, - TkDSTRING, - TkSTAR, - TkSTRING then - tk.text + when :on_ident, :on_const, :on_gvar, :on_cvar, :on_ivar, :on_op, :on_kw then + tk[:text] + when :on_tstring, :on_dstring then + tk[:text][1..-2] else raise RDoc::Error, "Name or symbol expected (got #{tk})" end end ## + # Marks containers between +container+ and +ancestor+ as ignored + + def suppress_parents container, ancestor # :nodoc: + while container and container != ancestor do + container.suppress unless container.documented? + container = container.parent + end + end + + ## # Look for directives in a normal comment block: # # # :stopdoc: @@ -405,64 +640,69 @@ class RDoc::Parser::Ruby < RDoc::Parser # # This routine modifies its +comment+ parameter. - def look_for_directives_in context, comment - @preprocess.handle comment, context do |directive, param| + def look_for_directives_in container, comment + @preprocess.handle comment, container do |directive, param| case directive when 'method', 'singleton-method', 'attr', 'attr_accessor', 'attr_reader', 'attr_writer' then false # handled elsewhere when 'section' then - context.set_current_section param, comment - comment.replace '' + break unless container.kind_of?(RDoc::Context) + container.set_current_section param, comment.dup + comment.text = '' break end end - remove_private_comments comment + comment.remove_private end ## # Adds useful info about the parser to +message+ def make_message message - prefix = "#{@file_name}:" + prefix = "#{@file_name}:".dup - prefix << "#{@scanner.line_no}:#{@scanner.char_no}:" if @scanner + tk = peek_tk + prefix << "#{tk[:line_no]}:#{tk[:char_no]}:" if tk "#{prefix} #{message}" end ## + # Creates a comment with the correct format + + def new_comment comment, line_no = nil + c = RDoc::Comment.new comment, @top_level, :ruby + c.line = line_no + c.format = @markup + c + end + + ## # Creates an RDoc::Attr for the name following +tk+, setting the comment to # +comment+. def parse_attr(context, single, tk, comment) - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] args = parse_symbol_arg 1 if args.size > 0 then name = args[0] rw = "R" - skip_tkspace false + skip_tkspace_without_nl tk = get_tk - if TkCOMMA === tk then + if :on_comma == tk[:kind] then rw = "RW" if get_bool else unget_tk tk end - att = RDoc::Attr.new get_tkread, name, rw, comment, single == SINGLE - att.record_location @top_level - att.offset = offset + att = create_attr context, single, name, rw, comment att.line = line_no read_documentation_modifiers att, RDoc::ATTR_MODIFIERS - - context.add_attribute att - - @stats.add_attribute att else warn "'attr' ignored - looks like a variable" end @@ -473,8 +713,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # comment for each to +comment+. def parse_attr_accessor(context, single, tk, comment) - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] args = parse_symbol_arg rw = "?" @@ -483,9 +722,9 @@ class RDoc::Parser::Ruby < RDoc::Parser read_documentation_modifiers tmp, RDoc::ATTR_MODIFIERS # TODO In most other places we let the context keep track of document_self # and add found items appropriately but here we do not. I'm not sure why. - return unless tmp.document_self + return if @track_visibility and not tmp.document_self - case tk.name + case tk[:text] when "attr_reader" then rw = "R" when "attr_writer" then rw = "W" when "attr_accessor" then rw = "RW" @@ -494,13 +733,8 @@ class RDoc::Parser::Ruby < RDoc::Parser end for name in args - att = RDoc::Attr.new get_tkread, name, rw, comment, single == SINGLE - att.record_location @top_level - att.offset = offset + att = create_attr context, single, name, rw, comment att.line = line_no - - context.add_attribute att - @stats.add_attribute att end end @@ -508,22 +742,19 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses an +alias+ in +context+ with +comment+ def parse_alias(context, single, tk, comment) - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] skip_tkspace - if TkLPAREN === peek_tk then + if :on_lparen === peek_tk[:kind] then get_tk skip_tkspace end new_name = get_symbol_or_name - @scanner.instance_eval { @lex_state = EXPR_FNAME } - skip_tkspace - if TkCOMMA === peek_tk then + if :on_comma === peek_tk[:kind] then get_tk skip_tkspace end @@ -536,8 +767,7 @@ class RDoc::Parser::Ruby < RDoc::Parser al = RDoc::Alias.new(get_tkread, old_name, new_name, comment, single == SINGLE) - al.record_location @top_level - al.offset = offset + record_location al al.line = line_no read_documentation_modifiers al, RDoc::ATTR_MODIFIERS @@ -551,285 +781,475 @@ class RDoc::Parser::Ruby < RDoc::Parser # Extracts call parameters from the token stream. def parse_call_parameters(tk) - end_token = case tk - when TkLPAREN, TkfLPAREN - TkRPAREN - when TkRPAREN + end_token = case tk[:kind] + when :on_lparen + :on_rparen + when :on_rparen return "" else - TkNL + :on_nl end nest = 0 loop do - case tk - when TkSEMICOLON + break if tk.nil? + case tk[:kind] + when :on_semicolon break - when TkLPAREN, TkfLPAREN + when :on_lparen nest += 1 when end_token - if end_token == TkRPAREN + if end_token == :on_rparen nest -= 1 - break if @scanner.lex_state == EXPR_END and nest <= 0 + break if RDoc::Parser::RipperStateLex.end?(tk) and nest <= 0 else - break unless @scanner.continue + break if RDoc::Parser::RipperStateLex.end?(tk) end - when TkCOMMENT, TkASSIGN, TkOPASGN + when :on_comment, :on_embdoc unget_tk(tk) break - when nil then - break + when :on_op + if tk[:text] =~ /^(.{1,2})?=$/ + unget_tk(tk) + break + end end tk = get_tk end - res = get_tkread.tr("\n", " ").strip - res = "" if res == ";" - res + + get_tkread_clean "\n", " " end ## # Parses a class in +context+ with +comment+ - def parse_class(container, single, tk, comment) - offset = tk.seek - line_no = tk.line_no + def parse_class container, single, tk, comment + line_no = tk[:line_no] declaration_context = container - container, name_t, given_name = get_class_or_module container - - case name_t - when TkCONSTANT - name = name_t.name - superclass = '::Object' + container, name_t, given_name, = get_class_or_module container - if TkLT === peek_tk then - get_tk - skip_tkspace - superclass = get_class_specification - superclass = '(unknown)' if superclass.empty? + if name_t[:kind] == :on_const + cls = parse_class_regular container, declaration_context, single, + name_t, given_name, comment + elsif name_t[:kind] == :on_op && name_t[:text] == '<<' + case name = get_class_specification + when 'self', container.name + read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS + parse_statements container, SINGLE + return # don't update line + else + cls = parse_class_singleton container, name, comment end + else + warn "Expected class name or '<<'. Got #{name_t[:kind]}: #{name_t[:text].inspect}" + return + end - cls_type = single == SINGLE ? RDoc::SingleClass : RDoc::NormalClass - cls = declaration_context.add_class cls_type, given_name, superclass - cls.ignore unless container.document_children + cls.line = line_no - read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS - cls.record_location @top_level - cls.offset = offset - cls.line = line_no + # after end modifiers + read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS - cls.add_comment comment, @top_level + cls + end - @top_level.add_to_classes_or_modules cls - @stats.add_class cls + ## + # Parses and creates a regular class - parse_statements cls - when TkLSHFT - case name = get_class_specification - when "self", container.name - parse_statements container, SINGLE - else - other = RDoc::TopLevel.find_class_named name + def parse_class_regular container, declaration_context, single, # :nodoc: + name_t, given_name, comment + superclass = '::Object' - unless other then - other = container.add_module RDoc::NormalModule, name - other.record_location @top_level - other.offset = offset - other.line = line_no + if given_name =~ /^::/ then + declaration_context = @top_level + given_name = $' + end - other.add_comment comment, @top_level - end + tk = peek_tk + if tk[:kind] == :on_op && tk[:text] == '<' then + get_tk + skip_tkspace + superclass = get_class_specification + superclass = '(unknown)' if superclass.empty? + end - # notify :nodoc: all if not a constant-named class/module - # (and remove any comment) - unless name =~ /\A(::)?[A-Z]/ then - other.document_self = nil - other.document_children = false - other.clear_comment - end + cls_type = single == SINGLE ? RDoc::SingleClass : RDoc::NormalClass + cls = declaration_context.add_class cls_type, given_name, superclass + cls.ignore unless container.document_children + + read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS + record_location cls + + cls.add_comment comment, @top_level - @top_level.add_to_classes_or_modules other - @stats.add_class other + @top_level.add_to_classes_or_modules cls + @stats.add_class cls + + suppress_parents container, declaration_context unless cls.document_self + + parse_statements cls + + cls + end - read_documentation_modifiers other, RDoc::CLASS_MODIFIERS - parse_statements(other, SINGLE) + ## + # Parses a singleton class in +container+ with the given +name+ and + # +comment+. + + def parse_class_singleton container, name, comment # :nodoc: + other = @store.find_class_named name + + unless other then + if name =~ /^::/ then + name = $' + container = @top_level end - else - warn("Expected class name or '<<'. Got #{name_t.class}: #{name_t.text.inspect}") + + other = container.add_module RDoc::NormalModule, name + record_location other + + # class << $gvar + other.ignore if name.empty? + + other.add_comment comment, @top_level + end + + # notify :nodoc: all if not a constant-named class/module + # (and remove any comment) + unless name =~ /\A(::)?[A-Z]/ then + other.document_self = nil + other.document_children = false + other.clear_comment end + + @top_level.add_to_classes_or_modules other + @stats.add_class other + + read_documentation_modifiers other, RDoc::CLASS_MODIFIERS + parse_statements(other, SINGLE) + + other end ## - # Parses a constant in +context+ with +comment+ + # Parses a constant in +context+ with +comment+. If +ignore_constants+ is + # true, no found constants will be added to RDoc. - def parse_constant container, tk, comment - offset = tk.seek - line_no = tk.line_no + def parse_constant container, tk, comment, ignore_constants = false + line_no = tk[:line_no] - name = tk.name - skip_tkspace false + name = tk[:text] + skip_tkspace_without_nl return unless name =~ /^\w+$/ - eq_tk = get_tk + new_modules = [] + if :on_op == peek_tk[:kind] && '::' == peek_tk[:text] then + unget_tk tk - unless TkASSIGN === eq_tk then - unget_tk eq_tk - return false - end + container, name_t, _, new_modules = get_class_or_module container, true - nest = 0 - get_tkread + name = name_t[:text] + end - tk = get_tk + is_array_or_hash = false + if peek_tk && :on_lbracket == peek_tk[:kind] + get_tk + nest = 1 + while bracket_tk = get_tk + case bracket_tk[:kind] + when :on_lbracket + nest += 1 + when :on_rbracket + nest -= 1 + break if nest == 0 + end + end + skip_tkspace_without_nl + is_array_or_hash = true + end - if TkGT === tk then - unget_tk tk - unget_tk eq_tk + unless peek_tk && :on_op == peek_tk[:kind] && '=' == peek_tk[:text] then return false end + get_tk + + unless ignore_constants + new_modules.each do |prev_c, new_module| + prev_c.add_module_by_normal_module new_module + new_module.ignore unless prev_c.document_children + @top_level.add_to_classes_or_modules new_module + end + end + + value = '' + con = RDoc::Constant.new name, value, comment + + body = parse_constant_body container, con, is_array_or_hash + + return unless body + + con.value = body + record_location con + con.line = line_no + read_documentation_modifiers con, RDoc::CONSTANT_MODIFIERS - rhs_name = '' + return if is_array_or_hash + @stats.add_constant con + container.add_constant con + + true + end + + def parse_constant_body container, constant, is_array_or_hash # :nodoc: + nest = 0 + rhs_name = ''.dup + + get_tkread + + tk = get_tk + + body = nil loop do - case tk - when TkSEMICOLON then + break if tk.nil? + if :on_semicolon == tk[:kind] then break if nest <= 0 - when TkLPAREN, TkfLPAREN, TkLBRACE, TkfLBRACE, TkLBRACK, TkfLBRACK, - TkDO, TkIF, TkUNLESS, TkCASE, TkDEF, TkBEGIN then + elsif [:on_tlambeg, :on_lparen, :on_lbrace, :on_lbracket].include?(tk[:kind]) then + nest += 1 + elsif (:on_kw == tk[:kind] && 'def' == tk[:text]) then nest += 1 - when TkRPAREN, TkRBRACE, TkRBRACK, TkEND then + elsif (:on_kw == tk[:kind] && %w{do if unless case begin}.include?(tk[:text])) then + if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + nest += 1 + end + elsif [:on_rparen, :on_rbrace, :on_rbracket].include?(tk[:kind]) || + (:on_kw == tk[:kind] && 'end' == tk[:text]) then nest -= 1 - when TkCOMMENT then - if nest <= 0 && - (@scanner.lex_state == EXPR_END || !@scanner.continue) then - unget_tk tk + elsif (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) then + unget_tk tk + if nest <= 0 and RDoc::Parser::RipperStateLex.end?(tk) then + body = get_tkread_clean(/^[ \t]+/, '') + read_documentation_modifiers constant, RDoc::CONSTANT_MODIFIERS break + else + read_documentation_modifiers constant, RDoc::CONSTANT_MODIFIERS end - when TkCONSTANT then - rhs_name << tk.name - - if nest <= 0 and TkNL === peek_tk then - mod = if rhs_name =~ /^::/ then - RDoc::TopLevel.find_class_or_module rhs_name - else - container.find_module_named rhs_name - end + elsif :on_const == tk[:kind] then + rhs_name << tk[:text] - container.add_module_alias mod, name, @top_level if mod + next_tk = peek_tk + if nest <= 0 and (next_tk.nil? || :on_nl == next_tk[:kind]) then + create_module_alias container, constant, rhs_name unless is_array_or_hash break end - when TkNL then - if nest <= 0 && - (@scanner.lex_state == EXPR_END || !@scanner.continue) then + elsif :on_nl == tk[:kind] then + if nest <= 0 and RDoc::Parser::RipperStateLex.end?(tk) then unget_tk tk break end - when TkCOLON2, TkCOLON3 then + elsif :on_op == tk[:kind] && '::' == tk[:text] rhs_name << '::' - when nil then - break end tk = get_tk end - res = get_tkread.gsub(/^[ \t]+/, '').strip - res = "" if res == ";" + body ? body : get_tkread_clean(/^[ \t]+/, '') + end - con = RDoc::Constant.new name, res, comment - con.record_location @top_level - con.offset = offset - con.line = line_no - read_documentation_modifiers con, RDoc::CONSTANT_MODIFIERS + ## + # Generates an RDoc::Method or RDoc::Attr from +comment+ by looking for + # :method: or :attr: directives in +comment+. + + def parse_comment container, tk, comment + return parse_comment_tomdoc container, tk, comment if @markup == 'tomdoc' + column = tk[:char_no] + line_no = comment.line.nil? ? tk[:line_no] : comment.line + + comment.text = comment.text.sub(/(^# +:?)(singleton-)(method:)/, '\1\3') + singleton = !!$~ + + co = + if (comment.text = comment.text.sub(/^# +:?method: *(\S*).*?\n/i, '')) && !!$~ then + line_no += $`.count("\n") + parse_comment_ghost container, comment.text, $1, column, line_no, comment + elsif (comment.text = comment.text.sub(/# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i, '')) && !!$~ then + parse_comment_attr container, $1, $3, comment + end + + if co then + co.singleton = singleton + co.line = line_no + end - @stats.add_constant con - container.add_constant con true end ## - # Generates an RDoc::Method or RDoc::Attr from +comment+ by looking for - # :method: or :attr: directives in +comment+. + # Parse a comment that is describing an attribute in +container+ with the + # given +name+ and +comment+. - def parse_comment(container, tk, comment) - column = tk.char_no - offset = tk.seek - line_no = tk.line_no + def parse_comment_attr container, type, name, comment # :nodoc: + return if name.empty? - singleton = !!comment.sub!(/(^# +:?)(singleton-)(method:)/, '\1\3') + rw = case type + when 'attr_reader' then 'R' + when 'attr_writer' then 'W' + else 'RW' + end - # REFACTOR - if comment.sub!(/^# +:?method: *(\S*).*?\n/i, '') then - name = $1 unless $1.empty? + create_attr container, NORMAL, name, rw, comment + end - meth = RDoc::GhostMethod.new get_tkread, name - meth.record_location @top_level - meth.singleton = singleton - meth.offset = offset - meth.line = line_no + def parse_comment_ghost container, text, name, column, line_no, # :nodoc: + comment + name = nil if name.empty? - meth.start_collecting_tokens - indent = TkSPACE.new nil, 1, 1 - indent.set_text " " * column + meth = RDoc::GhostMethod.new get_tkread, name + record_location meth - position_comment = TkCOMMENT.new nil, line_no, 1 - position_comment.set_text "# File #{@top_level.absolute_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + meth.start_collecting_tokens + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + position_comment = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [position_comment, newline, indent] + + meth.params = + if text.sub!(/^#\s+:?args?:\s*(.*?)\s*$/i, '') then + $1 + else + '' + end - meth.params = '' + comment.normalize + comment.extract_call_seq meth - extract_call_seq comment, meth + return unless meth.name - return unless meth.name + container.add_method meth - container.add_method meth + meth.comment = comment - meth.comment = comment + @stats.add_method meth - @stats.add_method meth - elsif comment.sub!(/# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i, '') then - rw = case $1 - when 'attr_reader' then 'R' - when 'attr_writer' then 'W' - else 'RW' - end + meth + end - name = $3 unless $3.empty? + ## + # Creates an RDoc::Method on +container+ from +comment+ if there is a + # Signature section in the comment - # TODO authorize 'singleton-attr...'? - att = RDoc::Attr.new get_tkread, name, rw, comment - att.record_location @top_level - att.offset = offset - att.line = line_no + def parse_comment_tomdoc container, tk, comment + return unless signature = RDoc::TomDoc.signature(comment) + column = tk[:char_no] + line_no = tk[:line_no] - container.add_attribute att - @stats.add_attribute att - end + name, = signature.split %r%[ \(]%, 2 - true + meth = RDoc::GhostMethod.new get_tkread, name + record_location meth + meth.line = line_no + + meth.start_collecting_tokens + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + position_comment = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [position_comment, newline, indent] + + meth.call_seq = signature + + comment.normalize + + return unless meth.name + + container.add_method meth + + meth.comment = comment + + @stats.add_method meth end ## - # Parses an +include+ in +context+ with +comment+ + # Parses an +include+ or +extend+, indicated by the +klass+ and adds it to + # +container+ # with +comment+ - def parse_include context, comment + def parse_extend_or_include klass, container, comment # :nodoc: loop do skip_tkspace_comment - name = get_constant_with_optional_parens + name = get_included_module_with_optional_parens unless name.empty? then - incl = context.add_include RDoc::Include.new(name, comment) - incl.record_location @top_level + obj = container.add klass, name, comment + record_location obj end - return unless TkCOMMA === peek_tk + return if peek_tk.nil? || :on_comma != peek_tk[:kind] get_tk end end ## + # Parses an +included+ with a block feature of ActiveSupport::Concern. + + def parse_included_with_activesupport_concern container, comment # :nodoc: + skip_tkspace_without_nl + tk = get_tk + unless tk[:kind] == :on_lbracket || (tk[:kind] == :on_kw && tk[:text] == 'do') + unget_tk tk + return nil # should be a block + end + + parse_statements container + + container + end + + ## + # Parses identifiers that can create new methods or change visibility. + # + # Returns true if the comment was not consumed. + + def parse_identifier container, single, tk, comment # :nodoc: + case tk[:text] + when 'private', 'protected', 'public', 'private_class_method', + 'public_class_method', 'module_function' then + parse_visibility container, single, tk + return true + when 'private_constant', 'public_constant' + parse_constant_visibility container, single, tk + return true + when 'attr' then + parse_attr container, single, tk, comment + when /^attr_(reader|writer|accessor)$/ then + parse_attr_accessor container, single, tk, comment + when 'alias_method' then + parse_alias container, single, tk, comment + when 'require', 'include' then + # ignore + else + if comment.text =~ /\A#\#$/ then + case comment.text + when /^# +:?attr(_reader|_writer|_accessor)?:/ then + parse_meta_attr container, single, tk, comment + else + method = parse_meta_method container, single, tk, comment + method.params = container.params if + container.params + method.block_params = container.block_params if + container.block_params + end + end + end + + false + end + + ## # Parses a meta-programmed attribute and creates an RDoc::Attr. # # To create foo and bar attributes on class C with comment "My attributes": @@ -867,7 +1287,9 @@ class RDoc::Parser::Ruby < RDoc::Parser tmp = RDoc::CodeObject.new read_documentation_modifiers tmp, RDoc::ATTR_MODIFIERS - if comment.sub!(/^# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i, '') then + regexp = /^# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i + if regexp =~ comment.text then + comment.text = comment.text.sub(regexp, '') rw = case $1 when 'attr_reader' then 'R' when 'attr_writer' then 'W' @@ -877,95 +1299,109 @@ class RDoc::Parser::Ruby < RDoc::Parser end if name then - att = RDoc::Attr.new get_tkread, name, rw, comment, single == SINGLE - att.record_location @top_level - - context.add_attribute att - @stats.add_attribute att + att = create_attr context, single, name, rw, comment else args.each do |attr_name| - att = RDoc::Attr.new(get_tkread, attr_name, rw, comment, - single == SINGLE) - att.record_location @top_level - - context.add_attribute att - @stats.add_attribute att + att = create_attr context, single, attr_name, rw, comment end end + + att end ## # Parses a meta-programmed method def parse_meta_method(container, single, tk, comment) - column = tk.char_no - offset = tk.seek - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] start_collecting_tokens add_token tk add_token_listener self - skip_tkspace false + skip_tkspace_without_nl - singleton = !!comment.sub!(/(^# +:?)(singleton-)(method:)/, '\1\3') + comment.text = comment.text.sub(/(^# +:?)(singleton-)(method:)/, '\1\3') + singleton = !!$~ - if comment.sub!(/^# +:?method: *(\S*).*?\n/i, '') then - name = $1 unless $1.empty? - end + name = parse_meta_method_name comment, tk - if name.nil? then - name_t = get_tk - case name_t - when TkSYMBOL then - name = name_t.text[1..-1] - when TkSTRING then - name = name_t.value[1..-2] - when TkASSIGN then # ignore - remove_token_listener self - return - else - warn "unknown name token #{name_t.inspect} for meta-method '#{tk.name}'" - name = 'unknown' - end - end + return unless name meth = RDoc::MetaMethod.new get_tkread, name - meth.record_location @top_level - meth.offset = offset + record_location meth meth.line = line_no meth.singleton = singleton remove_token_listener self meth.start_collecting_tokens - indent = TkSPACE.new nil, 1, 1 - indent.set_text " " * column - - position_comment = TkCOMMENT.new nil, line_no, 1 - position_comment.value = "# File #{@top_level.absolute_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + position_comment = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [position_comment, newline, indent] meth.add_tokens @token_stream + parse_meta_method_params container, single, meth, tk, comment + + meth.comment = comment + + @stats.add_method meth + + meth + end + + ## + # Parses the name of a metaprogrammed method. +comment+ is used to + # determine the name while +tk+ is used in an error message if the name + # cannot be determined. + + def parse_meta_method_name comment, tk # :nodoc: + if comment.text.sub!(/^# +:?method: *(\S*).*?\n/i, '') then + return $1 unless $1.empty? + end + + name_t = get_tk + + if :on_symbol == name_t[:kind] then + name_t[:text][1..-1] + elsif :on_tstring == name_t[:kind] then + name_t[:text][1..-2] + elsif :on_op == name_t[:kind] && '=' == name_t[:text] then # ignore + remove_token_listener self + + nil + else + warn "unknown name token #{name_t.inspect} for meta-method '#{tk[:text]}'" + 'unknown' + end + end + + ## + # Parses the parameters and block for a meta-programmed method. + + def parse_meta_method_params container, single, meth, tk, comment # :nodoc: token_listener meth do meth.params = '' - extract_call_seq comment, meth + look_for_directives_in meth, comment + comment.normalize + comment.extract_call_seq meth container.add_method meth last_tk = tk while tk = get_tk do - case tk - when TkSEMICOLON then + if :on_semicolon == tk[:kind] then break - when TkNL then - break unless last_tk and TkCOMMA === last_tk - when TkSPACE then + elsif :on_nl == tk[:kind] then + break unless last_tk and :on_comma == last_tk[:kind] + elsif :on_sp == tk[:kind] then # expression continues - when TkDO then - unget_tk tk + elsif :on_kw == tk[:kind] && 'do' == tk[:text] then parse_statements container, single, meth break else @@ -973,130 +1409,65 @@ class RDoc::Parser::Ruby < RDoc::Parser end end end - - meth.comment = comment - - @stats.add_method meth end ## # Parses a normal method defined by +def+ def parse_method(container, single, tk, comment) - added_container = nil - meth = nil + singleton = nil + added_container = false name = nil - column = tk.char_no - offset = tk.seek - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] start_collecting_tokens add_token tk token_listener self do - @scanner.instance_eval do @lex_state = EXPR_FNAME end + prev_container = container + name, container, singleton = parse_method_name container + added_container = container != prev_container + end - skip_tkspace - name_t = get_tk - back_tk = skip_tkspace - meth = nil - added_container = false + return unless name - dot = get_tk - if TkDOT === dot or TkCOLON2 === dot then - @scanner.instance_eval do @lex_state = EXPR_FNAME end - skip_tkspace - name_t2 = get_tk - - case name_t - when TkSELF, TkMOD then - name = name_t2.name - when TkCONSTANT then - name = name_t2.name - prev_container = container - container = container.find_module_named(name_t.name) - unless container then - added_container = true - obj = name_t.name.split("::").inject(Object) do |state, item| - state.const_get(item) - end rescue nil - - type = obj.class == Class ? RDoc::NormalClass : RDoc::NormalModule - - unless [Class, Module].include?(obj.class) then - warn("Couldn't find #{name_t.name}. Assuming it's a module") - end + meth = RDoc::AnyMethod.new get_tkread, name + look_for_directives_in meth, comment + meth.singleton = single == SINGLE ? true : singleton - if type == RDoc::NormalClass then - sclass = obj.superclass ? obj.superclass.name : nil - container = prev_container.add_class type, name_t.name, sclass - else - container = prev_container.add_module type, name_t.name - end + record_location meth + meth.line = line_no - container.record_location @top_level - end - when TkIDENTIFIER, TkIVAR, TkGVAR then - dummy = RDoc::Context.new - dummy.parent = container - skip_method dummy - return - when TkTRUE, TkFALSE, TkNIL then - klass_name = "#{name_t.name.capitalize}Class" - container = RDoc::TopLevel.find_class_named klass_name - container ||= @top_level.add_class RDoc::NormalClass, klass_name + meth.start_collecting_tokens + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + token = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + token[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [token, newline, indent] + meth.add_tokens @token_stream - name = name_t2.name - else - warn "unexpected method name token #{name_t.inspect}" - # break - skip_method container - return - end + parse_method_params_and_body container, single, meth, added_container - meth = RDoc::AnyMethod.new(get_tkread, name) - meth.singleton = true - else - unget_tk dot - back_tk.reverse_each do |token| - unget_tk token - end + comment.normalize + comment.extract_call_seq meth - name = case name_t - when TkSTAR, TkAMPER then - name_t.text - else - unless name_t.respond_to? :name then - warn "expected method name token, . or ::, got #{name_t.inspect}" - skip_method container - return - end - name_t.name - end - - meth = RDoc::AnyMethod.new get_tkread, name - meth.singleton = (single == SINGLE) - end - end + meth.comment = comment - meth.record_location @top_level - meth.offset = offset - meth.line = line_no + # after end modifiers + read_documentation_modifiers meth, RDoc::METHOD_MODIFIERS - meth.start_collecting_tokens - indent = TkSPACE.new nil, 1, 1 - indent.set_text " " * column + @stats.add_method meth + end - token = TkCOMMENT.new nil, line_no, 1 - token.set_text "# File #{@top_level.absolute_name}, line #{line_no}" - meth.add_tokens [token, NEWLINE_TOKEN, indent] - meth.add_tokens @token_stream + ## + # Parses the parameters and body of +meth+ + def parse_method_params_and_body container, single, meth, added_container token_listener meth do - @scanner.instance_eval do @continue = false end parse_method_parameters meth - if meth.document_self then + if meth.document_self or not @track_visibility then container.add_method meth elsif added_container then container.document_self = false @@ -1105,7 +1476,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # Having now read the method parameters and documentation modifiers, we # now know whether we have to rename #initialize to ::new - if name == "initialize" && !meth.singleton then + if meth.name == "initialize" && !meth.singleton then if meth.dont_rename_initialize then meth.visibility = :protected else @@ -1117,12 +1488,108 @@ class RDoc::Parser::Ruby < RDoc::Parser parse_statements container, single, meth end + end - extract_call_seq comment, meth + ## + # Parses a method that needs to be ignored. - meth.comment = comment + def parse_method_dummy container + dummy = RDoc::Context.new + dummy.parent = container + dummy.store = container.store + skip_method dummy + end - @stats.add_method meth + ## + # Parses the name of a method in +container+. + # + # Returns the method name, the container it is in (for def Foo.name) and if + # it is a singleton or regular method. + + def parse_method_name container # :nodoc: + skip_tkspace + name_t = get_tk + back_tk = skip_tkspace_without_nl + singleton = false + + dot = get_tk + if dot[:kind] == :on_period || (dot[:kind] == :on_op && dot[:text] == '::') then + singleton = true + + name, container = parse_method_name_singleton container, name_t + else + unget_tk dot + back_tk.reverse_each do |token| + unget_tk token + end + + name = parse_method_name_regular container, name_t + end + + return name, container, singleton + end + + ## + # For the given +container+ and initial name token +name_t+ the method name + # is parsed from the token stream for a regular method. + + def parse_method_name_regular container, name_t # :nodoc: + if :on_op == name_t[:kind] && (%w{* & [] []= <<}.include?(name_t[:text])) then + name_t[:text] + else + unless [:on_kw, :on_const, :on_ident].include?(name_t[:kind]) then + warn "expected method name token, . or ::, got #{name_t.inspect}" + skip_method container + return + end + name_t[:text] + end + end + + ## + # For the given +container+ and initial name token +name_t+ the method name + # and the new +container+ (if necessary) are parsed from the token stream + # for a singleton method. + + def parse_method_name_singleton container, name_t # :nodoc: + skip_tkspace + name_t2 = get_tk + + if (:on_kw == name_t[:kind] && 'self' == name_t[:text]) || (:on_op == name_t[:kind] && '%' == name_t[:text]) then + # NOTE: work around '[' being consumed early + if :on_lbracket == name_t2[:kind] + get_tk + name = '[]' + else + name = name_t2[:text] + end + elsif :on_const == name_t[:kind] then + name = name_t2[:text] + + container = get_method_container container, name_t + + return unless container + + name + elsif :on_ident == name_t[:kind] || :on_ivar == name_t[:kind] || :on_gvar == name_t[:kind] then + parse_method_dummy container + + name = nil + elsif (:on_kw == name_t[:kind]) && ('true' == name_t[:text] || 'false' == name_t[:text] || 'nil' == name_t[:text]) then + klass_name = "#{name_t[:text].capitalize}Class" + container = @store.find_class_named klass_name + container ||= @top_level.add_class RDoc::NormalClass, klass_name + + name = name_t2[:text] + else + warn "unexpected method name token #{name_t.inspect}" + # break + skip_method container + + name = nil + end + + return name, container end ## @@ -1130,63 +1597,61 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_method_or_yield_parameters(method = nil, modifiers = RDoc::METHOD_MODIFIERS) - skip_tkspace false + skip_tkspace_without_nl tk = get_tk + end_token = get_end_token tk + return '' unless end_token - # Little hack going on here. In the statement - # f = 2*(1+yield) - # We see the RPAREN as the next token, so we need - # to exit early. This still won't catch all cases - # (such as "a = yield + 1" - end_token = case tk - when TkLPAREN, TkfLPAREN - TkRPAREN - when TkRPAREN - return "" - else - TkNL - end nest = 0 + continue = false - loop do - case tk - when TkSEMICOLON then + while tk != nil do + case tk[:kind] + when :on_semicolon then break if nest == 0 - when TkLBRACE, TkfLBRACE then + when :on_lbracket then nest += 1 - when TkRBRACE then + when :on_rbracket then + nest -= 1 + when :on_lbrace then + nest += 1 + when :on_rbrace then nest -= 1 if nest <= 0 # we might have a.each { |i| yield i } unget_tk(tk) if nest < 0 break end - when TkLPAREN, TkfLPAREN then + when :on_lparen then nest += 1 - when end_token then - if end_token == TkRPAREN + when end_token[:kind] then + if end_token[:kind] == :on_rparen nest -= 1 - break if @scanner.lex_state == EXPR_END and nest <= 0 + break if nest <= 0 else - break unless @scanner.continue + break end - when TkRPAREN then + when :on_rparen then nest -= 1 - when method && method.block_params.nil? && TkCOMMENT then - unget_tk tk - read_documentation_modifiers method, modifiers - @read.pop - when TkCOMMENT then + when :on_comment, :on_embdoc then @read.pop - when nil then - break + if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and + (!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then + if method && method.block_params.nil? then + unget_tk tk + read_documentation_modifiers method, modifiers + end + break if !continue and nest <= 0 + end + when :on_comma then + continue = true + when :on_ident then + continue = false if continue end tk = get_tk end - res = get_tkread.gsub(/\s+/, ' ').strip - res = '' if res == ';' - res + get_tkread_clean(/\s+/, ' ') end ## @@ -1197,34 +1662,37 @@ class RDoc::Parser::Ruby < RDoc::Parser # # and add this as the block_params for the method - def parse_method_parameters(method) + def parse_method_parameters method res = parse_method_or_yield_parameters method res = "(#{res})" unless res =~ /\A\(/ method.params = res unless method.params - if method.block_params.nil? then - skip_tkspace false - read_documentation_modifiers method, RDoc::METHOD_MODIFIERS - end + return if method.block_params + + skip_tkspace_without_nl + read_documentation_modifiers method, RDoc::METHOD_MODIFIERS end ## # Parses an RDoc::NormalModule in +container+ with +comment+ - def parse_module(container, single, tk, comment) + def parse_module container, single, tk, comment container, name_t, = get_class_or_module container - name = name_t.name + name = name_t[:text] mod = container.add_module RDoc::NormalModule, name - mod.record_location @top_level + mod.ignore unless container.document_children + record_location mod read_documentation_modifiers mod, RDoc::CLASS_MODIFIERS mod.add_comment comment, @top_level - parse_statements(mod) + parse_statements mod + + # after end modifiers + read_documentation_modifiers mod, RDoc::CLASS_MODIFIERS - @top_level.add_to_classes_or_modules mod @stats.add_module mod end @@ -1235,12 +1703,12 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace_comment tk = get_tk - if TkLPAREN === tk then + if :on_lparen == tk[:kind] then skip_tkspace_comment tk = get_tk end - name = tk.text if TkSTRING === tk + name = tk[:text][1..-2] if :on_tstring == tk[:kind] if name then @top_level.add_require RDoc::Require.new(name, comment) @@ -1250,11 +1718,43 @@ class RDoc::Parser::Ruby < RDoc::Parser end ## - # The core of the ruby parser. + # Parses a rescue + + def parse_rescue + skip_tkspace_without_nl + + while tk = get_tk + case tk[:kind] + when :on_nl, :on_semicolon, :on_comment then + break + when :on_comma then + skip_tkspace_without_nl + + get_tk if :on_nl == peek_tk[:kind] + end + + skip_tkspace_without_nl + end + end + + ## + # Retrieve comment body without =begin/=end + + def retrieve_comment_body(tk) + if :on_embdoc == tk[:kind] + tk[:text].gsub(/\A=begin.*\n/, '').gsub(/=end\n?\z/, '') + else + tk[:text] + end + end + + ## + # The core of the Ruby parser. def parse_statements(container, single = NORMAL, current_method = nil, - comment = '') - comment.force_encoding @encoding if @encoding + comment = new_comment('')) + raise 'no' unless RDoc::Comment === comment + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding nest = 1 save_visibility = container.visibility @@ -1265,38 +1765,61 @@ class RDoc::Parser::Ruby < RDoc::Parser keep_comment = false try_parse_comment = false - non_comment_seen = true unless TkCOMMENT === tk + non_comment_seen = true unless (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) - case tk - when TkNL then - skip_tkspace - tk = get_tk + case tk[:kind] + when :on_nl, :on_ignored_nl, :on_comment, :on_embdoc then + if :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] + skip_tkspace + tk = get_tk + else + past_tokens = @read.size > 1 ? @read[0..-2] : [] + nl_position = 0 + past_tokens.reverse.each_with_index do |read_tk, i| + if read_tk =~ /^\n$/ then + nl_position = (past_tokens.size - 1) - i + break + elsif read_tk =~ /^#.*\n$/ then + nl_position = ((past_tokens.size - 1) - i) + 1 + break + end + end + comment_only_line = past_tokens[nl_position..-1].all?{ |c| c =~ /^\s+$/ } + unless comment_only_line then + tk = get_tk + end + end - if TkCOMMENT === tk then + if tk and (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) then if non_comment_seen then # Look for RDoc in a comment about to be thrown away non_comment_seen = parse_comment container, tk, comment unless comment.empty? comment = '' - comment.force_encoding @encoding if @encoding + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding end - while TkCOMMENT === tk do - comment << tk.text << "\n" + line_no = nil + while tk and (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) do + comment_body = retrieve_comment_body(tk) + line_no = tk[:line_no] if comment.empty? + comment += comment_body + comment << "\n" unless comment_body =~ /\n\z/ - tk = get_tk - - if TkNL === tk then - skip_tkspace false # leading spaces - tk = get_tk + if comment_body.size > 1 && comment_body =~ /\n\z/ then + skip_tkspace_without_nl # leading spaces end + tk = get_tk end + comment = new_comment comment, line_no + unless comment.empty? then look_for_directives_in container, comment if container.done_documenting then + throw :eof if RDoc::TopLevel === container container.ongoing_visibility = save_visibility end end @@ -1306,98 +1829,90 @@ class RDoc::Parser::Ruby < RDoc::Parser non_comment_seen = true end - unget_tk tk # TODO peek instead of get then unget + unget_tk tk keep_comment = true + container.current_line_visibility = nil - when TkCLASS then - parse_class container, single, tk, comment + when :on_kw then + case tk[:text] + when 'class' then + parse_class container, single, tk, comment - when TkMODULE then - if container.document_children then + when 'module' then parse_module container, single, tk, comment - else - nest += 1 - end - when TkDEF then - parse_method container, single, tk, comment + when 'def' then + parse_method container, single, tk, comment - when TkCONSTANT then - unless parse_constant container, tk, comment then - try_parse_comment = true - end + when 'alias' then + parse_alias container, single, tk, comment unless current_method - when TkALIAS then - parse_alias container, single, tk, comment unless current_method + when 'yield' then + if current_method.nil? then + warn "Warning: yield outside of method" if container.document_self + else + parse_yield container, single, tk, current_method + end - when TkYIELD then - if current_method.nil? then - warn "Warning: yield outside of method" if container.document_self - else - parse_yield container, single, tk, current_method - end + when 'until', 'while' then + if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + nest += 1 + skip_optional_do_after_expression + end - # Until and While can have a 'do', which shouldn't increase the nesting. - # We can't solve the general case, but we can handle most occurrences by - # ignoring a do at the end of a line. + # Until and While can have a 'do', which shouldn't increase the nesting. + # We can't solve the general case, but we can handle most occurrences by + # ignoring a do at the end of a line. - when TkUNTIL, TkWHILE then - nest += 1 - skip_optional_do_after_expression + # 'for' is trickier + when 'for' then + nest += 1 + skip_for_variable + skip_optional_do_after_expression - # 'for' is trickier - when TkFOR then - nest += 1 - skip_for_variable - skip_optional_do_after_expression + when 'case', 'do', 'if', 'unless', 'begin' then + if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + nest += 1 + end - when TkCASE, TkDO, TkIF, TkUNLESS, TkBEGIN then - nest += 1 + when 'super' then + current_method.calls_super = true if current_method - when TkIDENTIFIER then - if nest == 1 and current_method.nil? then - case tk.name - when 'private', 'protected', 'public', 'private_class_method', - 'public_class_method', 'module_function' then - parse_visibility container, single, tk - keep_comment = true - when 'attr' then - parse_attr container, single, tk, comment - when /^attr_(reader|writer|accessor)$/ then - parse_attr_accessor container, single, tk, comment - when 'alias_method' then - parse_alias container, single, tk, comment - when 'require', 'include' then - # ignore - else - if comment =~ /\A#\#$/ then - case comment - when /^# +:?attr(_reader|_writer|_accessor)?:/ then - parse_meta_attr container, single, tk, comment - else - parse_meta_method container, single, tk, comment - end - end + when 'rescue' then + parse_rescue + + when 'end' then + nest -= 1 + if nest == 0 then + container.ongoing_visibility = save_visibility + + parse_comment container, tk, comment unless comment.empty? + + return end end - case tk.name + when :on_const then + unless parse_constant container, tk, comment, current_method then + try_parse_comment = true + end + + when :on_ident then + if nest == 1 and current_method.nil? then + keep_comment = parse_identifier container, single, tk, comment + end + + case tk[:text] when "require" then parse_require container, comment when "include" then - parse_include container, comment + parse_extend_or_include RDoc::Include, container, comment + when "extend" then + parse_extend_or_include RDoc::Extend, container, comment + when "included" then + parse_included_with_activesupport_concern container, comment end - when TkEND then - nest -= 1 - if nest == 0 then - read_documentation_modifiers container, RDoc::CLASS_MODIFIERS - container.ongoing_visibility = save_visibility - - parse_comment container, tk, comment unless comment.empty? - - return - end else try_parse_comment = nest == 1 end @@ -1410,65 +1925,87 @@ class RDoc::Parser::Ruby < RDoc::Parser end unless keep_comment then - comment = '' - comment.force_encoding @encoding if @encoding + comment = new_comment '' + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding + container.params = nil + container.block_params = nil end - begin - get_tkread - skip_tkspace false - end while peek_tk == TkNL + consume_trailing_spaces end + + container.params = nil + container.block_params = nil end ## # Parse up to +no+ symbol arguments def parse_symbol_arg(no = nil) - args = [] - skip_tkspace_comment - case tk = get_tk - when TkLPAREN - loop do - skip_tkspace_comment - if tk1 = parse_symbol_in_arg - args.push tk1 - break if no and args.size >= no - end + tk = get_tk + if tk[:kind] == :on_lparen + parse_symbol_arg_paren no + else + parse_symbol_arg_space no, tk + end + end - skip_tkspace_comment - case tk2 = get_tk - when TkRPAREN - break - when TkCOMMA - else - warn("unexpected token: '#{tk2.inspect}'") if $DEBUG_RDOC - break - end + ## + # Parses up to +no+ symbol arguments surrounded by () and places them in + # +args+. + + def parse_symbol_arg_paren no # :nodoc: + args = [] + + loop do + skip_tkspace_comment + if tk1 = parse_symbol_in_arg + args.push tk1 + break if no and args.size >= no end - else - unget_tk tk - if tk = parse_symbol_in_arg - args.push tk - return args if no and args.size >= no + + skip_tkspace_comment + case (tk2 = get_tk)[:kind] + when :on_rparen + break + when :on_comma + else + warn("unexpected token: '#{tk2.inspect}'") if $DEBUG_RDOC + break end + end + + args + end - loop do - skip_tkspace false + ## + # Parses up to +no+ symbol arguments separated by spaces and places them in + # +args+. - tk1 = get_tk - unless TkCOMMA === tk1 then - unget_tk tk1 - break - end + def parse_symbol_arg_space no, tk # :nodoc: + args = [] - skip_tkspace_comment - if tk = parse_symbol_in_arg - args.push tk - break if no and args.size >= no - end + unget_tk tk + if tk = parse_symbol_in_arg + args.push tk + return args if no and args.size >= no + end + + loop do + skip_tkspace_without_nl + + tk1 = get_tk + if tk1.nil? || :on_comma != tk1[:kind] then + unget_tk tk1 + break + end + + skip_tkspace_comment + if tk = parse_symbol_in_arg + args.push tk + break if no and args.size >= no end end @@ -1479,12 +2016,12 @@ class RDoc::Parser::Ruby < RDoc::Parser # Returns symbol text from the next token def parse_symbol_in_arg - case tk = get_tk - when TkSYMBOL - tk.text.sub(/^:/, '') - when TkSTRING - eval @read[-1] - when TkDSTRING, TkIDENTIFIER then + tk = get_tk + if :on_symbol == tk[:kind] then + tk[:text].sub(/^:/, '') + elsif :on_tstring == tk[:kind] then + tk[:text][1..-2] + elsif :on_dstring == tk[:kind] or :on_ident == tk[:kind] then nil # ignore else warn("Expected symbol or string, got #{tk.inspect}") if $DEBUG_RDOC @@ -1497,8 +2034,13 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_top_level_statements container comment = collect_first_comment + look_for_directives_in container, comment + throw :eof if container.done_documenting + + @markup = comment.format + # HACK move if to RDoc::Context#comment= container.comment = comment if container.document_self unless comment.empty? @@ -1509,78 +2051,39 @@ class RDoc::Parser::Ruby < RDoc::Parser # Determines the visibility in +container+ from +tk+ def parse_visibility(container, single, tk) - singleton = (single == SINGLE) - - vis_type = tk.name - - vis = case vis_type - when 'private' then :private - when 'protected' then :protected - when 'public' then :public - when 'private_class_method' then - singleton = true - :private - when 'public_class_method' then - singleton = true - :public - when 'module_function' then - singleton = true - :public - else - raise RDoc::Error, "Invalid visibility: #{tk.name}" - end + vis_type, vis, singleton = get_visibility_information tk, single skip_tkspace_comment false - case peek_tk - # Ryan Davis suggested the extension to ignore modifiers, because he - # often writes - # - # protected unless $TESTING - # - when TkNL, TkUNLESS_MOD, TkIF_MOD, TkSEMICOLON then + ptk = peek_tk + # Ryan Davis suggested the extension to ignore modifiers, because he + # often writes + # + # protected unless $TESTING + # + if [:on_nl, :on_semicolon].include?(ptk[:kind]) || (:on_kw == ptk[:kind] && (['if', 'unless'].include?(ptk[:text]))) then container.ongoing_visibility = vis + elsif :on_kw == ptk[:kind] && 'def' == ptk[:text] + container.current_line_visibility = vis else - new_methods = [] - - case vis_type - when 'module_function' then - args = parse_symbol_arg - container.set_visibility_for args, :private, false - - container.methods_matching args do |m| - s_m = m.dup - s_m.record_location @top_level - s_m.singleton = true - new_methods << s_m - end - when 'public_class_method', 'private_class_method' then - args = parse_symbol_arg - - container.methods_matching args, true do |m| - if m.parent != container then - m = m.dup - m.record_location @top_level - new_methods << m - end + update_visibility container, vis_type, vis, singleton + end + end - m.visibility = vis - end - else - args = parse_symbol_arg - container.set_visibility_for args, vis, singleton - end + ## + # Parses a Module#private_constant or Module#public_constant call from +tk+. - new_methods.each do |method| - case method - when RDoc::AnyMethod then - container.add_method method - when RDoc::Attr then - container.add_attribute method - end - method.visibility = vis - end + def parse_constant_visibility(container, single, tk) + args = parse_symbol_arg + case tk[:text] + when 'private_constant' + vis = :private + when 'public_constant' + vis = :public + else + raise RDoc::Error, 'Unreachable' end + container.set_constant_visibility_for args, vis end ## @@ -1590,7 +2093,6 @@ class RDoc::Parser::Ruby < RDoc::Parser return if method.block_params get_tkread - @scanner.instance_eval { @continue = false } method.block_params = parse_method_or_yield_parameters end @@ -1604,28 +2106,45 @@ class RDoc::Parser::Ruby < RDoc::Parser # # class MyClass # :nodoc: # - # We return the directive name and any parameters as a two element array + # We return the directive name and any parameters as a two element array if + # the name is in +allowed+. A directive can be found anywhere up to the end + # of the current line. def read_directive allowed - tk = get_tk + tokens = [] - if TkCOMMENT === tk then - return unless tk.text =~ /\s*:?(\w+):\s*(.*)/ + while tk = get_tk do + tokens << tk - directive = $1.downcase + if :on_nl == tk[:kind] or (:on_kw == tk[:kind] && 'def' == tk[:text]) then + return + elsif :on_comment == tk[:kind] or :on_embdoc == tk[:kind] then + return unless tk[:text] =~ /\s*:?([\w-]+):\s*(.*)/ - return [directive, $2] if allowed.include? directive - else - unget_tk tk + directive = $1.downcase + + return [directive, $2] if allowed.include? directive + + return + end + end + ensure + unless tokens.length == 1 and (:on_comment == tokens.first[:kind] or :on_embdoc == tokens.first[:kind]) then + tokens.reverse_each do |token| + unget_tk token + end end end ## - # Handles the directive for +context+ if the directive is listed in +allow+. - # This method is called for directives following a definition. + # Handles directives following the definition for +context+ (any + # RDoc::CodeObject) if the directives are +allowed+ at this point. + # + # See also RDoc::Markup::PreProcess#handle_directive - def read_documentation_modifiers context, allow - directive, value = read_directive allow + def read_documentation_modifiers context, allowed + skip_tkspace_without_nl + directive, value = read_directive allowed return unless directive @@ -1639,18 +2158,20 @@ class RDoc::Parser::Ruby < RDoc::Parser end ## - # Removes private comments from +comment+ + # Records the location of this +container+ in the file for this parser and + # adds it to the list of classes and modules in the file. - def remove_private_comments(comment) - empty = '' - empty.force_encoding comment.encoding if Object.const_defined? :Encoding + def record_location container # :nodoc: + case container + when RDoc::ClassModule then + @top_level.add_to_classes_or_modules container + end - comment.gsub!(/^#--.*?^#\+\+\n?/m, empty) - comment.sub!(/^#--.*/m, '') + container.record_location @top_level end ## - # Scans this ruby file for ruby constructs + # Scans this Ruby file for Ruby constructs def scan reset @@ -1658,30 +2179,35 @@ class RDoc::Parser::Ruby < RDoc::Parser catch :eof do begin parse_top_level_statements @top_level + rescue StandardError => e - bytes = '' - - 20.times do @scanner.ungetc end - count = 0 - 60.times do |i| - count = i - byte = @scanner.getc - break unless byte - bytes << byte + if @content.include?('<%') and @content.include?('%>') then + # Maybe, this is ERB. + $stderr.puts "\033[2KRDoc detects ERB file. Skips it for compatibility:" + $stderr.puts @file_name + return + end + + if @scanner_point >= @scanner.size + now_line_no = @scanner[@scanner.size - 1][:line_no] + else + now_line_no = peek_tk[:line_no] end - count -= 20 - count.times do @scanner.ungetc end + first_tk_index = @scanner.find_index { |tk| tk[:line_no] == now_line_no } + last_tk_index = @scanner.find_index { |tk| tk[:line_no] == now_line_no + 1 } + last_tk_index = last_tk_index ? last_tk_index - 1 : @scanner.size - 1 + code = @scanner[first_tk_index..last_tk_index].map{ |t| t[:text] }.join $stderr.puts <<-EOF -#{self.class} failure around line #{@scanner.line_no} of +#{self.class} failure around line #{now_line_no} of #{@file_name} EOF - unless bytes.empty? then + unless code.empty? then + $stderr.puts code $stderr.puts - $stderr.puts bytes.inspect end raise e @@ -1695,58 +2221,52 @@ class RDoc::Parser::Ruby < RDoc::Parser # while, until, and for have an optional do def skip_optional_do_after_expression - skip_tkspace false + skip_tkspace_without_nl tk = get_tk - case tk - when TkLPAREN, TkfLPAREN then - end_token = TkRPAREN - else - end_token = TkNL - end b_nest = 0 nest = 0 - @scanner.instance_eval { @continue = false } loop do - case tk - when TkSEMICOLON then + break unless tk + case tk[:kind] + when :on_semicolon, :on_nl, :on_ignored_nl then break if b_nest.zero? - when TkLPAREN, TkfLPAREN then + when :on_lparen then nest += 1 - when TkBEGIN then - b_nest += 1 - when TkEND then - b_nest -= 1 - when TkDO - break if nest.zero? - when end_token then - if end_token == TkRPAREN - nest -= 1 - break if @scanner.lex_state == EXPR_END and nest.zero? - else - break unless @scanner.continue + when :on_rparen then + nest -= 1 + when :on_kw then + case tk[:text] + when 'begin' + b_nest += 1 + when 'end' + b_nest -= 1 + when 'do' + break if nest.zero? + end + when :on_comment, :on_embdoc then + if b_nest.zero? and "\n" == tk[:text][-1] then + break end - when nil then - break end tk = get_tk end - skip_tkspace false + skip_tkspace_without_nl - get_tk if TkDO === peek_tk + get_tk if peek_tk && :on_kw == peek_tk[:kind] && 'do' == peek_tk[:text] end ## # skip the var [in] part of a 'for' statement def skip_for_variable - skip_tkspace false + skip_tkspace_without_nl + get_tk + skip_tkspace_without_nl tk = get_tk - skip_tkspace false - tk = get_tk - unget_tk(tk) unless TkIN === tk + unget_tk(tk) unless :on_kw == tk[:kind] and 'in' == tk[:text] end ## @@ -1763,20 +2283,63 @@ class RDoc::Parser::Ruby < RDoc::Parser def skip_tkspace_comment(skip_nl = true) loop do - skip_tkspace skip_nl - return unless TkCOMMENT === peek_tk + skip_nl ? skip_tkspace : skip_tkspace_without_nl + next_tk = peek_tk + return if next_tk.nil? || (:on_comment != next_tk[:kind] and :on_embdoc != next_tk[:kind]) get_tk end end ## - # Prints +msg+ to +$stderr+ unless we're being quiet + # Updates visibility in +container+ from +vis_type+ and +vis+. - def warn(msg) - return if @options.quiet - msg = make_message msg - $stderr.puts msg + def update_visibility container, vis_type, vis, singleton # :nodoc: + new_methods = [] + + case vis_type + when 'module_function' then + args = parse_symbol_arg + container.set_visibility_for args, :private, false + + container.methods_matching args do |m| + s_m = m.dup + record_location s_m + s_m.singleton = true + new_methods << s_m + end + when 'public_class_method', 'private_class_method' then + args = parse_symbol_arg + + container.methods_matching args, true do |m| + if m.parent != container then + m = m.dup + record_location m + new_methods << m + end + + m.visibility = vis + end + else + args = parse_symbol_arg + container.set_visibility_for args, vis, singleton + end + + new_methods.each do |method| + case method + when RDoc::AnyMethod then + container.add_method method + when RDoc::Attr then + container.add_attribute method + end + method.visibility = vis + end end -end + ## + # Prints +message+ to +$stderr+ unless we're being quiet + + def warn message + @options.warn make_message message + end +end diff --git a/lib/rdoc/parser/ruby_tools.rb b/lib/rdoc/parser/ruby_tools.rb index 678f721624..681d7166ce 100644 --- a/lib/rdoc/parser/ruby_tools.rb +++ b/lib/rdoc/parser/ruby_tools.rb @@ -1,11 +1,9 @@ +# frozen_string_literal: true ## -# Collection of methods for writing parsers against RDoc::RubyLex and -# RDoc::RubyToken +# Collection of methods for writing parsers module RDoc::Parser::RubyTools - include RDoc::RubyToken - ## # Adds a token listener +obj+, but you should probably use token_listener @@ -21,38 +19,24 @@ module RDoc::Parser::RubyTools tk = nil if @tokens.empty? then - tk = @scanner.token - @read.push @scanner.get_readed - puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG + if @scanner_point >= @scanner.size + return nil + else + tk = @scanner[@scanner_point] + @scanner_point += 1 + @read.push tk[:text] + end else @read.push @unget_read.shift tk = @tokens.shift - puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG end - tk = nil if TkEND_OF_SCRIPT === tk - - if TkSYMBEG === tk then - set_token_position tk.line_no, tk.char_no - - case tk1 = get_tk - when TkId, TkOp, TkSTRING, TkDSTRING, TkSTAR, TkAMPER then - if tk1.respond_to?(:name) then - tk = Token(TkSYMBOL).set_text(":" + tk1.name) - else - tk = Token(TkSYMBOL).set_text(":" + tk1.text) - end - - # remove the identifier we just read (we're about to replace it with a - # symbol) - @token_listeners.each do |obj| - obj.pop_token - end if @token_listeners - else - tk = tk1 - end + if tk == nil || :on___end__ == tk[:kind] + tk = nil end + return nil unless tk + # inform any listeners of our shiny new token @token_listeners.each do |obj| obj.add_token(tk) @@ -70,7 +54,13 @@ module RDoc::Parser::RubyTools loop do tk = get_tk - case tk when *tokens then unget_tk tk; break end + + case tk + when *tokens then + unget_tk tk + break + end + read << tk end @@ -116,19 +106,34 @@ module RDoc::Parser::RubyTools @tokens = [] @unget_read = [] @nest = 0 + @scanner_point = 0 + end + + ## + # Skips whitespace tokens including newlines + + def skip_tkspace + tokens = [] + + while (tk = get_tk) and (:on_sp == tk[:kind] or :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind]) do + tokens.push(tk) + end + + unget_tk(tk) + tokens end ## - # Skips whitespace tokens including newlines if +skip_nl+ is true + # Skips whitespace tokens excluding newlines - def skip_tkspace(skip_nl = true) # HACK dup + def skip_tkspace_without_nl tokens = [] - while TkSPACE === (tk = get_tk) or (skip_nl and TkNL === tk) do - tokens.push tk + while (tk = get_tk) and :on_sp == tk[:kind] do + tokens.push(tk) end - unget_tk tk + unget_tk(tk) tokens end diff --git a/lib/rdoc/parser/simple.rb b/lib/rdoc/parser/simple.rb index 1e82eb5097..b1dabad0f8 100644 --- a/lib/rdoc/parser/simple.rb +++ b/lib/rdoc/parser/simple.rb @@ -1,9 +1,12 @@ +# frozen_string_literal: true ## # Parse a non-source file. We basically take the whole thing as one big # comment. class RDoc::Parser::Simple < RDoc::Parser + include RDoc::Parser::Text + parse_files_matching(//) attr_reader :content # :nodoc: @@ -16,7 +19,7 @@ class RDoc::Parser::Simple < RDoc::Parser preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include - preprocess.handle @content, @top_level + @content = preprocess.handle @content, @top_level end ## @@ -24,26 +27,35 @@ class RDoc::Parser::Simple < RDoc::Parser def scan comment = remove_coding_comment @content - comment = remove_private_comments comment + comment = remove_private_comment comment + + comment = RDoc::Comment.new comment, @top_level @top_level.comment = comment - @top_level.parser = self.class @top_level end ## - # Removes comments wrapped in <tt>--/++</tt> - - def remove_private_comments text - text.gsub(/^--\n.*?^\+\+/m, '').sub(/^--\n.*/m, '') - end - - ## # Removes the encoding magic comment from +text+ def remove_coding_comment text text.sub(/\A# .*coding[=:].*$/, '') end -end + ## + # Removes private comments. + # + # Unlike RDoc::Comment#remove_private this implementation only looks for two + # dashes at the beginning of the line. Three or more dashes are considered + # to be a rule and ignored. + + def remove_private_comment comment + # Workaround for gsub encoding for Ruby 1.9.2 and earlier + empty = '' + empty = RDoc::Encoding.change_encoding empty, comment.encoding + + comment = comment.gsub(%r%^--\n.*?^\+\+\n?%m, empty) + comment.sub(%r%^--\n.*%m, empty) + end +end diff --git a/lib/rdoc/parser/text.rb b/lib/rdoc/parser/text.rb new file mode 100644 index 0000000000..01de0cc595 --- /dev/null +++ b/lib/rdoc/parser/text.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true +## +# Indicates this parser is text and doesn't contain code constructs. +# +# Include this module in a RDoc::Parser subclass to make it show up as a file, +# not as part of a class or module. +#-- +# This is not named File to avoid overriding ::File + +module RDoc::Parser::Text +end + |
