diff options
Diffstat (limited to 'lib/rdoc/parser')
| -rw-r--r-- | lib/rdoc/parser/c.rb | 454 | ||||
| -rw-r--r-- | lib/rdoc/parser/changelog.rb | 173 | ||||
| -rw-r--r-- | lib/rdoc/parser/markdown.rb | 1 | ||||
| -rw-r--r-- | lib/rdoc/parser/rd.rb | 1 | ||||
| -rw-r--r-- | lib/rdoc/parser/ripper_state_lex.rb | 590 | ||||
| -rw-r--r-- | lib/rdoc/parser/ruby.rb | 1121 | ||||
| -rw-r--r-- | lib/rdoc/parser/ruby_tools.rb | 66 | ||||
| -rw-r--r-- | lib/rdoc/parser/simple.rb | 6 | ||||
| -rw-r--r-- | lib/rdoc/parser/text.rb | 1 |
9 files changed, 1668 insertions, 745 deletions
diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index fd336f5f5b..b89aaa6dcc 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -1,16 +1,17 @@ +# frozen_string_literal: true require 'tsort' ## # RDoc::Parser::C attempts to parse C extension files. It looks for -# the standard patterns that you find in extensions: <tt>rb_define_class, -# rb_define_method</tt> and so on. It tries to find the corresponding +# the standard patterns that you find in extensions: +rb_define_class+, +# +rb_define_method+ and so on. It tries to find the corresponding # C source for the methods and extract comments, but if we fail # we don't worry too much. # # The comments associated with a Ruby method are extracted from the C # comment block associated with the routine that _implements_ that # method, that is to say the method whose name is given in the -# <tt>rb_define_method</tt> call. For example, you might write: +# +rb_define_method+ call. For example, you might write: # # /* # * Returns a new array that is a one-dimensional flattening of this @@ -23,8 +24,7 @@ require 'tsort' # * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # */ # static VALUE -# rb_ary_flatten(ary) -# VALUE ary; +# rb_ary_flatten(VALUE ary) # { # ary = rb_obj_dup(ary); # rb_ary_flatten_bang(ary); @@ -34,16 +34,16 @@ require 'tsort' # ... # # void -# Init_Array() +# Init_Array(void) # { # ... # rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0); # -# Here RDoc will determine from the rb_define_method line that there's a +# Here RDoc will determine from the +rb_define_method+ line that there's a # method called "flatten" in class Array, and will look for the implementation -# in the method rb_ary_flatten. It will then use the comment from that +# in the method +rb_ary_flatten+. It will then use the comment from that # method in the HTML output. This method must be in the same source file -# as the rb_define_method. +# as the +rb_define_method+. # # The comment blocks may include special directives: # @@ -69,15 +69,15 @@ require 'tsort' # [Document-variable: +name+] # Documentation for the named +rb_define_variable+ # -# [Document-method: +method_name+] +# [Document-method\: +method_name+] # Documentation for the named method. Use this when the method name is # unambiguous. # -# [Document-method: <tt>ClassName::method_name<tt>] +# [Document-method\: <tt>ClassName::method_name</tt>] # Documentation for a singleton method in the given class. Use this when # the method name alone is ambiguous. # -# [Document-method: <tt>ClassName#method_name<tt>] +# [Document-method\: <tt>ClassName#method_name</tt>] # Documentation for a instance method in the given class. Use this when the # method name alone is ambiguous. # @@ -173,6 +173,8 @@ class RDoc::Parser::C < RDoc::Parser @classes = load_variable_map :c_class_variables @singleton_classes = load_variable_map :c_singleton_class_variables + @markup = @options.markup + # class_variable => { function => [method, ...] } @methods = Hash.new { |h, f| h[f] = Hash.new { |i, m| i[m] = [] } } @@ -210,47 +212,6 @@ class RDoc::Parser::C < RDoc::Parser end ## - # Removes duplicate call-seq entries for methods using the same - # implementation. - - def deduplicate_call_seq - @methods.each do |var_name, functions| - class_name = @known_classes[var_name] - class_obj = find_class var_name, class_name - - functions.each_value do |method_names| - next if method_names.length == 1 - - method_names.each do |method_name| - deduplicate_method_name class_obj, method_name - end - end - end - end - - ## - # If two ruby methods share a C implementation (and comment) this - # deduplicates the examples in the call_seq for the method to reduce - # confusion in the output. - - def deduplicate_method_name class_obj, method_name # :nodoc: - return unless - method = class_obj.method_list.find { |m| m.name == method_name } - return unless call_seq = method.call_seq - - method_name = method_name[0, 1] if method_name =~ /\A\[/ - - entries = call_seq.split "\n" - - matching = entries.select do |entry| - entry =~ /^\w*\.?#{Regexp.escape method_name}/ or - entry =~ /\s#{Regexp.escape method_name}\s/ - end - - method.call_seq = matching.join "\n" - end - - ## # Scans #content for rb_define_alias def do_aliases @@ -268,24 +229,30 @@ class RDoc::Parser::C < RDoc::Parser end class_obj = find_class var_name, class_name - - al = RDoc::Alias.new '', old_name, new_name, '' - al.singleton = @singleton_classes.key? var_name - comment = find_alias_comment var_name, new_name, old_name - comment.normalize - - al.comment = comment - - al.record_location @top_level - - class_obj.add_alias al - @stats.add_alias al + if comment.to_s.empty? and existing_method = class_obj.method_list.find { |m| m.name == old_name} + comment = existing_method.comment + end + add_alias(var_name, class_obj, old_name, new_name, comment) end end ## + # Add alias, either from a direct alias definition, or from two + # method that reference the same function. + + def add_alias(var_name, class_obj, old_name, new_name, comment) + al = RDoc::Alias.new '', old_name, new_name, '' + al.singleton = @singleton_classes.key? var_name + al.comment = comment + al.record_location @top_level + class_obj.add_alias al + @stats.add_alias al + al + end + + ## # Scans #content for rb_attr and rb_define_attr def do_attrs @@ -323,12 +290,100 @@ class RDoc::Parser::C < RDoc::Parser # Scans #content for rb_define_class, boot_defclass, rb_define_class_under # and rb_singleton_class - def do_classes - do_boot_defclass - do_define_class - do_define_class_under - do_singleton_class - do_struct_define_without_accessor + def do_classes_and_modules + do_boot_defclass if @file_name == "class.c" + + @content.scan( + %r( + (?<var_name>[\w\.]+)\s* = + \s*rb_(?: + define_(?: + class(?: # rb_define_class(class_name_1, parent_name_1) + \s*\( + \s*"(?<class_name_1>\w+)", + \s*(?<parent_name_1>\w+)\s* + \) + | + _under\s*\( # rb_define_class_under(class_under, class_name2, parent_name2...) + \s* (?<class_under>\w+), + \s* "(?<class_name_2>\w+)", + \s* + (?: + (?<parent_name_2>[\w\*\s\(\)\.\->]+) | + rb_path2class\("(?<path>[\w:]+)"\) + ) + \s*\) + ) + | + module(?: # rb_define_module(module_name_1) + \s*\( + \s*"(?<module_name_1>\w+)"\s* + \) + | + _under\s*\( # rb_define_module_under(module_under, module_name_2) + \s*(?<module_under>\w+), + \s*"(?<module_name_2>\w+)" + \s*\) + ) + ) + | + struct_define_without_accessor\s*\( # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + \s*"(?<class_name_3>\w+)", + \s*(?<parent_name_3>\w+), + \s*\w+, # Allocation function + (?:\s*"\w+",)* # Attributes + \s*NULL + \) + | + singleton_class\s*\( # rb_singleton_class(target_class_name) + \s*(?<target_class_name>\w+) + \) + ) + )mx + ) do + class_name = $~[:class_name_1] + type = :class + if class_name + # rb_define_class(class_name_1, parent_name_1) + parent_name = $~[:parent_name_1] + #under = nil + else + class_name = $~[:class_name_2] + if class_name + # rb_define_class_under(class_under, class_name2, parent_name2...) + parent_name = $~[:parent_name_2] || $~[:path] + under = $~[:class_under] + else + class_name = $~[:class_name_3] + if class_name + # rb_struct_define_without_accessor(class_name_3, parent_name_3, ...) + parent_name = $~[:parent_name_3] + #under = nil + else + type = :module + class_name = $~[:module_name_1] + #parent_name = nil + if class_name + # rb_define_module(module_name_1) + #under = nil + else + class_name = $~[:module_name_2] + if class_name + # rb_define_module_under(module_under, module_name_1) + under = $~[:module_under] + else + # rb_singleton_class(target_class_name) + target_class_name = $~[:target_class_name] + handle_singleton $~[:var_name], target_class_name + next + end + end + end + end + end + + handle_class_module($~[:var_name], type, class_name, parent_name, under) + end end ## @@ -377,65 +432,6 @@ class RDoc::Parser::C < RDoc::Parser end end - ## - # Scans #content for rb_define_class - - def do_define_class - # The '.' lets us handle SWIG-generated files - @content.scan(/([\w\.]+)\s* = \s*rb_define_class\s* - \( - \s*"(\w+)", - \s*(\w+)\s* - \)/mx) do |var_name, class_name, parent| - handle_class_module(var_name, :class, class_name, parent, nil) - end - end - - ## - # Scans #content for rb_define_class_under - - def do_define_class_under - @content.scan(/([\w\.]+)\s* = # var_name - \s*rb_define_class_under\s* - \( - \s* (\w+), # under - \s* "(\w+)", # class_name - \s* - (?: - ([\w\*\s\(\)\.\->]+) | # parent_name - rb_path2class\("([\w:]+)"\) # path - ) - \s* - \) - /mx) do |var_name, under, class_name, parent_name, path| - parent = path || parent_name - - handle_class_module var_name, :class, class_name, parent, under - end - end - - ## - # Scans #content for rb_define_module - - def do_define_module - @content.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do - |var_name, class_name| - handle_class_module(var_name, :module, class_name, nil, nil) - end - end - - ## - # Scans #content for rb_define_module_under - - def do_define_module_under - @content.scan(/(\w+)\s* = \s*rb_define_module_under\s* - \( - \s*(\w+), - \s*"(\w+)" - \s*\)/mx) do |var_name, in_module, class_name| - handle_class_module(var_name, :module, class_name, nil, in_module) - end - end ## # Scans #content for rb_include_module @@ -445,7 +441,7 @@ class RDoc::Parser::C < RDoc::Parser next unless cls = @classes[c] m = @known_classes[m] || m - comment = RDoc::Comment.new '', @top_level + comment = new_comment '', @top_level, :c incl = cls.add_include RDoc::Include.new(m, comment) incl.record_location @top_level end @@ -518,42 +514,6 @@ class RDoc::Parser::C < RDoc::Parser end ## - # Scans #content for rb_define_module and rb_define_module_under - - def do_modules - do_define_module - do_define_module_under - end - - ## - # Scans #content for rb_singleton_class - - def do_singleton_class - @content.scan(/([\w\.]+)\s* = \s*rb_singleton_class\s* - \( - \s*(\w+) - \s*\)/mx) do |sclass_var, class_var| - handle_singleton sclass_var, class_var - end - end - - ## - # Scans #content for struct_define_without_accessor - - def do_struct_define_without_accessor - @content.scan(/([\w\.]+)\s* = \s*rb_struct_define_without_accessor\s* - \( - \s*"(\w+)", # Class name - \s*(\w+), # Parent class - \s*\w+, # Allocation function - (\s*"\w+",)* # Attributes - \s*NULL - \)/mx) do |var_name, class_name, parent| - handle_class_module(var_name, :class, class_name, parent, nil) - end - end - - ## # Finds the comment for an alias on +class_name+ from +new_name+ to # +old_name+ @@ -563,7 +523,7 @@ class RDoc::Parser::C < RDoc::Parser \s*"#{Regexp.escape new_name}"\s*, \s*"#{Regexp.escape old_name}"\s*\);%xm - RDoc::Comment.new($1 || '', @top_level) + new_comment($1 || '', @top_level, :c) end ## @@ -602,21 +562,49 @@ class RDoc::Parser::C < RDoc::Parser '' end - RDoc::Comment.new comment, @top_level + new_comment comment, @top_level, :c + end + + ## + # Generate a Ruby-method table + + def gen_body_table file_content + table = {} + file_content.scan(%r{ + ((?>/\*.*?\*/\s*)?) + ((?:(?:\w+)\s+)? + (?:intern\s+)?VALUE\s+(\w+) + \s*(?:\([^)]*\))(?:[^\);]|$)) + | ((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+(\w+)\s+(\w+)) + | ^\s*\#\s*define\s+(\w+)\s+(\w+) + }xm) do + case + when $1 + table[$3] = [:func_def, $1, $2, $~.offset(2)] if !table[$3] || table[$3][0] != :func_def + when $4 + table[$6] = [:macro_def, $4, $5, $~.offset(5), $7] if !table[$6] || table[$6][0] == :macro_alias + when $8 + table[$8] ||= [:macro_alias, $9] + end + end + table end ## # Find the C code corresponding to a Ruby method def find_body class_name, meth_name, meth_obj, file_content, quiet = false - case file_content - when %r%((?>/\*.*?\*/\s*)?) - ((?:(?:\w+)\s+)? - (?:intern\s+)?VALUE\s+#{meth_name} - \s*(\([^)]*\))([^;]|$))%xm then - comment = RDoc::Comment.new $1, @top_level - body = $2 - offset, = $~.offset(2) + if file_content + @body_table ||= {} + @body_table[file_content] ||= gen_body_table file_content + type, *args = @body_table[file_content][meth_name] + end + + case type + when :func_def + comment = new_comment args[0], @top_level, :c + body = args[1] + offset, = args[2] comment.remove_private if comment @@ -637,38 +625,34 @@ class RDoc::Parser::C < RDoc::Parser #meth_obj.params = params meth_obj.start_collecting_tokens - tk = RDoc::RubyToken::Token.new nil, 1, 1 - tk.set_text body + tk = { :line_no => 1, :char_no => 1, :text => body } meth_obj.add_token tk meth_obj.comment = comment - meth_obj.offset = offset meth_obj.line = file_content[0, offset].count("\n") + 1 body - when %r%((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+#{meth_name}\s+(\w+))%m then - comment = RDoc::Comment.new $1, @top_level - body = $2 - offset = $~.offset(2).first + when :macro_def + comment = new_comment args[0], @top_level, :c + body = args[1] + offset, = args[2] - find_body class_name, $3, meth_obj, file_content, true + find_body class_name, args[3], meth_obj, file_content, true comment.normalize find_modifiers comment, meth_obj meth_obj.start_collecting_tokens - tk = RDoc::RubyToken::Token.new nil, 1, 1 - tk.set_text body + tk = { :line_no => 1, :char_no => 1, :text => body } meth_obj.add_token tk meth_obj.comment = comment - meth_obj.offset = offset meth_obj.line = file_content[0, offset].count("\n") + 1 body - when %r%^\s*\#\s*define\s+#{meth_name}\s+(\w+)%m then + when :macro_alias # with no comment we hope the aliased definition has it and use it's # definition - body = find_body(class_name, $1, meth_obj, file_content, true) + body = find_body(class_name, args[0], meth_obj, file_content, true) return body if body @@ -755,7 +739,7 @@ class RDoc::Parser::C < RDoc::Parser comment = '' end - comment = RDoc::Comment.new comment, @top_level + comment = new_comment comment, @top_level, :c comment.normalize look_for_directives_in class_mod, comment @@ -764,29 +748,43 @@ class RDoc::Parser::C < RDoc::Parser end ## + # Generate a const table + + def gen_const_table file_content + table = {} + @content.scan(%r{ + ((?>^\s*/\*.*?\*/\s+)) + rb_define_(\w+)\((?:\s*(?:\w+),)?\s* + "(\w+)"\s*, + .*?\)\s*; + | Document-(?:const|global|variable):\s + ((?:\w+::)*\w+) + \s*?\n((?>.*?\*/)) + }mxi) do + case + when $1 then table[[$2, $3]] = $1 + when $4 then table[$4] = "/*\n" + $5 + end + end + table + end + + ## # Finds a comment matching +type+ and +const_name+ either above the # comment or in the matching Document- section. def find_const_comment(type, const_name, class_name = nil) - comment = if @content =~ %r%((?>^\s*/\*.*?\*/\s+)) - rb_define_#{type}\((?:\s*(\w+),)?\s* - "#{const_name}"\s*, - .*?\)\s*;%xmi then - $1 - elsif class_name and - @content =~ %r%Document-(?:const|global|variable):\s - #{class_name}::#{const_name} - \s*?\n((?>.*?\*/))%xm then - "/*\n#{$1}" - elsif @content =~ %r%Document-(?:const|global|variable): - \s#{const_name} - \s*?\n((?>.*?\*/))%xm then - "/*\n#{$1}" - else - '' - end + @const_table ||= {} + @const_table[@content] ||= gen_const_table @content + table = @const_table[@content] + + comment = + table[[type, const_name]] || + (class_name && table[class_name + "::" + const_name]) || + table[const_name] || + '' - RDoc::Comment.new comment, @top_level + new_comment comment, @top_level, :c end ## @@ -817,7 +815,7 @@ class RDoc::Parser::C < RDoc::Parser return unless comment - RDoc::Comment.new comment, @top_level + new_comment comment, @top_level, :c end ## @@ -826,8 +824,8 @@ class RDoc::Parser::C < RDoc::Parser def handle_attr(var_name, attr_name, read, write) rw = '' - rw << 'R' if '1' == read - rw << 'W' if '1' == write + rw += 'R' if '1' == read + rw += 'W' if '1' == write class_name = @known_classes[var_name] @@ -840,7 +838,7 @@ class RDoc::Parser::C < RDoc::Parser comment = find_attr_comment var_name, attr_name comment.normalize - name = attr_name.gsub(/rb_intern\("([^"]+)"\)/, '\1') + name = attr_name.gsub(/rb_intern(?:_const)?\("([^"]+)"\)/, '\1') attr = RDoc::Attr.new '', name, rw, comment @@ -913,7 +911,7 @@ class RDoc::Parser::C < RDoc::Parser # can override the C value of the comment to give a friendly definition. # # /* 300: The perfect score in bowling */ - # rb_define_const(cFoo, "PERFECT", INT2FIX(300); + # rb_define_const(cFoo, "PERFECT", INT2FIX(300)); # # Will override <tt>INT2FIX(300)</tt> with the value +300+ in the output # RDoc. Values may include quotes and escaped colons (\:). @@ -943,15 +941,15 @@ class RDoc::Parser::C < RDoc::Parser if new_definition.empty? then # Default to literal C definition new_definition = definition else - new_definition.gsub!("\:", ":") - new_definition.gsub!("\\", '\\') + new_definition = new_definition.gsub("\:", ":") + new_definition = new_definition.gsub("\\", '\\') end new_definition.sub!(/\A(\s+)/, '') new_comment = "#{$1}#{new_comment.lstrip}" - new_comment = RDoc::Comment.new new_comment, @top_level + new_comment = self.new_comment(new_comment, @top_level, :c) con = RDoc::Constant.new const_name, new_definition, new_comment else @@ -989,6 +987,10 @@ class RDoc::Parser::C < RDoc::Parser class_obj = find_class var_name, class_name + if existing_method = class_obj.method_list.find { |m| m.c_function == function } + add_alias(var_name, class_obj, existing_method.name, meth_name, existing_method.comment) + end + if class_obj then if meth_name == 'initialize' then meth_name = 'new' @@ -1028,7 +1030,12 @@ class RDoc::Parser::C < RDoc::Parser meth_obj.record_location @top_level + + if meth_obj.section_title + class_obj.temporary_section = class_obj.add_section(meth_obj.section_title) + end class_obj.add_method meth_obj + @stats.add_method meth_obj meth_obj.visibility = :private if 'private_method' == type end @@ -1198,7 +1205,7 @@ class RDoc::Parser::C < RDoc::Parser # when scanning for classes and methods def remove_commented_out_lines - @content.gsub!(%r%//.*rb_define_%, '//') + @content = @content.gsub(%r%//.*rb_define_%, '//') end ## @@ -1208,8 +1215,7 @@ class RDoc::Parser::C < RDoc::Parser def scan remove_commented_out_lines - do_modules - do_classes + do_classes_and_modules do_missing do_constants @@ -1218,12 +1224,14 @@ class RDoc::Parser::C < RDoc::Parser do_aliases do_attrs - deduplicate_call_seq - @store.add_c_variables self @top_level end + def new_comment text = nil, location = nil, language = nil + RDoc::Comment.new(text, location, language).tap do |comment| + comment.format = @markup + end + end end - diff --git a/lib/rdoc/parser/changelog.rb b/lib/rdoc/parser/changelog.rb index a3567c1f81..9245d49376 100644 --- a/lib/rdoc/parser/changelog.rb +++ b/lib/rdoc/parser/changelog.rb @@ -1,4 +1,4 @@ -require 'time' +# frozen_string_literal: true ## # A ChangeLog file parser. @@ -28,13 +28,13 @@ class RDoc::Parser::ChangeLog < RDoc::Parser if last =~ /\)\s*\z/ and continuation =~ /\A\(/ then last.sub!(/\)\s*\z/, ',') - continuation.sub!(/\A\(/, '') + continuation = continuation.sub(/\A\(/, '') end if last =~ /\s\z/ then last << continuation else - last << ' ' << continuation + last << ' ' + continuation end end @@ -101,17 +101,37 @@ class RDoc::Parser::ChangeLog < RDoc::Parser # Groups +entries+ by date. def group_entries entries + @time_cache ||= {} entries.group_by do |title, _| begin - Time.parse(title).strftime '%Y-%m-%d' + time = @time_cache[title] + (time || parse_date(title)).strftime '%Y-%m-%d' rescue NoMethodError, ArgumentError time, = title.split ' ', 2 - Time.parse(time).strftime '%Y-%m-%d' + parse_date(time).strftime '%Y-%m-%d' end end end ## + # Parse date in ISO-8601, RFC-2822, or default of Git + + def parse_date(date) + case date + when /\A\s*(\d+)-(\d+)-(\d+)(?:[ T](\d+):(\d+):(\d+) *([-+]\d\d):?(\d\d))?\b/ + Time.new($1, $2, $3, $4, $5, $6, ("#{$7}:#{$8}" if $7)) + when /\A\s*\w{3}, +(\d+) (\w{3}) (\d+) (\d+):(\d+):(\d+) *(?:([-+]\d\d):?(\d\d))\b/ + Time.new($3, $2, $1, $4, $5, $6, ("#{$7}:#{$8}" if $7)) + when /\A\s*\w{3} (\w{3}) +(\d+) (\d+) (\d+):(\d+):(\d+) *(?:([-+]\d\d):?(\d\d))\b/ + Time.new($3, $1, $2, $4, $5, $6, ("#{$7}:#{$8}" if $7)) + when /\A\s*\w{3} (\w{3}) +(\d+) (\d+):(\d+):(\d+) (\d+)\b/ + Time.new($6, $1, $2, $3, $4, $5) + else + raise ArgumentError, "bad date: #{date}" + end + end + + ## # Parses the entries in the ChangeLog. # # Returns an Array of each ChangeLog entry in order of parsing. @@ -127,6 +147,14 @@ class RDoc::Parser::ChangeLog < RDoc::Parser # 'README.EXT.ja: ditto']] def parse_entries + @time_cache ||= {} + + if /\A((?:.*\n){,3})commit\s/ =~ @content + class << self; prepend Git; end + parse_info($1) + return parse_entries + end + entries = [] entry_name = nil entry_body = [] @@ -141,28 +169,20 @@ class RDoc::Parser::ChangeLog < RDoc::Parser entry_name = $& begin - time = Time.parse entry_name - # HACK Ruby 1.8 does not raise ArgumentError for Time.parse "Other" - entry_name = nil unless entry_name =~ /#{time.year}/ - rescue NoMethodError - # HACK Ruby 2.1.2 and earlier raises NoMethodError if time part is absent - entry_name.split ' ', 2 + time = parse_date entry_name + @time_cache[entry_name] = time rescue ArgumentError - if /out of range/ =~ $!.message - Time.parse(entry_name.split(' ', 2)[0]) rescue entry_name = nil - else - entry_name = nil - end + entry_name = nil end entry_body = [] when /^(\t| {8})?\*\s*(.*)/ then # "\t* file.c (func): ..." - entry_body << $2 + entry_body << $2.dup when /^(\t| {8})?\s*(\(.*)/ then # "\t(func): ..." entry = $2 if entry_body.last =~ /:/ then - entry_body << entry + entry_body << entry.dup else continue_entry_body entry_body, entry end @@ -184,6 +204,8 @@ class RDoc::Parser::ChangeLog < RDoc::Parser # Converts the ChangeLog into an RDoc::Markup::Document def scan + @time_cache = {} + entries = parse_entries grouped_entries = group_entries entries @@ -194,5 +216,120 @@ class RDoc::Parser::ChangeLog < RDoc::Parser @top_level end + module Git + def parse_info(info) + /^\s*base-url\s*=\s*(.*\S)/ =~ info + @base_url = $1 + end + + def parse_entries + entries = [] + + @content.scan(/^commit\s+(\h{20})\h*\n((?:.+\n)*)\n((?: {4}.*\n+)*)/) do + entry_name, header, entry_body = $1, $2, $3.gsub(/^ {4}/, '') + # header = header.scan(/^ *(\S+?): +(.*)/).to_h + # date = header["CommitDate"] || header["Date"] + date = header[/^ *(?:Author)?Date: +(.*)/, 1] + author = header[/^ *Author: +(.*)/, 1] + begin + time = parse_date(header[/^ *CommitDate: +(.*)/, 1] || date) + @time_cache[entry_name] = time + author.sub!(/\s*<(.*)>/, '') + email = $1 + entries << [entry_name, [author, email, date, entry_body]] + rescue ArgumentError + end + end + + entries + end + + def create_entries entries + # git log entries have no strictly itemized style like the old + # style, just assume Markdown. + entries.map do |commit, entry| + LogEntry.new(@base_url, commit, *entry) + end + end + + LogEntry = Struct.new(:base, :commit, :author, :email, :date, :contents) do + HEADING_LEVEL = 3 + + def initialize(base, commit, author, email, date, contents) + case contents + when String + contents = RDoc::Markdown.parse(contents).parts.each do |body| + case body + when RDoc::Markup::Heading + body.level += HEADING_LEVEL + 1 + end + end + case first = contents[0] + when RDoc::Markup::Paragraph + contents[0] = RDoc::Markup::Heading.new(HEADING_LEVEL + 1, first.text) + end + end + super + end + + def level + HEADING_LEVEL + end + + def aref + "label-#{commit}" + end + + def label context = nil + aref + end + + def text + case base + when nil + "#{date}" + when /%s/ + "{#{date}}[#{base % commit}]" + else + "{#{date}}[#{base}#{commit}]" + end + " {#{author}}[mailto:#{email}]" + end + + def accept visitor + visitor.accept_heading self + begin + if visitor.respond_to?(:code_object=) + code_object = visitor.code_object + visitor.code_object = self + end + contents.each do |body| + body.accept visitor + end + ensure + if visitor.respond_to?(:code_object) + visitor.code_object = code_object + end + end + end + + def pretty_print q # :nodoc: + q.group(2, '[log_entry: ', ']') do + q.text commit + q.text ',' + q.breakable + q.group(2, '[date: ', ']') { q.text date } + q.text ',' + q.breakable + q.group(2, '[author: ', ']') { q.text author } + q.text ',' + q.breakable + q.group(2, '[email: ', ']') { q.text email } + q.text ',' + q.breakable + q.pp contents + end + end + end + end end diff --git a/lib/rdoc/parser/markdown.rb b/lib/rdoc/parser/markdown.rb index 6fd88cf614..9ff478f872 100644 --- a/lib/rdoc/parser/markdown.rb +++ b/lib/rdoc/parser/markdown.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true ## # Parse a Markdown format file. The parsed RDoc::Markup::Document is attached # as a file comment. diff --git a/lib/rdoc/parser/rd.rb b/lib/rdoc/parser/rd.rb index 09069ae297..25f5711731 100644 --- a/lib/rdoc/parser/rd.rb +++ b/lib/rdoc/parser/rd.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true ## # Parse a RD format file. The parsed RDoc::Markup::Document is attached as a # file comment. diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb new file mode 100644 index 0000000000..5492f08726 --- /dev/null +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -0,0 +1,590 @@ +# frozen_string_literal: true +require 'ripper' + +class RDoc::Parser::RipperStateLex + # TODO: Remove this constants after Ruby 2.4 EOL + RIPPER_HAS_LEX_STATE = Ripper::Filter.method_defined?(:state) + + Token = Struct.new(:line_no, :char_no, :kind, :text, :state) + + EXPR_NONE = 0 + EXPR_BEG = 1 + EXPR_END = 2 + EXPR_ENDARG = 4 + EXPR_ENDFN = 8 + EXPR_ARG = 16 + EXPR_CMDARG = 32 + EXPR_MID = 64 + EXPR_FNAME = 128 + EXPR_DOT = 256 + EXPR_CLASS = 512 + EXPR_LABEL = 1024 + EXPR_LABELED = 2048 + EXPR_FITEM = 4096 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS) + EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG) + EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) + + class InnerStateLex < Ripper::Filter + attr_accessor :lex_state + + def initialize(code) + @lex_state = EXPR_BEG + @in_fname = false + @continue = false + reset + super(code) + end + + def reset + @command_start = false + @cmd_state = @command_start + end + + def on_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_ignored_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_op(tok, data) + case tok + when '&', '|', '!', '!=', '!~' + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '<<' + # TODO next token? + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '?' + @lex_state = EXPR_BEG + when '&&', '||', '+=', '-=', '*=', '**=', + '&=', '|=', '^=', '<<=', '>>=', '||=', '&&=' + @lex_state = EXPR_BEG + when '::' + case @lex_state + when EXPR_ARG, EXPR_CMDARG + @lex_state = EXPR_DOT + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + else + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_kw(tok, data) + case tok + when 'class' + @lex_state = EXPR_CLASS + @in_fname = true + when 'def' + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + when 'if', 'unless', 'while', 'until' + if ((EXPR_MID | EXPR_END | EXPR_ENDARG | EXPR_ENDFN | EXPR_ARG | EXPR_CMDARG) & @lex_state) != 0 # postfix if + @lex_state = EXPR_BEG | EXPR_LABEL + else + @lex_state = EXPR_BEG + end + when 'begin', 'case', 'when' + @lex_state = EXPR_BEG + when 'return', 'break' + @lex_state = EXPR_MID + else + if @lex_state == EXPR_FNAME + @lex_state = EXPR_END + else + @lex_state = EXPR_END + end + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_tstring_beg(tok, data) + @lex_state = EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_tstring_end(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_CHAR(tok, data) + @lex_state = EXPR_END + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_period(tok, data) + @lex_state = EXPR_DOT + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_int(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_float(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rational(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_imaginary(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_symbeg(tok, data) + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + private def on_variables(event, tok, data) + if @in_fname + @lex_state = EXPR_ENDFN + @in_fname = false + @continue = false + elsif @continue + case @lex_state + when EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_ENDFN + @continue = false + end + else + @lex_state = EXPR_CMDARG + end + data << Token.new(lineno, column, event, tok, @lex_state) + end + + def on_ident(tok, data) + on_variables(__method__, tok, data) + end + + def on_ivar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_cvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_gvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_backref(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_lparen(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rparen(tok, data) + @lex_state = EXPR_ENDFN + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_lbrace(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rbrace(tok, data) + @lex_state = EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_lbracket(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_rbracket(tok, data) + @lex_state = EXPR_ENDARG + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_const(tok, data) + case @lex_state + when EXPR_FNAME + @lex_state = EXPR_ENDFN + when EXPR_CLASS, EXPR_CMDARG, EXPR_MID + @lex_state = EXPR_ARG + else + @lex_state = EXPR_CMDARG + end + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_sp(tok, data) + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_comma(tok, data) + @lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0 + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_comment(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_ignored_sp(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_heredoc_beg(tok, data) + data << Token.new(lineno, column, __method__, tok, @lex_state) + @lex_state = EXPR_END + data + end + + def on_heredoc_end(tok, data) + data << Token.new(lineno, column, __method__, tok, @lex_state) + @lex_state = EXPR_BEG + data + end + + def on_default(event, tok, data) + reset + data << Token.new(lineno, column, event, tok, @lex_state) + end + end unless RIPPER_HAS_LEX_STATE + + class InnerStateLex < Ripper::Filter + def initialize(code) + super(code) + end + + def on_default(event, tok, data) + data << Token.new(lineno, column, event, tok, state) + end + end if RIPPER_HAS_LEX_STATE + + def get_squashed_tk + if @buf.empty? + tk = @tokens.shift + else + tk = @buf.shift + end + return nil if tk.nil? + case tk[:kind] + when :on_symbeg then + tk = get_symbol_tk(tk) + when :on_tstring_beg then + tk = get_string_tk(tk) + when :on_backtick then + if (tk[:state] & (EXPR_FNAME | EXPR_ENDFN)) != 0 + @inner_lex.lex_state = EXPR_ARG unless RIPPER_HAS_LEX_STATE + tk[:kind] = :on_ident + tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG + else + tk = get_string_tk(tk) + end + when :on_regexp_beg then + tk = get_regexp_tk(tk) + when :on_embdoc_beg then + tk = get_embdoc_tk(tk) + when :on_heredoc_beg then + @heredoc_queue << retrieve_heredoc_info(tk) + @inner_lex.lex_state = EXPR_END unless RIPPER_HAS_LEX_STATE + when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then + if !@heredoc_queue.empty? + get_heredoc_tk(*@heredoc_queue.shift) + elsif tk[:text].nil? # :on_ignored_nl sometimes gives nil + tk[:text] = '' + end + when :on_words_beg then + tk = get_words_tk(tk) + when :on_qwords_beg then + tk = get_words_tk(tk) + when :on_symbols_beg then + tk = get_words_tk(tk) + when :on_qsymbols_beg then + tk = get_words_tk(tk) + when :on_op then + if '&.' == tk[:text] + tk[:kind] = :on_period + else + tk = get_op_tk(tk) + end + end + tk + end + + private def get_symbol_tk(tk) + is_symbol = true + symbol_tk = Token.new(tk.line_no, tk.char_no, :on_symbol) + if ":'" == tk[:text] or ':"' == tk[:text] + tk1 = get_string_tk(tk) + symbol_tk[:text] = tk1[:text] + symbol_tk[:state] = tk1[:state] + else + case (tk1 = get_squashed_tk)[:kind] + when :on_ident + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_tstring_content + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end + when :on_tstring_end + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_op + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_ivar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_cvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_gvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_const + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_kw + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + else + is_symbol = false + tk = tk1 + end + end + if is_symbol + tk = symbol_tk + end + tk + end + + private def get_string_tk(tk) + string = tk[:text] + state = nil + kind = :on_tstring + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_tstring_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + elsif :on_label_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + kind = :on_symbol + break + else + string = string + inner_str_tk[:text] + if :on_embexpr_beg == inner_str_tk[:kind] then + kind = :on_dstring if :on_tstring == kind + end + end + end + Token.new(tk.line_no, tk.char_no, kind, string, state) + end + + private def get_regexp_tk(tk) + string = tk[:text] + state = nil + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_regexp_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + else + string = string + inner_str_tk[:text] + end + end + Token.new(tk.line_no, tk.char_no, :on_regexp, string, state) + end + + private def get_embdoc_tk(tk) + string = tk[:text] + until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do + string = string + embdoc_tk[:text] + end + string = string + embdoc_tk[:text] + Token.new(tk.line_no, tk.char_no, :on_embdoc, string, embdoc_tk.state) + end + + private def get_heredoc_tk(heredoc_name, indent) + string = '' + start_tk = nil + prev_tk = nil + until heredoc_end?(heredoc_name, indent, tk = @tokens.shift) do + start_tk = tk unless start_tk + if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no] + string = string + (' ' * tk[:char_no]) + end + string = string + tk[:text] + prev_tk = tk + end + start_tk = tk unless start_tk + prev_tk = tk unless prev_tk + @buf.unshift tk # closing heredoc + heredoc_tk = Token.new(start_tk.line_no, start_tk.char_no, :on_heredoc, string, prev_tk.state) + @buf.unshift heredoc_tk + end + + private def retrieve_heredoc_info(tk) + name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2') + indent = tk[:text] =~ /\A<<[-~]/ + [name, indent] + end + + private def heredoc_end?(name, indent, tk) + result = false + if :on_heredoc_end == tk[:kind] then + tk_name = tk[:text].chomp + tk_name.lstrip! if indent + if name == tk_name + result = true + end + end + result + end + + private def get_words_tk(tk) + string = '' + start_token = tk[:text] + start_quote = tk[:text].rstrip[-1] + line_no = tk[:line_no] + char_no = tk[:char_no] + state = tk[:state] + end_quote = + case start_quote + when ?( then ?) + when ?[ then ?] + when ?{ then ?} + when ?< then ?> + else start_quote + end + end_token = nil + loop do + tk = get_squashed_tk + if tk.nil? + end_token = end_quote + break + elsif :on_tstring_content == tk[:kind] then + string += tk[:text] + elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then + if end_quote == tk[:text].strip then + end_token = tk[:text] + break + else + string += tk[:text] + end + else + string += tk[:text] + end + end + text = "#{start_token}#{string}#{end_token}" + Token.new(line_no, char_no, :on_dstring, text, state) + end + + private def get_op_tk(tk) + redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~] + if redefinable_operators.include?(tk[:text]) and tk[:state] == EXPR_ARG then + @inner_lex.lex_state = EXPR_ARG unless RIPPER_HAS_LEX_STATE + tk[:state] = Ripper::Lexer.const_defined?(:State) ? Ripper::Lexer::State.new(EXPR_ARG) : EXPR_ARG + tk[:kind] = :on_ident + elsif tk[:text] =~ /^[-+]$/ then + tk_ahead = get_squashed_tk + case tk_ahead[:kind] + when :on_int, :on_float, :on_rational, :on_imaginary then + tk[:text] += tk_ahead[:text] + tk[:kind] = tk_ahead[:kind] + tk[:state] = tk_ahead[:state] + when :on_heredoc_beg, :on_tstring, :on_dstring # frozen/non-frozen string literal + tk[:text] += tk_ahead[:text] + tk[:kind] = tk_ahead[:kind] + tk[:state] = tk_ahead[:state] + else + @buf.unshift tk_ahead + end + end + tk + end + + def initialize(code) + @buf = [] + @heredoc_queue = [] + @inner_lex = InnerStateLex.new(code) + @tokens = @inner_lex.parse([]) + end + + def self.parse(code) + lex = self.new(code) + tokens = [] + begin + while tk = lex.get_squashed_tk + tokens.push tk + end + rescue StopIteration + end + tokens + end + + def self.end?(token) + (token[:state] & EXPR_END) + end +end diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb index ce1083edc7..e546fe2141 100644 --- a/lib/rdoc/parser/ruby.rb +++ b/lib/rdoc/parser/ruby.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true ## # This file contains stuff stolen outright from: # @@ -7,8 +8,6 @@ # by Keiju ISHITSUKA (Nippon Rational Inc.) # -$TOKEN_DEBUG ||= nil - ## # Extracts code elements from a source file returning a TopLevel object # containing the constituent file elements. @@ -23,6 +22,7 @@ $TOKEN_DEBUG ||= nil # * aliases # * private, public, protected # * private_class_function, public_class_function +# * private_constant, public_constant # * module_function # * attr, attr_reader, attr_writer, attr_accessor # * extra accessors given on the command line @@ -138,11 +138,13 @@ $TOKEN_DEBUG ||= nil # Note that by default, the :method: directive will be ignored if there is a # standard rdocable item following it. +require 'ripper' +require_relative 'ripper_state_lex' + class RDoc::Parser::Ruby < RDoc::Parser parse_files_matching(/\.rbw?$/) - include RDoc::RubyToken include RDoc::TokenStream include RDoc::Parser::RubyTools @@ -162,20 +164,34 @@ class RDoc::Parser::Ruby < RDoc::Parser def initialize(top_level, file_name, content, options, stats) super + if /\t/ =~ content then + tab_width = @options.tab_width + content = content.split(/\n/).map do |line| + 1 while line.gsub!(/\t+/) { + ' ' * (tab_width*$&.length - $`.length % tab_width) + } && $~ + line + end.join("\n") + end + @size = 0 @token_listeners = nil - @scanner = RDoc::RubyLex.new content, @options - @scanner.exception_on_syntax_error = false + content = RDoc::Encoding.remove_magic_comment content + @scanner = RDoc::Parser::RipperStateLex.parse(content) + @content = content + @scanner_point = 0 @prev_seek = nil @markup = @options.markup @track_visibility = :nodoc != @options.visibility - - @encoding = nil - @encoding = @options.encoding if Object.const_defined? :Encoding + @encoding = @options.encoding reset end + def tk_nl?(tk) + :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] + end + ## # Retrieves the read token stream and replaces +pattern+ with +replacement+ # using gsub. If the result is only a ";" returns an empty string. @@ -195,7 +211,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # methods. def get_visibility_information tk, single # :nodoc: - vis_type = tk.name + vis_type = tk[:text] singleton = single == SINGLE vis = @@ -224,31 +240,34 @@ class RDoc::Parser::Ruby < RDoc::Parser def collect_first_comment skip_tkspace - comment = '' - comment.force_encoding @encoding if @encoding + comment = ''.dup + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding first_line = true - first_comment_tk_class = nil + first_comment_tk_kind = nil + line_no = nil tk = get_tk - while TkCOMMENT === tk - if first_line and tk.text =~ /\A#!/ then + while tk && (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) + comment_body = retrieve_comment_body(tk) + if first_line and comment_body =~ /\A#!/ then skip_tkspace tk = get_tk - elsif first_line and tk.text =~ /\A#\s*-\*-/ then + elsif first_line and comment_body =~ /\A#\s*-\*-/ then first_line = false skip_tkspace tk = get_tk else - break if first_comment_tk_class and not first_comment_tk_class === tk - first_comment_tk_class = tk.class + break if first_comment_tk_kind and not first_comment_tk_kind === tk[:kind] + first_comment_tk_kind = tk[:kind] + line_no = tk[:line_no] if first_line first_line = false - comment << tk.text << "\n" + comment << comment_body tk = get_tk - if TkNL === tk then - skip_tkspace false + if :on_nl === tk then + skip_tkspace_without_nl tk = get_tk end end @@ -256,15 +275,14 @@ class RDoc::Parser::Ruby < RDoc::Parser unget_tk tk - new_comment comment + new_comment comment, line_no end ## # Consumes trailing whitespace from the token stream def consume_trailing_spaces # :nodoc: - get_tkread - skip_tkspace false + skip_tkspace_without_nl end ## @@ -291,7 +309,7 @@ class RDoc::Parser::Ruby < RDoc::Parser container.find_module_named rhs_name end - container.add_module_alias mod, constant.name, @top_level if mod + container.add_module_alias mod, rhs_name, constant, @top_level end ## @@ -304,16 +322,14 @@ class RDoc::Parser::Ruby < RDoc::Parser end ## - # Looks for a true or false token. Returns false if TkFALSE or TkNIL are - # found. + # Looks for a true or false token. def get_bool skip_tkspace tk = get_tk - case tk - when TkTRUE + if :on_kw == tk[:kind] && 'true' == tk[:text] true - when TkFALSE, TkNIL + elsif :on_kw == tk[:kind] && ('false' == tk[:text] || 'nil' == tk[:text]) false else unget_tk tk @@ -329,27 +345,31 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_class_or_module container, ignore_constants = false skip_tkspace name_t = get_tk - given_name = '' + given_name = ''.dup # class ::A -> A is in the top level - case name_t - when TkCOLON2, TkCOLON3 then # bug + if :on_op == name_t[:kind] and '::' == name_t[:text] then # bug name_t = get_tk container = @top_level given_name << '::' end - skip_tkspace false - given_name << name_t.name + skip_tkspace_without_nl + given_name << name_t[:text] - while TkCOLON2 === peek_tk do + is_self = name_t[:kind] == :on_op && name_t[:text] == '<<' + new_modules = [] + while !is_self && (tk = peek_tk) and :on_op == tk[:kind] and '::' == tk[:text] do prev_container = container - container = container.find_module_named name_t.name + container = container.find_module_named name_t[:text] container ||= if ignore_constants then - RDoc::Context.new + c = RDoc::NormalModule.new name_t[:text] + c.store = @store + new_modules << [prev_container, c] + c else - c = prev_container.add_module RDoc::NormalModule, name_t.name + c = prev_container.add_module RDoc::NormalModule, name_t[:text] c.ignore unless prev_container.document_children @top_level.add_to_classes_or_modules c c @@ -358,35 +378,51 @@ class RDoc::Parser::Ruby < RDoc::Parser record_location container get_tk - skip_tkspace false + skip_tkspace + if :on_lparen == peek_tk[:kind] # ProcObjectInConstant::() + parse_method_or_yield_parameters + break + end name_t = get_tk - given_name << '::' << name_t.name + unless :on_const == name_t[:kind] || :on_ident == name_t[:kind] + raise RDoc::Error, "Invalid class or module definition: #{given_name}" + end + if prev_container == container and !ignore_constants + given_name = name_t[:text] + else + given_name << '::' + name_t[:text] + end end - skip_tkspace false + skip_tkspace_without_nl - return [container, name_t, given_name] + return [container, name_t, given_name, new_modules] end ## # Return a superclass, which can be either a constant of an expression def get_class_specification - case peek_tk - when TkSELF then return 'self' - when TkGVAR then return '' + tk = peek_tk + if tk.nil? + return '' + elsif :on_kw == tk[:kind] && 'self' == tk[:text] + return 'self' + elsif :on_gvar == tk[:kind] + return '' end res = get_constant - skip_tkspace false + skip_tkspace_without_nl get_tkread # empty out read buffer tk = get_tk + return res unless tk - case tk - when TkNL, TkCOMMENT, TkSEMICOLON then + case tk[:kind] + when :on_nl, :on_comment, :on_embdoc, :on_semicolon then unget_tk(tk) return res end @@ -401,11 +437,11 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_constant res = "" - skip_tkspace false + skip_tkspace_without_nl tk = get_tk - while TkCOLON2 === tk or TkCOLON3 === tk or TkCONSTANT === tk do - res += tk.name + while tk && ((:on_op == tk[:kind] && '::' == tk[:text]) || :on_const == tk[:kind]) do + res += tk[:text] tk = get_tk end @@ -414,28 +450,83 @@ class RDoc::Parser::Ruby < RDoc::Parser end ## - # Get a constant that may be surrounded by parens + # Get an included module that may be surrounded by parens - def get_constant_with_optional_parens - skip_tkspace false + def get_included_module_with_optional_parens + skip_tkspace_without_nl + get_tkread + tk = get_tk + end_token = get_end_token tk + return '' unless end_token nest = 0 + continue = false + only_constant = true - while TkLPAREN === (tk = peek_tk) or TkfLPAREN === tk do - get_tk - skip_tkspace - nest += 1 - end - - name = get_constant - - while nest > 0 - skip_tkspace + while tk != nil do + is_element_of_constant = false + case tk[:kind] + when :on_semicolon then + break if nest == 0 + when :on_lbracket then + nest += 1 + when :on_rbracket then + nest -= 1 + when :on_lbrace then + nest += 1 + when :on_rbrace then + nest -= 1 + if nest <= 0 + # we might have a.each { |i| yield i } + unget_tk(tk) if nest < 0 + break + end + when :on_lparen then + nest += 1 + when end_token[:kind] then + if end_token[:kind] == :on_rparen + nest -= 1 + break if nest <= 0 + else + break if nest <= 0 + end + when :on_rparen then + nest -= 1 + when :on_comment, :on_embdoc then + @read.pop + if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and + (!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then + break if !continue and nest <= 0 + end + when :on_comma then + continue = true + when :on_ident then + continue = false if continue + when :on_kw then + case tk[:text] + when 'def', 'do', 'case', 'for', 'begin', 'class', 'module' + nest += 1 + when 'if', 'unless', 'while', 'until', 'rescue' + # postfix if/unless/while/until/rescue must be EXPR_LABEL + nest += 1 unless (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0 + when 'end' + nest -= 1 + break if nest == 0 + end + when :on_const then + is_element_of_constant = true + when :on_op then + is_element_of_constant = true if '::' == tk[:text] + end + only_constant = false unless is_element_of_constant tk = get_tk - nest -= 1 if TkRPAREN === tk end - name + if only_constant + get_tkread_clean(/\s+/, ' ') + else + '' + end end ## @@ -447,13 +538,19 @@ class RDoc::Parser::Ruby < RDoc::Parser # won't catch all cases (such as "a = yield + 1" def get_end_token tk # :nodoc: - case tk - when TkLPAREN, TkfLPAREN - TkRPAREN - when TkRPAREN + case tk[:kind] + when :on_lparen + token = RDoc::Parser::RipperStateLex::Token.new + token[:kind] = :on_rparen + token[:text] = ')' + token + when :on_rparen nil else - TkNL + token = RDoc::Parser::RipperStateLex::Token.new + token[:kind] = :on_nl + token[:text] = "\n" + token end end @@ -462,11 +559,11 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_method_container container, name_t # :nodoc: prev_container = container - container = container.find_module_named(name_t.name) + container = container.find_module_named(name_t[:text]) unless container then constant = prev_container.constants.find do |const| - const.name == name_t.name + const.name == name_t[:text] end if constant then @@ -477,21 +574,21 @@ class RDoc::Parser::Ruby < RDoc::Parser unless container then # TODO seems broken, should starting at Object in @store - obj = name_t.name.split("::").inject(Object) do |state, item| + obj = name_t[:text].split("::").inject(Object) do |state, item| state.const_get(item) end rescue nil type = obj.class == Class ? RDoc::NormalClass : RDoc::NormalModule unless [Class, Module].include?(obj.class) then - warn("Couldn't find #{name_t.name}. Assuming it's a module") + warn("Couldn't find #{name_t[:text]}. Assuming it's a module") end if type == RDoc::NormalClass then sclass = obj.superclass ? obj.superclass.name : nil - container = prev_container.add_class type, name_t.name, sclass + container = prev_container.add_class type, name_t[:text], sclass else - container = prev_container.add_module type, name_t.name + container = prev_container.add_module type, name_t[:text] end record_location container @@ -505,32 +602,26 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_symbol_or_name tk = get_tk - case tk - when TkSYMBOL then - text = tk.text.sub(/^:/, '') + case tk[:kind] + when :on_symbol then + text = tk[:text].sub(/^:/, '') - if TkASSIGN === peek_tk then + next_tk = peek_tk + if next_tk && :on_op == next_tk[:kind] && '=' == next_tk[:text] then get_tk text << '=' end text - when TkId, TkOp then - tk.name - when TkAMPER, - TkDSTRING, - TkSTAR, - TkSTRING then - tk.text + when :on_ident, :on_const, :on_gvar, :on_cvar, :on_ivar, :on_op, :on_kw then + tk[:text] + when :on_tstring, :on_dstring then + tk[:text][1..-2] else raise RDoc::Error, "Name or symbol expected (got #{tk})" end end - def stop_at_EXPR_END # :nodoc: - @scanner.lex_state == :EXPR_END || !@scanner.continue - end - ## # Marks containers between +container+ and +ancestor+ as ignored @@ -549,29 +640,31 @@ class RDoc::Parser::Ruby < RDoc::Parser # # This routine modifies its +comment+ parameter. - def look_for_directives_in context, comment - @preprocess.handle comment, context do |directive, param| + def look_for_directives_in container, comment + @preprocess.handle comment, container do |directive, param| case directive when 'method', 'singleton-method', 'attr', 'attr_accessor', 'attr_reader', 'attr_writer' then false # handled elsewhere when 'section' then - context.set_current_section param, comment.dup + break unless container.kind_of?(RDoc::Context) + container.set_current_section param, comment.dup comment.text = '' break end end - remove_private_comments comment + comment.remove_private end ## # Adds useful info about the parser to +message+ def make_message message - prefix = "#{@file_name}:" + prefix = "#{@file_name}:".dup - prefix << "#{@scanner.line_no}:#{@scanner.char_no}:" if @scanner + tk = peek_tk + prefix << "#{tk[:line_no]}:#{tk[:char_no]}:" if tk "#{prefix} #{message}" end @@ -579,8 +672,9 @@ class RDoc::Parser::Ruby < RDoc::Parser ## # Creates a comment with the correct format - def new_comment comment - c = RDoc::Comment.new comment, @top_level + def new_comment comment, line_no = nil + c = RDoc::Comment.new comment, @top_level, :ruby + c.line = line_no c.format = @markup c end @@ -590,24 +684,22 @@ class RDoc::Parser::Ruby < RDoc::Parser # +comment+. def parse_attr(context, single, tk, comment) - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] args = parse_symbol_arg 1 if args.size > 0 then name = args[0] rw = "R" - skip_tkspace false + skip_tkspace_without_nl tk = get_tk - if TkCOMMA === tk then + if :on_comma == tk[:kind] then rw = "RW" if get_bool else unget_tk tk end att = create_attr context, single, name, rw, comment - att.offset = offset att.line = line_no read_documentation_modifiers att, RDoc::ATTR_MODIFIERS @@ -621,8 +713,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # comment for each to +comment+. def parse_attr_accessor(context, single, tk, comment) - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] args = parse_symbol_arg rw = "?" @@ -633,7 +724,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # and add found items appropriately but here we do not. I'm not sure why. return if @track_visibility and not tmp.document_self - case tk.name + case tk[:text] when "attr_reader" then rw = "R" when "attr_writer" then rw = "W" when "attr_accessor" then rw = "RW" @@ -643,7 +734,6 @@ class RDoc::Parser::Ruby < RDoc::Parser for name in args att = create_attr context, single, name, rw, comment - att.offset = offset att.line = line_no end end @@ -652,22 +742,19 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses an +alias+ in +context+ with +comment+ def parse_alias(context, single, tk, comment) - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] skip_tkspace - if TkLPAREN === peek_tk then + if :on_lparen === peek_tk[:kind] then get_tk skip_tkspace end new_name = get_symbol_or_name - @scanner.lex_state = :EXPR_FNAME - skip_tkspace - if TkCOMMA === peek_tk then + if :on_comma === peek_tk[:kind] then get_tk skip_tkspace end @@ -681,7 +768,6 @@ class RDoc::Parser::Ruby < RDoc::Parser al = RDoc::Alias.new(get_tkread, old_name, new_name, comment, single == SINGLE) record_location al - al.offset = offset al.line = line_no read_documentation_modifiers al, RDoc::ATTR_MODIFIERS @@ -695,34 +781,38 @@ class RDoc::Parser::Ruby < RDoc::Parser # Extracts call parameters from the token stream. def parse_call_parameters(tk) - end_token = case tk - when TkLPAREN, TkfLPAREN - TkRPAREN - when TkRPAREN + end_token = case tk[:kind] + when :on_lparen + :on_rparen + when :on_rparen return "" else - TkNL + :on_nl end nest = 0 loop do - case tk - when TkSEMICOLON + break if tk.nil? + case tk[:kind] + when :on_semicolon break - when TkLPAREN, TkfLPAREN + when :on_lparen nest += 1 when end_token - if end_token == TkRPAREN + if end_token == :on_rparen nest -= 1 - break if @scanner.lex_state == :EXPR_END and nest <= 0 + break if RDoc::Parser::RipperStateLex.end?(tk) and nest <= 0 else - break unless @scanner.continue + break if RDoc::Parser::RipperStateLex.end?(tk) end - when TkCOMMENT, TkASSIGN, TkOPASGN + when :on_comment, :on_embdoc unget_tk(tk) break - when nil then - break + when :on_op + if tk[:text] =~ /^(.{1,2})?=$/ + unget_tk(tk) + break + end end tk = get_tk end @@ -734,33 +824,33 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses a class in +context+ with +comment+ def parse_class container, single, tk, comment - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] declaration_context = container - container, name_t, given_name = get_class_or_module container - - cls = - case name_t - when TkCONSTANT - parse_class_regular container, declaration_context, single, - name_t, given_name, comment - when TkLSHFT - case name = get_class_specification - when 'self', container.name - parse_statements container, SINGLE - return # don't update offset or line - else - parse_class_singleton container, name, comment - end + container, name_t, given_name, = get_class_or_module container + + if name_t[:kind] == :on_const + cls = parse_class_regular container, declaration_context, single, + name_t, given_name, comment + elsif name_t[:kind] == :on_op && name_t[:text] == '<<' + case name = get_class_specification + when 'self', container.name + read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS + parse_statements container, SINGLE + return # don't update line else - warn "Expected class name or '<<'. Got #{name_t.class}: #{name_t.text.inspect}" - return + cls = parse_class_singleton container, name, comment end + else + warn "Expected class name or '<<'. Got #{name_t[:kind]}: #{name_t[:text].inspect}" + return + end - cls.offset = offset cls.line = line_no + # after end modifiers + read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS + cls end @@ -776,7 +866,8 @@ class RDoc::Parser::Ruby < RDoc::Parser given_name = $' end - if TkLT === peek_tk then + tk = peek_tk + if tk[:kind] == :on_op && tk[:text] == '<' then get_tk skip_tkspace superclass = get_class_specification @@ -846,102 +937,125 @@ class RDoc::Parser::Ruby < RDoc::Parser # true, no found constants will be added to RDoc. def parse_constant container, tk, comment, ignore_constants = false - offset = tk.seek - line_no = tk.line_no + line_no = tk[:line_no] - name = tk.name - skip_tkspace false + name = tk[:text] + skip_tkspace_without_nl return unless name =~ /^\w+$/ - eq_tk = get_tk - - if TkCOLON2 === eq_tk then - unget_tk eq_tk + new_modules = [] + if :on_op == peek_tk[:kind] && '::' == peek_tk[:text] then unget_tk tk - container, name_t, = get_class_or_module container, ignore_constants + container, name_t, _, new_modules = get_class_or_module container, true - name = name_t.name + name = name_t[:text] + end - eq_tk = get_tk + is_array_or_hash = false + if peek_tk && :on_lbracket == peek_tk[:kind] + get_tk + nest = 1 + while bracket_tk = get_tk + case bracket_tk[:kind] + when :on_lbracket + nest += 1 + when :on_rbracket + nest -= 1 + break if nest == 0 + end + end + skip_tkspace_without_nl + is_array_or_hash = true end - unless TkASSIGN === eq_tk then - unget_tk eq_tk + unless peek_tk && :on_op == peek_tk[:kind] && '=' == peek_tk[:text] then return false end + get_tk - if TkGT === peek_tk then - unget_tk eq_tk - return + unless ignore_constants + new_modules.each do |prev_c, new_module| + prev_c.add_module_by_normal_module new_module + new_module.ignore unless prev_c.document_children + @top_level.add_to_classes_or_modules new_module + end end value = '' con = RDoc::Constant.new name, value, comment - body = parse_constant_body container, con + body = parse_constant_body container, con, is_array_or_hash return unless body - value.replace body + con.value = body record_location con - con.offset = offset con.line = line_no read_documentation_modifiers con, RDoc::CONSTANT_MODIFIERS + return if is_array_or_hash + @stats.add_constant con container.add_constant con true end - def parse_constant_body container, constant # :nodoc: + def parse_constant_body container, constant, is_array_or_hash # :nodoc: nest = 0 - rhs_name = '' + rhs_name = ''.dup get_tkread tk = get_tk + body = nil loop do - case tk - when TkSEMICOLON then + break if tk.nil? + if :on_semicolon == tk[:kind] then break if nest <= 0 - when TkLPAREN, TkfLPAREN, TkLBRACE, TkfLBRACE, TkLBRACK, TkfLBRACK, - TkDO, TkIF, TkUNLESS, TkCASE, TkDEF, TkBEGIN then + elsif [:on_tlambeg, :on_lparen, :on_lbrace, :on_lbracket].include?(tk[:kind]) then + nest += 1 + elsif (:on_kw == tk[:kind] && 'def' == tk[:text]) then nest += 1 - when TkRPAREN, TkRBRACE, TkRBRACK, TkEND then + elsif (:on_kw == tk[:kind] && %w{do if unless case begin}.include?(tk[:text])) then + if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + nest += 1 + end + elsif [:on_rparen, :on_rbrace, :on_rbracket].include?(tk[:kind]) || + (:on_kw == tk[:kind] && 'end' == tk[:text]) then nest -= 1 - when TkCOMMENT then - if nest <= 0 and stop_at_EXPR_END then - unget_tk tk + elsif (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) then + unget_tk tk + if nest <= 0 and RDoc::Parser::RipperStateLex.end?(tk) then + body = get_tkread_clean(/^[ \t]+/, '') + read_documentation_modifiers constant, RDoc::CONSTANT_MODIFIERS break else - unget_tk tk read_documentation_modifiers constant, RDoc::CONSTANT_MODIFIERS end - when TkCONSTANT then - rhs_name << tk.name + elsif :on_const == tk[:kind] then + rhs_name << tk[:text] - if nest <= 0 and TkNL === peek_tk then - create_module_alias container, constant, rhs_name + next_tk = peek_tk + if nest <= 0 and (next_tk.nil? || :on_nl == next_tk[:kind]) then + create_module_alias container, constant, rhs_name unless is_array_or_hash break end - when TkNL then - if nest <= 0 and stop_at_EXPR_END then + elsif :on_nl == tk[:kind] then + if nest <= 0 and RDoc::Parser::RipperStateLex.end?(tk) then unget_tk tk break end - when TkCOLON2, TkCOLON3 then + elsif :on_op == tk[:kind] && '::' == tk[:text] rhs_name << '::' - when nil then - break end tk = get_tk end - get_tkread_clean(/^[ \t]+/, '') + body ? body : get_tkread_clean(/^[ \t]+/, '') end ## @@ -950,24 +1064,22 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_comment container, tk, comment return parse_comment_tomdoc container, tk, comment if @markup == 'tomdoc' - column = tk.char_no - offset = tk.seek - line_no = tk.line_no + column = tk[:char_no] + line_no = comment.line.nil? ? tk[:line_no] : comment.line - text = comment.text - - singleton = !!text.sub!(/(^# +:?)(singleton-)(method:)/, '\1\3') + comment.text = comment.text.sub(/(^# +:?)(singleton-)(method:)/, '\1\3') + singleton = !!$~ co = - if text.sub!(/^# +:?method: *(\S*).*?\n/i, '') then - parse_comment_ghost container, text, $1, column, line_no, comment - elsif text.sub!(/# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i, '') then + if (comment.text = comment.text.sub(/^# +:?method: *(\S*).*?\n/i, '')) && !!$~ then + line_no += $`.count("\n") + parse_comment_ghost container, comment.text, $1, column, line_no, comment + elsif (comment.text = comment.text.sub(/# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i, '')) && !!$~ then parse_comment_attr container, $1, $3, comment end if co then co.singleton = singleton - co.offset = offset co.line = line_no end @@ -998,12 +1110,11 @@ class RDoc::Parser::Ruby < RDoc::Parser record_location meth meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * column - - position_comment = TkCOMMENT.new 0, line_no, 1 - position_comment.set_text "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + position_comment = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [position_comment, newline, indent] meth.params = if text.sub!(/^#\s+:?args?:\s*(.*?)\s*$/i, '') then @@ -1032,23 +1143,21 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_comment_tomdoc container, tk, comment return unless signature = RDoc::TomDoc.signature(comment) - offset = tk.seek - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] name, = signature.split %r%[ \(]%, 2 meth = RDoc::GhostMethod.new get_tkread, name record_location meth - meth.offset = offset meth.line = line_no meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * offset - - position_comment = TkCOMMENT.new 0, line_no, 1 - position_comment.set_text "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + position_comment = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [position_comment, newline, indent] meth.call_seq = signature @@ -1071,30 +1180,49 @@ class RDoc::Parser::Ruby < RDoc::Parser loop do skip_tkspace_comment - name = get_constant_with_optional_parens + name = get_included_module_with_optional_parens unless name.empty? then obj = container.add klass, name, comment record_location obj end - return unless TkCOMMA === peek_tk + return if peek_tk.nil? || :on_comma != peek_tk[:kind] get_tk end end ## + # Parses an +included+ with a block feature of ActiveSupport::Concern. + + def parse_included_with_activesupport_concern container, comment # :nodoc: + skip_tkspace_without_nl + tk = get_tk + unless tk[:kind] == :on_lbracket || (tk[:kind] == :on_kw && tk[:text] == 'do') + unget_tk tk + return nil # should be a block + end + + parse_statements container + + container + end + + ## # Parses identifiers that can create new methods or change visibility. # # Returns true if the comment was not consumed. def parse_identifier container, single, tk, comment # :nodoc: - case tk.name + case tk[:text] when 'private', 'protected', 'public', 'private_class_method', 'public_class_method', 'module_function' then parse_visibility container, single, tk return true + when 'private_constant', 'public_constant' + parse_constant_visibility container, single, tk + return true when 'attr' then parse_attr container, single, tk, comment when /^attr_(reader|writer|accessor)$/ then @@ -1159,7 +1287,9 @@ class RDoc::Parser::Ruby < RDoc::Parser tmp = RDoc::CodeObject.new read_documentation_modifiers tmp, RDoc::ATTR_MODIFIERS - if comment.text.sub!(/^# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i, '') then + regexp = /^# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i + if regexp =~ comment.text then + comment.text = comment.text.sub(regexp, '') rw = case $1 when 'attr_reader' then 'R' when 'attr_writer' then 'W' @@ -1183,17 +1313,17 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses a meta-programmed method def parse_meta_method(container, single, tk, comment) - column = tk.char_no - offset = tk.seek - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] start_collecting_tokens add_token tk add_token_listener self - skip_tkspace false + skip_tkspace_without_nl - singleton = !!comment.text.sub!(/(^# +:?)(singleton-)(method:)/, '\1\3') + comment.text = comment.text.sub(/(^# +:?)(singleton-)(method:)/, '\1\3') + singleton = !!$~ name = parse_meta_method_name comment, tk @@ -1201,19 +1331,17 @@ class RDoc::Parser::Ruby < RDoc::Parser meth = RDoc::MetaMethod.new get_tkread, name record_location meth - meth.offset = offset meth.line = line_no meth.singleton = singleton remove_token_listener self meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * column - - position_comment = TkCOMMENT.new 0, line_no, 1 - position_comment.value = "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + position_comment = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [position_comment, newline, indent] meth.add_tokens @token_stream parse_meta_method_params container, single, meth, tk, comment @@ -1237,17 +1365,16 @@ class RDoc::Parser::Ruby < RDoc::Parser name_t = get_tk - case name_t - when TkSYMBOL then - name_t.text[1..-1] - when TkSTRING then - name_t.value[1..-2] - when TkASSIGN then # ignore + if :on_symbol == name_t[:kind] then + name_t[:text][1..-1] + elsif :on_tstring == name_t[:kind] then + name_t[:text][1..-2] + elsif :on_op == name_t[:kind] && '=' == name_t[:text] then # ignore remove_token_listener self nil else - warn "unknown name token #{name_t.inspect} for meta-method '#{tk.name}'" + warn "unknown name token #{name_t.inspect} for meta-method '#{tk[:text]}'" 'unknown' end end @@ -1259,6 +1386,7 @@ class RDoc::Parser::Ruby < RDoc::Parser token_listener meth do meth.params = '' + look_for_directives_in meth, comment comment.normalize comment.extract_call_seq meth @@ -1267,14 +1395,13 @@ class RDoc::Parser::Ruby < RDoc::Parser last_tk = tk while tk = get_tk do - case tk - when TkSEMICOLON then + if :on_semicolon == tk[:kind] then break - when TkNL then - break unless last_tk and TkCOMMA === last_tk - when TkSPACE then + elsif :on_nl == tk[:kind] then + break unless last_tk and :on_comma == last_tk[:kind] + elsif :on_sp == tk[:kind] then # expression continues - when TkDO then + elsif :on_kw == tk[:kind] && 'do' == tk[:text] then parse_statements container, single, meth break else @@ -1291,9 +1418,8 @@ class RDoc::Parser::Ruby < RDoc::Parser singleton = nil added_container = false name = nil - column = tk.char_no - offset = tk.seek - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] start_collecting_tokens add_token tk @@ -1307,19 +1433,18 @@ class RDoc::Parser::Ruby < RDoc::Parser return unless name meth = RDoc::AnyMethod.new get_tkread, name + look_for_directives_in meth, comment meth.singleton = single == SINGLE ? true : singleton record_location meth - meth.offset = offset meth.line = line_no meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * column - - token = TkCOMMENT.new 0, line_no, 1 - token.set_text "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [token, NEWLINE_TOKEN, indent] + indent = RDoc::Parser::RipperStateLex::Token.new(1, 1, :on_sp, ' ' * column) + token = RDoc::Parser::RipperStateLex::Token.new(line_no, 1, :on_comment) + token[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = RDoc::Parser::RipperStateLex::Token.new(0, 0, :on_nl, "\n") + meth.add_tokens [token, newline, indent] meth.add_tokens @token_stream parse_method_params_and_body container, single, meth, added_container @@ -1329,6 +1454,9 @@ class RDoc::Parser::Ruby < RDoc::Parser meth.comment = comment + # after end modifiers + read_documentation_modifiers meth, RDoc::METHOD_MODIFIERS + @stats.add_method meth end @@ -1337,7 +1465,6 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_method_params_and_body container, single, meth, added_container token_listener meth do - @scanner.continue = false parse_method_parameters meth if meth.document_self or not @track_visibility then @@ -1380,15 +1507,13 @@ class RDoc::Parser::Ruby < RDoc::Parser # it is a singleton or regular method. def parse_method_name container # :nodoc: - @scanner.lex_state = :EXPR_FNAME - skip_tkspace name_t = get_tk - back_tk = skip_tkspace + back_tk = skip_tkspace_without_nl singleton = false - case dot = get_tk - when TkDOT, TkCOLON2 then + dot = get_tk + if dot[:kind] == :on_period || (dot[:kind] == :on_op && dot[:text] == '::') then singleton = true name, container = parse_method_name_singleton container, name_t @@ -1409,16 +1534,15 @@ class RDoc::Parser::Ruby < RDoc::Parser # is parsed from the token stream for a regular method. def parse_method_name_regular container, name_t # :nodoc: - case name_t - when TkSTAR, TkAMPER then - name_t.text + if :on_op == name_t[:kind] && (%w{* & [] []= <<}.include?(name_t[:text])) then + name_t[:text] else - unless name_t.respond_to? :name then + unless [:on_kw, :on_const, :on_ident].include?(name_t[:kind]) then warn "expected method name token, . or ::, got #{name_t.inspect}" skip_method container return end - name_t.name + name_t[:text] end end @@ -1428,47 +1552,42 @@ class RDoc::Parser::Ruby < RDoc::Parser # for a singleton method. def parse_method_name_singleton container, name_t # :nodoc: - @scanner.lex_state = :EXPR_FNAME skip_tkspace name_t2 = get_tk - name = - case name_t - when TkSELF, TkMOD then - case name_t2 - # NOTE: work around '[' being consumed early and not being re-tokenized - # as a TkAREF - when TkfLBRACK then - get_tk - '[]' - else - name_t2.name - end - when TkCONSTANT then - name = name_t2.name + if (:on_kw == name_t[:kind] && 'self' == name_t[:text]) || (:on_op == name_t[:kind] && '%' == name_t[:text]) then + # NOTE: work around '[' being consumed early + if :on_lbracket == name_t2[:kind] + get_tk + name = '[]' + else + name = name_t2[:text] + end + elsif :on_const == name_t[:kind] then + name = name_t2[:text] - container = get_method_container container, name_t + container = get_method_container container, name_t - return unless container + return unless container - name - when TkIDENTIFIER, TkIVAR, TkGVAR then - parse_method_dummy container + name + elsif :on_ident == name_t[:kind] || :on_ivar == name_t[:kind] || :on_gvar == name_t[:kind] then + parse_method_dummy container - nil - when TkTRUE, TkFALSE, TkNIL then - klass_name = "#{name_t.name.capitalize}Class" - container = @store.find_class_named klass_name - container ||= @top_level.add_class RDoc::NormalClass, klass_name + name = nil + elsif (:on_kw == name_t[:kind]) && ('true' == name_t[:text] || 'false' == name_t[:text] || 'nil' == name_t[:text]) then + klass_name = "#{name_t[:text].capitalize}Class" + container = @store.find_class_named klass_name + container ||= @top_level.add_class RDoc::NormalClass, klass_name - name_t2.name - else - warn "unexpected method name token #{name_t.inspect}" - # break - skip_method container + name = name_t2[:text] + else + warn "unexpected method name token #{name_t.inspect}" + # break + skip_method container - nil - end + name = nil + end return name, container end @@ -1478,45 +1597,56 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_method_or_yield_parameters(method = nil, modifiers = RDoc::METHOD_MODIFIERS) - skip_tkspace false + skip_tkspace_without_nl tk = get_tk end_token = get_end_token tk return '' unless end_token nest = 0 + continue = false - loop do - case tk - when TkSEMICOLON then + while tk != nil do + case tk[:kind] + when :on_semicolon then break if nest == 0 - when TkLBRACE, TkfLBRACE then + when :on_lbracket then nest += 1 - when TkRBRACE then + when :on_rbracket then + nest -= 1 + when :on_lbrace then + nest += 1 + when :on_rbrace then nest -= 1 if nest <= 0 # we might have a.each { |i| yield i } unget_tk(tk) if nest < 0 break end - when TkLPAREN, TkfLPAREN then + when :on_lparen then nest += 1 - when end_token then - if end_token == TkRPAREN + when end_token[:kind] then + if end_token[:kind] == :on_rparen nest -= 1 break if nest <= 0 else - break unless @scanner.continue + break end - when TkRPAREN then + when :on_rparen then nest -= 1 - when method && method.block_params.nil? && TkCOMMENT then - unget_tk tk - read_documentation_modifiers method, modifiers - @read.pop - when TkCOMMENT then + when :on_comment, :on_embdoc then @read.pop - when nil then - break + if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and + (!continue or (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) != 0) then + if method && method.block_params.nil? then + unget_tk tk + read_documentation_modifiers method, modifiers + end + break if !continue and nest <= 0 + end + when :on_comma then + continue = true + when :on_ident then + continue = false if continue end tk = get_tk end @@ -1540,7 +1670,7 @@ class RDoc::Parser::Ruby < RDoc::Parser return if method.block_params - skip_tkspace false + skip_tkspace_without_nl read_documentation_modifiers method, RDoc::METHOD_MODIFIERS end @@ -1550,7 +1680,7 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_module container, single, tk, comment container, name_t, = get_class_or_module container - name = name_t.name + name = name_t[:text] mod = container.add_module RDoc::NormalModule, name mod.ignore unless container.document_children @@ -1560,6 +1690,9 @@ class RDoc::Parser::Ruby < RDoc::Parser mod.add_comment comment, @top_level parse_statements mod + # after end modifiers + read_documentation_modifiers mod, RDoc::CLASS_MODIFIERS + @stats.add_module mod end @@ -1570,12 +1703,12 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace_comment tk = get_tk - if TkLPAREN === tk then + if :on_lparen == tk[:kind] then skip_tkspace_comment tk = get_tk end - name = tk.text if TkSTRING === tk + name = tk[:text][1..-2] if :on_tstring == tk[:kind] if name then @top_level.add_require RDoc::Require.new(name, comment) @@ -1588,19 +1721,30 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses a rescue def parse_rescue - skip_tkspace false + skip_tkspace_without_nl while tk = get_tk - case tk - when TkNL, TkSEMICOLON then + case tk[:kind] + when :on_nl, :on_semicolon, :on_comment then break - when TkCOMMA then - skip_tkspace false + when :on_comma then + skip_tkspace_without_nl - get_tk if TkNL === peek_tk + get_tk if :on_nl == peek_tk[:kind] end - skip_tkspace false + skip_tkspace_without_nl + end + end + + ## + # Retrieve comment body without =begin/=end + + def retrieve_comment_body(tk) + if :on_embdoc == tk[:kind] + tk[:text].gsub(/\A=begin.*\n/, '').gsub(/=end\n?\z/, '') + else + tk[:text] end end @@ -1610,7 +1754,7 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_statements(container, single = NORMAL, current_method = nil, comment = new_comment('')) raise 'no' unless RDoc::Comment === comment - comment.force_encoding @encoding if @encoding + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding nest = 1 save_visibility = container.visibility @@ -1621,35 +1765,55 @@ class RDoc::Parser::Ruby < RDoc::Parser keep_comment = false try_parse_comment = false - non_comment_seen = true unless TkCOMMENT === tk + non_comment_seen = true unless (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) - case tk - when TkNL then - skip_tkspace - tk = get_tk + case tk[:kind] + when :on_nl, :on_ignored_nl, :on_comment, :on_embdoc then + if :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] + skip_tkspace + tk = get_tk + else + past_tokens = @read.size > 1 ? @read[0..-2] : [] + nl_position = 0 + past_tokens.reverse.each_with_index do |read_tk, i| + if read_tk =~ /^\n$/ then + nl_position = (past_tokens.size - 1) - i + break + elsif read_tk =~ /^#.*\n$/ then + nl_position = ((past_tokens.size - 1) - i) + 1 + break + end + end + comment_only_line = past_tokens[nl_position..-1].all?{ |c| c =~ /^\s+$/ } + unless comment_only_line then + tk = get_tk + end + end - if TkCOMMENT === tk then + if tk and (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) then if non_comment_seen then # Look for RDoc in a comment about to be thrown away non_comment_seen = parse_comment container, tk, comment unless comment.empty? comment = '' - comment.force_encoding @encoding if @encoding + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding end - while TkCOMMENT === tk do - comment << tk.text << "\n" - - tk = get_tk + line_no = nil + while tk and (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) do + comment_body = retrieve_comment_body(tk) + line_no = tk[:line_no] if comment.empty? + comment += comment_body + comment << "\n" unless comment_body =~ /\n\z/ - if TkNL === tk then - skip_tkspace false # leading spaces - tk = get_tk + if comment_body.size > 1 && comment_body =~ /\n\z/ then + skip_tkspace_without_nl # leading spaces end + tk = get_tk end - comment = new_comment comment + comment = new_comment comment, line_no unless comment.empty? then look_for_directives_in container, comment @@ -1667,78 +1831,88 @@ class RDoc::Parser::Ruby < RDoc::Parser unget_tk tk keep_comment = true + container.current_line_visibility = nil - when TkCLASS then - parse_class container, single, tk, comment + when :on_kw then + case tk[:text] + when 'class' then + parse_class container, single, tk, comment - when TkMODULE then - parse_module container, single, tk, comment + when 'module' then + parse_module container, single, tk, comment - when TkDEF then - parse_method container, single, tk, comment + when 'def' then + parse_method container, single, tk, comment - when TkCONSTANT then - unless parse_constant container, tk, comment, current_method then - try_parse_comment = true - end + when 'alias' then + parse_alias container, single, tk, comment unless current_method - when TkALIAS then - parse_alias container, single, tk, comment unless current_method + when 'yield' then + if current_method.nil? then + warn "Warning: yield outside of method" if container.document_self + else + parse_yield container, single, tk, current_method + end - when TkYIELD then - if current_method.nil? then - warn "Warning: yield outside of method" if container.document_self - else - parse_yield container, single, tk, current_method - end + when 'until', 'while' then + if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + nest += 1 + skip_optional_do_after_expression + end - # Until and While can have a 'do', which shouldn't increase the nesting. - # We can't solve the general case, but we can handle most occurrences by - # ignoring a do at the end of a line. + # Until and While can have a 'do', which shouldn't increase the nesting. + # We can't solve the general case, but we can handle most occurrences by + # ignoring a do at the end of a line. - when TkUNTIL, TkWHILE then - nest += 1 - skip_optional_do_after_expression + # 'for' is trickier + when 'for' then + nest += 1 + skip_for_variable + skip_optional_do_after_expression - # 'for' is trickier - when TkFOR then - nest += 1 - skip_for_variable - skip_optional_do_after_expression + when 'case', 'do', 'if', 'unless', 'begin' then + if (tk[:state] & RDoc::Parser::RipperStateLex::EXPR_LABEL) == 0 + nest += 1 + end - when TkCASE, TkDO, TkIF, TkUNLESS, TkBEGIN then - nest += 1 + when 'super' then + current_method.calls_super = true if current_method + + when 'rescue' then + parse_rescue + + when 'end' then + nest -= 1 + if nest == 0 then + container.ongoing_visibility = save_visibility + + parse_comment container, tk, comment unless comment.empty? - when TkSUPER then - current_method.calls_super = true if current_method + return + end + end - when TkRESCUE then - parse_rescue + when :on_const then + unless parse_constant container, tk, comment, current_method then + try_parse_comment = true + end - when TkIDENTIFIER then + when :on_ident then if nest == 1 and current_method.nil? then keep_comment = parse_identifier container, single, tk, comment end - case tk.name + case tk[:text] when "require" then parse_require container, comment when "include" then parse_extend_or_include RDoc::Include, container, comment when "extend" then parse_extend_or_include RDoc::Extend, container, comment + when "included" then + parse_included_with_activesupport_concern container, comment end - when TkEND then - nest -= 1 - if nest == 0 then - read_documentation_modifiers container, RDoc::CLASS_MODIFIERS - container.ongoing_visibility = save_visibility - - parse_comment container, tk, comment unless comment.empty? - - return - end else try_parse_comment = nest == 1 end @@ -1752,7 +1926,7 @@ class RDoc::Parser::Ruby < RDoc::Parser unless keep_comment then comment = new_comment '' - comment.force_encoding @encoding if @encoding + comment = RDoc::Encoding.change_encoding comment, @encoding if @encoding container.params = nil container.block_params = nil end @@ -1770,8 +1944,8 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_symbol_arg(no = nil) skip_tkspace_comment - case tk = get_tk - when TkLPAREN + tk = get_tk + if tk[:kind] == :on_lparen parse_symbol_arg_paren no else parse_symbol_arg_space no, tk @@ -1793,10 +1967,10 @@ class RDoc::Parser::Ruby < RDoc::Parser end skip_tkspace_comment - case tk2 = get_tk - when TkRPAREN + case (tk2 = get_tk)[:kind] + when :on_rparen break - when TkCOMMA + when :on_comma else warn("unexpected token: '#{tk2.inspect}'") if $DEBUG_RDOC break @@ -1820,10 +1994,10 @@ class RDoc::Parser::Ruby < RDoc::Parser end loop do - skip_tkspace false + skip_tkspace_without_nl tk1 = get_tk - unless TkCOMMA === tk1 then + if tk1.nil? || :on_comma != tk1[:kind] then unget_tk tk1 break end @@ -1842,12 +2016,12 @@ class RDoc::Parser::Ruby < RDoc::Parser # Returns symbol text from the next token def parse_symbol_in_arg - case tk = get_tk - when TkSYMBOL - tk.text.sub(/^:/, '') - when TkSTRING - eval @read[-1] - when TkDSTRING, TkIDENTIFIER then + tk = get_tk + if :on_symbol == tk[:kind] then + tk[:text].sub(/^:/, '') + elsif :on_tstring == tk[:kind] then + tk[:text][1..-2] + elsif :on_dstring == tk[:kind] or :on_ident == tk[:kind] then nil # ignore else warn("Expected symbol or string, got #{tk.inspect}") if $DEBUG_RDOC @@ -1881,27 +2055,44 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace_comment false - case peek_tk - # Ryan Davis suggested the extension to ignore modifiers, because he - # often writes - # - # protected unless $TESTING - # - when TkNL, TkUNLESS_MOD, TkIF_MOD, TkSEMICOLON then + ptk = peek_tk + # Ryan Davis suggested the extension to ignore modifiers, because he + # often writes + # + # protected unless $TESTING + # + if [:on_nl, :on_semicolon].include?(ptk[:kind]) || (:on_kw == ptk[:kind] && (['if', 'unless'].include?(ptk[:text]))) then container.ongoing_visibility = vis + elsif :on_kw == ptk[:kind] && 'def' == ptk[:text] + container.current_line_visibility = vis else update_visibility container, vis_type, vis, singleton end end ## + # Parses a Module#private_constant or Module#public_constant call from +tk+. + + def parse_constant_visibility(container, single, tk) + args = parse_symbol_arg + case tk[:text] + when 'private_constant' + vis = :private + when 'public_constant' + vis = :public + else + raise RDoc::Error, 'Unreachable' + end + container.set_constant_visibility_for args, vis + end + + ## # Determines the block parameter for +context+ def parse_yield(context, single, tk, method) return if method.block_params get_tkread - @scanner.continue = false method.block_params = parse_method_or_yield_parameters end @@ -1925,11 +2116,10 @@ class RDoc::Parser::Ruby < RDoc::Parser while tk = get_tk do tokens << tk - case tk - when TkNL, TkDEF then + if :on_nl == tk[:kind] or (:on_kw == tk[:kind] && 'def' == tk[:text]) then return - when TkCOMMENT then - return unless tk.text =~ /\s*:?([\w-]+):\s*(.*)/ + elsif :on_comment == tk[:kind] or :on_embdoc == tk[:kind] then + return unless tk[:text] =~ /\s*:?([\w-]+):\s*(.*)/ directive = $1.downcase @@ -1939,7 +2129,7 @@ class RDoc::Parser::Ruby < RDoc::Parser end end ensure - unless tokens.length == 1 and TkCOMMENT === tokens.first then + unless tokens.length == 1 and (:on_comment == tokens.first[:kind] or :on_embdoc == tokens.first[:kind]) then tokens.reverse_each do |token| unget_tk token end @@ -1953,6 +2143,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # See also RDoc::Markup::PreProcess#handle_directive def read_documentation_modifiers context, allowed + skip_tkspace_without_nl directive, value = read_directive allowed return unless directive @@ -1980,15 +2171,6 @@ class RDoc::Parser::Ruby < RDoc::Parser end ## - # Removes private comments from +comment+ - #-- - # TODO remove - - def remove_private_comments comment - comment.remove_private - end - - ## # Scans this Ruby file for Ruby constructs def scan @@ -1999,29 +2181,33 @@ class RDoc::Parser::Ruby < RDoc::Parser parse_top_level_statements @top_level rescue StandardError => e - bytes = '' - - 20.times do @scanner.ungetc end - count = 0 - 60.times do |i| - count = i - byte = @scanner.getc - break unless byte - bytes << byte + if @content.include?('<%') and @content.include?('%>') then + # Maybe, this is ERB. + $stderr.puts "\033[2KRDoc detects ERB file. Skips it for compatibility:" + $stderr.puts @file_name + return end - count -= 20 - count.times do @scanner.ungetc end + + if @scanner_point >= @scanner.size + now_line_no = @scanner[@scanner.size - 1][:line_no] + else + now_line_no = peek_tk[:line_no] + end + first_tk_index = @scanner.find_index { |tk| tk[:line_no] == now_line_no } + last_tk_index = @scanner.find_index { |tk| tk[:line_no] == now_line_no + 1 } + last_tk_index = last_tk_index ? last_tk_index - 1 : @scanner.size - 1 + code = @scanner[first_tk_index..last_tk_index].map{ |t| t[:text] }.join $stderr.puts <<-EOF -#{self.class} failure around line #{@scanner.line_no} of +#{self.class} failure around line #{now_line_no} of #{@file_name} EOF - unless bytes.empty? then + unless code.empty? then + $stderr.puts code $stderr.puts - $stderr.puts bytes.inspect end raise e @@ -2035,53 +2221,52 @@ class RDoc::Parser::Ruby < RDoc::Parser # while, until, and for have an optional do def skip_optional_do_after_expression - skip_tkspace false + skip_tkspace_without_nl tk = get_tk - end_token = get_end_token tk b_nest = 0 nest = 0 - @scanner.continue = false loop do - case tk - when TkSEMICOLON then + break unless tk + case tk[:kind] + when :on_semicolon, :on_nl, :on_ignored_nl then break if b_nest.zero? - when TkLPAREN, TkfLPAREN then + when :on_lparen then nest += 1 - when TkBEGIN then - b_nest += 1 - when TkEND then - b_nest -= 1 - when TkDO - break if nest.zero? - when end_token then - if end_token == TkRPAREN - nest -= 1 - break if @scanner.lex_state == :EXPR_END and nest.zero? - else - break unless @scanner.continue + when :on_rparen then + nest -= 1 + when :on_kw then + case tk[:text] + when 'begin' + b_nest += 1 + when 'end' + b_nest -= 1 + when 'do' + break if nest.zero? + end + when :on_comment, :on_embdoc then + if b_nest.zero? and "\n" == tk[:text][-1] then + break end - when nil then - break end tk = get_tk end - skip_tkspace false + skip_tkspace_without_nl - get_tk if TkDO === peek_tk + get_tk if peek_tk && :on_kw == peek_tk[:kind] && 'do' == peek_tk[:text] end ## # skip the var [in] part of a 'for' statement def skip_for_variable - skip_tkspace false + skip_tkspace_without_nl get_tk - skip_tkspace false + skip_tkspace_without_nl tk = get_tk - unget_tk(tk) unless TkIN === tk + unget_tk(tk) unless :on_kw == tk[:kind] and 'in' == tk[:text] end ## @@ -2098,8 +2283,9 @@ class RDoc::Parser::Ruby < RDoc::Parser def skip_tkspace_comment(skip_nl = true) loop do - skip_tkspace skip_nl - return unless TkCOMMENT === peek_tk + skip_nl ? skip_tkspace : skip_tkspace_without_nl + next_tk = peek_tk + return if next_tk.nil? || (:on_comment != next_tk[:kind] and :on_embdoc != next_tk[:kind]) get_tk end end @@ -2157,4 +2343,3 @@ class RDoc::Parser::Ruby < RDoc::Parser end end - diff --git a/lib/rdoc/parser/ruby_tools.rb b/lib/rdoc/parser/ruby_tools.rb index 654431ea30..681d7166ce 100644 --- a/lib/rdoc/parser/ruby_tools.rb +++ b/lib/rdoc/parser/ruby_tools.rb @@ -1,11 +1,9 @@ +# frozen_string_literal: true ## -# Collection of methods for writing parsers against RDoc::RubyLex and -# RDoc::RubyToken +# Collection of methods for writing parsers module RDoc::Parser::RubyTools - include RDoc::RubyToken - ## # Adds a token listener +obj+, but you should probably use token_listener @@ -21,37 +19,24 @@ module RDoc::Parser::RubyTools tk = nil if @tokens.empty? then - tk = @scanner.token - @read.push @scanner.get_readed - puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG + if @scanner_point >= @scanner.size + return nil + else + tk = @scanner[@scanner_point] + @scanner_point += 1 + @read.push tk[:text] + end else @read.push @unget_read.shift tk = @tokens.shift - puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG end - tk = nil if TkEND_OF_SCRIPT === tk - - if TkSYMBEG === tk then - set_token_position tk.line_no, tk.char_no - - case tk1 = get_tk - when TkId, TkOp, TkSTRING, TkDSTRING, TkSTAR, TkAMPER then - if tk1.respond_to?(:name) then - tk = Token(TkSYMBOL).set_text(":" + tk1.name) - else - tk = Token(TkSYMBOL).set_text(":" + tk1.text) - end - - # remove the identifier we just read to replace it with a symbol - @token_listeners.each do |obj| - obj.pop_token - end if @token_listeners - else - tk = tk1 - end + if tk == nil || :on___end__ == tk[:kind] + tk = nil end + return nil unless tk + # inform any listeners of our shiny new token @token_listeners.each do |obj| obj.add_token(tk) @@ -121,19 +106,34 @@ module RDoc::Parser::RubyTools @tokens = [] @unget_read = [] @nest = 0 + @scanner_point = 0 + end + + ## + # Skips whitespace tokens including newlines + + def skip_tkspace + tokens = [] + + while (tk = get_tk) and (:on_sp == tk[:kind] or :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind]) do + tokens.push(tk) + end + + unget_tk(tk) + tokens end ## - # Skips whitespace tokens including newlines if +skip_nl+ is true + # Skips whitespace tokens excluding newlines - def skip_tkspace(skip_nl = true) # HACK dup + def skip_tkspace_without_nl tokens = [] - while TkSPACE === (tk = get_tk) or (skip_nl and TkNL === tk) do - tokens.push tk + while (tk = get_tk) and :on_sp == tk[:kind] do + tokens.push(tk) end - unget_tk tk + unget_tk(tk) tokens end diff --git a/lib/rdoc/parser/simple.rb b/lib/rdoc/parser/simple.rb index 65cfc1b2e7..b1dabad0f8 100644 --- a/lib/rdoc/parser/simple.rb +++ b/lib/rdoc/parser/simple.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true ## # Parse a non-source file. We basically take the whole thing as one big # comment. @@ -18,7 +19,7 @@ class RDoc::Parser::Simple < RDoc::Parser preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include - preprocess.handle @content, @top_level + @content = preprocess.handle @content, @top_level end ## @@ -51,11 +52,10 @@ class RDoc::Parser::Simple < RDoc::Parser def remove_private_comment comment # Workaround for gsub encoding for Ruby 1.9.2 and earlier empty = '' - empty.force_encoding comment.encoding if Object.const_defined? :Encoding + empty = RDoc::Encoding.change_encoding empty, comment.encoding comment = comment.gsub(%r%^--\n.*?^\+\+\n?%m, empty) comment.sub(%r%^--\n.*%m, empty) end end - diff --git a/lib/rdoc/parser/text.rb b/lib/rdoc/parser/text.rb index f973313551..01de0cc595 100644 --- a/lib/rdoc/parser/text.rb +++ b/lib/rdoc/parser/text.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true ## # Indicates this parser is text and doesn't contain code constructs. # |
