summaryrefslogtreecommitdiff
path: root/ruby_1_8_5/lib/rdoc/parsers/parse_rb.rb
diff options
context:
space:
mode:
Diffstat (limited to 'ruby_1_8_5/lib/rdoc/parsers/parse_rb.rb')
-rw-r--r--ruby_1_8_5/lib/rdoc/parsers/parse_rb.rb2605
1 files changed, 2605 insertions, 0 deletions
diff --git a/ruby_1_8_5/lib/rdoc/parsers/parse_rb.rb b/ruby_1_8_5/lib/rdoc/parsers/parse_rb.rb
new file mode 100644
index 0000000000..dde017be7d
--- /dev/null
+++ b/ruby_1_8_5/lib/rdoc/parsers/parse_rb.rb
@@ -0,0 +1,2605 @@
+#!/usr/local/bin/ruby
+
+# Parse a Ruby source file, building a set of objects
+# representing the modules, classes, methods,
+# requires, and includes we find (these classes
+# are defined in code_objects.rb).
+
+# This file contains stuff stolen outright from:
+#
+# rtags.rb -
+# ruby-lex.rb - ruby lexcal analizer
+# ruby-token.rb - ruby tokens
+# by Keiju ISHITSUKA (Nippon Rational Inc.)
+#
+
+require "e2mmap"
+require "irb/slex"
+
+require "rdoc/code_objects"
+require "rdoc/tokenstream"
+
+require "rdoc/markup/simple_markup/preprocess"
+
+require "rdoc/parsers/parserfactory"
+
+$TOKEN_DEBUG = $DEBUG
+
+# Definitions of all tokens involved in the lexical analysis
+
+module RubyToken
+ EXPR_BEG = :EXPR_BEG
+ EXPR_MID = :EXPR_MID
+ EXPR_END = :EXPR_END
+ EXPR_ARG = :EXPR_ARG
+ EXPR_FNAME = :EXPR_FNAME
+ EXPR_DOT = :EXPR_DOT
+ EXPR_CLASS = :EXPR_CLASS
+
+ class Token
+ NO_TEXT = "??".freeze
+ attr :text
+
+ def initialize(line_no, char_no)
+ @line_no = line_no
+ @char_no = char_no
+ @text = NO_TEXT
+ end
+
+ # Because we're used in contexts that expect to return a token,
+ # we set the text string and then return ourselves
+ def set_text(text)
+ @text = text
+ self
+ end
+
+ attr_reader :line_no, :char_no, :text
+ end
+
+ class TkNode < Token
+ attr :node
+ end
+
+ class TkId < Token
+ def initialize(line_no, char_no, name)
+ super(line_no, char_no)
+ @name = name
+ end
+ attr :name
+ end
+
+ class TkKW < TkId
+ end
+
+ class TkVal < Token
+ def initialize(line_no, char_no, value = nil)
+ super(line_no, char_no)
+ set_text(value)
+ end
+ end
+
+ class TkOp < Token
+ def name
+ self.class.op_name
+ end
+ end
+
+ class TkOPASGN < TkOp
+ def initialize(line_no, char_no, op)
+ super(line_no, char_no)
+ op = TkReading2Token[op] unless op.kind_of?(Symbol)
+ @op = op
+ end
+ attr :op
+ end
+
+ class TkUnknownChar < Token
+ def initialize(line_no, char_no, id)
+ super(line_no, char_no)
+ @name = char_no.chr
+ end
+ attr :name
+ end
+
+ class TkError < Token
+ end
+
+ def set_token_position(line, char)
+ @prev_line_no = line
+ @prev_char_no = char
+ end
+
+ def Token(token, value = nil)
+ tk = nil
+ case token
+ when String, Symbol
+ source = token.kind_of?(String) ? TkReading2Token : TkSymbol2Token
+ if (tk = source[token]).nil?
+ IRB.fail TkReading2TokenNoKey, token
+ end
+ tk = Token(tk[0], value)
+ else
+ tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty?
+ token.new(@prev_line_no, @prev_char_no)
+ else
+ token.new(@prev_line_no, @prev_char_no, value)
+ end
+ end
+ tk
+ end
+
+ TokenDefinitions = [
+ [:TkCLASS, TkKW, "class", EXPR_CLASS],
+ [:TkMODULE, TkKW, "module", EXPR_BEG],
+ [:TkDEF, TkKW, "def", EXPR_FNAME],
+ [:TkUNDEF, TkKW, "undef", EXPR_FNAME],
+ [:TkBEGIN, TkKW, "begin", EXPR_BEG],
+ [:TkRESCUE, TkKW, "rescue", EXPR_MID],
+ [:TkENSURE, TkKW, "ensure", EXPR_BEG],
+ [:TkEND, TkKW, "end", EXPR_END],
+ [:TkIF, TkKW, "if", EXPR_BEG, :TkIF_MOD],
+ [:TkUNLESS, TkKW, "unless", EXPR_BEG, :TkUNLESS_MOD],
+ [:TkTHEN, TkKW, "then", EXPR_BEG],
+ [:TkELSIF, TkKW, "elsif", EXPR_BEG],
+ [:TkELSE, TkKW, "else", EXPR_BEG],
+ [:TkCASE, TkKW, "case", EXPR_BEG],
+ [:TkWHEN, TkKW, "when", EXPR_BEG],
+ [:TkWHILE, TkKW, "while", EXPR_BEG, :TkWHILE_MOD],
+ [:TkUNTIL, TkKW, "until", EXPR_BEG, :TkUNTIL_MOD],
+ [:TkFOR, TkKW, "for", EXPR_BEG],
+ [:TkBREAK, TkKW, "break", EXPR_END],
+ [:TkNEXT, TkKW, "next", EXPR_END],
+ [:TkREDO, TkKW, "redo", EXPR_END],
+ [:TkRETRY, TkKW, "retry", EXPR_END],
+ [:TkIN, TkKW, "in", EXPR_BEG],
+ [:TkDO, TkKW, "do", EXPR_BEG],
+ [:TkRETURN, TkKW, "return", EXPR_MID],
+ [:TkYIELD, TkKW, "yield", EXPR_END],
+ [:TkSUPER, TkKW, "super", EXPR_END],
+ [:TkSELF, TkKW, "self", EXPR_END],
+ [:TkNIL, TkKW, "nil", EXPR_END],
+ [:TkTRUE, TkKW, "true", EXPR_END],
+ [:TkFALSE, TkKW, "false", EXPR_END],
+ [:TkAND, TkKW, "and", EXPR_BEG],
+ [:TkOR, TkKW, "or", EXPR_BEG],
+ [:TkNOT, TkKW, "not", EXPR_BEG],
+ [:TkIF_MOD, TkKW],
+ [:TkUNLESS_MOD, TkKW],
+ [:TkWHILE_MOD, TkKW],
+ [:TkUNTIL_MOD, TkKW],
+ [:TkALIAS, TkKW, "alias", EXPR_FNAME],
+ [:TkDEFINED, TkKW, "defined?", EXPR_END],
+ [:TklBEGIN, TkKW, "BEGIN", EXPR_END],
+ [:TklEND, TkKW, "END", EXPR_END],
+ [:Tk__LINE__, TkKW, "__LINE__", EXPR_END],
+ [:Tk__FILE__, TkKW, "__FILE__", EXPR_END],
+
+ [:TkIDENTIFIER, TkId],
+ [:TkFID, TkId],
+ [:TkGVAR, TkId],
+ [:TkIVAR, TkId],
+ [:TkCONSTANT, TkId],
+
+ [:TkINTEGER, TkVal],
+ [:TkFLOAT, TkVal],
+ [:TkSTRING, TkVal],
+ [:TkXSTRING, TkVal],
+ [:TkREGEXP, TkVal],
+ [:TkCOMMENT, TkVal],
+
+ [:TkDSTRING, TkNode],
+ [:TkDXSTRING, TkNode],
+ [:TkDREGEXP, TkNode],
+ [:TkNTH_REF, TkId],
+ [:TkBACK_REF, TkId],
+
+ [:TkUPLUS, TkOp, "+@"],
+ [:TkUMINUS, TkOp, "-@"],
+ [:TkPOW, TkOp, "**"],
+ [:TkCMP, TkOp, "<=>"],
+ [:TkEQ, TkOp, "=="],
+ [:TkEQQ, TkOp, "==="],
+ [:TkNEQ, TkOp, "!="],
+ [:TkGEQ, TkOp, ">="],
+ [:TkLEQ, TkOp, "<="],
+ [:TkANDOP, TkOp, "&&"],
+ [:TkOROP, TkOp, "||"],
+ [:TkMATCH, TkOp, "=~"],
+ [:TkNMATCH, TkOp, "!~"],
+ [:TkDOT2, TkOp, ".."],
+ [:TkDOT3, TkOp, "..."],
+ [:TkAREF, TkOp, "[]"],
+ [:TkASET, TkOp, "[]="],
+ [:TkLSHFT, TkOp, "<<"],
+ [:TkRSHFT, TkOp, ">>"],
+ [:TkCOLON2, TkOp],
+ [:TkCOLON3, TkOp],
+# [:OPASGN, TkOp], # +=, -= etc. #
+ [:TkASSOC, TkOp, "=>"],
+ [:TkQUESTION, TkOp, "?"], #?
+ [:TkCOLON, TkOp, ":"], #:
+
+ [:TkfLPAREN], # func( #
+ [:TkfLBRACK], # func[ #
+ [:TkfLBRACE], # func{ #
+ [:TkSTAR], # *arg
+ [:TkAMPER], # &arg #
+ [:TkSYMBOL, TkId], # :SYMBOL
+ [:TkSYMBEG, TkId],
+ [:TkGT, TkOp, ">"],
+ [:TkLT, TkOp, "<"],
+ [:TkPLUS, TkOp, "+"],
+ [:TkMINUS, TkOp, "-"],
+ [:TkMULT, TkOp, "*"],
+ [:TkDIV, TkOp, "/"],
+ [:TkMOD, TkOp, "%"],
+ [:TkBITOR, TkOp, "|"],
+ [:TkBITXOR, TkOp, "^"],
+ [:TkBITAND, TkOp, "&"],
+ [:TkBITNOT, TkOp, "~"],
+ [:TkNOTOP, TkOp, "!"],
+
+ [:TkBACKQUOTE, TkOp, "`"],
+
+ [:TkASSIGN, Token, "="],
+ [:TkDOT, Token, "."],
+ [:TkLPAREN, Token, "("], #(exp)
+ [:TkLBRACK, Token, "["], #[arry]
+ [:TkLBRACE, Token, "{"], #{hash}
+ [:TkRPAREN, Token, ")"],
+ [:TkRBRACK, Token, "]"],
+ [:TkRBRACE, Token, "}"],
+ [:TkCOMMA, Token, ","],
+ [:TkSEMICOLON, Token, ";"],
+
+ [:TkRD_COMMENT],
+ [:TkSPACE],
+ [:TkNL],
+ [:TkEND_OF_SCRIPT],
+
+ [:TkBACKSLASH, TkUnknownChar, "\\"],
+ [:TkAT, TkUnknownChar, "@"],
+ [:TkDOLLAR, TkUnknownChar, "\$"], #"
+ ]
+
+ # {reading => token_class}
+ # {reading => [token_class, *opt]}
+ TkReading2Token = {}
+ TkSymbol2Token = {}
+
+ def RubyToken.def_token(token_n, super_token = Token, reading = nil, *opts)
+ token_n = token_n.id2name unless token_n.kind_of?(String)
+ if RubyToken.const_defined?(token_n)
+ IRB.fail AlreadyDefinedToken, token_n
+ end
+
+ token_c = Class.new super_token
+ RubyToken.const_set token_n, token_c
+# token_c.inspect
+
+ if reading
+ if TkReading2Token[reading]
+ IRB.fail TkReading2TokenDuplicateError, token_n, reading
+ end
+ if opts.empty?
+ TkReading2Token[reading] = [token_c]
+ else
+ TkReading2Token[reading] = [token_c].concat(opts)
+ end
+ end
+ TkSymbol2Token[token_n.intern] = token_c
+
+ if token_c <= TkOp
+ token_c.class_eval %{
+ def self.op_name; "#{reading}"; end
+ }
+ end
+ end
+
+ for defs in TokenDefinitions
+ def_token(*defs)
+ end
+
+ NEWLINE_TOKEN = TkNL.new(0,0)
+ NEWLINE_TOKEN.set_text("\n")
+
+end
+
+
+
+# Lexical analyzer for Ruby source
+
+class RubyLex
+
+ ######################################################################
+ #
+ # Read an input stream character by character. We allow for unlimited
+ # ungetting of characters just read.
+ #
+ # We simplify the implementation greatly by reading the entire input
+ # into a buffer initially, and then simply traversing it using
+ # pointers.
+ #
+ # We also have to allow for the <i>here document diversion</i>. This
+ # little gem comes about when the lexer encounters a here
+ # document. At this point we effectively need to split the input
+ # stream into two parts: one to read the body of the here document,
+ # the other to read the rest of the input line where the here
+ # document was initially encountered. For example, we might have
+ #
+ # do_something(<<-A, <<-B)
+ # stuff
+ # for
+ # A
+ # stuff
+ # for
+ # B
+ #
+ # When the lexer encounters the <<A, it reads until the end of the
+ # line, and keeps it around for later. It then reads the body of the
+ # here document. Once complete, it needs to read the rest of the
+ # original line, but then skip the here document body.
+ #
+
+ class BufferedReader
+
+ attr_reader :line_num
+
+ def initialize(content)
+ if /\t/ =~ content
+ tab_width = Options.instance.tab_width
+ content = content.split(/\n/).map do |line|
+ 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
+ line
+ end .join("\n")
+ end
+ @content = content
+ @content << "\n" unless @content[-1,1] == "\n"
+ @size = @content.size
+ @offset = 0
+ @hwm = 0
+ @line_num = 1
+ @read_back_offset = 0
+ @last_newline = 0
+ @newline_pending = false
+ end
+
+ def column
+ @offset - @last_newline
+ end
+
+ def getc
+ return nil if @offset >= @size
+ ch = @content[@offset, 1]
+
+ @offset += 1
+ @hwm = @offset if @hwm < @offset
+
+ if @newline_pending
+ @line_num += 1
+ @last_newline = @offset - 1
+ @newline_pending = false
+ end
+
+ if ch == "\n"
+ @newline_pending = true
+ end
+ ch
+ end
+
+ def getc_already_read
+ getc
+ end
+
+ def ungetc(ch)
+ raise "unget past beginning of file" if @offset <= 0
+ @offset -= 1
+ if @content[@offset] == ?\n
+ @newline_pending = false
+ end
+ end
+
+ def get_read
+ res = @content[@read_back_offset...@offset]
+ @read_back_offset = @offset
+ res
+ end
+
+ def peek(at)
+ pos = @offset + at
+ if pos >= @size
+ nil
+ else
+ @content[pos, 1]
+ end
+ end
+
+ def peek_equal(str)
+ @content[@offset, str.length] == str
+ end
+
+ def divert_read_from(reserve)
+ @content[@offset, 0] = reserve
+ @size = @content.size
+ end
+ end
+
+ # end of nested class BufferedReader
+
+ extend Exception2MessageMapper
+ def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
+ def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
+ def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
+ def_exception(:TkReading2TokenDuplicateError,
+ "key duplicate(token_n='%s', key='%s')")
+ def_exception(:SyntaxError, "%s")
+
+ include RubyToken
+ include IRB
+
+ attr_reader :continue
+ attr_reader :lex_state
+
+ def RubyLex.debug?
+ false
+ end
+
+ def initialize(content)
+ lex_init
+
+ @reader = BufferedReader.new(content)
+
+ @exp_line_no = @line_no = 1
+ @base_char_no = 0
+ @indent = 0
+
+ @ltype = nil
+ @quoted = nil
+ @lex_state = EXPR_BEG
+ @space_seen = false
+
+ @continue = false
+ @line = ""
+
+ @skip_space = false
+ @read_auto_clean_up = false
+ @exception_on_syntax_error = true
+ end
+
+ attr :skip_space, true
+ attr :read_auto_clean_up, true
+ attr :exception_on_syntax_error, true
+
+ attr :indent
+
+ # io functions
+ def line_no
+ @reader.line_num
+ end
+
+ def char_no
+ @reader.column
+ end
+
+ def get_read
+ @reader.get_read
+ end
+
+ def getc
+ @reader.getc
+ end
+
+ def getc_of_rests
+ @reader.getc_already_read
+ end
+
+ def gets
+ c = getc or return
+ l = ""
+ begin
+ l.concat c unless c == "\r"
+ break if c == "\n"
+ end while c = getc
+ l
+ end
+
+
+ def ungetc(c = nil)
+ @reader.ungetc(c)
+ end
+
+ def peek_equal?(str)
+ @reader.peek_equal(str)
+ end
+
+ def peek(i = 0)
+ @reader.peek(i)
+ end
+
+ def lex
+ until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
+ !@continue or
+ tk.nil?)
+ end
+ line = get_read
+
+ if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
+ nil
+ else
+ line
+ end
+ end
+
+ def token
+ set_token_position(line_no, char_no)
+ begin
+ begin
+ tk = @OP.match(self)
+ @space_seen = tk.kind_of?(TkSPACE)
+ rescue SyntaxError
+ abort if @exception_on_syntax_error
+ tk = TkError.new(line_no, char_no)
+ end
+ end while @skip_space and tk.kind_of?(TkSPACE)
+ if @read_auto_clean_up
+ get_read
+ end
+# throw :eof unless tk
+ p tk if $DEBUG
+ tk
+ end
+
+ ENINDENT_CLAUSE = [
+ "case", "class", "def", "do", "for", "if",
+ "module", "unless", "until", "while", "begin" #, "when"
+ ]
+ DEINDENT_CLAUSE = ["end" #, "when"
+ ]
+
+ PERCENT_LTYPE = {
+ "q" => "\'",
+ "Q" => "\"",
+ "x" => "\`",
+ "r" => "/",
+ "w" => "]"
+ }
+
+ PERCENT_PAREN = {
+ "{" => "}",
+ "[" => "]",
+ "<" => ">",
+ "(" => ")"
+ }
+
+ Ltype2Token = {
+ "\'" => TkSTRING,
+ "\"" => TkSTRING,
+ "\`" => TkXSTRING,
+ "/" => TkREGEXP,
+ "]" => TkDSTRING
+ }
+ Ltype2Token.default = TkSTRING
+
+ DLtype2Token = {
+ "\"" => TkDSTRING,
+ "\`" => TkDXSTRING,
+ "/" => TkDREGEXP,
+ }
+
+ def lex_init()
+ @OP = SLex.new
+ @OP.def_rules("\0", "\004", "\032") do |chars, io|
+ Token(TkEND_OF_SCRIPT).set_text(chars)
+ end
+
+ @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io|
+ @space_seen = TRUE
+ while (ch = getc) =~ /[ \t\f\r\13]/
+ chars << ch
+ end
+ ungetc
+ Token(TkSPACE).set_text(chars)
+ end
+
+ @OP.def_rule("#") do
+ |op, io|
+ identify_comment
+ end
+
+ @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
+ |op, io|
+ str = op
+ @ltype = "="
+
+
+ begin
+ line = ""
+ begin
+ ch = getc
+ line << ch
+ end until ch == "\n"
+ str << line
+ end until line =~ /^=end/
+
+ ungetc
+
+ @ltype = nil
+
+ if str =~ /\A=begin\s+rdoc/i
+ str.sub!(/\A=begin.*\n/, '')
+ str.sub!(/^=end.*/m, '')
+ Token(TkCOMMENT).set_text(str)
+ else
+ Token(TkRD_COMMENT)#.set_text(str)
+ end
+ end
+
+ @OP.def_rule("\n") do
+ print "\\n\n" if RubyLex.debug?
+ case @lex_state
+ when EXPR_BEG, EXPR_FNAME, EXPR_DOT
+ @continue = TRUE
+ else
+ @continue = FALSE
+ @lex_state = EXPR_BEG
+ end
+ Token(TkNL).set_text("\n")
+ end
+
+ @OP.def_rules("*", "**",
+ "!", "!=", "!~",
+ "=", "==", "===",
+ "=~", "<=>",
+ "<", "<=",
+ ">", ">=", ">>") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token(op).set_text(op)
+ end
+
+ @OP.def_rules("<<") do
+ |op, io|
+ tk = nil
+ if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
+ (@lex_state != EXPR_ARG || @space_seen)
+ c = peek(0)
+ if /[-\w_\"\'\`]/ =~ c
+ tk = identify_here_document
+ end
+ end
+ if !tk
+ @lex_state = EXPR_BEG
+ tk = Token(op).set_text(op)
+ end
+ tk
+ end
+
+ @OP.def_rules("'", '"') do
+ |op, io|
+ identify_string(op)
+ end
+
+ @OP.def_rules("`") do
+ |op, io|
+ if @lex_state == EXPR_FNAME
+ Token(op).set_text(op)
+ else
+ identify_string(op)
+ end
+ end
+
+ @OP.def_rules('?') do
+ |op, io|
+ if @lex_state == EXPR_END
+ @lex_state = EXPR_BEG
+ Token(TkQUESTION).set_text(op)
+ else
+ ch = getc
+ if @lex_state == EXPR_ARG && ch !~ /\s/
+ ungetc
+ @lex_state = EXPR_BEG;
+ Token(TkQUESTION).set_text(op)
+ else
+ str = op
+ str << ch
+ if (ch == '\\') #'
+ str << read_escape
+ end
+ @lex_state = EXPR_END
+ Token(TkINTEGER).set_text(str)
+ end
+ end
+ end
+
+ @OP.def_rules("&", "&&", "|", "||") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token(op).set_text(op)
+ end
+
+ @OP.def_rules("+=", "-=", "*=", "**=",
+ "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ op =~ /^(.*)=$/
+ Token(TkOPASGN, $1).set_text(op)
+ end
+
+ @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
+ Token(TkUPLUS).set_text(op)
+ end
+
+ @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
+ Token(TkUMINUS).set_text(op)
+ end
+
+ @OP.def_rules("+", "-") do
+ |op, io|
+ catch(:RET) do
+ if @lex_state == EXPR_ARG
+ if @space_seen and peek(0) =~ /[0-9]/
+ throw :RET, identify_number(op)
+ else
+ @lex_state = EXPR_BEG
+ end
+ elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
+ throw :RET, identify_number(op)
+ else
+ @lex_state = EXPR_BEG
+ end
+ Token(op).set_text(op)
+ end
+ end
+
+ @OP.def_rule(".") do
+ @lex_state = EXPR_BEG
+ if peek(0) =~ /[0-9]/
+ ungetc
+ identify_number("")
+ else
+ # for obj.if
+ @lex_state = EXPR_DOT
+ Token(TkDOT).set_text(".")
+ end
+ end
+
+ @OP.def_rules("..", "...") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token(op).set_text(op)
+ end
+
+ lex_int2
+ end
+
+ def lex_int2
+ @OP.def_rules("]", "}", ")") do
+ |op, io|
+ @lex_state = EXPR_END
+ @indent -= 1
+ Token(op).set_text(op)
+ end
+
+ @OP.def_rule(":") do
+ if @lex_state == EXPR_END || peek(0) =~ /\s/
+ @lex_state = EXPR_BEG
+ tk = Token(TkCOLON)
+ else
+ @lex_state = EXPR_FNAME;
+ tk = Token(TkSYMBEG)
+ end
+ tk.set_text(":")
+ end
+
+ @OP.def_rule("::") do
+# p @lex_state.id2name, @space_seen
+ if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
+ @lex_state = EXPR_BEG
+ tk = Token(TkCOLON3)
+ else
+ @lex_state = EXPR_DOT
+ tk = Token(TkCOLON2)
+ end
+ tk.set_text("::")
+ end
+
+ @OP.def_rule("/") do
+ |op, io|
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ identify_string(op)
+ elsif peek(0) == '='
+ getc
+ @lex_state = EXPR_BEG
+ Token(TkOPASGN, :/).set_text("/=") #")
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
+ identify_string(op)
+ else
+ @lex_state = EXPR_BEG
+ Token("/").set_text(op)
+ end
+ end
+
+ @OP.def_rules("^") do
+ @lex_state = EXPR_BEG
+ Token("^").set_text("^")
+ end
+
+ # @OP.def_rules("^=") do
+ # @lex_state = EXPR_BEG
+ # Token(TkOPASGN, :^)
+ # end
+
+ @OP.def_rules(",", ";") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token(op).set_text(op)
+ end
+
+ @OP.def_rule("~") do
+ @lex_state = EXPR_BEG
+ Token("~").set_text("~")
+ end
+
+ @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
+ @lex_state = EXPR_BEG
+ Token("~").set_text("~@")
+ end
+
+ @OP.def_rule("(") do
+ @indent += 1
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ @lex_state = EXPR_BEG
+ tk = Token(TkfLPAREN)
+ else
+ @lex_state = EXPR_BEG
+ tk = Token(TkLPAREN)
+ end
+ tk.set_text("(")
+ end
+
+ @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
+ Token("[]").set_text("[]")
+ end
+
+ @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
+ Token("[]=").set_text("[]=")
+ end
+
+ @OP.def_rule("[") do
+ @indent += 1
+ if @lex_state == EXPR_FNAME
+ t = Token(TkfLBRACK)
+ else
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ t = Token(TkLBRACK)
+ elsif @lex_state == EXPR_ARG && @space_seen
+ t = Token(TkLBRACK)
+ else
+ t = Token(TkfLBRACK)
+ end
+ @lex_state = EXPR_BEG
+ end
+ t.set_text("[")
+ end
+
+ @OP.def_rule("{") do
+ @indent += 1
+ if @lex_state != EXPR_END && @lex_state != EXPR_ARG
+ t = Token(TkLBRACE)
+ else
+ t = Token(TkfLBRACE)
+ end
+ @lex_state = EXPR_BEG
+ t.set_text("{")
+ end
+
+ @OP.def_rule('\\') do #'
+ if getc == "\n"
+ @space_seen = true
+ @continue = true
+ Token(TkSPACE).set_text("\\\n")
+ else
+ ungetc
+ Token("\\").set_text("\\") #"
+ end
+ end
+
+ @OP.def_rule('%') do
+ |op, io|
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ identify_quotation('%')
+ elsif peek(0) == '='
+ getc
+ Token(TkOPASGN, "%").set_text("%=")
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
+ identify_quotation('%')
+ else
+ @lex_state = EXPR_BEG
+ Token("%").set_text("%")
+ end
+ end
+
+ @OP.def_rule('$') do #'
+ identify_gvar
+ end
+
+ @OP.def_rule('@') do
+ if peek(0) =~ /[@\w_]/
+ ungetc
+ identify_identifier
+ else
+ Token("@").set_text("@")
+ end
+ end
+
+ # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
+ # |op, io|
+ # @indent += 1
+ # @lex_state = EXPR_FNAME
+ # # @lex_state = EXPR_END
+ # # until @rests[0] == "\n" or @rests[0] == ";"
+ # # rests.shift
+ # # end
+ # end
+
+ @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
+ throw :eof
+ end
+
+ @OP.def_rule("") do
+ |op, io|
+ printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
+ if peek(0) =~ /[0-9]/
+ t = identify_number("")
+ elsif peek(0) =~ /[\w_]/
+ t = identify_identifier
+ end
+ printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
+ t
+ end
+
+ p @OP if RubyLex.debug?
+ end
+
+ def identify_gvar
+ @lex_state = EXPR_END
+ str = "$"
+
+ tk = case ch = getc
+ when /[~_*$?!@\/\\;,=:<>".]/ #"
+ str << ch
+ Token(TkGVAR, str)
+
+ when "-"
+ str << "-" << getc
+ Token(TkGVAR, str)
+
+ when "&", "`", "'", "+"
+ str << ch
+ Token(TkBACK_REF, str)
+
+ when /[1-9]/
+ str << ch
+ while (ch = getc) =~ /[0-9]/
+ str << ch
+ end
+ ungetc
+ Token(TkNTH_REF)
+ when /\w/
+ ungetc
+ ungetc
+ return identify_identifier
+ else
+ ungetc
+ Token("$")
+ end
+ tk.set_text(str)
+ end
+
+ def identify_identifier
+ token = ""
+ token.concat getc if peek(0) =~ /[$@]/
+ token.concat getc if peek(0) == "@"
+
+ while (ch = getc) =~ /\w|_/
+ print ":", ch, ":" if RubyLex.debug?
+ token.concat ch
+ end
+ ungetc
+
+ if ch == "!" or ch == "?"
+ token.concat getc
+ end
+ # fix token
+
+ # $stderr.puts "identifier - #{token}, state = #@lex_state"
+
+ case token
+ when /^\$/
+ return Token(TkGVAR, token).set_text(token)
+ when /^\@/
+ @lex_state = EXPR_END
+ return Token(TkIVAR, token).set_text(token)
+ end
+
+ if @lex_state != EXPR_DOT
+ print token, "\n" if RubyLex.debug?
+
+ token_c, *trans = TkReading2Token[token]
+ if token_c
+ # reserved word?
+
+ if (@lex_state != EXPR_BEG &&
+ @lex_state != EXPR_FNAME &&
+ trans[1])
+ # modifiers
+ token_c = TkSymbol2Token[trans[1]]
+ @lex_state = trans[0]
+ else
+ if @lex_state != EXPR_FNAME
+ if ENINDENT_CLAUSE.include?(token)
+ @indent += 1
+ elsif DEINDENT_CLAUSE.include?(token)
+ @indent -= 1
+ end
+ @lex_state = trans[0]
+ else
+ @lex_state = EXPR_END
+ end
+ end
+ return Token(token_c, token).set_text(token)
+ end
+ end
+
+ if @lex_state == EXPR_FNAME
+ @lex_state = EXPR_END
+ if peek(0) == '='
+ token.concat getc
+ end
+ elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
+ @lex_state = EXPR_ARG
+ else
+ @lex_state = EXPR_END
+ end
+
+ if token[0, 1] =~ /[A-Z]/
+ return Token(TkCONSTANT, token).set_text(token)
+ elsif token[token.size - 1, 1] =~ /[!?]/
+ return Token(TkFID, token).set_text(token)
+ else
+ return Token(TkIDENTIFIER, token).set_text(token)
+ end
+ end
+
+ def identify_here_document
+ ch = getc
+ if ch == "-"
+ ch = getc
+ indent = true
+ end
+ if /['"`]/ =~ ch # '
+ lt = ch
+ quoted = ""
+ while (c = getc) && c != lt
+ quoted.concat c
+ end
+ else
+ lt = '"'
+ quoted = ch.dup
+ while (c = getc) && c =~ /\w/
+ quoted.concat c
+ end
+ ungetc
+ end
+
+ ltback, @ltype = @ltype, lt
+ reserve = ""
+
+ while ch = getc
+ reserve << ch
+ if ch == "\\" #"
+ ch = getc
+ reserve << ch
+ elsif ch == "\n"
+ break
+ end
+ end
+
+ str = ""
+ while (l = gets)
+ l.chomp!
+ l.strip! if indent
+ break if l == quoted
+ str << l.chomp << "\n"
+ end
+
+ @reader.divert_read_from(reserve)
+
+ @ltype = ltback
+ @lex_state = EXPR_END
+ Token(Ltype2Token[lt], str).set_text(str.dump)
+ end
+
+ def identify_quotation(initial_char)
+ ch = getc
+ if lt = PERCENT_LTYPE[ch]
+ initial_char += ch
+ ch = getc
+ elsif ch =~ /\W/
+ lt = "\""
+ else
+ RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
+ end
+# if ch !~ /\W/
+# ungetc
+# next
+# end
+ #@ltype = lt
+ @quoted = ch unless @quoted = PERCENT_PAREN[ch]
+ identify_string(lt, @quoted, ch, initial_char)
+ end
+
+ def identify_number(start)
+ str = start.dup
+
+ if start == "+" or start == "-" or start == ""
+ start = getc
+ str << start
+ end
+
+ @lex_state = EXPR_END
+
+ if start == "0"
+ if peek(0) == "x"
+ ch = getc
+ str << ch
+ match = /[0-9a-f_]/
+ else
+ match = /[0-7_]/
+ end
+ while ch = getc
+ if ch !~ match
+ ungetc
+ break
+ else
+ str << ch
+ end
+ end
+ return Token(TkINTEGER).set_text(str)
+ end
+
+ type = TkINTEGER
+ allow_point = TRUE
+ allow_e = TRUE
+ while ch = getc
+ case ch
+ when /[0-9_]/
+ str << ch
+
+ when allow_point && "."
+ type = TkFLOAT
+ if peek(0) !~ /[0-9]/
+ ungetc
+ break
+ end
+ str << ch
+ allow_point = false
+
+ when allow_e && "e", allow_e && "E"
+ str << ch
+ type = TkFLOAT
+ if peek(0) =~ /[+-]/
+ str << getc
+ end
+ allow_e = false
+ allow_point = false
+ else
+ ungetc
+ break
+ end
+ end
+ Token(type).set_text(str)
+ end
+
+ def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
+ @ltype = ltype
+ @quoted = quoted
+ subtype = nil
+
+ str = ""
+ str << initial_char if initial_char
+ str << (opener||quoted)
+
+ nest = 0
+ begin
+ while ch = getc
+ str << ch
+ if @quoted == ch
+ if nest == 0
+ break
+ else
+ nest -= 1
+ end
+ elsif opener == ch
+ nest += 1
+ elsif @ltype != "'" && @ltype != "]" and ch == "#"
+ ch = getc
+ if ch == "{"
+ subtype = true
+ str << ch << skip_inner_expression
+ else
+ ungetc(ch)
+ end
+ elsif ch == '\\' #'
+ str << read_escape
+ end
+ end
+ if @ltype == "/"
+ if peek(0) =~ /i|o|n|e|s/
+ str << getc
+ end
+ end
+ if subtype
+ Token(DLtype2Token[ltype], str)
+ else
+ Token(Ltype2Token[ltype], str)
+ end.set_text(str)
+ ensure
+ @ltype = nil
+ @quoted = nil
+ @lex_state = EXPR_END
+ end
+ end
+
+ def skip_inner_expression
+ res = ""
+ nest = 0
+ while (ch = getc)
+ res << ch
+ if ch == '}'
+ break if nest.zero?
+ nest -= 1
+ elsif ch == '{'
+ nest += 1
+ end
+ end
+ res
+ end
+
+ def identify_comment
+ @ltype = "#"
+ comment = "#"
+ while ch = getc
+ if ch == "\\"
+ ch = getc
+ if ch == "\n"
+ ch = " "
+ else
+ comment << "\\"
+ end
+ else
+ if ch == "\n"
+ @ltype = nil
+ ungetc
+ break
+ end
+ end
+ comment << ch
+ end
+ return Token(TkCOMMENT).set_text(comment)
+ end
+
+ def read_escape
+ res = ""
+ case ch = getc
+ when /[0-7]/
+ ungetc ch
+ 3.times do
+ case ch = getc
+ when /[0-7]/
+ when nil
+ break
+ else
+ ungetc
+ break
+ end
+ res << ch
+ end
+
+ when "x"
+ res << ch
+ 2.times do
+ case ch = getc
+ when /[0-9a-fA-F]/
+ when nil
+ break
+ else
+ ungetc
+ break
+ end
+ res << ch
+ end
+
+ when "M"
+ res << ch
+ if (ch = getc) != '-'
+ ungetc
+ else
+ res << ch
+ if (ch = getc) == "\\" #"
+ res << ch
+ res << read_escape
+ else
+ res << ch
+ end
+ end
+
+ when "C", "c" #, "^"
+ res << ch
+ if ch == "C" and (ch = getc) != "-"
+ ungetc
+ else
+ res << ch
+ if (ch = getc) == "\\" #"
+ res << ch
+ res << read_escape
+ else
+ res << ch
+ end
+ end
+ else
+ res << ch
+ end
+ res
+ end
+end
+
+
+
+# Extract code elements from a source file, returning a TopLevel
+# object containing the constituent file elements.
+#
+# This file is based on rtags
+
+module RDoc
+
+ GENERAL_MODIFIERS = [ 'nodoc' ].freeze
+
+ CLASS_MODIFIERS = GENERAL_MODIFIERS
+
+ ATTR_MODIFIERS = GENERAL_MODIFIERS
+
+ CONSTANT_MODIFIERS = GENERAL_MODIFIERS
+
+ METHOD_MODIFIERS = GENERAL_MODIFIERS +
+ [ 'arg', 'args', 'yield', 'yields', 'notnew', 'not-new', 'not_new', 'doc' ]
+
+
+ class RubyParser
+ include RubyToken
+ include TokenStream
+
+ extend ParserFactory
+
+ parse_files_matching(/\.rbw?$/)
+
+
+ def initialize(top_level, file_name, content, options, stats)
+ @options = options
+ @stats = stats
+ @size = 0
+ @token_listeners = nil
+ @input_file_name = file_name
+ @scanner = RubyLex.new(content)
+ @scanner.exception_on_syntax_error = false
+ @top_level = top_level
+ @progress = $stderr unless options.quiet
+ end
+
+ def scan
+ @tokens = []
+ @unget_read = []
+ @read = []
+ catch(:eof) do
+ catch(:enddoc) do
+ begin
+ parse_toplevel_statements(@top_level)
+ rescue Exception => e
+ $stderr.puts "\n\n"
+ $stderr.puts "RDoc failure in #@input_file_name at or around " +
+ "line #{@scanner.line_no} column #{@scanner.char_no}"
+ $stderr.puts
+ $stderr.puts "Before reporting this, could you check that the file"
+ $stderr.puts "you're documenting compiles cleanly--RDoc is not a"
+ $stderr.puts "full Ruby parser, and gets confused easily if fed"
+ $stderr.puts "invalid programs."
+ $stderr.puts
+ $stderr.puts "The internal error was:\n\n"
+
+ e.set_backtrace(e.backtrace[0,4])
+ raise
+ end
+ end
+ end
+ @top_level
+ end
+
+ private
+
+ def make_message(msg)
+ prefix = "\n" + @input_file_name + ":"
+ if @scanner
+ prefix << "#{@scanner.line_no}:#{@scanner.char_no}: "
+ end
+ return prefix + msg
+ end
+
+ def warn(msg)
+ return if @options.quiet
+ msg = make_message msg
+ $stderr.puts msg
+ end
+
+ def error(msg)
+ msg = make_message msg
+ $stderr.puts msg
+ exit(1)
+ end
+
+ def progress(char)
+ unless @options.quiet
+ @progress.print(char)
+ @progress.flush
+ end
+ end
+
+ def add_token_listener(obj)
+ @token_listeners ||= []
+ @token_listeners << obj
+ end
+
+ def remove_token_listener(obj)
+ @token_listeners.delete(obj)
+ end
+
+ def get_tk
+ tk = nil
+ if @tokens.empty?
+ tk = @scanner.token
+ @read.push @scanner.get_read
+ puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG
+ else
+ @read.push @unget_read.shift
+ tk = @tokens.shift
+ puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG
+ end
+
+ if tk.kind_of?(TkSYMBEG)
+ set_token_position(tk.line_no, tk.char_no)
+ tk1 = get_tk
+ if tk1.kind_of?(TkId) || tk1.kind_of?(TkOp)
+ tk = Token(TkSYMBOL).set_text(":" + tk1.name)
+ # remove the identifier we just read (we're about to
+ # replace it with a symbol)
+ @token_listeners.each do |obj|
+ obj.pop_token
+ end if @token_listeners
+ else
+ warn("':' not followed by identified or operator")
+ tk = tk1
+ end
+ end
+
+ # inform any listeners of our shiny new token
+ @token_listeners.each do |obj|
+ obj.add_token(tk)
+ end if @token_listeners
+
+ tk
+ end
+
+ def peek_tk
+ unget_tk(tk = get_tk)
+ tk
+ end
+
+ def unget_tk(tk)
+ @tokens.unshift tk
+ @unget_read.unshift @read.pop
+
+ # Remove this token from any listeners
+ @token_listeners.each do |obj|
+ obj.pop_token
+ end if @token_listeners
+ end
+
+ def skip_tkspace(skip_nl = true)
+ tokens = []
+ while ((tk = get_tk).kind_of?(TkSPACE) ||
+ (skip_nl && tk.kind_of?(TkNL)))
+ tokens.push tk
+ end
+ unget_tk(tk)
+ tokens
+ end
+
+ def get_tkread
+ read = @read.join("")
+ @read = []
+ read
+ end
+
+ def peek_read
+ @read.join('')
+ end
+
+ NORMAL = "::"
+ SINGLE = "<<"
+
+ # Look for the first comment in a file that isn't
+ # a shebang line.
+
+ def collect_first_comment
+ skip_tkspace
+ res = ''
+ first_line = true
+
+ tk = get_tk
+ while tk.kind_of?(TkCOMMENT)
+ if first_line && tk.text[0,2] == "#!"
+ skip_tkspace
+ tk = get_tk
+ else
+ res << tk.text << "\n"
+ tk = get_tk
+ if tk.kind_of? TkNL
+ skip_tkspace(false)
+ tk = get_tk
+ end
+ end
+ first_line = false
+ end
+ unget_tk(tk)
+ res
+ end
+
+ def parse_toplevel_statements(container)
+ comment = collect_first_comment
+ look_for_directives_in(container, comment)
+ container.comment = comment unless comment.empty?
+ parse_statements(container, NORMAL, nil, comment)
+ end
+
+ def parse_statements(container, single=NORMAL, current_method=nil, comment='')
+ nest = 1
+ save_visibility = container.visibility
+
+# if container.kind_of?(TopLevel)
+# else
+# comment = ''
+# end
+
+ non_comment_seen = true
+
+ while tk = get_tk
+
+ keep_comment = false
+
+ non_comment_seen = true unless tk.kind_of?(TkCOMMENT)
+
+ case tk
+
+ when TkNL
+ skip_tkspace(true) # Skip blanks and newlines
+ tk = get_tk
+ if tk.kind_of?(TkCOMMENT)
+ if non_comment_seen
+ comment = ''
+ non_comment_seen = false
+ end
+ while tk.kind_of?(TkCOMMENT)
+ comment << tk.text << "\n"
+ tk = get_tk # this is the newline
+ skip_tkspace(false) # leading spaces
+ tk = get_tk
+ end
+ unless comment.empty?
+ look_for_directives_in(container, comment)
+ if container.done_documenting
+ container.ongoing_visibility = save_visibility
+# return
+ end
+ end
+ keep_comment = true
+ else
+ non_comment_seen = true
+ end
+ unget_tk(tk)
+ keep_comment = true
+
+
+ when TkCLASS
+ if container.document_children
+ parse_class(container, single, tk, comment)
+ else
+ nest += 1
+ end
+
+ when TkMODULE
+ if container.document_children
+ parse_module(container, single, tk, comment)
+ else
+ nest += 1
+ end
+
+ when TkDEF
+ if container.document_self
+ parse_method(container, single, tk, comment)
+ else
+ nest += 1
+ end
+
+ when TkCONSTANT
+ if container.document_self
+ parse_constant(container, single, tk, comment)
+ end
+
+ when TkALIAS
+ if container.document_self
+ parse_alias(container, single, tk, comment)
+ end
+
+ when TkYIELD
+ if current_method.nil?
+ warn("Warning: yield outside of method") if container.document_self
+ else
+ parse_yield(container, single, tk, current_method)
+ end
+
+ # Until and While can have a 'do', which shouldn't increas
+ # the nesting. We can't solve the general case, but we can
+ # handle most occurrences by ignoring a do at the end of a line
+
+ when TkUNTIL, TkWHILE
+ nest += 1
+ puts "FOUND #{tk.class} in #{container.name}, nest = #{nest}, " +
+ "line #{tk.line_no}" if $DEBUG
+ skip_optional_do_after_expression
+
+ # 'for' is trickier
+ when TkFOR
+ nest += 1
+ puts "FOUND #{tk.class} in #{container.name}, nest = #{nest}, " +
+ "line #{tk.line_no}" if $DEBUG
+ skip_for_variable
+ skip_optional_do_after_expression
+
+ when TkCASE, TkDO, TkIF, TkUNLESS, TkBEGIN
+ nest += 1
+ puts "Found #{tk.class} in #{container.name}, nest = #{nest}, " +
+ "line #{tk.line_no}" if $DEBUG
+
+ when TkIDENTIFIER
+ if nest == 1 and current_method.nil?
+ case tk.name
+ when "private", "protected", "public",
+ "private_class_method", "public_class_method"
+ parse_visibility(container, single, tk)
+ keep_comment = true
+ when "attr"
+ parse_attr(container, single, tk, comment)
+ when /^attr_(reader|writer|accessor)$/, @options.extra_accessors
+ parse_attr_accessor(container, single, tk, comment)
+ when "alias_method"
+ if container.document_self
+ parse_alias(container, single, tk, comment)
+ end
+ end
+ end
+
+ case tk.name
+ when "require"
+ parse_require(container, comment)
+ when "include"
+ parse_include(container, comment)
+ end
+
+
+ when TkEND
+ nest -= 1
+ puts "Found 'end' in #{container.name}, nest = #{nest}, line #{tk.line_no}" if $DEBUG
+ puts "Method = #{current_method.name}" if $DEBUG and current_method
+ if nest == 0
+ read_documentation_modifiers(container, CLASS_MODIFIERS)
+ container.ongoing_visibility = save_visibility
+ return
+ end
+
+ end
+
+ comment = '' unless keep_comment
+ begin
+ get_tkread
+ skip_tkspace(false)
+ end while peek_tk == TkNL
+
+ end
+ end
+
+ def parse_class(container, single, tk, comment, &block)
+ progress("c")
+
+ @stats.num_classes += 1
+
+ container, name_t = get_class_or_module(container)
+
+ case name_t
+ when TkCONSTANT
+ name = name_t.name
+ superclass = "Object"
+
+ if peek_tk.kind_of?(TkLT)
+ get_tk
+ skip_tkspace(true)
+ superclass = get_class_specification
+ superclass = "<unknown>" if superclass.empty?
+ end
+
+ if single == SINGLE
+ cls_type = SingleClass
+ else
+ cls_type = NormalClass
+ end
+
+ cls = container.add_class(cls_type, name, superclass)
+ read_documentation_modifiers(cls, CLASS_MODIFIERS)
+ cls.record_location(@top_level)
+ parse_statements(cls)
+ cls.comment = comment
+
+ when TkLSHFT
+ case name = get_class_specification
+ when "self", container.name
+ parse_statements(container, SINGLE, &block)
+ else
+ other = TopLevel.find_class_named(name)
+ unless other
+# other = @top_level.add_class(NormalClass, name, nil)
+# other.record_location(@top_level)
+# other.comment = comment
+ other = NormalClass.new("Dummy", nil)
+ end
+ read_documentation_modifiers(other, CLASS_MODIFIERS)
+ parse_statements(other, SINGLE, &block)
+ end
+
+ else
+ warn("Expected class name or '<<'. Got #{name_t.class}: #{name_t.text.inspect}")
+ end
+ end
+
+ def parse_module(container, single, tk, comment)
+ progress("m")
+ @stats.num_modules += 1
+ container, name_t = get_class_or_module(container)
+# skip_tkspace
+ name = name_t.name
+ mod = container.add_module(NormalModule, name)
+ mod.record_location(@top_level)
+ read_documentation_modifiers(mod, CLASS_MODIFIERS)
+ parse_statements(mod)
+ mod.comment = comment
+ end
+
+ # Look for the name of a class of module (optionally with a leading :: or
+ # with :: separated named) and return the ultimate name and container
+
+ def get_class_or_module(container)
+ skip_tkspace
+ name_t = get_tk
+
+ # class ::A -> A is in the top level
+ if name_t.kind_of?(TkCOLON2)
+ name_t = get_tk
+ container = @top_level
+ end
+
+ skip_tkspace(false)
+
+ while peek_tk.kind_of?(TkCOLON2)
+ prev_container = container
+ container = container.find_module_named(name_t.name)
+ if !container
+# warn("Couldn't find module #{name_t.name}")
+ container = prev_container.add_module(NormalModule, name_t.name)
+ end
+ get_tk
+ name_t = get_tk
+ end
+ skip_tkspace(false)
+ return [container, name_t]
+ end
+
+ def parse_constant(container, single, tk, comment)
+ name = tk.name
+ skip_tkspace(false)
+ eq_tk = get_tk
+
+ unless eq_tk.kind_of?(TkASSIGN)
+ unget_tk(eq_tk)
+ return
+ end
+
+
+ nest = 0
+ get_tkread
+
+ tk = get_tk
+ if tk.kind_of? TkGT
+ unget_tk(tk)
+ unget_tk(eq_tk)
+ return
+ end
+
+ loop do
+ puts("Param: #{tk}, #{@scanner.continue} " +
+ "#{@scanner.lex_state} #{nest}") if $DEBUG
+
+ case tk
+ when TkSEMICOLON
+ break
+ when TkLPAREN, TkfLPAREN
+ nest += 1
+ when TkRPAREN
+ nest -= 1
+ when TkCOMMENT
+ if nest <= 0 && @scanner.lex_state == EXPR_END
+ unget_tk(tk)
+ break
+ end
+ when TkNL
+ if (@scanner.lex_state == EXPR_END and nest <= 0) || !@scanner.continue
+ unget_tk(tk)
+ break
+ end
+ end
+ tk = get_tk
+ end
+
+ res = get_tkread.tr("\n", " ").strip
+ res = "" if res == ";"
+ con = Constant.new(name, res, comment)
+ read_documentation_modifiers(con, CONSTANT_MODIFIERS)
+ if con.document_self
+ container.add_constant(con)
+ end
+ end
+
+ def parse_method(container, single, tk, comment)
+ progress(".")
+ @stats.num_methods += 1
+ line_no = tk.line_no
+ column = tk.char_no
+
+ start_collecting_tokens
+ add_token(tk)
+ add_token_listener(self)
+
+ @scanner.instance_eval{@lex_state = EXPR_FNAME}
+ skip_tkspace(false)
+ name_t = get_tk
+ back_tk = skip_tkspace
+ meth = nil
+ added_container = false
+
+ dot = get_tk
+ if dot.kind_of?(TkDOT) or dot.kind_of?(TkCOLON2)
+ @scanner.instance_eval{@lex_state = EXPR_FNAME}
+ skip_tkspace
+ name_t2 = get_tk
+ case name_t
+ when TkSELF
+ name = name_t2.name
+ when TkCONSTANT
+ name = name_t2.name
+ prev_container = container
+ container = container.find_module_named(name_t.name)
+ if !container
+ added_container = true
+ obj = name_t.name.split("::").inject(Object) do |state, item|
+ state.const_get(item)
+ end rescue nil
+
+ type = obj.class == Class ? NormalClass : NormalModule
+ if not [Class, Module].include?(obj.class)
+ warn("Couldn't find #{name_t.name}. Assuming it's a module")
+ end
+
+ if type == NormalClass then
+ container = prev_container.add_class(type, name_t.name, obj.superclass.name)
+ else
+ container = prev_container.add_module(type, name_t.name)
+ end
+ end
+ else
+ # warn("Unexpected token '#{name_t2.inspect}'")
+ # break
+ skip_method(container)
+ return
+ end
+ meth = AnyMethod.new(get_tkread, name)
+ meth.singleton = true
+ else
+ unget_tk dot
+ back_tk.reverse_each do
+ |tk|
+ unget_tk tk
+ end
+ name = name_t.name
+
+ meth = AnyMethod.new(get_tkread, name)
+ meth.singleton = (single == SINGLE)
+ end
+
+ remove_token_listener(self)
+
+ meth.start_collecting_tokens
+ indent = TkSPACE.new(1,1)
+ indent.set_text(" " * column)
+
+ meth.add_tokens([TkCOMMENT.new(line_no,
+ 1,
+ "# File #{@top_level.file_absolute_name}, line #{line_no}"),
+ NEWLINE_TOKEN,
+ indent])
+
+ meth.add_tokens(@token_stream)
+
+ add_token_listener(meth)
+
+ @scanner.instance_eval{@continue = false}
+ parse_method_parameters(meth)
+
+ if meth.document_self
+ container.add_method(meth)
+ elsif added_container
+ container.document_self = false
+ end
+
+ # Having now read the method parameters and documentation modifiers, we
+ # now know whether we have to rename #initialize to ::new
+
+ if name == "initialize" && !meth.singleton
+ if meth.dont_rename_initialize
+ meth.visibility = :protected
+ else
+ meth.singleton = true
+ meth.name = "new"
+ meth.visibility = :public
+ end
+ end
+
+ parse_statements(container, single, meth)
+
+ remove_token_listener(meth)
+
+ # Look for a 'call-seq' in the comment, and override the
+ # normal parameter stuff
+
+ if comment.sub!(/:?call-seq:(.*?)^\s*\#?\s*$/m, '')
+ seq = $1
+ seq.gsub!(/^\s*\#\s*/, '')
+ meth.call_seq = seq
+ end
+
+ meth.comment = comment
+
+ end
+
+ def skip_method(container)
+ meth = AnyMethod.new("", "anon")
+ parse_method_parameters(meth)
+ parse_statements(container, false, meth)
+ end
+
+ # Capture the method's parameters. Along the way,
+ # look for a comment containing
+ #
+ # # yields: ....
+ #
+ # and add this as the block_params for the method
+
+ def parse_method_parameters(method)
+ res = parse_method_or_yield_parameters(method)
+ res = "(" + res + ")" unless res[0] == ?(
+ method.params = res unless method.params
+ if method.block_params.nil?
+ skip_tkspace(false)
+ read_documentation_modifiers(method, METHOD_MODIFIERS)
+ end
+ end
+
+ def parse_method_or_yield_parameters(method=nil, modifiers=METHOD_MODIFIERS)
+ skip_tkspace(false)
+ tk = get_tk
+
+ # Little hack going on here. In the statement
+ # f = 2*(1+yield)
+ # We see the RPAREN as the next token, so we need
+ # to exit early. This still won't catch all cases
+ # (such as "a = yield + 1"
+ end_token = case tk
+ when TkLPAREN, TkfLPAREN
+ TkRPAREN
+ when TkRPAREN
+ return ""
+ else
+ TkNL
+ end
+ nest = 0
+
+ loop do
+ puts("Param: #{tk.inspect}, #{@scanner.continue} " +
+ "#{@scanner.lex_state} #{nest}") if $DEBUG
+ case tk
+ when TkSEMICOLON
+ break
+ when TkLBRACE
+ nest += 1
+ when TkRBRACE
+ # we might have a.each {|i| yield i }
+ unget_tk(tk) if nest.zero?
+ nest -= 1
+ break if nest <= 0
+ when TkLPAREN, TkfLPAREN
+ nest += 1
+ when end_token
+ if end_token == TkRPAREN
+ nest -= 1
+ break if @scanner.lex_state == EXPR_END and nest <= 0
+ else
+ break unless @scanner.continue
+ end
+ when method && method.block_params.nil? && TkCOMMENT
+ unget_tk(tk)
+ read_documentation_modifiers(method, modifiers)
+ end
+ tk = get_tk
+ end
+ res = get_tkread.tr("\n", " ").strip
+ res = "" if res == ";"
+ res
+ end
+
+ # skip the var [in] part of a 'for' statement
+ def skip_for_variable
+ skip_tkspace(false)
+ tk = get_tk
+ skip_tkspace(false)
+ tk = get_tk
+ unget_tk(tk) unless tk.kind_of?(TkIN)
+ end
+
+ # while, until, and for have an optional
+ def skip_optional_do_after_expression
+ skip_tkspace(false)
+ tk = get_tk
+ case tk
+ when TkLPAREN, TkfLPAREN
+ end_token = TkRPAREN
+ else
+ end_token = TkNL
+ end
+
+ nest = 0
+ @scanner.instance_eval{@continue = false}
+
+ loop do
+ puts("\nWhile: #{tk}, #{@scanner.continue} " +
+ "#{@scanner.lex_state} #{nest}") if $DEBUG
+ case tk
+ when TkSEMICOLON
+ break
+ when TkLPAREN, TkfLPAREN
+ nest += 1
+ when TkDO
+ break if nest.zero?
+ when end_token
+ if end_token == TkRPAREN
+ nest -= 1
+ break if @scanner.lex_state == EXPR_END and nest.zero?
+ else
+ break unless @scanner.continue
+ end
+ end
+ tk = get_tk
+ end
+ skip_tkspace(false)
+ if peek_tk.kind_of? TkDO
+ get_tk
+ end
+ end
+
+ # Return a superclass, which can be either a constant
+ # of an expression
+
+ def get_class_specification
+ tk = get_tk
+ return "self" if tk.kind_of?(TkSELF)
+
+ res = ""
+ while tk.kind_of?(TkCOLON2) ||
+ tk.kind_of?(TkCOLON3) ||
+ tk.kind_of?(TkCONSTANT)
+
+ res += tk.text
+ tk = get_tk
+ end
+
+ unget_tk(tk)
+ skip_tkspace(false)
+
+ get_tkread # empty out read buffer
+
+ tk = get_tk
+
+ case tk
+ when TkNL, TkCOMMENT, TkSEMICOLON
+ unget_tk(tk)
+ return res
+ end
+
+ res += parse_call_parameters(tk)
+ res
+ end
+
+ def parse_call_parameters(tk)
+
+ end_token = case tk
+ when TkLPAREN, TkfLPAREN
+ TkRPAREN
+ when TkRPAREN
+ return ""
+ else
+ TkNL
+ end
+ nest = 0
+
+ loop do
+ puts("Call param: #{tk}, #{@scanner.continue} " +
+ "#{@scanner.lex_state} #{nest}") if $DEBUG
+ case tk
+ when TkSEMICOLON
+ break
+ when TkLPAREN, TkfLPAREN
+ nest += 1
+ when end_token
+ if end_token == TkRPAREN
+ nest -= 1
+ break if @scanner.lex_state == EXPR_END and nest <= 0
+ else
+ break unless @scanner.continue
+ end
+ when TkCOMMENT
+ unget_tk(tk)
+ break
+ end
+ tk = get_tk
+ end
+ res = get_tkread.tr("\n", " ").strip
+ res = "" if res == ";"
+ res
+ end
+
+
+ # Parse a constant, which might be qualified by
+ # one or more class or module names
+
+ def get_constant
+ res = ""
+ skip_tkspace(false)
+ tk = get_tk
+
+ while tk.kind_of?(TkCOLON2) ||
+ tk.kind_of?(TkCOLON3) ||
+ tk.kind_of?(TkCONSTANT)
+
+ res += tk.text
+ tk = get_tk
+ end
+
+# if res.empty?
+# warn("Unexpected token #{tk} in constant")
+# end
+ unget_tk(tk)
+ res
+ end
+
+ # Get a constant that may be surrounded by parens
+
+ def get_constant_with_optional_parens
+ skip_tkspace(false)
+ nest = 0
+ while (tk = peek_tk).kind_of?(TkLPAREN) || tk.kind_of?(TkfLPAREN)
+ get_tk
+ skip_tkspace(true)
+ nest += 1
+ end
+
+ name = get_constant
+
+ while nest > 0
+ skip_tkspace(true)
+ tk = get_tk
+ nest -= 1 if tk.kind_of?(TkRPAREN)
+ end
+ name
+ end
+
+ # Directives are modifier comments that can appear after class, module,
+ # or method names. For example
+ #
+ # def fred # :yields: a, b
+ #
+ # or
+ #
+ # class SM # :nodoc:
+ #
+ # we return the directive name and any parameters as a two element array
+
+ def read_directive(allowed)
+ tk = get_tk
+ puts "directive: #{tk.inspect}" if $DEBUG
+ result = nil
+ if tk.kind_of?(TkCOMMENT)
+ if tk.text =~ /\s*:?(\w+):\s*(.*)/
+ directive = $1.downcase
+ if allowed.include?(directive)
+ result = [directive, $2]
+ end
+ end
+ else
+ unget_tk(tk)
+ end
+ result
+ end
+
+
+ def read_documentation_modifiers(context, allow)
+ dir = read_directive(allow)
+
+ case dir[0]
+
+ when "notnew", "not_new", "not-new"
+ context.dont_rename_initialize = true
+
+ when "nodoc"
+ context.document_self = false
+ if dir[1].downcase == "all"
+ context.document_children = false
+ end
+
+ when "doc"
+ context.document_self = true
+ context.force_documentation = true
+
+ when "yield", "yields"
+ unless context.params.nil?
+ context.params.sub!(/(,|)\s*&\w+/,'') # remove parameter &proc
+ end
+ context.block_params = dir[1]
+
+ when "arg", "args"
+ context.params = dir[1]
+ end if dir
+ end
+
+
+ # Look for directives in a normal comment block:
+ #
+ # #-- - don't display comment from this point forward
+ #
+ #
+ # This routine modifies it's parameter
+
+ def look_for_directives_in(context, comment)
+
+ preprocess = SM::PreProcess.new(@input_file_name,
+ @options.rdoc_include)
+
+ preprocess.handle(comment) do |directive, param|
+ case directive
+ when "stopdoc"
+ context.stop_doc
+ ""
+ when "startdoc"
+ context.start_doc
+ context.force_documentation = true
+ ""
+
+ when "enddoc"
+ #context.done_documenting = true
+ #""
+ throw :enddoc
+
+ when "main"
+ options = Options.instance
+ options.main_page = param
+ ""
+
+ when "title"
+ options = Options.instance
+ options.title = param
+ ""
+
+ when "section"
+ context.set_current_section(param, comment)
+ comment.replace("") # 1.8 doesn't support #clear
+ break
+ else
+ warn "Unrecognized directive '#{directive}'"
+ break
+ end
+ end
+
+ remove_private_comments(comment)
+ end
+
+ def remove_private_comments(comment)
+ comment.gsub!(/^#--.*?^#\+\+/m, '')
+ comment.sub!(/^#--.*/m, '')
+ end
+
+
+
+ def get_symbol_or_name
+ tk = get_tk
+ case tk
+ when TkSYMBOL
+ tk.text.sub(/^:/, '')
+ when TkId, TkOp
+ tk.name
+ when TkSTRING
+ tk.text
+ else
+ raise "Name or symbol expected (got #{tk})"
+ end
+ end
+
+ def parse_alias(context, single, tk, comment)
+ skip_tkspace
+ if (peek_tk.kind_of? TkLPAREN)
+ get_tk
+ skip_tkspace
+ end
+ new_name = get_symbol_or_name
+ @scanner.instance_eval{@lex_state = EXPR_FNAME}
+ skip_tkspace
+ if (peek_tk.kind_of? TkCOMMA)
+ get_tk
+ skip_tkspace
+ end
+ old_name = get_symbol_or_name
+
+ al = Alias.new(get_tkread, old_name, new_name, comment)
+ read_documentation_modifiers(al, ATTR_MODIFIERS)
+ if al.document_self
+ context.add_alias(al)
+ end
+ end
+
+ def parse_yield_parameters
+ parse_method_or_yield_parameters
+ end
+
+ def parse_yield(context, single, tk, method)
+ if method.block_params.nil?
+ get_tkread
+ @scanner.instance_eval{@continue = false}
+ method.block_params = parse_yield_parameters
+ end
+ end
+
+ def parse_require(context, comment)
+ skip_tkspace_comment
+ tk = get_tk
+ if tk.kind_of? TkLPAREN
+ skip_tkspace_comment
+ tk = get_tk
+ end
+
+ name = nil
+ case tk
+ when TkSTRING
+ name = tk.text
+# when TkCONSTANT, TkIDENTIFIER, TkIVAR, TkGVAR
+# name = tk.name
+ when TkDSTRING
+ warn "Skipping require of dynamic string: #{tk.text}"
+ # else
+ # warn "'require' used as variable"
+ end
+ if name
+ context.add_require(Require.new(name, comment))
+ else
+ unget_tk(tk)
+ end
+ end
+
+ def parse_include(context, comment)
+ loop do
+ skip_tkspace_comment
+ name = get_constant_with_optional_parens
+ unless name.empty?
+ context.add_include(Include.new(name, comment))
+ end
+ return unless peek_tk.kind_of?(TkCOMMA)
+ get_tk
+ end
+ end
+
+ def get_bool
+ skip_tkspace
+ tk = get_tk
+ case tk
+ when TkTRUE
+ true
+ when TkFALSE, TkNIL
+ false
+ else
+ unget_tk tk
+ true
+ end
+ end
+
+ def parse_attr(context, single, tk, comment)
+ args = parse_symbol_arg(1)
+ if args.size > 0
+ name = args[0]
+ rw = "R"
+ skip_tkspace(false)
+ tk = get_tk
+ if tk.kind_of? TkCOMMA
+ rw = "RW" if get_bool
+ else
+ unget_tk tk
+ end
+ att = Attr.new(get_tkread, name, rw, comment)
+ read_documentation_modifiers(att, ATTR_MODIFIERS)
+ if att.document_self
+ context.add_attribute(att)
+ end
+ else
+ warn("'attr' ignored - looks like a variable")
+ end
+
+ end
+
+ def parse_visibility(container, single, tk)
+ singleton = (single == SINGLE)
+ vis = case tk.name
+ when "private" then :private
+ when "protected" then :protected
+ when "public" then :public
+ when "private_class_method"
+ singleton = true
+ :private
+ when "public_class_method"
+ singleton = true
+ :public
+ else raise "Invalid visibility: #{tk.name}"
+ end
+
+ skip_tkspace_comment(false)
+ case peek_tk
+ # Ryan Davis suggested the extension to ignore modifiers, because he
+ # often writes
+ #
+ # protected unless $TESTING
+ #
+ when TkNL, TkUNLESS_MOD, TkIF_MOD
+# error("Missing argument") if singleton
+ container.ongoing_visibility = vis
+ else
+ args = parse_symbol_arg
+ container.set_visibility_for(args, vis, singleton)
+ end
+ end
+
+ def parse_attr_accessor(context, single, tk, comment)
+ args = parse_symbol_arg
+ read = get_tkread
+ rw = "?"
+
+ # If nodoc is given, don't document any of them
+
+ tmp = CodeObject.new
+ read_documentation_modifiers(tmp, ATTR_MODIFIERS)
+ return unless tmp.document_self
+
+ case tk.name
+ when "attr_reader" then rw = "R"
+ when "attr_writer" then rw = "W"
+ when "attr_accessor" then rw = "RW"
+ else
+ rw = @options.extra_accessor_flags[tk.name]
+ end
+
+ for name in args
+ att = Attr.new(get_tkread, name, rw, comment)
+ context.add_attribute(att)
+ end
+ end
+
+ def skip_tkspace_comment(skip_nl = true)
+ loop do
+ skip_tkspace(skip_nl)
+ return unless peek_tk.kind_of? TkCOMMENT
+ get_tk
+ end
+ end
+
+ def parse_symbol_arg(no = nil)
+
+ args = []
+ skip_tkspace_comment
+ case tk = get_tk
+ when TkLPAREN
+ loop do
+ skip_tkspace_comment
+ if tk1 = parse_symbol_in_arg
+ args.push tk1
+ break if no and args.size >= no
+ end
+
+ skip_tkspace_comment
+ case tk2 = get_tk
+ when TkRPAREN
+ break
+ when TkCOMMA
+ else
+ warn("unexpected token: '#{tk2.inspect}'") if $DEBUG
+ break
+ end
+ end
+ else
+ unget_tk tk
+ if tk = parse_symbol_in_arg
+ args.push tk
+ return args if no and args.size >= no
+ end
+
+ loop do
+# skip_tkspace_comment(false)
+ skip_tkspace(false)
+
+ tk1 = get_tk
+ unless tk1.kind_of?(TkCOMMA)
+ unget_tk tk1
+ break
+ end
+
+ skip_tkspace_comment
+ if tk = parse_symbol_in_arg
+ args.push tk
+ break if no and args.size >= no
+ end
+ end
+ end
+ args
+ end
+
+ def parse_symbol_in_arg
+ case tk = get_tk
+ when TkSYMBOL
+ tk.text.sub(/^:/, '')
+ when TkSTRING
+ eval @read[-1]
+ else
+ warn("Expected symbol or string, got #{tk.inspect}") if $DEBUG
+ nil
+ end
+ end
+ end
+
+end