diff options
Diffstat (limited to 'lib/rdoc/ruby_lex.rb')
-rw-r--r-- | lib/rdoc/ruby_lex.rb | 225 |
1 files changed, 125 insertions, 100 deletions
diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb index e6e0b41aab..06a82ef978 100644 --- a/lib/rdoc/ruby_lex.rb +++ b/lib/rdoc/ruby_lex.rb @@ -42,8 +42,8 @@ class RDoc::RubyLex include RDoc::RubyToken include IRB - attr_reader :continue - attr_reader :lex_state + attr_accessor :continue + attr_accessor :lex_state attr_reader :reader class << self @@ -107,7 +107,7 @@ class RDoc::RubyLex @here_header = false @indent = 0 @indent_stack = [] - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG @space_seen = false @continue = false @@ -280,7 +280,7 @@ class RDoc::RubyLex @quoted = nil @indent = 0 @indent_stack = [] - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG @space_seen = false @here_header = false @@ -450,11 +450,11 @@ class RDoc::RubyLex @OP.def_rule("\n") do |op, io| print "\\n\n" if RDoc::RubyLex.debug? case @lex_state - when EXPR_BEG, EXPR_FNAME, EXPR_DOT + when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT @continue = true else @continue = false - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG until (@indent_stack.empty? || [TkLPAREN, TkLBRACK, TkLBRACE, TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) @@ -473,25 +473,25 @@ class RDoc::RubyLex ">", ">=", ">>") do |op, io| case @lex_state - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG + when :EXPR_FNAME, :EXPR_DOT + @lex_state = :EXPR_ARG else - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG end Token(op) end @OP.def_rules("!", "!=", "!~") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token(op) end @OP.def_rules("<<") do |op, io| tk = nil - if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && - (@lex_state != EXPR_ARG || @space_seen) + if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS && + (@lex_state != :EXPR_ARG || @space_seen) c = peek(0) if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-") tk = identify_here_document @@ -500,10 +500,10 @@ class RDoc::RubyLex unless tk tk = Token(op) case @lex_state - when EXPR_FNAME, EXPR_DOT - @lex_state = EXPR_ARG + when :EXPR_FNAME, :EXPR_DOT + @lex_state = :EXPR_ARG else - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG end end tk @@ -516,8 +516,8 @@ class RDoc::RubyLex @OP.def_rules("`") do |op, io| - if @lex_state == EXPR_FNAME - @lex_state = EXPR_END + if @lex_state == :EXPR_FNAME + @lex_state = :EXPR_END Token(op) else identify_string(op) @@ -526,61 +526,61 @@ class RDoc::RubyLex @OP.def_rules('?') do |op, io| - if @lex_state == EXPR_END - @lex_state = EXPR_BEG + if @lex_state == :EXPR_END + @lex_state = :EXPR_BEG Token(TkQUESTION) else ch = getc - if @lex_state == EXPR_ARG && ch =~ /\s/ + if @lex_state == :EXPR_ARG && ch =~ /\s/ ungetc - @lex_state = EXPR_BEG; + @lex_state = :EXPR_BEG; Token(TkQUESTION) else - @lex_state = EXPR_END - Token(TkSTRING, ch) + @lex_state = :EXPR_END + Token(TkCHAR, "?#{ch}") end end end @OP.def_rules("&", "&&", "|", "||") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token(op) end @OP.def_rules("+=", "-=", "*=", "**=", "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG op =~ /^(.*)=$/ Token(TkOPASGN, $1) end - @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do + @OP.def_rule("+@", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| - @lex_state = EXPR_ARG + @lex_state = :EXPR_ARG Token(op) end - @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do + @OP.def_rule("-@", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| - @lex_state = EXPR_ARG + @lex_state = :EXPR_ARG Token(op) end @OP.def_rules("+", "-") do |op, io| catch(:RET) do - if @lex_state == EXPR_ARG + if @lex_state == :EXPR_ARG if @space_seen and peek(0) =~ /[0-9]/ throw :RET, identify_number(op) else - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG end - elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/ + elsif @lex_state != :EXPR_END and peek(0) =~ /[0-9]/ throw :RET, identify_number(op) else - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG end Token(op) end @@ -588,20 +588,20 @@ class RDoc::RubyLex @OP.def_rule(".") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG if peek(0) =~ /[0-9]/ ungetc identify_number else # for "obj.if" etc. - @lex_state = EXPR_DOT + @lex_state = :EXPR_DOT Token(TkDOT) end end @OP.def_rules("..", "...") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token(op) end @@ -611,7 +611,7 @@ class RDoc::RubyLex def lex_int2 @OP.def_rules("]", "}", ")") do |op, io| - @lex_state = EXPR_END + @lex_state = :EXPR_END @indent -= 1 @indent_stack.pop Token(op) @@ -619,11 +619,11 @@ class RDoc::RubyLex @OP.def_rule(":") do |op, io| - if @lex_state == EXPR_END || peek(0) =~ /\s/ - @lex_state = EXPR_BEG + if @lex_state == :EXPR_END || peek(0) =~ /\s/ + @lex_state = :EXPR_BEG Token(TkCOLON) else - @lex_state = EXPR_FNAME; + @lex_state = :EXPR_FNAME; Token(TkSYMBEG) end end @@ -631,51 +631,51 @@ class RDoc::RubyLex @OP.def_rule("::") do |op, io| # p @lex_state.id2name, @space_seen - if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen - @lex_state = EXPR_BEG + if @lex_state == :EXPR_BEG or @lex_state == :EXPR_ARG && @space_seen + @lex_state = :EXPR_BEG Token(TkCOLON3) else - @lex_state = EXPR_DOT + @lex_state = :EXPR_DOT Token(TkCOLON2) end end @OP.def_rule("/") do |op, io| - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID identify_string(op) elsif peek(0) == '=' getc - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token(TkOPASGN, "/") #/) - elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ + elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/ identify_string(op) else - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token("/") #/) end end @OP.def_rules("^") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token("^") end # @OP.def_rules("^=") do - # @lex_state = EXPR_BEG + # @lex_state = :EXPR_BEG # Token(OP_ASGN, :^) # end @OP.def_rules(",") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token(op) end @OP.def_rules(";") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG until (@indent_stack.empty? || [TkLPAREN, TkLBRACK, TkLBRACE, TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) @@ -686,56 +686,56 @@ class RDoc::RubyLex @OP.def_rule("~") do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token("~") end - @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do + @OP.def_rule("~@", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token("~") end @OP.def_rule("(") do |op, io| @indent += 1 - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - @lex_state = EXPR_BEG + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID + @lex_state = :EXPR_BEG tk_c = TkfLPAREN else - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG tk_c = TkLPAREN end @indent_stack.push tk_c Token tk_c end - @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do + @OP.def_rule("[]", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| - @lex_state = EXPR_ARG + @lex_state = :EXPR_ARG Token("[]") end - @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do + @OP.def_rule("[]=", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| - @lex_state = EXPR_ARG + @lex_state = :EXPR_ARG Token("[]=") end @OP.def_rule("[") do |op, io| @indent += 1 - if @lex_state == EXPR_FNAME + if @lex_state == :EXPR_FNAME tk_c = TkfLBRACK else - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID tk_c = TkLBRACK - elsif @lex_state == EXPR_ARG && @space_seen + elsif @lex_state == :EXPR_ARG && @space_seen tk_c = TkLBRACK else tk_c = TkfLBRACK end - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG end @indent_stack.push tk_c Token(tk_c) @@ -744,12 +744,12 @@ class RDoc::RubyLex @OP.def_rule("{") do |op, io| @indent += 1 - if @lex_state != EXPR_END && @lex_state != EXPR_ARG + if @lex_state != :EXPR_END && @lex_state != :EXPR_ARG tk_c = TkLBRACE else tk_c = TkfLBRACE end - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG @indent_stack.push tk_c Token(tk_c) end @@ -768,15 +768,15 @@ class RDoc::RubyLex @OP.def_rule('%') do |op, io| - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID identify_quotation elsif peek(0) == '=' getc Token(TkOPASGN, :%) - elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ + elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/ identify_quotation else - @lex_state = EXPR_BEG + @lex_state = :EXPR_BEG Token("%") #)) end end @@ -799,15 +799,15 @@ class RDoc::RubyLex # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do # |op, io| # @indent += 1 - # @lex_state = EXPR_FNAME - # # @lex_state = EXPR_END + # @lex_state = :EXPR_FNAME + # # @lex_state = :EXPR_END # # until @rests[0] == "\n" or @rests[0] == ";" # # rests.shift # # end # end @OP.def_rule("_") do - if peek_match?(/_END__/) and @lex_state == EXPR_BEG then + if peek_match?(/_END__/) and @lex_state == :EXPR_BEG then 6.times { getc } Token(TkEND_OF_SCRIPT, '__END__') else @@ -832,7 +832,7 @@ class RDoc::RubyLex end def identify_gvar - @lex_state = EXPR_END + @lex_state = :EXPR_END case ch = getc when /[~_*$?!@\/\\;,=:<>".]/ #" @@ -888,32 +888,33 @@ class RDoc::RubyLex when /^\$/ return Token(TkGVAR, token) when /^\@\@/ - @lex_state = EXPR_END + @lex_state = :EXPR_END # p Token(TkCVAR, token) return Token(TkCVAR, token) when /^\@/ - @lex_state = EXPR_END + @lex_state = :EXPR_END return Token(TkIVAR, token) end - if @lex_state != EXPR_DOT + if @lex_state != :EXPR_DOT print token, "\n" if RDoc::RubyLex.debug? token_c, *trans = TkReading2Token[token] if token_c # reserved word? - if (@lex_state != EXPR_BEG && - @lex_state != EXPR_FNAME && + if (@lex_state != :EXPR_BEG && + @lex_state != :EXPR_FNAME && trans[1]) # modifiers token_c = TkSymbol2Token[trans[1]] @lex_state = trans[0] else - if @lex_state != EXPR_FNAME + if @lex_state != :EXPR_FNAME if ENINDENT_CLAUSE.include?(token) + valid = peek(0) != ':' + # check for ``class = val'' etc. - valid = true case token when "class" valid = false unless peek_match?(/^\s*(<<|\w|::)/) @@ -925,7 +926,8 @@ class RDoc::RubyLex valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/) else # no nothing - end + end if valid + if valid if token == "do" if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last) @@ -946,23 +948,23 @@ class RDoc::RubyLex end @lex_state = trans[0] else - @lex_state = EXPR_END + @lex_state = :EXPR_END end end return Token(token_c, token) end end - if @lex_state == EXPR_FNAME - @lex_state = EXPR_END + if @lex_state == :EXPR_FNAME + @lex_state = :EXPR_END if peek(0) == '=' token.concat getc end - elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT || - @lex_state == EXPR_ARG - @lex_state = EXPR_ARG + elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT || + @lex_state == :EXPR_ARG + @lex_state = :EXPR_ARG else - @lex_state = EXPR_END + @lex_state = :EXPR_END end if token[0, 1] =~ /[A-Z]/ @@ -1028,6 +1030,8 @@ class RDoc::RubyLex end if output_heredoc then + raise Error, "Missing terminating #{quoted} for string" unless l + doc << l.chomp else doc << '"' @@ -1041,7 +1045,7 @@ class RDoc::RubyLex token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt] @ltype = ltback - @lex_state = EXPR_END + @lex_state = :EXPR_END Token(token_class, doc) end @@ -1065,7 +1069,7 @@ class RDoc::RubyLex end def identify_number(op = "") - @lex_state = EXPR_END + @lex_state = :EXPR_END num = op @@ -1240,7 +1244,7 @@ class RDoc::RubyLex ensure @ltype = nil @quoted = nil - @lex_state = EXPR_END + @lex_state = :EXPR_END end end @@ -1283,18 +1287,19 @@ class RDoc::RubyLex def read_escape escape = '' ch = getc - escape << ch case ch when "\n", "\r", "\f" + escape << ch when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #" + escape << ch when /[0-7]/ ungetc ch 3.times do ch = getc - escape << ch case ch when /[0-7]/ + escape << ch when nil break else @@ -1304,11 +1309,13 @@ class RDoc::RubyLex end when "x" + escape << ch + 2.times do ch = getc - escape << ch case ch when /[0-9a-fA-F]/ + escape << ch when nil break else @@ -1318,26 +1325,44 @@ class RDoc::RubyLex end when "M" - ch = getc escape << ch + + ch = getc if ch != '-' ungetc else - ch = getc escape << ch + + ch = getc if ch == "\\" #" + ungetc escape << read_escape + else + escape << ch end end when "C", "c" #, "^" - if ch == "C" and (ch = getc) != "-" - escape << ch - ungetc + escape << ch + + if ch == "C" + ch = getc + + if ch == "-" + escape << ch + ch = getc + escape << ch + + escape << read_escape if ch == "\\" + else + ungetc + end elsif (ch = getc) == "\\" #" escape << ch << read_escape end else + escape << ch + # other characters end |