diff options
author | hsbt <hsbt@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-08-29 11:52:50 +0000 |
---|---|---|
committer | hsbt <hsbt@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-08-29 11:52:50 +0000 |
commit | 4790c08906f296eea070c06933a5c2484b30584e (patch) | |
tree | 2f1835afaf8562c9ae611c80b7361c1eaa79b897 /lib/rdoc/ruby_lex.rb | |
parent | 26a9bf756bf66e77dd7b897f7ad97a7ffdfb3275 (diff) |
Merge rdoc-6.0.0.beta1.
This version fixed strange behavior of ruby code parser.
We will list all of impromovement to Changelog when 6.0.0 releasing.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59686 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rdoc/ruby_lex.rb')
-rw-r--r-- | lib/rdoc/ruby_lex.rb | 358 |
1 files changed, 256 insertions, 102 deletions
diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb index 1fc3c12c4a..e76fdf0414 100644 --- a/lib/rdoc/ruby_lex.rb +++ b/lib/rdoc/ruby_lex.rb @@ -45,6 +45,7 @@ class RDoc::RubyLex attr_accessor :continue attr_accessor :lex_state + attr_accessor :first_in_method_statement attr_reader :reader class << self @@ -106,10 +107,15 @@ class RDoc::RubyLex @rests = [] @seek = 0 + @heredoc_queue = [] + @indent = 0 @indent_stack = [] @lex_state = :EXPR_BEG @space_seen = false + @escaped_nl = false + @first_in_method_statement = false + @after_question = false @continue = false @line = "" @@ -350,6 +356,7 @@ class RDoc::RubyLex begin tk = @OP.match(self) @space_seen = tk.kind_of?(TkSPACE) + @first_in_method_statement = false if !@space_seen && @first_in_method_statement rescue SyntaxError => e raise Error, "syntax error: #{e.message}" if @exception_on_syntax_error @@ -361,6 +368,28 @@ class RDoc::RubyLex if @readed_auto_clean_up get_readed end + + if TkSYMBEG === tk then + tk1 = token + set_token_position tk.seek, tk.line_no, tk.char_no + + case tk1 + when TkId, TkOp, TkSTRING, TkDSTRING, TkSTAR, TkAMPER then + if tk1.respond_to?(:name) then + tk = Token(TkSYMBOL, ":" + tk1.name) + else + tk = Token(TkSYMBOL, ":" + tk1.text) + end + else + tk = tk1 + end + elsif (TkPLUS === tk or TkMINUS === tk) and peek(0) =~ /\d/ then + tk1 = token + set_token_position tk.seek, tk.line_no, tk.char_no + tk = Token(tk1.class, tk.text + tk1.text) + end + @after_question = false if @after_question and !(TkQUESTION === tk) + # Tracer.off tk end @@ -380,7 +409,9 @@ class RDoc::RubyLex "r" => "/", "w" => "]", "W" => "]", - "s" => ":" + "s" => ":", + "i" => "]", + "I" => "]" } PERCENT_PAREN = { @@ -430,15 +461,18 @@ class RDoc::RubyLex proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do |op, io| @ltype = "=" - res = '' - nil until getc == "\n" + res = op + until (ch = getc) == "\n" do + res << ch + end + res << ch until ( peek_equal?("=end") && peek(4) =~ /\s/ ) do (ch = getc) res << ch end - gets # consume =end + res << gets # consume =end @ltype = nil Token(TkRD_COMMENT, res) @@ -446,42 +480,90 @@ class RDoc::RubyLex @OP.def_rule("\n") do |op, io| print "\\n\n" if RDoc::RubyLex.debug? + unless @heredoc_queue.empty? + info = @heredoc_queue[0] + if !info[:started] # "\n" + info[:started] = true + ungetc "\n" + elsif info[:heredoc_end].nil? # heredoc body + tk, heredoc_end = identify_here_document_body(info[:quoted], info[:lt], info[:indent]) + info[:heredoc_end] = heredoc_end + ungetc "\n" + else # heredoc end + @heredoc_queue.shift + @lex_state = :EXPR_BEG + tk = Token(TkHEREDOCEND, info[:heredoc_end]) + if !@heredoc_queue.empty? + @heredoc_queue[0][:started] = true + ungetc "\n" + end + end + end + unless tk + case @lex_state + when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT + @continue = true + else + @continue = false + @lex_state = :EXPR_BEG unless @escaped_nl + until (@indent_stack.empty? || + [TkLPAREN, TkLBRACK, TkLBRACE, + TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) + @indent_stack.pop + end + end + @current_readed = @readed + @here_readed.clear + tk = Token(TkNL) + end + @escaped_nl = false + tk + end + + @OP.def_rules("=") do + |op, io| case @lex_state - when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT - @continue = true + when :EXPR_FNAME, :EXPR_DOT + @lex_state = :EXPR_ARG else - @continue = false @lex_state = :EXPR_BEG - until (@indent_stack.empty? || - [TkLPAREN, TkLBRACK, TkLBRACE, - TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) - @indent_stack.pop - end end - @current_readed = @readed - @here_readed.clear - Token(TkNL) + Token(op) end @OP.def_rules("*", "**", - "=", "==", "===", + "==", "===", "=~", "<=>", "<", "<=", - ">", ">=", ">>") do + ">", ">=", ">>", "=>") do |op, io| case @lex_state when :EXPR_FNAME, :EXPR_DOT + tk = Token(TkId, op) @lex_state = :EXPR_ARG else + tk = Token(op) @lex_state = :EXPR_BEG end + tk + end + + @OP.def_rules("->") do + |op, io| + @lex_state = :EXPR_ENDFN Token(op) end @OP.def_rules("!", "!=", "!~") do |op, io| - @lex_state = :EXPR_BEG - Token(op) + case @lex_state + when :EXPR_FNAME, :EXPR_DOT + @lex_state = :EXPR_ARG + Token(TkId, op) + else + @lex_state = :EXPR_BEG + Token(op) + end end @OP.def_rules("<<") do @@ -490,16 +572,17 @@ class RDoc::RubyLex if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS && (@lex_state != :EXPR_ARG || @space_seen) c = peek(0) - if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-") - tk = identify_here_document + if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-" || c == "~") + tk = identify_here_document(op) end end unless tk - tk = Token(op) case @lex_state when :EXPR_FNAME, :EXPR_DOT + tk = Token(TkId, op) @lex_state = :EXPR_ARG else + tk = Token(op) @lex_state = :EXPR_BEG end end @@ -513,9 +596,9 @@ class RDoc::RubyLex @OP.def_rules("`") do |op, io| - if @lex_state == :EXPR_FNAME - @lex_state = :EXPR_END - Token(op) + if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state + @lex_state = :EXPR_ARG + Token(TkId, op) else identify_string(op) end @@ -525,6 +608,7 @@ class RDoc::RubyLex |op, io| if @lex_state == :EXPR_END @lex_state = :EXPR_BEG + @after_question = true Token(TkQUESTION) else ch = getc @@ -534,17 +618,31 @@ class RDoc::RubyLex Token(TkQUESTION) else @lex_state = :EXPR_END + ch << getc if "\\" == ch Token(TkCHAR, "?#{ch}") end end end - @OP.def_rules("&", "&&", "|", "||") do + @OP.def_rules("&&", "||") do |op, io| @lex_state = :EXPR_BEG Token(op) end + @OP.def_rules("&", "|") do + |op, io| + case @lex_state + when :EXPR_FNAME, :EXPR_DOT + tk = Token(TkId, op) + @lex_state = :EXPR_ARG + else + tk = Token(op) + @lex_state = :EXPR_BEG + end + tk + end + @OP.def_rules("+=", "-=", "*=", "**=", "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do |op, io| @@ -556,19 +654,22 @@ class RDoc::RubyLex @OP.def_rule("+@", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| @lex_state = :EXPR_ARG - Token(op) + Token(TkId, op) end @OP.def_rule("-@", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| @lex_state = :EXPR_ARG - Token(op) + Token(TkId, op) end @OP.def_rules("+", "-") do |op, io| catch(:RET) do - if @lex_state == :EXPR_ARG + if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state + tk = Token(TkId, op) + @lex_state = :EXPR_ARG + elsif @lex_state == :EXPR_ARG if @space_seen and peek(0) =~ /[0-9]/ throw :RET, identify_number(op) else @@ -579,20 +680,21 @@ class RDoc::RubyLex else @lex_state = :EXPR_BEG end - Token(op) + tk = Token(op) unless tk + tk end end - @OP.def_rule(".") do + @OP.def_rules(".", "&.") do |op, io| @lex_state = :EXPR_BEG if peek(0) =~ /[0-9]/ ungetc identify_number else - # for "obj.if" etc. + # for "obj.if" or "obj&.if" etc. @lex_state = :EXPR_DOT - Token(TkDOT) + Token(op) end end @@ -639,7 +741,10 @@ class RDoc::RubyLex @OP.def_rule("/") do |op, io| - if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID + if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state + @lex_state = :EXPR_ARG + Token(TkId, op) + elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID || @first_in_method_statement identify_string(op) elsif peek(0) == '=' getc @@ -655,8 +760,15 @@ class RDoc::RubyLex @OP.def_rules("^") do |op, io| - @lex_state = :EXPR_BEG - Token("^") + case @lex_state + when :EXPR_FNAME, :EXPR_DOT + tk = Token(TkId, op) + @lex_state = :EXPR_ARG + else + tk = Token(op) + @lex_state = :EXPR_BEG + end + tk end # @OP.def_rules("^=") do @@ -683,8 +795,14 @@ class RDoc::RubyLex @OP.def_rule("~") do |op, io| - @lex_state = :EXPR_BEG - Token("~") + case @lex_state + when :EXPR_FNAME, :EXPR_DOT + @lex_state = :EXPR_ARG + Token(TkId, op) + else + @lex_state = :EXPR_BEG + Token(op) + end end @OP.def_rule("~@", proc{|op, io| @lex_state == :EXPR_FNAME}) do @@ -710,17 +828,18 @@ class RDoc::RubyLex @OP.def_rule("[]", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| @lex_state = :EXPR_ARG - Token("[]") + Token(TkId, op) end @OP.def_rule("[]=", proc{|op, io| @lex_state == :EXPR_FNAME}) do |op, io| @lex_state = :EXPR_ARG - Token("[]=") + Token(TkId, op) end @OP.def_rule("[") do |op, io| + text = nil @indent += 1 if @lex_state == :EXPR_FNAME tk_c = TkfLBRACK @@ -729,13 +848,25 @@ class RDoc::RubyLex tk_c = TkLBRACK elsif @lex_state == :EXPR_ARG && @space_seen tk_c = TkLBRACK + elsif @lex_state == :EXPR_DOT + if peek(0) == "]" + tk_c = TkIDENTIFIER + getc + if peek(0) == "=" + text = "[]=" + else + text = "[]" + end + else + tk_c = TkOp + end else tk_c = TkfLBRACK end @lex_state = :EXPR_BEG end @indent_stack.push tk_c - Token(tk_c) + Token(tk_c, text) end @OP.def_rule("{") do @@ -753,23 +884,25 @@ class RDoc::RubyLex @OP.def_rule('\\') do |op, io| - if getc == "\n" + if peek(0) == "\n" @space_seen = true @continue = true - Token(TkSPACE) - else - ungetc - Token("\\") + @escaped_nl = true end + Token("\\") end @OP.def_rule('%') do |op, io| - if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID + if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state + @lex_state = :EXPR_ARG + Token(TkId, op) + elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID identify_quotation elsif peek(0) == '=' getc - Token(TkOPASGN, :%) + @lex_state = :EXPR_BEG + Token(TkOPASGN, '%') elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/ identify_quotation else @@ -871,7 +1004,7 @@ class RDoc::RubyLex ungetc - if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "=" + if ((ch == "!" && peek(1) != "=") || ch == "?") && token[0,1] =~ /\w/ token.concat getc end @@ -944,44 +1077,63 @@ class RDoc::RubyLex @lex_state = :EXPR_END end end + if token_c.ancestors.include?(TkId) and peek(0) == ':' and !peek_match?(/^::/) + token.concat getc + token_c = TkSYMBOL + end return Token(token_c, token) end end if @lex_state == :EXPR_FNAME @lex_state = :EXPR_END - if peek(0) == '=' + if peek(0) == '=' and peek(1) != '>' token.concat getc end elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT || - @lex_state == :EXPR_ARG + @lex_state == :EXPR_ARG || @lex_state == :EXPR_MID @lex_state = :EXPR_ARG else @lex_state = :EXPR_END end if token[0, 1] =~ /[A-Z]/ - return Token(TkCONSTANT, token) + if token[-1] =~ /[!?]/ + token_c = TkIDENTIFIER + else + token_c = TkCONSTANT + end elsif token[token.size - 1, 1] =~ /[!?]/ - return Token(TkFID, token) + token_c = TkFID + else + token_c = TkIDENTIFIER + end + if peek(0) == ':' and !peek_match?(/^::/) + token.concat getc + return Token(TkSYMBOL, token) else - return Token(TkIDENTIFIER, token) + return Token(token_c, token) end end - def identify_here_document + def identify_here_document(op) ch = getc + start_token = op # if lt = PERCENT_LTYPE[ch] - if ch == "-" + if ch == "-" or ch == "~" + start_token.concat ch ch = getc indent = true end if /['"`]/ =~ ch + start_token.concat ch user_quote = lt = ch quoted = "" while (c = getc) && c != lt quoted.concat c end + start_token.concat quoted + start_token.concat lt else user_quote = nil lt = '"' @@ -989,57 +1141,38 @@ class RDoc::RubyLex while (c = getc) && c =~ /\w/ quoted.concat c end + start_token.concat quoted ungetc end - ltback, @ltype = @ltype, lt - reserve = [] - while ch = getc - reserve.push ch - if ch == "\\" - reserve.push ch = getc - elsif ch == "\n" - break - end - end - - output_heredoc = reserve.join =~ /\A\r?\n\z/ + @heredoc_queue << { + quoted: quoted, + lt: lt, + indent: indent, + started: false + } + @lex_state = :EXPR_END + Token(RDoc::RubyLex::TkHEREDOCBEG, start_token) + end - if output_heredoc then - doc = '<<' - doc << '-' if indent - doc << "#{user_quote}#{quoted}#{user_quote}\n" - else - doc = '"' - end + def identify_here_document_body(quoted, lt, indent) + ltback, @ltype = @ltype, lt - @current_readed = @readed + doc = "" + heredoc_end = nil while l = gets l = l.sub(/(:?\r)?\n\z/, "\n") if (indent ? l.strip : l.chomp) == quoted + heredoc_end = l break end doc << l end + raise Error, "Missing terminating #{quoted} for string" unless heredoc_end - if output_heredoc then - raise Error, "Missing terminating #{quoted} for string" unless l - - doc << l.chomp - else - doc << '"' - end - - @current_readed = @here_readed - @here_readed.concat reserve - while ch = reserve.pop - ungetc ch - end - - token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt] @ltype = ltback - @lex_state = :EXPR_END - Token(token_class, doc) + @lex_state = :EXPR_BEG + [Token(RDoc::RubyLex::TkHEREDOC, doc), heredoc_end] end def identify_quotation @@ -1066,7 +1199,7 @@ class RDoc::RubyLex num = op - if peek(0) == "0" && peek(1) !~ /[.eE]/ + if peek(0) == "0" && peek(1) !~ /[.eEri]/ num << getc case peek(0) @@ -1125,6 +1258,7 @@ class RDoc::RubyLex type = TkINTEGER allow_point = true allow_e = true + allow_ri = true non_digit = false while ch = getc num << ch @@ -1154,8 +1288,25 @@ class RDoc::RubyLex num << getc end allow_e = false + allow_ri = false allow_point = false non_digit = ch + when allow_ri && "r" + if non_digit + raise Error, "trailing `#{non_digit}' in number" + end + type = TkRATIONAL + if peek(0) == 'i' + type = TkIMAGINARY + num << getc + end + break + when allow_ri && "i" + if non_digit && non_digit != "r" + raise Error, "trailing `#{non_digit}' in number" + end + type = TkIMAGINARY + break else if non_digit raise Error, "trailing `#{non_digit}' in number" @@ -1174,10 +1325,10 @@ class RDoc::RubyLex @ltype = ltype @quoted = quoted - str = if ltype == quoted and %w[" ' /].include? ltype then + str = if ltype == quoted and %w[" ' / `].include? ltype and type.nil? then ltype.dup else - "%#{type or PERCENT_LTYPE.key ltype}#{PERCENT_PAREN_REV[quoted]||quoted}" + "%#{type}#{PERCENT_PAREN_REV[quoted]||quoted}" end subtype = nil @@ -1191,21 +1342,21 @@ class RDoc::RubyLex break elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#" ch = getc - subtype = true if ch == "{" then + subtype = true str << ch << skip_inner_expression next else ungetc end elsif ch == '\\' - if %w[' /].include? @ltype then + case @ltype + when "'" then case ch = getc - when "\\", "\n", "'" - when @ltype + when "'", '\\' then str << ch else - ungetc + str << ch end else str << read_escape @@ -1227,7 +1378,10 @@ class RDoc::RubyLex end end - if subtype + if peek(0) == ':' and !peek_match?(/^::/) and :EXPR_BEG == @lex_state and !@after_question + str.concat getc + return Token(TkSYMBOL, str) + elsif subtype Token(DLtype2Token[ltype], str) else Token(Ltype2Token[ltype], str) |