path: root/lib/rdoc/ruby_lex.rb
diff options
Diffstat (limited to 'lib/rdoc/ruby_lex.rb')
1 files changed, 0 insertions, 1521 deletions
diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb
deleted file mode 100644
index e76fdf0414..0000000000
--- a/lib/rdoc/ruby_lex.rb
+++ /dev/null
@@ -1,1521 +0,0 @@
-# coding: US-ASCII
-# frozen_string_literal: false
-# irb/ruby-lex.rb - ruby lexcal analyzer
-# $Release Version: 0.9.5$
-# $Revision: 17979 $
-# $Date: 2008-07-09 10:17:05 -0700 (Wed, 09 Jul 2008) $
-# by Keiju ISHITSUKA(
-require "e2mmap"
-require "irb/slex"
-require "stringio"
-# Ruby lexer adapted from irb.
-# The internals are not documented because they are scary.
-class RDoc::RubyLex
- ##
- # Raised upon invalid input
- class Error < RDoc::Error
- end
- # :stopdoc:
- extend Exception2MessageMapper
- def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
- def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
- def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
- def_exception(:TkReading2TokenDuplicateError,
- "key duplicate(token_n='%s', key='%s')")
- def_exception(:SyntaxError, "%s")
- def_exception(:TerminateLineInput, "Terminate Line Input")
- include RDoc::RubyToken
- include IRB
- attr_accessor :continue
- attr_accessor :lex_state
- attr_accessor :first_in_method_statement
- attr_reader :reader
- class << self
- attr_accessor :debug_level
- end
- def self.debug?
- @debug_level > 0
- end
- self.debug_level = 0
- # :startdoc:
- ##
- # Returns an Array of +ruby+ tokens. See ::new for a description of
- # +options+.
- def self.tokenize ruby, options
- tokens = []
- scanner = ruby, options
- scanner.exception_on_syntax_error = true
- while token = scanner.token do
- tokens << token
- end
- tokens
- end
- ##
- # Creates a new lexer for +content+. +options+ is an RDoc::Options, only
- # +tab_width is used.
- def initialize(content, options)
- lex_init
- if /\t/ =~ content then
- tab_width = options.tab_width
- content = content.split(/\n/).map do |line|
- 1 while line.gsub!(/\t+/) {
- ' ' * (tab_width*$&.length - $`.length % tab_width)
- } && $~
- line
- end.join("\n")
- end
- content << "\n" unless content[-1, 1] == "\n"
- set_input content
- @base_char_no = 0
- @char_no = 0
- @exp_line_no = @line_no = 1
- @here_readed = []
- @readed = []
- @current_readed = @readed
- @rests = []
- @seek = 0
- @heredoc_queue = []
- @indent = 0
- @indent_stack = []
- @lex_state = :EXPR_BEG
- @space_seen = false
- @escaped_nl = false
- @first_in_method_statement = false
- @after_question = false
- @continue = false
- @line = ""
- @skip_space = false
- @readed_auto_clean_up = false
- @exception_on_syntax_error = true
- @prompt = nil
- @prev_seek = nil
- @ltype = nil
- end
- # :stopdoc:
- def inspect # :nodoc:
- "#<%s:0x%x pos %d lex_state %p space_seen %p>" % [
- self.class, object_id,
- @io.pos, @lex_state, @space_seen,
- ]
- end
- attr_accessor :skip_space
- attr_accessor :readed_auto_clean_up
- attr_accessor :exception_on_syntax_error
- attr_reader :seek
- attr_reader :char_no
- attr_reader :line_no
- attr_reader :indent
- # io functions
- def set_input(io, p = nil, &block)
- @io = io
- if p.respond_to?(:call)
- @input = p
- elsif block_given?
- @input = block
- else
- @input ={@io.gets}
- end
- end
- def get_readed
- if idx = @readed.rindex("\n")
- @base_char_no = @readed.size - (idx + 1)
- else
- @base_char_no += @readed.size
- end
- readed = @readed.join("")
- @readed.clear
- readed
- end
- def getc
- while @rests.empty?
- # return nil unless buf_input
- @rests.push nil unless buf_input
- end
- c = @rests.shift
- @current_readed.push c
- @seek += 1
- if c == "\n".freeze
- @line_no += 1
- @char_no = 0
- else
- @char_no += 1
- end
- c
- end
- def gets
- l = ""
- while c = getc
- l.concat(c)
- break if c == "\n"
- end
- return nil if l == "" and c.nil?
- l
- end
- def eof?
- @io.eof?
- end
- def getc_of_rests
- if @rests.empty?
- nil
- else
- getc
- end
- end
- def ungetc(c = nil)
- if @here_readed.empty?
- c2 = @readed.pop
- else
- c2 = @here_readed.pop
- end
- c = c2 unless c
- @rests.unshift c #c =
- @seek -= 1
- if c == "\n"
- @line_no -= 1
- if idx = @readed.rindex("\n")
- @char_no = idx + 1
- else
- @char_no = @base_char_no + @readed.size
- end
- else
- @char_no -= 1
- end
- end
- def peek_equal?(str)
- chrs = str.split(//)
- until @rests.size >= chrs.size
- return false unless buf_input
- end
- @rests[0, chrs.size] == chrs
- end
- def peek_match?(regexp)
- while @rests.empty?
- return false unless buf_input
- end
- regexp =~ @rests.join("")
- end
- def peek(i = 0)
- while @rests.size <= i
- return nil unless buf_input
- end
- @rests[i]
- end
- def buf_input
- prompt
- line =
- return nil unless line
- @rests.concat line.split(//)
- true
- end
- private :buf_input
- def set_prompt(p = nil, &block)
- p = block if block_given?
- if p.respond_to?(:call)
- @prompt = p
- else
- @prompt ={print p}
- end
- end
- def prompt
- if @prompt
-, @indent, @continue, @line_no)
- end
- end
- def initialize_input
- @ltype = nil
- @quoted = nil
- @indent = 0
- @indent_stack = []
- @lex_state = :EXPR_BEG
- @space_seen = false
- @current_readed = @readed
- @continue = false
- prompt
- @line = ""
- @exp_line_no = @line_no
- end
- def each_top_level_statement
- initialize_input
- catch(:TERM_INPUT) do
- loop do
- begin
- @continue = false
- prompt
- unless l = lex
- throw :TERM_INPUT if @line == ''
- else
- #p l
- @line.concat l
- if @ltype or @continue or @indent > 0
- next
- end
- end
- if @line != "\n"
- yield @line, @exp_line_no
- end
- break unless l
- @line = ''
- @exp_line_no = @line_no
- @indent = 0
- @indent_stack = []
- prompt
- rescue TerminateLineInput
- initialize_input
- prompt
- get_readed
- end
- end
- end
- end
- def lex
- until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
- !@continue or
- tk.nil?)
- #p tk
- #p @lex_state
- #p self
- end
- line = get_readed
- # print self.inspect
- if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
- nil
- else
- line
- end
- end
- def token
- # require "tracer"
- # Tracer.on
- @prev_seek = @seek
- @prev_line_no = @line_no
- @prev_char_no = @char_no
- begin
- begin
- tk = @OP.match(self)
- @space_seen = tk.kind_of?(TkSPACE)
- @first_in_method_statement = false if !@space_seen && @first_in_method_statement
- rescue SyntaxError => e
- raise Error, "syntax error: #{e.message}" if
- @exception_on_syntax_error
- tk =, @line_no, @char_no)
- end
- end while @skip_space and tk.kind_of?(TkSPACE)
- if @readed_auto_clean_up
- get_readed
- end
- if TkSYMBEG === tk then
- tk1 = token
- set_token_position, tk.line_no, tk.char_no
- case tk1
- when TkId, TkOp, TkSTRING, TkDSTRING, TkSTAR, TkAMPER then
- if tk1.respond_to?(:name) then
- tk = Token(TkSYMBOL, ":" +
- else
- tk = Token(TkSYMBOL, ":" + tk1.text)
- end
- else
- tk = tk1
- end
- elsif (TkPLUS === tk or TkMINUS === tk) and peek(0) =~ /\d/ then
- tk1 = token
- set_token_position, tk.line_no, tk.char_no
- tk = Token(tk1.class, tk.text + tk1.text)
- end
- @after_question = false if @after_question and !(TkQUESTION === tk)
- #
- tk
- end
- "case", "class", "def", "do", "for", "if",
- "module", "unless", "until", "while", "begin" #, "when"
- ]
- DEINDENT_CLAUSE = ["end" #, "when"
- ]
- "q" => "\'",
- "Q" => "\"",
- "x" => "\`",
- "r" => "/",
- "w" => "]",
- "W" => "]",
- "s" => ":",
- "i" => "]",
- "I" => "]"
- }
- "{" => "}",
- "[" => "]",
- "<" => ">",
- "(" => ")"
- }
- Ltype2Token = {
- "\'" => TkSTRING,
- "\"" => TkSTRING,
- "\`" => TkXSTRING,
- "/" => TkREGEXP,
- "]" => TkDSTRING,
- ":" => TkSYMBOL
- }
- DLtype2Token = {
- "\"" => TkDSTRING,
- "\`" => TkDXSTRING,
- "/" => TkDREGEXP,
- }
- def lex_init()
- @OP =
- @OP.def_rules("\0", "\004", "\032") do |op, io|
- Token(TkEND_OF_SCRIPT, '')
- end
- @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
- @space_seen = true
- str = op
- while (ch = getc) =~ /[ \t\f\r\13]/ do
- str << ch
- end
- ungetc
- Token TkSPACE, str
- end
- @OP.def_rule("#") do |op, io|
- identify_comment
- end
- @OP.def_rule("=begin",
- proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
- |op, io|
- @ltype = "="
- res = op
- until (ch = getc) == "\n" do
- res << ch
- end
- res << ch
- until ( peek_equal?("=end") && peek(4) =~ /\s/ ) do
- (ch = getc)
- res << ch
- end
- res << gets # consume =end
- @ltype = nil
- Token(TkRD_COMMENT, res)
- end
- @OP.def_rule("\n") do |op, io|
- print "\\n\n" if RDoc::RubyLex.debug?
- unless @heredoc_queue.empty?
- info = @heredoc_queue[0]
- if !info[:started] # "\n"
- info[:started] = true
- ungetc "\n"
- elsif info[:heredoc_end].nil? # heredoc body
- tk, heredoc_end = identify_here_document_body(info[:quoted], info[:lt], info[:indent])
- info[:heredoc_end] = heredoc_end
- ungetc "\n"
- else # heredoc end
- @heredoc_queue.shift
- @lex_state = :EXPR_BEG
- tk = Token(TkHEREDOCEND, info[:heredoc_end])
- if !@heredoc_queue.empty?
- @heredoc_queue[0][:started] = true
- ungetc "\n"
- end
- end
- end
- unless tk
- case @lex_state
- @continue = true
- else
- @continue = false
- @lex_state = :EXPR_BEG unless @escaped_nl
- until (@indent_stack.empty? ||
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
- @indent_stack.pop
- end
- end
- @current_readed = @readed
- @here_readed.clear
- tk = Token(TkNL)
- end
- @escaped_nl = false
- tk
- end
- @OP.def_rules("=") do
- |op, io|
- case @lex_state
- @lex_state = :EXPR_ARG
- else
- @lex_state = :EXPR_BEG
- end
- Token(op)
- end
- @OP.def_rules("*", "**",
- "==", "===",
- "=~", "<=>",
- "<", "<=",
- ">", ">=", ">>", "=>") do
- |op, io|
- case @lex_state
- tk = Token(TkId, op)
- @lex_state = :EXPR_ARG
- else
- tk = Token(op)
- @lex_state = :EXPR_BEG
- end
- tk
- end
- @OP.def_rules("->") do
- |op, io|
- @lex_state = :EXPR_ENDFN
- Token(op)
- end
- @OP.def_rules("!", "!=", "!~") do
- |op, io|
- case @lex_state
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- else
- @lex_state = :EXPR_BEG
- Token(op)
- end
- end
- @OP.def_rules("<<") do
- |op, io|
- tk = nil
- if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
- (@lex_state != :EXPR_ARG || @space_seen)
- c = peek(0)
- if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-" || c == "~")
- tk = identify_here_document(op)
- end
- end
- unless tk
- case @lex_state
- tk = Token(TkId, op)
- @lex_state = :EXPR_ARG
- else
- tk = Token(op)
- @lex_state = :EXPR_BEG
- end
- end
- tk
- end
- @OP.def_rules("'", '"') do
- |op, io|
- identify_string(op)
- end
- @OP.def_rules("`") do
- |op, io|
- if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- else
- identify_string(op)
- end
- end
- @OP.def_rules('?') do
- |op, io|
- if @lex_state == :EXPR_END
- @lex_state = :EXPR_BEG
- @after_question = true
- else
- ch = getc
- if @lex_state == :EXPR_ARG && ch =~ /\s/
- ungetc
- @lex_state = :EXPR_BEG;
- else
- @lex_state = :EXPR_END
- ch << getc if "\\" == ch
- Token(TkCHAR, "?#{ch}")
- end
- end
- end
- @OP.def_rules("&&", "||") do
- |op, io|
- @lex_state = :EXPR_BEG
- Token(op)
- end
- @OP.def_rules("&", "|") do
- |op, io|
- case @lex_state
- tk = Token(TkId, op)
- @lex_state = :EXPR_ARG
- else
- tk = Token(op)
- @lex_state = :EXPR_BEG
- end
- tk
- end
- @OP.def_rules("+=", "-=", "*=", "**=",
- "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
- |op, io|
- @lex_state = :EXPR_BEG
- op =~ /^(.*)=$/
- Token(TkOPASGN, $1)
- end
- @OP.def_rule("+@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
- |op, io|
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- end
- @OP.def_rule("-@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
- |op, io|
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- end
- @OP.def_rules("+", "-") do
- |op, io|
- catch(:RET) do
- if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state
- tk = Token(TkId, op)
- @lex_state = :EXPR_ARG
- elsif @lex_state == :EXPR_ARG
- if @space_seen and peek(0) =~ /[0-9]/
- throw :RET, identify_number(op)
- else
- @lex_state = :EXPR_BEG
- end
- elsif @lex_state != :EXPR_END and peek(0) =~ /[0-9]/
- throw :RET, identify_number(op)
- else
- @lex_state = :EXPR_BEG
- end
- tk = Token(op) unless tk
- tk
- end
- end
- @OP.def_rules(".", "&.") do
- |op, io|
- @lex_state = :EXPR_BEG
- if peek(0) =~ /[0-9]/
- ungetc
- identify_number
- else
- # for "obj.if" or "obj&.if" etc.
- @lex_state = :EXPR_DOT
- Token(op)
- end
- end
- @OP.def_rules("..", "...") do
- |op, io|
- @lex_state = :EXPR_BEG
- Token(op)
- end
- lex_int2
- end
- def lex_int2
- @OP.def_rules("]", "}", ")") do
- |op, io|
- @lex_state = :EXPR_END
- @indent -= 1
- @indent_stack.pop
- Token(op)
- end
- @OP.def_rule(":") do
- |op, io|
- if @lex_state == :EXPR_END || peek(0) =~ /\s/
- @lex_state = :EXPR_BEG
- Token(TkCOLON)
- else
- @lex_state = :EXPR_FNAME;
- Token(TkSYMBEG)
- end
- end
- @OP.def_rule("::") do
- |op, io|
- # p @lex_state.id2name, @space_seen
- if @lex_state == :EXPR_BEG or @lex_state == :EXPR_ARG && @space_seen
- @lex_state = :EXPR_BEG
- Token(TkCOLON3)
- else
- @lex_state = :EXPR_DOT
- Token(TkCOLON2)
- end
- end
- @OP.def_rule("/") do
- |op, io|
- if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID || @first_in_method_statement
- identify_string(op)
- elsif peek(0) == '='
- getc
- @lex_state = :EXPR_BEG
- Token(TkOPASGN, "/") #/)
- elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/
- identify_string(op)
- else
- @lex_state = :EXPR_BEG
- Token("/") #/)
- end
- end
- @OP.def_rules("^") do
- |op, io|
- case @lex_state
- tk = Token(TkId, op)
- @lex_state = :EXPR_ARG
- else
- tk = Token(op)
- @lex_state = :EXPR_BEG
- end
- tk
- end
- # @OP.def_rules("^=") do
- # @lex_state = :EXPR_BEG
- # Token(OP_ASGN, :^)
- # end
- @OP.def_rules(",") do
- |op, io|
- @lex_state = :EXPR_BEG
- Token(op)
- end
- @OP.def_rules(";") do
- |op, io|
- @lex_state = :EXPR_BEG
- until (@indent_stack.empty? ||
- TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
- @indent_stack.pop
- end
- Token(op)
- end
- @OP.def_rule("~") do
- |op, io|
- case @lex_state
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- else
- @lex_state = :EXPR_BEG
- Token(op)
- end
- end
- @OP.def_rule("~@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
- |op, io|
- @lex_state = :EXPR_BEG
- Token("~")
- end
- @OP.def_rule("(") do
- |op, io|
- @indent += 1
- if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
- @lex_state = :EXPR_BEG
- tk_c = TkfLPAREN
- else
- @lex_state = :EXPR_BEG
- tk_c = TkLPAREN
- end
- @indent_stack.push tk_c
- Token tk_c
- end
- @OP.def_rule("[]", proc{|op, io| @lex_state == :EXPR_FNAME}) do
- |op, io|
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- end
- @OP.def_rule("[]=", proc{|op, io| @lex_state == :EXPR_FNAME}) do
- |op, io|
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- end
- @OP.def_rule("[") do
- |op, io|
- text = nil
- @indent += 1
- if @lex_state == :EXPR_FNAME
- tk_c = TkfLBRACK
- else
- if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
- tk_c = TkLBRACK
- elsif @lex_state == :EXPR_ARG && @space_seen
- tk_c = TkLBRACK
- elsif @lex_state == :EXPR_DOT
- if peek(0) == "]"
- getc
- if peek(0) == "="
- text = "[]="
- else
- text = "[]"
- end
- else
- tk_c = TkOp
- end
- else
- tk_c = TkfLBRACK
- end
- @lex_state = :EXPR_BEG
- end
- @indent_stack.push tk_c
- Token(tk_c, text)
- end
- @OP.def_rule("{") do
- |op, io|
- @indent += 1
- if @lex_state != :EXPR_END && @lex_state != :EXPR_ARG
- tk_c = TkLBRACE
- else
- tk_c = TkfLBRACE
- end
- @lex_state = :EXPR_BEG
- @indent_stack.push tk_c
- Token(tk_c)
- end
- @OP.def_rule('\\') do
- |op, io|
- if peek(0) == "\n"
- @space_seen = true
- @continue = true
- @escaped_nl = true
- end
- Token("\\")
- end
- @OP.def_rule('%') do
- |op, io|
- if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state
- @lex_state = :EXPR_ARG
- Token(TkId, op)
- elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
- identify_quotation
- elsif peek(0) == '='
- getc
- @lex_state = :EXPR_BEG
- Token(TkOPASGN, '%')
- elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/
- identify_quotation
- else
- @lex_state = :EXPR_BEG
- Token("%") #))
- end
- end
- @OP.def_rule('$') do
- |op, io|
- identify_gvar
- end
- @OP.def_rule('@') do
- |op, io|
- if peek(0) =~ /[\w@]/
- ungetc
- identify_identifier
- else
- Token("@")
- end
- end
- # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
- # |op, io|
- # @indent += 1
- # @lex_state = :EXPR_FNAME
- # # @lex_state = :EXPR_END
- # # until @rests[0] == "\n" or @rests[0] == ";"
- # # rests.shift
- # # end
- # end
- @OP.def_rule("_") do
- if peek_match?(/_END__/) and @lex_state == :EXPR_BEG then
- 6.times { getc }
- Token(TkEND_OF_SCRIPT, '__END__')
- else
- ungetc
- identify_identifier
- end
- end
- @OP.def_rule("") do
- |op, io|
- printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
- if peek(0) =~ /[0-9]/
- t = identify_number
- else
- t = identify_identifier
- end
- printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
- t
- end
- p @OP if RDoc::RubyLex.debug?
- end
- def identify_gvar
- @lex_state = :EXPR_END
- case ch = getc
- when /[~_*$?!@\/\\;,=:<>".]/ #"
- Token(TkGVAR, "$" + ch)
- when "-"
- Token(TkGVAR, "$-" + getc)
- when "&", "`", "'", "+"
- Token(TkBACK_REF, "$"+ch)
- when /[1-9]/
- ref = ch
- while (ch = getc) =~ /[0-9]/ do ref << ch end
- ungetc
- Token(TkNTH_REF, "$#{ref}")
- when /\w/
- ungetc
- ungetc
- identify_identifier
- else
- ungetc
- Token("$")
- end
- end
- IDENT_RE = eval '/[\w\u{0080}-\u{FFFFF}]/u'
- def identify_identifier
- token = ""
- if peek(0) =~ /[$@]/
- token.concat(c = getc)
- if c == "@" and peek(0) == "@"
- token.concat getc
- end
- end
- while (ch = getc) =~ IDENT_RE do
- print " :#{ch}: " if RDoc::RubyLex.debug?
- token.concat ch
- end
- ungetc
- if ((ch == "!" && peek(1) != "=") || ch == "?") && token[0,1] =~ /\w/
- token.concat getc
- end
- # almost fix token
- case token
- when /^\$/
- return Token(TkGVAR, token)
- when /^\@\@/
- @lex_state = :EXPR_END
- # p Token(TkCVAR, token)
- return Token(TkCVAR, token)
- when /^\@/
- @lex_state = :EXPR_END
- return Token(TkIVAR, token)
- end
- if @lex_state != :EXPR_DOT
- print token, "\n" if RDoc::RubyLex.debug?
- token_c, *trans = TkReading2Token[token]
- if token_c
- # reserved word?
- if (@lex_state != :EXPR_BEG &&
- @lex_state != :EXPR_FNAME &&
- trans[1])
- # modifiers
- token_c = TkSymbol2Token[trans[1]]
- @lex_state = trans[0]
- else
- if @lex_state != :EXPR_FNAME
- if ENINDENT_CLAUSE.include?(token)
- valid = peek(0) != ':'
- # check for ``class = val'' etc.
- case token
- when "class"
- valid = false unless peek_match?(/^\s*(<<|\w|::)/)
- when "def"
- valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
- when "do"
- valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/)
- valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/)
- else
- # no nothing
- end if valid
- if valid
- if token == "do"
- if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
- @indent += 1
- @indent_stack.push token_c
- end
- else
- @indent += 1
- @indent_stack.push token_c
- end
- else
- token_c = TkIDENTIFIER
- end
- elsif DEINDENT_CLAUSE.include?(token)
- @indent -= 1
- @indent_stack.pop
- end
- @lex_state = trans[0]
- else
- @lex_state = :EXPR_END
- end
- end
- if token_c.ancestors.include?(TkId) and peek(0) == ':' and !peek_match?(/^::/)
- token.concat getc
- token_c = TkSYMBOL
- end
- return Token(token_c, token)
- end
- end
- if @lex_state == :EXPR_FNAME
- @lex_state = :EXPR_END
- if peek(0) == '=' and peek(1) != '>'
- token.concat getc
- end
- elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT ||
- @lex_state == :EXPR_ARG || @lex_state == :EXPR_MID
- @lex_state = :EXPR_ARG
- else
- @lex_state = :EXPR_END
- end
- if token[0, 1] =~ /[A-Z]/
- if token[-1] =~ /[!?]/
- token_c = TkIDENTIFIER
- else
- token_c = TkCONSTANT
- end
- elsif token[token.size - 1, 1] =~ /[!?]/
- token_c = TkFID
- else
- token_c = TkIDENTIFIER
- end
- if peek(0) == ':' and !peek_match?(/^::/)
- token.concat getc
- return Token(TkSYMBOL, token)
- else
- return Token(token_c, token)
- end
- end
- def identify_here_document(op)
- ch = getc
- start_token = op
- # if lt = PERCENT_LTYPE[ch]
- if ch == "-" or ch == "~"
- start_token.concat ch
- ch = getc
- indent = true
- end
- if /['"`]/ =~ ch
- start_token.concat ch
- user_quote = lt = ch
- quoted = ""
- while (c = getc) && c != lt
- quoted.concat c
- end
- start_token.concat quoted
- start_token.concat lt
- else
- user_quote = nil
- lt = '"'
- quoted = ch.dup
- while (c = getc) && c =~ /\w/
- quoted.concat c
- end
- start_token.concat quoted
- ungetc
- end
- @heredoc_queue << {
- quoted: quoted,
- lt: lt,
- indent: indent,
- started: false
- }
- @lex_state = :EXPR_END
- Token(RDoc::RubyLex::TkHEREDOCBEG, start_token)
- end
- def identify_here_document_body(quoted, lt, indent)
- ltback, @ltype = @ltype, lt
- doc = ""
- heredoc_end = nil
- while l = gets
- l = l.sub(/(:?\r)?\n\z/, "\n")
- if (indent ? l.strip : l.chomp) == quoted
- heredoc_end = l
- break
- end
- doc << l
- end
- raise Error, "Missing terminating #{quoted} for string" unless heredoc_end
- @ltype = ltback
- @lex_state = :EXPR_BEG
- [Token(RDoc::RubyLex::TkHEREDOC, doc), heredoc_end]
- end
- def identify_quotation
- type = ch = getc
- if lt = PERCENT_LTYPE[type]
- ch = getc
- elsif type =~ /\W/
- type = nil
- lt = "\""
- else
- return Token(TkMOD, '%')
- end
- # if ch !~ /\W/
- # ungetc
- # next
- # end
- #@ltype = lt
- @quoted = ch unless @quoted = PERCENT_PAREN[ch]
- identify_string(lt, @quoted, type)
- end
- def identify_number(op = "")
- @lex_state = :EXPR_END
- num = op
- if peek(0) == "0" && peek(1) !~ /[.eEri]/
- num << getc
- case peek(0)
- when /[xX]/
- ch = getc
- match = /[0-9a-fA-F_]/
- when /[bB]/
- ch = getc
- match = /[01_]/
- when /[oO]/
- ch = getc
- match = /[0-7_]/
- when /[dD]/
- ch = getc
- match = /[0-9_]/
- when /[0-7]/
- match = /[0-7_]/
- when /[89]/
- raise Error, "Illegal octal digit"
- else
- return Token(TkINTEGER, num)
- end
- num << ch if ch
- len0 = true
- non_digit = false
- while ch = getc
- num << ch
- if match =~ ch
- if ch == "_"
- if non_digit
- raise Error, "trailing `#{ch}' in number"
- else
- non_digit = ch
- end
- else
- non_digit = false
- len0 = false
- end
- else
- ungetc
- num[-1, 1] = ''
- if len0
- raise Error, "numeric literal without digits"
- end
- if non_digit
- raise Error, "trailing `#{non_digit}' in number"
- end
- break
- end
- end
- return Token(TkINTEGER, num)
- end
- type = TkINTEGER
- allow_point = true
- allow_e = true
- allow_ri = true
- non_digit = false
- while ch = getc
- num << ch
- case ch
- when /[0-9]/
- non_digit = false
- when "_"
- non_digit = ch
- when allow_point && "."
- if non_digit
- raise Error, "trailing `#{non_digit}' in number"
- end
- type = TkFLOAT
- if peek(0) !~ /[0-9]/
- type = TkINTEGER
- ungetc
- num[-1, 1] = ''
- break
- end
- allow_point = false
- when allow_e && "e", allow_e && "E"
- if non_digit
- raise Error, "trailing `#{non_digit}' in number"
- end
- type = TkFLOAT
- if peek(0) =~ /[+-]/
- num << getc
- end
- allow_e = false
- allow_ri = false
- allow_point = false
- non_digit = ch
- when allow_ri && "r"
- if non_digit
- raise Error, "trailing `#{non_digit}' in number"
- end
- type = TkRATIONAL
- if peek(0) == 'i'
- type = TkIMAGINARY
- num << getc
- end
- break
- when allow_ri && "i"
- if non_digit && non_digit != "r"
- raise Error, "trailing `#{non_digit}' in number"
- end
- type = TkIMAGINARY
- break
- else
- if non_digit
- raise Error, "trailing `#{non_digit}' in number"
- end
- ungetc
- num[-1, 1] = ''
- break
- end
- end
- Token(type, num)
- end
- def identify_string(ltype, quoted = ltype, type = nil)
- close = PERCENT_PAREN.values.include?(quoted)
- @ltype = ltype
- @quoted = quoted
- str = if ltype == quoted and %w[" ' / `].include? ltype and type.nil? then
- ltype.dup
- else
- "%#{type}#{PERCENT_PAREN_REV[quoted]||quoted}"
- end
- subtype = nil
- begin
- nest = 0
- while ch = getc
- str << ch
- if @quoted == ch and nest <= 0
- break
- elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
- ch = getc
- if ch == "{" then
- subtype = true
- str << ch << skip_inner_expression
- next
- else
- ungetc
- end
- elsif ch == '\\'
- case @ltype
- when "'" then
- case ch = getc
- when "'", '\\' then
- str << ch
- else
- str << ch
- end
- else
- str << read_escape
- end
- end
- if close then
- if PERCENT_PAREN[ch] == @quoted
- nest += 1
- elsif ch == @quoted
- nest -= 1
- end
- end
- end
- if @ltype == "/"
- while peek(0) =~ /i|m|x|o|e|s|u|n/
- str << getc
- end
- end
- if peek(0) == ':' and !peek_match?(/^::/) and :EXPR_BEG == @lex_state and !@after_question
- str.concat getc
- return Token(TkSYMBOL, str)
- elsif subtype
- Token(DLtype2Token[ltype], str)
- else
- Token(Ltype2Token[ltype], str)
- end
- ensure
- @ltype = nil
- @quoted = nil
- @lex_state = :EXPR_END
- end
- end
- def skip_inner_expression
- res = ""
- nest = 0
- while ch = getc
- res << ch
- if ch == '}'
- break if
- nest -= 1
- elsif ch == '{'
- nest += 1
- end
- end
- res
- end
- def identify_comment
- @ltype = "#"
- comment = '#'
- while ch = getc
- # if ch == "\\" #"
- # read_escape
- # end
- if ch == "\n"
- @ltype = nil
- ungetc
- break
- end
- comment << ch
- end
- return Token(TkCOMMENT, comment)
- end
- def read_escape
- escape = ''
- ch = getc
- case ch
- when "\n", "\r", "\f"
- escape << ch
- when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
- escape << ch
- when /[0-7]/
- ungetc ch
- 3.times do
- ch = getc
- case ch
- when /[0-7]/
- escape << ch
- when nil
- break
- else
- ungetc
- break
- end
- end
- when "x"
- escape << ch
- 2.times do
- ch = getc
- case ch
- when /[0-9a-fA-F]/
- escape << ch
- when nil
- break
- else
- ungetc
- break
- end
- end
- when "M"
- escape << ch
- ch = getc
- if ch != '-'
- ungetc
- else
- escape << ch
- ch = getc
- if ch == "\\" #"
- ungetc
- escape << read_escape
- else
- escape << ch
- end
- end
- when "C", "c" #, "^"
- escape << ch
- if ch == "C"
- ch = getc
- if ch == "-"
- escape << ch
- ch = getc
- escape << ch
- escape << read_escape if ch == "\\"
- else
- ungetc
- end
- elsif (ch = getc) == "\\" #"
- escape << ch << read_escape
- end
- else
- escape << ch
- # other characters
- end
- escape
- end
- # :startdoc:
-#RDoc::RubyLex.debug_level = 1