diff options
Diffstat (limited to 'lib/rdoc/parser/ripper_state_lex.rb')
-rw-r--r-- | lib/rdoc/parser/ripper_state_lex.rb | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb new file mode 100644 index 0000000000..c9a0f5a21e --- /dev/null +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -0,0 +1,587 @@ +require 'ripper' + +class RDoc::RipperStateLex + EXPR_NONE = 0 + EXPR_BEG = 1 + EXPR_END = 2 + EXPR_ENDARG = 4 + EXPR_ENDFN = 8 + EXPR_ARG = 16 + EXPR_CMDARG = 32 + EXPR_MID = 64 + EXPR_FNAME = 128 + EXPR_DOT = 256 + EXPR_CLASS = 512 + EXPR_LABEL = 1024 + EXPR_LABELED = 2048 + EXPR_FITEM = 4096 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS) + EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG) + EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) + + class InnerStateLex < Ripper::Filter + attr_accessor :lex_state + + def initialize(code) + @lex_state = EXPR_BEG + @in_fname = false + @continue = false + reset + super(code) + end + + def reset + @command_start = false + @cmd_state = @command_start + end + + def on_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_ignored_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_op(tok, data) + case tok + when '&', '|', '!', '!=', '!~' + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '<<' + # TODO next token? + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '?' + @lex_state = EXPR_BEG + when '&&', '||', '+=', '-=', '*=', '**=', + '&=', '|=', '^=', '<<=', '>>=', '||=', '&&=' + @lex_state = EXPR_BEG + else + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_kw(tok, data) + case tok + when 'class' + @lex_state = EXPR_CLASS + @in_fname = true + when 'def' + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + when 'if', 'unless', 'while', 'until' + if ((EXPR_END | EXPR_ENDARG | EXPR_ENDFN | EXPR_ARG | EXPR_CMDARG) & @lex_state) != 0 # postfix if + @lex_state = EXPR_BEG | EXPR_LABEL + else + @lex_state = EXPR_BEG + end + when 'begin' + @lex_state = EXPR_BEG + else + if @lex_state == EXPR_FNAME + @lex_state = EXPR_END + else + @lex_state = EXPR_END + end + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_tstring_beg(tok, data) + @lex_state = EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_tstring_end(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_CHAR(tok, data) + @lex_state = EXPR_END + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_period(tok, data) + @lex_state = EXPR_DOT + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_int(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_float(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rational(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_imaginary(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_symbeg(tok, data) + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + private def on_variables(event, tok, data) + if @in_fname + @lex_state = EXPR_ENDFN + @in_fname = false + @continue = false + elsif @continue + case @lex_state + when EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_ENDFN + @continue = false + end + else + @lex_state = EXPR_CMDARG + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state}) + end + + def on_ident(tok, data) + on_variables(__method__, tok, data) + end + + def on_ivar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_cvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_gvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_backref(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_lparen(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rparen(tok, data) + @lex_state = EXPR_ENDFN + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_lbrace(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rbrace(tok, data) + @lex_state = EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_lbracket(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rbracket(tok, data) + @lex_state = EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_const(tok, data) + case @lex_state + when EXPR_FNAME + @lex_state = EXPR_ENDFN + when EXPR_CLASS + @lex_state = EXPR_ARG + else + @lex_state = EXPR_CMDARG + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_sp(tok, data) + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_comma(tok, data) + @lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0 + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_comment(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_ignored_sp(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_heredoc_end(tok, data) + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + @lex_state = EXPR_BEG + end + + def on_default(event, tok, data) + reset + @callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state}) + end + + def each(&block) + @callback = block + parse + end + end + + def get_squashed_tk + if @buf.empty? + tk = @inner_lex_enumerator.next + else + tk = @buf.shift + end + case tk[:kind] + when :on_symbeg then + tk = get_symbol_tk(tk) + when :on_tstring_beg then + tk = get_string_tk(tk) + when :on_backtick then + if (EXPR_FNAME & tk[:state]) != 0 + @inner_lex.lex_state = EXPR_ARG + tk[:kind] = :on_ident + tk[:state] = @inner_lex.lex_state + else + tk = get_string_tk(tk) + end + when :on_regexp_beg then + tk = get_regexp_tk(tk) + when :on_embdoc_beg then + tk = get_embdoc_tk(tk) + when :on_heredoc_beg then + @heredoc_queue << retrieve_heredoc_info(tk) + @inner_lex.lex_state = EXPR_END + when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then + unless @heredoc_queue.empty? + get_heredoc_tk(*@heredoc_queue.shift) + end + when :on_words_beg then + tk = get_words_tk(tk) + when :on_qwords_beg then + tk = get_words_tk(tk) + when :on_symbols_beg then + tk = get_words_tk(tk) + when :on_qsymbols_beg then + tk = get_words_tk(tk) + when :on_op then + if '&.' == tk[:text] + tk[:kind] = :on_period + else + tk = get_op_tk(tk) + end + end + tk + end + + private def get_symbol_tk(tk) + is_symbol = true + symbol_tk = { :line_no => tk[:line_no], :char_no => tk[:char_no], :kind => :on_symbol } + if ":'" == tk[:text] or ':"' == tk[:text] + tk1 = get_string_tk(tk) + symbol_tk[:text] = tk1[:text] + symbol_tk[:state] = tk1[:state] + else + case (tk1 = get_squashed_tk)[:kind] + when :on_ident + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_tstring_content + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end + when :on_tstring_end + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_op + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_ivar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_cvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_gvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_const + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_kw + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + else + is_symbol = false + tk = tk1 + end + end + if is_symbol + tk = symbol_tk + end + tk + end + + private def get_string_tk(tk) + string = tk[:text] + state = nil + kind = :on_tstring + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_tstring_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + elsif :on_label_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + kind = :on_symbol + break + else + string = string + inner_str_tk[:text] + if :on_embexpr_beg == inner_str_tk[:kind] then + kind = :on_dstring if :on_tstring == kind + end + end + end + { + :line_no => tk[:line_no], + :char_no => tk[:char_no], + :kind => kind, + :text => string, + :state => state + } + end + + private def get_regexp_tk(tk) + string = tk[:text] + state = nil + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_regexp_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + else + string = string + inner_str_tk[:text] + end + end + { + :line_no => tk[:line_no], + :char_no => tk[:char_no], + :kind => :on_regexp, + :text => string, + :state => state + } + end + + private def get_embdoc_tk(tk) + string = tk[:text] + until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do + string = string + embdoc_tk[:text] + end + string = string + embdoc_tk[:text] + { + :line_no => tk[:line_no], + :char_no => tk[:char_no], + :kind => :on_embdoc, + :text => string, + :state => embdoc_tk[:state] + } + end + + private def get_heredoc_tk(heredoc_name, indent) + string = '' + start_tk = nil + prev_tk = nil + until heredoc_end?(heredoc_name, indent, tk = @inner_lex_enumerator.next) do + start_tk = tk unless start_tk + if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no] + string = string + (' ' * tk[:char_no]) + end + string = string + tk[:text] + prev_tk = tk + end + start_tk = tk unless start_tk + prev_tk = tk unless prev_tk + @buf.unshift tk # closing heredoc + heredoc_tk = { + :line_no => start_tk[:line_no], + :char_no => start_tk[:char_no], + :kind => :on_heredoc, + :text => string, + :state => prev_tk[:state] + } + @buf.unshift heredoc_tk + end + + private def retrieve_heredoc_info(tk) + name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2') + indent = tk[:text] =~ /\A<<[-~]/ + [name, indent] + end + + private def heredoc_end?(name, indent, tk) + result = false + if :on_heredoc_end == tk[:kind] then + tk_name = (indent ? tk[:text].gsub(/^ *(.+)\n?$/, '\1') : tk[:text].gsub(/\n\z/, '')) + if name == tk_name + result = true + end + end + result + end + + private def get_words_tk(tk) + string = '' + start_token = tk[:text] + start_quote = tk[:text].rstrip[-1] + line_no = tk[:line_no] + char_no = tk[:char_no] + state = tk[:state] + end_quote = + case start_quote + when ?( then ?) + when ?[ then ?] + when ?{ then ?} + when ?< then ?> + else start_quote + end + end_token = nil + loop do + tk = get_squashed_tk + if tk.nil? + end_token = end_quote + break + elsif :on_tstring_content == tk[:kind] then + string += tk[:text] + elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then + if end_quote == tk[:text].strip then + end_token = tk[:text] + break + else + string += tk[:text] + end + else + string += tk[:text] + end + end + text = "#{start_token}#{string}#{end_token}" + { + :line_no => line_no, + :char_no => char_no, + :kind => :on_dstring, + :text => text, + :state => state + } + end + + private def get_op_tk(tk) + redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~] + if redefinable_operators.include?(tk[:text]) and EXPR_ARG == tk[:state] then + @inner_lex.lex_state = EXPR_ARG + tk[:kind] = :on_ident + tk[:state] = @inner_lex.lex_state + elsif tk[:text] =~ /^[-+]$/ then + tk_ahead = get_squashed_tk + case tk_ahead[:kind] + when :on_int, :on_float, :on_rational, :on_imaginary then + tk[:text] += tk_ahead[:text] + tk[:kind] = tk_ahead[:kind] + tk[:state] = tk_ahead[:state] + else + @buf.unshift tk_ahead + end + end + tk + end + + def initialize(code) + @buf = [] + @heredoc_queue = [] + @inner_lex = InnerStateLex.new(code) + @inner_lex_enumerator = Enumerator.new do |y| + @inner_lex.each do |tk| + y << tk + end + end + end + + def self.parse(code) + lex = self.new(code) + tokens = [] + begin + while tk = lex.get_squashed_tk + tokens.push tk + end + rescue StopIteration + end + tokens + end + + def self.end?(token) + (token[:state] & EXPR_END) + end +end |