summaryrefslogtreecommitdiff
path: root/lib/rdoc/parser/ripper_state_lex.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rdoc/parser/ripper_state_lex.rb')
-rw-r--r--lib/rdoc/parser/ripper_state_lex.rb587
1 files changed, 0 insertions, 587 deletions
diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb
deleted file mode 100644
index c9a0f5a21e..0000000000
--- a/lib/rdoc/parser/ripper_state_lex.rb
+++ /dev/null
@@ -1,587 +0,0 @@
-require 'ripper'
-
-class RDoc::RipperStateLex
- EXPR_NONE = 0
- EXPR_BEG = 1
- EXPR_END = 2
- EXPR_ENDARG = 4
- EXPR_ENDFN = 8
- EXPR_ARG = 16
- EXPR_CMDARG = 32
- EXPR_MID = 64
- EXPR_FNAME = 128
- EXPR_DOT = 256
- EXPR_CLASS = 512
- EXPR_LABEL = 1024
- EXPR_LABELED = 2048
- EXPR_FITEM = 4096
- EXPR_VALUE = EXPR_BEG
- EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS)
- EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG)
- EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN)
-
- class InnerStateLex < Ripper::Filter
- attr_accessor :lex_state
-
- def initialize(code)
- @lex_state = EXPR_BEG
- @in_fname = false
- @continue = false
- reset
- super(code)
- end
-
- def reset
- @command_start = false
- @cmd_state = @command_start
- end
-
- def on_nl(tok, data)
- case @lex_state
- when EXPR_FNAME, EXPR_DOT
- @continue = true
- else
- @continue = false
- @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
- end
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_ignored_nl(tok, data)
- case @lex_state
- when EXPR_FNAME, EXPR_DOT
- @continue = true
- else
- @continue = false
- @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
- end
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_op(tok, data)
- case tok
- when '&', '|', '!', '!=', '!~'
- case @lex_state
- when EXPR_FNAME, EXPR_DOT
- @lex_state = EXPR_ARG
- else
- @lex_state = EXPR_BEG
- end
- when '<<'
- # TODO next token?
- case @lex_state
- when EXPR_FNAME, EXPR_DOT
- @lex_state = EXPR_ARG
- else
- @lex_state = EXPR_BEG
- end
- when '?'
- @lex_state = EXPR_BEG
- when '&&', '||', '+=', '-=', '*=', '**=',
- '&=', '|=', '^=', '<<=', '>>=', '||=', '&&='
- @lex_state = EXPR_BEG
- else
- case @lex_state
- when EXPR_FNAME, EXPR_DOT
- @lex_state = EXPR_ARG
- else
- @lex_state = EXPR_BEG
- end
- end
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_kw(tok, data)
- case tok
- when 'class'
- @lex_state = EXPR_CLASS
- @in_fname = true
- when 'def'
- @lex_state = EXPR_FNAME
- @continue = true
- @in_fname = true
- when 'if', 'unless', 'while', 'until'
- if ((EXPR_END | EXPR_ENDARG | EXPR_ENDFN | EXPR_ARG | EXPR_CMDARG) & @lex_state) != 0 # postfix if
- @lex_state = EXPR_BEG | EXPR_LABEL
- else
- @lex_state = EXPR_BEG
- end
- when 'begin'
- @lex_state = EXPR_BEG
- else
- if @lex_state == EXPR_FNAME
- @lex_state = EXPR_END
- else
- @lex_state = EXPR_END
- end
- end
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_tstring_beg(tok, data)
- @lex_state = EXPR_BEG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_tstring_end(tok, data)
- @lex_state = EXPR_END | EXPR_ENDARG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_CHAR(tok, data)
- @lex_state = EXPR_END
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_period(tok, data)
- @lex_state = EXPR_DOT
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_int(tok, data)
- @lex_state = EXPR_END | EXPR_ENDARG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_float(tok, data)
- @lex_state = EXPR_END | EXPR_ENDARG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_rational(tok, data)
- @lex_state = EXPR_END | EXPR_ENDARG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_imaginary(tok, data)
- @lex_state = EXPR_END | EXPR_ENDARG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_symbeg(tok, data)
- @lex_state = EXPR_FNAME
- @continue = true
- @in_fname = true
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- private def on_variables(event, tok, data)
- if @in_fname
- @lex_state = EXPR_ENDFN
- @in_fname = false
- @continue = false
- elsif @continue
- case @lex_state
- when EXPR_DOT
- @lex_state = EXPR_ARG
- else
- @lex_state = EXPR_ENDFN
- @continue = false
- end
- else
- @lex_state = EXPR_CMDARG
- end
- @callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state})
- end
-
- def on_ident(tok, data)
- on_variables(__method__, tok, data)
- end
-
- def on_ivar(tok, data)
- @lex_state = EXPR_END
- on_variables(__method__, tok, data)
- end
-
- def on_cvar(tok, data)
- @lex_state = EXPR_END
- on_variables(__method__, tok, data)
- end
-
- def on_gvar(tok, data)
- @lex_state = EXPR_END
- on_variables(__method__, tok, data)
- end
-
- def on_backref(tok, data)
- @lex_state = EXPR_END
- on_variables(__method__, tok, data)
- end
-
- def on_lparen(tok, data)
- @lex_state = EXPR_LABEL | EXPR_BEG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_rparen(tok, data)
- @lex_state = EXPR_ENDFN
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_lbrace(tok, data)
- @lex_state = EXPR_LABEL | EXPR_BEG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_rbrace(tok, data)
- @lex_state = EXPR_ENDARG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_lbracket(tok, data)
- @lex_state = EXPR_LABEL | EXPR_BEG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_rbracket(tok, data)
- @lex_state = EXPR_ENDARG
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_const(tok, data)
- case @lex_state
- when EXPR_FNAME
- @lex_state = EXPR_ENDFN
- when EXPR_CLASS
- @lex_state = EXPR_ARG
- else
- @lex_state = EXPR_CMDARG
- end
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_sp(tok, data)
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_comma(tok, data)
- @lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_comment(tok, data)
- @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_ignored_sp(tok, data)
- @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- end
-
- def on_heredoc_end(tok, data)
- @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state})
- @lex_state = EXPR_BEG
- end
-
- def on_default(event, tok, data)
- reset
- @callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state})
- end
-
- def each(&block)
- @callback = block
- parse
- end
- end
-
- def get_squashed_tk
- if @buf.empty?
- tk = @inner_lex_enumerator.next
- else
- tk = @buf.shift
- end
- case tk[:kind]
- when :on_symbeg then
- tk = get_symbol_tk(tk)
- when :on_tstring_beg then
- tk = get_string_tk(tk)
- when :on_backtick then
- if (EXPR_FNAME & tk[:state]) != 0
- @inner_lex.lex_state = EXPR_ARG
- tk[:kind] = :on_ident
- tk[:state] = @inner_lex.lex_state
- else
- tk = get_string_tk(tk)
- end
- when :on_regexp_beg then
- tk = get_regexp_tk(tk)
- when :on_embdoc_beg then
- tk = get_embdoc_tk(tk)
- when :on_heredoc_beg then
- @heredoc_queue << retrieve_heredoc_info(tk)
- @inner_lex.lex_state = EXPR_END
- when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then
- unless @heredoc_queue.empty?
- get_heredoc_tk(*@heredoc_queue.shift)
- end
- when :on_words_beg then
- tk = get_words_tk(tk)
- when :on_qwords_beg then
- tk = get_words_tk(tk)
- when :on_symbols_beg then
- tk = get_words_tk(tk)
- when :on_qsymbols_beg then
- tk = get_words_tk(tk)
- when :on_op then
- if '&.' == tk[:text]
- tk[:kind] = :on_period
- else
- tk = get_op_tk(tk)
- end
- end
- tk
- end
-
- private def get_symbol_tk(tk)
- is_symbol = true
- symbol_tk = { :line_no => tk[:line_no], :char_no => tk[:char_no], :kind => :on_symbol }
- if ":'" == tk[:text] or ':"' == tk[:text]
- tk1 = get_string_tk(tk)
- symbol_tk[:text] = tk1[:text]
- symbol_tk[:state] = tk1[:state]
- else
- case (tk1 = get_squashed_tk)[:kind]
- when :on_ident
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_tstring_content
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end
- when :on_tstring_end
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_op
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_ivar
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_cvar
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_gvar
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_const
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- when :on_kw
- symbol_tk[:text] = ":#{tk1[:text]}"
- symbol_tk[:state] = tk1[:state]
- else
- is_symbol = false
- tk = tk1
- end
- end
- if is_symbol
- tk = symbol_tk
- end
- tk
- end
-
- private def get_string_tk(tk)
- string = tk[:text]
- state = nil
- kind = :on_tstring
- loop do
- inner_str_tk = get_squashed_tk
- if inner_str_tk.nil?
- break
- elsif :on_tstring_end == inner_str_tk[:kind]
- string = string + inner_str_tk[:text]
- state = inner_str_tk[:state]
- break
- elsif :on_label_end == inner_str_tk[:kind]
- string = string + inner_str_tk[:text]
- state = inner_str_tk[:state]
- kind = :on_symbol
- break
- else
- string = string + inner_str_tk[:text]
- if :on_embexpr_beg == inner_str_tk[:kind] then
- kind = :on_dstring if :on_tstring == kind
- end
- end
- end
- {
- :line_no => tk[:line_no],
- :char_no => tk[:char_no],
- :kind => kind,
- :text => string,
- :state => state
- }
- end
-
- private def get_regexp_tk(tk)
- string = tk[:text]
- state = nil
- loop do
- inner_str_tk = get_squashed_tk
- if inner_str_tk.nil?
- break
- elsif :on_regexp_end == inner_str_tk[:kind]
- string = string + inner_str_tk[:text]
- state = inner_str_tk[:state]
- break
- else
- string = string + inner_str_tk[:text]
- end
- end
- {
- :line_no => tk[:line_no],
- :char_no => tk[:char_no],
- :kind => :on_regexp,
- :text => string,
- :state => state
- }
- end
-
- private def get_embdoc_tk(tk)
- string = tk[:text]
- until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do
- string = string + embdoc_tk[:text]
- end
- string = string + embdoc_tk[:text]
- {
- :line_no => tk[:line_no],
- :char_no => tk[:char_no],
- :kind => :on_embdoc,
- :text => string,
- :state => embdoc_tk[:state]
- }
- end
-
- private def get_heredoc_tk(heredoc_name, indent)
- string = ''
- start_tk = nil
- prev_tk = nil
- until heredoc_end?(heredoc_name, indent, tk = @inner_lex_enumerator.next) do
- start_tk = tk unless start_tk
- if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no]
- string = string + (' ' * tk[:char_no])
- end
- string = string + tk[:text]
- prev_tk = tk
- end
- start_tk = tk unless start_tk
- prev_tk = tk unless prev_tk
- @buf.unshift tk # closing heredoc
- heredoc_tk = {
- :line_no => start_tk[:line_no],
- :char_no => start_tk[:char_no],
- :kind => :on_heredoc,
- :text => string,
- :state => prev_tk[:state]
- }
- @buf.unshift heredoc_tk
- end
-
- private def retrieve_heredoc_info(tk)
- name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2')
- indent = tk[:text] =~ /\A<<[-~]/
- [name, indent]
- end
-
- private def heredoc_end?(name, indent, tk)
- result = false
- if :on_heredoc_end == tk[:kind] then
- tk_name = (indent ? tk[:text].gsub(/^ *(.+)\n?$/, '\1') : tk[:text].gsub(/\n\z/, ''))
- if name == tk_name
- result = true
- end
- end
- result
- end
-
- private def get_words_tk(tk)
- string = ''
- start_token = tk[:text]
- start_quote = tk[:text].rstrip[-1]
- line_no = tk[:line_no]
- char_no = tk[:char_no]
- state = tk[:state]
- end_quote =
- case start_quote
- when ?( then ?)
- when ?[ then ?]
- when ?{ then ?}
- when ?< then ?>
- else start_quote
- end
- end_token = nil
- loop do
- tk = get_squashed_tk
- if tk.nil?
- end_token = end_quote
- break
- elsif :on_tstring_content == tk[:kind] then
- string += tk[:text]
- elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then
- if end_quote == tk[:text].strip then
- end_token = tk[:text]
- break
- else
- string += tk[:text]
- end
- else
- string += tk[:text]
- end
- end
- text = "#{start_token}#{string}#{end_token}"
- {
- :line_no => line_no,
- :char_no => char_no,
- :kind => :on_dstring,
- :text => text,
- :state => state
- }
- end
-
- private def get_op_tk(tk)
- redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~]
- if redefinable_operators.include?(tk[:text]) and EXPR_ARG == tk[:state] then
- @inner_lex.lex_state = EXPR_ARG
- tk[:kind] = :on_ident
- tk[:state] = @inner_lex.lex_state
- elsif tk[:text] =~ /^[-+]$/ then
- tk_ahead = get_squashed_tk
- case tk_ahead[:kind]
- when :on_int, :on_float, :on_rational, :on_imaginary then
- tk[:text] += tk_ahead[:text]
- tk[:kind] = tk_ahead[:kind]
- tk[:state] = tk_ahead[:state]
- else
- @buf.unshift tk_ahead
- end
- end
- tk
- end
-
- def initialize(code)
- @buf = []
- @heredoc_queue = []
- @inner_lex = InnerStateLex.new(code)
- @inner_lex_enumerator = Enumerator.new do |y|
- @inner_lex.each do |tk|
- y << tk
- end
- end
- end
-
- def self.parse(code)
- lex = self.new(code)
- tokens = []
- begin
- while tk = lex.get_squashed_tk
- tokens.push tk
- end
- rescue StopIteration
- end
- tokens
- end
-
- def self.end?(token)
- (token[:state] & EXPR_END)
- end
-end