summaryrefslogtreecommitdiff
path: root/lib/irb
diff options
context:
space:
mode:
authoraycabta <aycabta@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-21 09:13:49 +0000
committeraycabta <aycabta@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-21 09:13:49 +0000
commit51cec00953ff8d7baa483d3846aa1dbdb89101aa (patch)
tree2900ea0ba7c09379990e9da2edda5d6ef8fa075c /lib/irb
parent683834eb72cfa77f4eac1c705327b522302b1721 (diff)
Revert "IRB is improved with Reline and RDoc"
This reverts commit 7f273ac6d0f05208b5b228da95205e20c0e8286c. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67691 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/irb')
-rw-r--r--lib/irb/completion.rb12
-rw-r--r--lib/irb/context.rb20
-rw-r--r--lib/irb/init.rb4
-rw-r--r--lib/irb/input-method.rb6
-rw-r--r--lib/irb/lc/help-message1
-rw-r--r--lib/irb/lc/ja/help-message2
-rw-r--r--lib/irb/ruby-lex.rb1259
7 files changed, 1097 insertions, 207 deletions
diff --git a/lib/irb/completion.rb b/lib/irb/completion.rb
index a8f462f2fd..390e7254dd 100644
--- a/lib/irb/completion.rb
+++ b/lib/irb/completion.rb
@@ -8,10 +8,11 @@
#
require "readline"
-require "rdoc"
module IRB
module InputCompletor # :nodoc:
+
+
# Set of reserved words used by Ruby, you should not use these for
# constants or variables
ReservedWords = %w[
@@ -194,14 +195,6 @@ module IRB
end
}
- RDocRIDriver = RDoc::RI::Driver.new
- PerfectMatchedProc = proc { |matched|
- begin
- RDocRIDriver.display_name(matched)
- rescue RDoc::RI::Driver::NotFoundError
- end
- }
-
# Set of available operators in Ruby
Operators = %w[% & * ** + - / < << <= <=> == === =~ > >= >> [] []= ^ ! != !~]
@@ -249,4 +242,3 @@ if Readline.respond_to?("basic_word_break_characters=")
end
Readline.completion_append_character = nil
Readline.completion_proc = IRB::InputCompletor::CompletionProc
-Readline.dig_perfect_match_proc = IRB::InputCompletor::PerfectMatchedProc
diff --git a/lib/irb/context.rb b/lib/irb/context.rb
index f8a6009d17..e8e6a118e6 100644
--- a/lib/irb/context.rb
+++ b/lib/irb/context.rb
@@ -101,6 +101,7 @@ module IRB
if @echo.nil?
@echo = true
end
+ self.debug_level = IRB.conf[:DEBUG_LEVEL]
end
# The top-level workspace, see WorkSpace#main
@@ -210,6 +211,10 @@ module IRB
#
# A copy of the default <code>IRB.conf[:VERBOSE]</code>
attr_accessor :verbose
+ # The debug level of irb
+ #
+ # See #debug_level= for more information.
+ attr_reader :debug_level
# The limit of backtrace lines displayed as top +n+ and tail +n+.
#
@@ -356,6 +361,21 @@ module IRB
print "Do nothing."
end
+ # Sets the debug level of irb
+ #
+ # Can also be set using the +--irb_debug+ command line option.
+ #
+ # See IRB@Command+line+options for more command line options.
+ def debug_level=(value)
+ @debug_level = value
+ RubyLex.debug_level = value
+ end
+
+ # Whether or not debug mode is enabled, see #debug_level=.
+ def debug?
+ @debug_level > 0
+ end
+
def evaluate(line, line_no, exception: nil) # :nodoc:
@line_no = line_no
if exception
diff --git a/lib/irb/init.rb b/lib/irb/init.rb
index 344b243f12..2066d8cb64 100644
--- a/lib/irb/init.rb
+++ b/lib/irb/init.rb
@@ -112,6 +112,8 @@ module IRB # :nodoc:
@CONF[:LC_MESSAGES] = Locale.new
@CONF[:AT_EXIT] = []
+
+ @CONF[:DEBUG_LEVEL] = 0
end
def IRB.init_error
@@ -189,6 +191,8 @@ module IRB # :nodoc:
@CONF[:CONTEXT_MODE] = ($1 || argv.shift).to_i
when "--single-irb"
@CONF[:SINGLE_IRB] = true
+ when /^--irb_debug(?:=(.+))?/
+ @CONF[:DEBUG_LEVEL] = ($1 || argv.shift).to_i
when "-v", "--version"
print IRB.version, "\n"
exit 0
diff --git a/lib/irb/input-method.rb b/lib/irb/input-method.rb
index 1f4f1d4746..f491d5a760 100644
--- a/lib/irb/input-method.rb
+++ b/lib/irb/input-method.rb
@@ -142,17 +142,13 @@ module IRB
@stdout = IO.open(STDOUT.to_i, 'w', :external_encoding => IRB.conf[:LC_MESSAGES].encoding, :internal_encoding => "-")
end
- def check_termination(&block)
- @check_termination_proc = block
- end
-
# Reads the next line from this input method.
#
# See IO#gets for more information.
def gets
Readline.input = @stdin
Readline.output = @stdout
- if l = readmultiline(@prompt, false, &@check_termination_proc)
+ if l = readline(@prompt, false)
HISTORY.push(l) if !l.empty?
@line[@line_no += 1] = l + "\n"
else
diff --git a/lib/irb/lc/help-message b/lib/irb/lc/help-message
index d1a66dddda..d43c6a1695 100644
--- a/lib/irb/lc/help-message
+++ b/lib/irb/lc/help-message
@@ -39,6 +39,7 @@ Usage: irb.rb [options] [programfile] [arguments]
--back-trace-limit n
Display backtrace top n and tail n. The default
value is 16.
+ --irb_debug n Set internal debug level to n (not for popular use)
--verbose Show details
--noverbose Don't show details
-v, --version Print the version of irb
diff --git a/lib/irb/lc/ja/help-message b/lib/irb/lc/ja/help-message
index 7a15f973c6..1b24d14d28 100644
--- a/lib/irb/lc/ja/help-message
+++ b/lib/irb/lc/ja/help-message
@@ -41,6 +41,8 @@ Usage: irb.rb [options] [programfile] [arguments]
バックトレース表示をバックトレースの頭から n, 後ろ
からnだけ行なう. デフォルトは16
+ --irb_debug n irbのデバッグレベルをnに設定する(非推奨).
+
--verbose 詳細なメッセージを出力する.
--noverbose 詳細なメッセージを出力しない(デフォルト).
-v, --version irbのバージョンを表示する.
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
index c4bec4a854..555d1f024f 100644
--- a/lib/irb/ruby-lex.rb
+++ b/lib/irb/ruby-lex.rb
@@ -11,39 +11,73 @@
#
require "e2mmap"
-require "ripper"
+require_relative "slex"
+require_relative "ruby-token"
# :stopdoc:
class RubyLex
extend Exception2MessageMapper
+ def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
+ def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
+ def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
+ def_exception(:TkReading2TokenDuplicateError,
+ "key duplicate(token_n='%s', key='%s')")
+ def_exception(:SyntaxError, "%s")
+
def_exception(:TerminateLineInput, "Terminate Line Input")
+ include RubyToken
+
+ class << self
+ attr_accessor :debug_level
+ def debug?
+ @debug_level > 0
+ end
+ end
+ @debug_level = 0
+
def initialize
+ lex_init
+ set_input(STDIN)
+
+ @seek = 0
@exp_line_no = @line_no = 1
+ @base_char_no = 0
+ @char_no = 0
+ @rests = []
+ @readed = []
+ @here_readed = []
+
@indent = 0
+ @indent_stack = []
+ @lex_state = EXPR_BEG
+ @space_seen = false
+ @here_header = false
+ @post_symbeg = false
+
@continue = false
@line = ""
+
+ @skip_space = false
+ @readed_auto_clean_up = false
+ @exception_on_syntax_error = true
+
@prompt = nil
end
+ attr_accessor :skip_space
+ attr_accessor :readed_auto_clean_up
+ attr_accessor :exception_on_syntax_error
+
+ attr_reader :seek
+ attr_reader :char_no
+ attr_reader :line_no
+ attr_reader :indent
+
# io functions
def set_input(io, p = nil, &block)
@io = io
- if @io.respond_to?(:check_termination)
- @io.check_termination do |code|
- @tokens = Ripper.lex(code)
- continue = process_continue
- code_block_open = check_code_block(code)
- indent = process_nesting_level
- ltype = process_literal_type
- if code_block_open or ltype or continue or indent > 0
- false
- else
- true
- end
- end
- end
if p.respond_to?(:call)
@input = p
elsif block_given?
@@ -53,6 +87,112 @@ class RubyLex
end
end
+ def get_readed
+ if idx = @readed.rindex("\n")
+ @base_char_no = @readed.size - (idx + 1)
+ else
+ @base_char_no += @readed.size
+ end
+
+ readed = @readed.join("")
+ @readed = []
+ readed
+ end
+
+ def getc
+ while @rests.empty?
+ @rests.push nil unless buf_input
+ end
+ c = @rests.shift
+ if @here_header
+ @here_readed.push c
+ else
+ @readed.push c
+ end
+ @seek += 1
+ if c == "\n"
+ @line_no += 1
+ @char_no = 0
+ else
+ @char_no += 1
+ end
+ c
+ end
+
+ def gets
+ l = ""
+ while c = getc
+ l.concat(c)
+ break if c == "\n"
+ end
+ return nil if l == "" and c.nil?
+ l
+ end
+
+ def eof?
+ @io.eof?
+ end
+
+ def getc_of_rests
+ if @rests.empty?
+ nil
+ else
+ getc
+ end
+ end
+
+ def ungetc(c = nil)
+ if @here_readed.empty?
+ c2 = @readed.pop
+ else
+ c2 = @here_readed.pop
+ end
+ c = c2 unless c
+ @rests.unshift c #c =
+ @seek -= 1
+ if c == "\n"
+ @line_no -= 1
+ if idx = @readed.rindex("\n")
+ @char_no = idx + 1
+ else
+ @char_no = @base_char_no + @readed.size
+ end
+ else
+ @char_no -= 1
+ end
+ end
+
+ def peek_equal?(str)
+ chrs = str.split(//)
+ until @rests.size >= chrs.size
+ return false unless buf_input
+ end
+ @rests[0, chrs.size] == chrs
+ end
+
+ def peek_match?(regexp)
+ while @rests.empty?
+ return false unless buf_input
+ end
+ regexp =~ @rests.join("")
+ end
+
+ def peek(i = 0)
+ while @rests.size <= i
+ return nil unless buf_input
+ end
+ @rests[i]
+ end
+
+ def buf_input
+ prompt
+ line = @input.call
+ return nil unless line
+ @rests.concat line.chars.to_a
+ true
+ end
+ private :buf_input
+
def set_prompt(p = nil, &block)
p = block if block_given?
if p.respond_to?(:call)
@@ -70,11 +210,20 @@ class RubyLex
def initialize_input
@ltype = nil
+ @quoted = nil
@indent = 0
+ @indent_stack = []
+ @lex_state = EXPR_BEG
+ @space_seen = false
+ @here_header = false
+
@continue = false
+ @post_symbeg = false
+
+ prompt
+
@line = ""
@exp_line_no = @line_no
- @code_block_open = false
end
def each_top_level_statement
@@ -82,14 +231,13 @@ class RubyLex
catch(:TERM_INPUT) do
loop do
begin
+ @continue = false
prompt
unless l = lex
throw :TERM_INPUT if @line == ''
else
- @line_no += 1
- next if l == "\n"
@line.concat l
- if @code_block_open or @ltype or @continue or @indent > 0
+ if @ltype or @continue or @indent > 0
next
end
end
@@ -102,203 +250,930 @@ class RubyLex
@exp_line_no = @line_no
@indent = 0
+ @indent_stack = []
+ prompt
rescue TerminateLineInput
initialize_input
prompt
+ get_readed
end
end
end
end
def lex
- line = @input.call
- if @io.respond_to?(:check_termination)
- return line # multiline
- end
- code = @line + (line.nil? ? '' : line)
- code.gsub!(/\n*$/, '').concat("\n")
- @tokens = Ripper.lex(code)
- @continue = process_continue
- @code_block_open = check_code_block(code)
- @indent = process_nesting_level
- @ltype = process_literal_type
- line
+ continue = @continue
+ while tk = token
+ case tk
+ when TkNL, TkEND_OF_SCRIPT
+ @continue = continue unless continue.nil?
+ break unless @continue
+ when TkSPACE, TkCOMMENT
+ when TkSEMICOLON, TkBEGIN, TkELSE
+ @continue = continue = false
+ else
+ continue = nil
+ end
+ end
+ line = get_readed
+ if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
+ nil
+ else
+ line
+ end
+ end
+
+ def token
+ @prev_seek = @seek
+ @prev_line_no = @line_no
+ @prev_char_no = @char_no
+ begin
+ begin
+ tk = @OP.match(self)
+ @space_seen = tk.kind_of?(TkSPACE)
+ @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
+ @post_symbeg = tk.kind_of?(TkSYMBEG)
+ rescue SyntaxError
+ raise if @exception_on_syntax_error
+ tk = TkError.new(@seek, @line_no, @char_no)
+ end
+ end while @skip_space and tk.kind_of?(TkSPACE)
+ if @readed_auto_clean_up
+ get_readed
+ end
+ tk
+ end
+
+ ENINDENT_CLAUSE = [
+ "case", "class", "def", "do", "for", "if",
+ "module", "unless", "until", "while", "begin"
+ ]
+ DEINDENT_CLAUSE = ["end"
+ ]
+
+ PERCENT_LTYPE = {
+ "q" => "\'",
+ "Q" => "\"",
+ "x" => "\`",
+ "r" => "/",
+ "w" => "]",
+ "W" => "]",
+ "i" => "]",
+ "I" => "]",
+ "s" => ":"
+ }
+
+ PERCENT_PAREN = {
+ "{" => "}",
+ "[" => "]",
+ "<" => ">",
+ "(" => ")"
+ }
+
+ Ltype2Token = {
+ "\'" => TkSTRING,
+ "\"" => TkSTRING,
+ "\`" => TkXSTRING,
+ "/" => TkREGEXP,
+ "]" => TkDSTRING,
+ ":" => TkSYMBOL
+ }
+ DLtype2Token = {
+ "\"" => TkDSTRING,
+ "\`" => TkDXSTRING,
+ "/" => TkDREGEXP,
+ }
+
+ def lex_init()
+ @OP = IRB::SLex.new
+ @OP.def_rules("\0", "\004", "\032") do |op, io|
+ Token(TkEND_OF_SCRIPT)
+ end
+
+ @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
+ @space_seen = true
+ while getc =~ /[ \t\f\r\13]/; end
+ ungetc
+ Token(TkSPACE)
+ end
+
+ @OP.def_rule("#") do |op, io|
+ identify_comment
+ end
+
+ @OP.def_rule("=begin",
+ proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
+ |op, io|
+ @ltype = "="
+ until getc == "\n"; end
+ until peek_equal?("=end") && peek(4) =~ /\s/
+ until getc == "\n"; end
+ end
+ gets
+ @ltype = nil
+ Token(TkRD_COMMENT)
+ end
+
+ @OP.def_rule("\n") do |op, io|
+ print "\\n\n" if RubyLex.debug?
+ case @lex_state
+ when EXPR_BEG, EXPR_FNAME, EXPR_DOT
+ @continue = true
+ else
+ @continue = false
+ @lex_state = EXPR_BEG
+ until (@indent_stack.empty? ||
+ [TkLPAREN, TkLBRACK, TkLBRACE,
+ TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
+ @indent_stack.pop
+ end
+ end
+ @here_header = false
+ @here_readed = []
+ Token(TkNL)
+ end
+
+ @OP.def_rules("*", "**",
+ "=", "==", "===",
+ "=~", "<=>",
+ "<", "<=",
+ ">", ">=", ">>",
+ "!", "!=", "!~") do
+ |op, io|
+ case @lex_state
+ when EXPR_FNAME, EXPR_DOT
+ @lex_state = EXPR_ARG
+ else
+ @lex_state = EXPR_BEG
+ end
+ Token(op)
+ end
+
+ @OP.def_rules("<<") do
+ |op, io|
+ tk = nil
+ if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
+ (@lex_state != EXPR_ARG || @space_seen)
+ c = peek(0)
+ if /[-~"'`\w]/ =~ c
+ tk = identify_here_document
+ end
+ end
+ unless tk
+ tk = Token(op)
+ case @lex_state
+ when EXPR_FNAME, EXPR_DOT
+ @lex_state = EXPR_ARG
+ else
+ @lex_state = EXPR_BEG
+ end
+ end
+ tk
+ end
+
+ @OP.def_rules("'", '"') do
+ |op, io|
+ identify_string(op)
+ end
+
+ @OP.def_rules("`") do
+ |op, io|
+ if @lex_state == EXPR_FNAME
+ @lex_state = EXPR_END
+ Token(op)
+ else
+ identify_string(op)
+ end
+ end
+
+ @OP.def_rules('?') do
+ |op, io|
+ if @lex_state == EXPR_END
+ @lex_state = EXPR_BEG
+ Token(TkQUESTION)
+ else
+ ch = getc
+ if @lex_state == EXPR_ARG && ch =~ /\s/
+ ungetc
+ @lex_state = EXPR_BEG;
+ Token(TkQUESTION)
+ else
+ if (ch == '\\')
+ read_escape
+ end
+ @lex_state = EXPR_END
+ Token(TkINTEGER)
+ end
+ end
+ end
+
+ @OP.def_rules("&", "&&", "|", "||") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token(op)
+ end
+
+ @OP.def_rules("+=", "-=", "*=", "**=",
+ "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ op =~ /^(.*)=$/
+ Token(TkOPASGN, $1)
+ end
+
+ @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
+ |op, io|
+ @lex_state = EXPR_ARG
+ Token(op)
+ end
+
+ @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
+ |op, io|
+ @lex_state = EXPR_ARG
+ Token(op)
+ end
+
+ @OP.def_rules("+", "-") do
+ |op, io|
+ catch(:RET) do
+ if @lex_state == EXPR_ARG
+ if @space_seen and peek(0) =~ /[0-9]/
+ throw :RET, identify_number
+ else
+ @lex_state = EXPR_BEG
+ end
+ elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
+ throw :RET, identify_number
+ else
+ @lex_state = EXPR_BEG
+ end
+ Token(op)
+ end
+ end
+
+ @OP.def_rule(".") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ if peek(0) =~ /[0-9]/
+ ungetc
+ identify_number
+ else
+ # for "obj.if" etc.
+ @lex_state = EXPR_DOT
+ Token(TkDOT)
+ end
+ end
+
+ @OP.def_rules("..", "...") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token(op)
+ end
+
+ lex_int2
+ end
+
+ def lex_int2
+ @OP.def_rules("]", "}", ")") do
+ |op, io|
+ @lex_state = EXPR_END
+ @indent -= 1
+ @indent_stack.pop
+ Token(op)
+ end
+
+ @OP.def_rule(":") do
+ |op, io|
+ if @lex_state == EXPR_END || peek(0) =~ /\s/
+ @lex_state = EXPR_BEG
+ Token(TkCOLON)
+ else
+ @lex_state = EXPR_FNAME
+ Token(TkSYMBEG)
+ end
+ end
+
+ @OP.def_rule("::") do
+ |op, io|
+ if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
+ @lex_state = EXPR_BEG
+ Token(TkCOLON3)
+ else
+ @lex_state = EXPR_DOT
+ Token(TkCOLON2)
+ end
+ end
+
+ @OP.def_rule("/") do
+ |op, io|
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ identify_string(op)
+ elsif peek(0) == '='
+ getc
+ @lex_state = EXPR_BEG
+ Token(TkOPASGN, "/") #/)
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
+ identify_string(op)
+ else
+ @lex_state = EXPR_BEG
+ Token("/") #/)
+ end
+ end
+
+ @OP.def_rules("^") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token("^")
+ end
+
+ @OP.def_rules(",") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token(op)
+ end
+
+ @OP.def_rules(";") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ until (@indent_stack.empty? ||
+ [TkLPAREN, TkLBRACK, TkLBRACE,
+ TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
+ @indent_stack.pop
+ end
+ Token(op)
+ end
+
+ @OP.def_rule("~") do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token("~")
+ end
+
+ @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
+ |op, io|
+ @lex_state = EXPR_BEG
+ Token("~")
+ end
+
+ @OP.def_rule("(") do
+ |op, io|
+ @indent += 1
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ @lex_state = EXPR_BEG
+ tk_c = TkfLPAREN
+ else
+ @lex_state = EXPR_BEG
+ tk_c = TkLPAREN
+ end
+ @indent_stack.push tk_c
+ Token(tk_c)
+ end
+
+ @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
+ |op, io|
+ @lex_state = EXPR_ARG
+ Token("[]")
+ end
+
+ @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
+ |op, io|
+ @lex_state = EXPR_ARG
+ Token("[]=")
+ end
+
+ @OP.def_rule("[") do
+ |op, io|
+ @indent += 1
+ if @lex_state == EXPR_FNAME
+ tk_c = TkfLBRACK
+ else
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ tk_c = TkLBRACK
+ elsif @lex_state == EXPR_ARG && @space_seen
+ tk_c = TkLBRACK
+ else
+ tk_c = TkfLBRACK
+ end
+ @lex_state = EXPR_BEG
+ end
+ @indent_stack.push tk_c
+ Token(tk_c)
+ end
+
+ @OP.def_rule("{") do
+ |op, io|
+ @indent += 1
+ if @lex_state != EXPR_END && @lex_state != EXPR_ARG
+ tk_c = TkLBRACE
+ else
+ tk_c = TkfLBRACE
+ end
+ @lex_state = EXPR_BEG
+ @indent_stack.push tk_c
+ Token(tk_c)
+ end
+
+ @OP.def_rule('\\') do
+ |op, io|
+ if getc == "\n"
+ @space_seen = true
+ @continue = true
+ Token(TkSPACE)
+ else
+ read_escape
+ Token("\\")
+ end
+ end
+
+ @OP.def_rule('%') do
+ |op, io|
+ if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+ identify_quotation
+ elsif peek(0) == '='
+ getc
+ Token(TkOPASGN, :%)
+ elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
+ identify_quotation
+ else
+ @lex_state = EXPR_BEG
+ Token("%") #))
+ end
+ end
+
+ @OP.def_rule('$') do
+ |op, io|
+ identify_gvar
+ end
+
+ @OP.def_rule('@') do
+ |op, io|
+ if peek(0) =~ /[\w@]/
+ ungetc
+ identify_identifier
+ else
+ Token("@")
+ end
+ end
+
+ @OP.def_rule("") do
+ |op, io|
+ printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
+ if peek(0) =~ /[0-9]/
+ t = identify_number
+ elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
+ t = identify_identifier
+ end
+ printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
+ t
+ end
+
+ p @OP if RubyLex.debug?
end
- def process_continue
- continued_bits = Ripper::EXPR_BEG | Ripper::EXPR_FNAME | Ripper::EXPR_DOT
- # last token is always newline
- if @tokens.size >= 2 and @tokens[-2][1] == :on_regexp_end
- # end of regexp literal
- return false
- elsif @tokens.size >= 2 and @tokens[-2][1] == :on_semicolon
- return false
- elsif @tokens.size >= 2 and @tokens[-2][1] == :on_kw and (@tokens[-2][2] == 'begin' or @tokens[-2][2] == 'else')
- return false
- elsif !@tokens.empty? and @tokens.last[2] == "\\\n"
- return true
- elsif @tokens.size >= 2 and @tokens[-2][3].anybits?(continued_bits)
- # end of literal except for regexp
- return true
- end
- false
+ def identify_gvar
+ @lex_state = EXPR_END
+
+ case ch = getc
+ when /[~_*$?!@\/\\;,=:<>".]/ #"
+ Token(TkGVAR, "$" + ch)
+ when "-"
+ Token(TkGVAR, "$-" + getc)
+ when "&", "`", "'", "+"
+ Token(TkBACK_REF, "$"+ch)
+ when /[1-9]/
+ while getc =~ /[0-9]/; end
+ ungetc
+ Token(TkNTH_REF)
+ when /\w/
+ ungetc
+ ungetc
+ identify_identifier
+ else
+ ungetc
+ Token("$")
+ end
end
- def check_code_block(code)
- return true if @tokens.empty?
- if @tokens.last[1] == :on_heredoc_beg
- return true
- end
-
- begin # check if parser error are available
- RubyVM::InstructionSequence.compile(code)
- rescue SyntaxError => e
- case e.message
- when /unterminated (?:string|regexp) meets end of file/
- # "unterminated regexp meets end of file"
- #
- # example:
- # /
- #
- # "unterminated string meets end of file"
- #
- # example:
- # '
- return true
- when /syntax error, unexpected end-of-input/
- # "syntax error, unexpected end-of-input, expecting keyword_end"
- #
- # example:
- # if ture
- # hoge
- # if false
- # fuga
- # end
- return true
- when /syntax error, unexpected keyword_end/
- # "syntax error, unexpected keyword_end"
- #
- # example:
- # if (
- # end
- #
- # example:
- # end
- return false
- when /unexpected tREGEXP_BEG/
- # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
- #
- # example:
- # method / f /
- return false
- end
- end
-
- last_lex_state = @tokens.last[3]
- if last_lex_state.allbits?(Ripper::EXPR_BEG)
- return false
- elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
- return false
- end
-
- false
+ def identify_identifier
+ token = ""
+ if peek(0) =~ /[$@]/
+ token.concat(c = getc)
+ if c == "@" and peek(0) == "@"
+ token.concat getc
+ end
+ end
+
+ while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
+ print ":", ch, ":" if RubyLex.debug?
+ token.concat ch
+ end
+ ungetc
+
+ if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
+ token.concat getc
+ end
+
+ # almost fix token
+
+ case token
+ when /^\$/
+ return Token(TkGVAR, token)
+ when /^\@\@/
+ @lex_state = EXPR_END
+ # p Token(TkCVAR, token)
+ return Token(TkCVAR, token)
+ when /^\@/
+ @lex_state = EXPR_END
+ return Token(TkIVAR, token)
+ end
+
+ if @lex_state != EXPR_DOT
+ print token, "\n" if RubyLex.debug?
+
+ token_c, *trans = TkReading2Token[token]
+ if token_c
+ # reserved word?
+
+ if (@lex_state != EXPR_BEG &&
+ @lex_state != EXPR_FNAME &&
+ trans[1])
+ # modifiers
+ token_c = TkSymbol2Token[trans[1]]
+ @lex_state = trans[0]
+ else
+ if @lex_state != EXPR_FNAME and peek(0) != ':'
+ if ENINDENT_CLAUSE.include?(token)
+ # check for ``class = val'' etc.
+ valid = true
+ case token
+ when "class"
+ valid = false unless peek_match?(/^\s*(<<|\w|::)/)
+ when "def"
+ valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
+ when "do"
+ valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
+ when *ENINDENT_CLAUSE
+ valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
+ else
+ # no nothing
+ end
+ if valid
+ if token == "do"
+ if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
+ @indent += 1
+ @indent_stack.push token_c
+ end
+ else
+ @indent += 1
+ @indent_stack.push token_c
+ end
+ end
+
+ elsif DEINDENT_CLAUSE.include?(token)
+ @indent -= 1
+ @indent_stack.pop
+ end
+ @lex_state = trans[0]
+ else
+ @lex_state = EXPR_END
+ end
+ end
+ return Token(token_c, token)
+ end
+ end
+
+ if @lex_state == EXPR_FNAME
+ @lex_state = EXPR_END
+ if peek(0) == '='
+ token.concat getc
+ end
+ elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
+ @lex_state = EXPR_ARG
+ else
+ @lex_state = EXPR_END
+ end
+
+ if token[0, 1] =~ /[A-Z]/
+ return Token(TkCONSTANT, token)
+ elsif token[token.size - 1, 1] =~ /[!?]/
+ return Token(TkFID, token)
+ else
+ return Token(TkIDENTIFIER, token)
+ end
end
- def process_nesting_level
- @tokens.inject(0) { |indent, t|
- case t[1]
- when :on_lbracket, :on_lbrace, :on_lparen
- indent += 1
- when :on_rbracket, :on_rbrace, :on_rparen
- indent -= 1
- when :on_kw
- case t[2]
- when 'def', 'do', 'case', 'for', 'begin', 'class', 'module'
- indent += 1
- when 'if', 'unless', 'while', 'until', 'rescue'
- # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
- indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL)
- when 'end'
- indent -= 1
+ def identify_here_document
+ ch = getc
+ if ch == "-" || ch == "~"
+ ch = getc
+ indent = true
+ end
+ if /['"`]/ =~ ch
+ lt = ch
+ quoted = ""
+ while (c = getc) && c != lt
+ quoted.concat c
+ end
+ else
+ lt = '"'
+ quoted = ch.dup
+ while (c = getc) && c =~ /\w/
+ quoted.concat c
+ end
+ ungetc
+ end
+
+ ltback, @ltype = @ltype, lt
+ reserve = []
+ while ch = getc
+ reserve.push ch
+ if ch == "\\"
+ reserve.push ch = getc
+ elsif ch == "\n"
+ break
+ end
+ end
+
+ @here_header = false
+
+ line = ""
+ while ch = getc
+ if ch == "\n"
+ if line == quoted
+ break
+ end
+ line = ""
+ else
+ line.concat ch unless indent && line == "" && /\s/ =~ ch
+ if @ltype != "'" && ch == "#" && peek(0) == "{"
+ identify_string_dvar
+ end
+ end
+ end
+
+ @here_header = true
+ @here_readed.concat reserve
+ while ch = reserve.pop
+ ungetc ch
+ end
+
+ @ltype = ltback
+ @lex_state = EXPR_END
+ Token(Ltype2Token[lt])
+ end
+
+ def identify_quotation
+ ch = getc
+ if lt = PERCENT_LTYPE[ch]
+ ch = getc
+ elsif ch =~ /\W/
+ lt = "\""
+ else
+ RubyLex.fail SyntaxError, "unknown type of %string"
+ end
+ @quoted = ch unless @quoted = PERCENT_PAREN[ch]
+ identify_string(lt, @quoted)
+ end
+
+ def identify_number
+ @lex_state = EXPR_END
+
+ if peek(0) == "0" && peek(1) !~ /[.eE]/
+ getc
+ case peek(0)
+ when /[xX]/
+ ch = getc
+ match = /[0-9a-fA-F_]/
+ when /[bB]/
+ ch = getc
+ match = /[01_]/
+ when /[oO]/
+ ch = getc
+ match = /[0-7_]/
+ when /[dD]/
+ ch = getc
+ match = /[0-9_]/
+ when /[0-7]/
+ match = /[0-7_]/
+ when /[89]/
+ RubyLex.fail SyntaxError, "Invalid octal digit"
+ else
+ return Token(TkINTEGER)
+ end
+
+ len0 = true
+ non_digit = false
+ while ch = getc
+ if match =~ ch
+ if ch == "_"
+ if non_digit
+ RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
+ else
+ non_digit = ch
+ end
+ else
+ non_digit = false
+ len0 = false
+ end
+ else
+ ungetc
+ if len0
+ RubyLex.fail SyntaxError, "numeric literal without digits"
+ end
+ if non_digit
+ RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
+ end
+ break
+ end
+ end
+ return Token(TkINTEGER)
+ end
+
+ type = TkINTEGER
+ allow_point = true
+ allow_e = true
+ non_digit = false
+ while ch = getc
+ case ch
+ when /[0-9]/
+ non_digit = false
+ when "_"
+ non_digit = ch
+ when allow_point && "."
+ if non_digit
+ RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
+ end
+ type = TkFLOAT
+ if peek(0) !~ /[0-9]/
+ type = TkINTEGER
+ ungetc
+ break
+ end
+ allow_point = false
+ when allow_e && "e", allow_e && "E"
+ if non_digit
+ RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
+ end
+ type = TkFLOAT
+ if peek(0) =~ /[+-]/
+ getc
+ end
+ allow_e = false
+ allow_point = false
+ non_digit = ch
+ else
+ if non_digit
+ RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
+ ungetc
+ break
end
- # percent literals are not indented
- indent
- }
+ end
+ Token(type)
+ end
+
+ def identify_string(ltype, quoted = ltype)
+ @ltype = ltype
+ @quoted = quoted
+ subtype = nil
+ begin
+ nest = 0
+ while ch = getc
+ if @quoted == ch and nest == 0
+ break
+ elsif @ltype != "'" && ch == "#" && peek(0) == "{"
+ identify_string_dvar
+ elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
+ subtype = true
+ elsif ch == '\\' and @ltype == "'" #'
+ case ch = getc
+ when "\\", "\n", "'"
+ else
+ ungetc
+ end
+ elsif ch == '\\' #'
+ read_escape
+ end
+ if PERCENT_PAREN.values.include?(@quoted)
+ if PERCENT_PAREN[ch] == @quoted
+ nest += 1
+ elsif ch == @quoted
+ nest -= 1
+ end
+ end
+ end
+ if @ltype == "/"
+ while /[imxoesun]/ =~ peek(0)
+ getc
+ end
+ end
+ if subtype
+ Token(DLtype2Token[ltype])
+ else
+ Token(Ltype2Token[ltype])
+ end
+ ensure
+ @ltype = nil
+ @quoted = nil
+ @lex_state = EXPR_END
+ end
end
- def check_string_literal
- i = 0
- start_token = []
- end_type = []
- while i < @tokens.size
- t = @tokens[i]
- case t[1]
- when :on_tstring_beg
- start_token << t
- end_type << :on_tstring_end
- when :on_regexp_beg
- start_token << t
- end_type << :on_regexp_end
- when :on_symbeg
- if (i + 1) < @tokens.size and @tokens[i + 1][1] != :on_ident
- start_token << t
- end_type << :on_tstring_end
+ def identify_string_dvar
+ begin
+ getc
+
+ reserve_continue = @continue
+ reserve_ltype = @ltype
+ reserve_indent = @indent
+ reserve_indent_stack = @indent_stack
+ reserve_state = @lex_state
+ reserve_quoted = @quoted
+
+ @ltype = nil
+ @quoted = nil
+ @indent = 0
+ @indent_stack = []
+ @lex_state = EXPR_BEG
+
+ loop do
+ @continue = false
+ prompt
+ tk = token
+ if @ltype or @continue or @indent >= 0
+ next
end
- when :on_backtick
- start_token << t
- end_type << :on_tstring_end
- when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
- start_token << t
- end_type << :on_tstring_end
- when :on_heredoc_beg
- start_token << t
- end_type << :on_heredoc_end
- when end_type.last
- start_token.pop
- end_type.pop
- end
- i += 1
- end
- start_token.last.nil? ? '' : start_token.last
+ break if tk.kind_of?(TkRBRACE)
+ end
+ ensure
+ @continue = reserve_continue
+ @ltype = reserve_ltype
+ @indent = reserve_indent
+ @indent_stack = reserve_indent_stack
+ @lex_state = reserve_state
+ @quoted = reserve_quoted
+ end
end
- def process_literal_type
- start_token = check_string_literal
- case start_token[1]
- when :on_tstring_beg
- case start_token[2]
- when ?" then ?"
- when /^%.$/ then ?"
- when /^%Q.$/ then ?"
- when ?' then ?'
- when /^%q.$/ then ?'
- end
- when :on_regexp_beg then ?/
- when :on_symbeg then ?:
- when :on_backtick then ?`
- when :on_qwords_beg then ?]
- when :on_words_beg then ?]
- when :on_qsymbols_beg then ?]
- when :on_symbols_beg then ?]
- when :on_heredoc_beg
- start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
- case $1
- when ?" then ?"
- when ?' then ?'
- when ?` then ?`
- else ?"
+ def identify_comment
+ @ltype = "#"
+
+ while ch = getc
+ if ch == "\n"
+ @ltype = nil
+ ungetc
+ break
+ end
+ end
+ return Token(TkCOMMENT)
+ end
+
+ def read_escape
+ case ch = getc
+ when "\n", "\r", "\f"
+ when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
+ when /[0-7]/
+ ungetc ch
+ 3.times do
+ case ch = getc
+ when /[0-7]/
+ when nil
+ break
+ else
+ ungetc
+ break
+ end
+ end
+
+ when "x"
+ 2.times do
+ case ch = getc
+ when /[0-9a-fA-F]/
+ when nil
+ break
+ else
+ ungetc
+ break
+ end
+ end
+
+ when "M"
+ if (ch = getc) != '-'
+ ungetc
+ else
+ if (ch = getc) == "\\" #"
+ read_escape
+ end
+ end
+
+ when "C", "c" #, "^"
+ if ch == "C" and (ch = getc) != "-"
+ ungetc
+ elsif (ch = getc) == "\\" #"
+ read_escape
end
else
- nil
+ # other characters
end
end
end