Revert "IRB is improved with Reline and RDoc"

This reverts commit 7f273ac6d0f05208b5b228da95205e20c0e8286c. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67691 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: aycabta <aycabta@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2019-04-21 09:13:49 +0000
committer: aycabta <aycabta@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2019-04-21 09:13:49 +0000
commit: 51cec00953ff8d7baa483d3846aa1dbdb89101aa (patch)
tree: 2900ea0ba7c09379990e9da2edda5d6ef8fa075c /lib/irb
parent: 683834eb72cfa77f4eac1c705327b522302b1721 (diff)
7 files changed, 1097 insertions, 207 deletions
diff --git a/lib/irb/completion.rb b/lib/irb/completion.rb
index a8f462f2fd..390e7254dd 100644
--- a/lib/irb/completion.rb
+++ b/lib/irb/completion.rb
@@ -8,10 +8,11 @@
 #
 
 require "readline"
-require "rdoc"
 
 module IRB
   module InputCompletor # :nodoc:
+
+
     # Set of reserved words used by Ruby, you should not use these for
     # constants or variables
     ReservedWords = %w[
@@ -194,14 +195,6 @@ module IRB
       end
     }
 
-    RDocRIDriver = RDoc::RI::Driver.new
-    PerfectMatchedProc = proc { |matched|
-      begin
-        RDocRIDriver.display_name(matched)
-      rescue RDoc::RI::Driver::NotFoundError
-      end
-    }
-
     # Set of available operators in Ruby
     Operators = %w[% & * ** + - / < << <= <=> == === =~ > >= >> [] []= ^ ! != !~]
 
@@ -249,4 +242,3 @@ if Readline.respond_to?("basic_word_break_characters=")
 end
 Readline.completion_append_character = nil
 Readline.completion_proc = IRB::InputCompletor::CompletionProc
-Readline.dig_perfect_match_proc = IRB::InputCompletor::PerfectMatchedProc
diff --git a/lib/irb/context.rb b/lib/irb/context.rb
index f8a6009d17..e8e6a118e6 100644
--- a/lib/irb/context.rb
+++ b/lib/irb/context.rb
@@ -101,6 +101,7 @@ module IRB
       if @echo.nil?
         @echo = true
       end
+      self.debug_level = IRB.conf[:DEBUG_LEVEL]
     end
 
     # The top-level workspace, see WorkSpace#main
@@ -210,6 +211,10 @@ module IRB
     #
     # A copy of the default <code>IRB.conf[:VERBOSE]</code>
     attr_accessor :verbose
+    # The debug level of irb
+    #
+    # See #debug_level= for more information.
+    attr_reader :debug_level
 
     # The limit of backtrace lines displayed as top +n+ and tail +n+.
     #
@@ -356,6 +361,21 @@ module IRB
       print "Do nothing."
     end
 
+    # Sets the debug level of irb
+    #
+    # Can also be set using the +--irb_debug+ command line option.
+    #
+    # See IRB@Command+line+options for more command line options.
+    def debug_level=(value)
+      @debug_level = value
+      RubyLex.debug_level = value
+    end
+
+    # Whether or not debug mode is enabled, see #debug_level=.
+    def debug?
+      @debug_level > 0
+    end
+
     def evaluate(line, line_no, exception: nil) # :nodoc:
       @line_no = line_no
       if exception
diff --git a/lib/irb/init.rb b/lib/irb/init.rb
index 344b243f12..2066d8cb64 100644
--- a/lib/irb/init.rb
+++ b/lib/irb/init.rb
@@ -112,6 +112,8 @@ module IRB # :nodoc:
     @CONF[:LC_MESSAGES] = Locale.new
 
     @CONF[:AT_EXIT] = []
+
+    @CONF[:DEBUG_LEVEL] = 0
   end
 
   def IRB.init_error
@@ -189,6 +191,8 @@ module IRB # :nodoc:
         @CONF[:CONTEXT_MODE] = ($1 || argv.shift).to_i
       when "--single-irb"
         @CONF[:SINGLE_IRB] = true
+      when /^--irb_debug(?:=(.+))?/
+        @CONF[:DEBUG_LEVEL] = ($1 || argv.shift).to_i
       when "-v", "--version"
         print IRB.version, "\n"
         exit 0
diff --git a/lib/irb/input-method.rb b/lib/irb/input-method.rb
index 1f4f1d4746..f491d5a760 100644
--- a/lib/irb/input-method.rb
+++ b/lib/irb/input-method.rb
@@ -142,17 +142,13 @@ module IRB
         @stdout = IO.open(STDOUT.to_i, 'w', :external_encoding => IRB.conf[:LC_MESSAGES].encoding, :internal_encoding => "-")
       end
 
-      def check_termination(&block)
-        @check_termination_proc = block
-      end
-
       # Reads the next line from this input method.
       #
       # See IO#gets for more information.
       def gets
         Readline.input = @stdin
         Readline.output = @stdout
-        if l = readmultiline(@prompt, false, &@check_termination_proc)
+        if l = readline(@prompt, false)
           HISTORY.push(l) if !l.empty?
           @line[@line_no += 1] = l + "\n"
         else
diff --git a/lib/irb/lc/help-message b/lib/irb/lc/help-message
index d1a66dddda..d43c6a1695 100644
--- a/lib/irb/lc/help-message
+++ b/lib/irb/lc/help-message
@@ -39,6 +39,7 @@ Usage:  irb.rb [options] [programfile] [arguments]
   --back-trace-limit n
 		    Display backtrace top n and tail n. The default
 		    value is 16.
+  --irb_debug n	    Set internal debug level to n (not for popular use)
   --verbose         Show details
   --noverbose       Don't show details
   -v, --version	    Print the version of irb
diff --git a/lib/irb/lc/ja/help-message b/lib/irb/lc/ja/help-message
index 7a15f973c6..1b24d14d28 100644
--- a/lib/irb/lc/ja/help-message
+++ b/lib/irb/lc/ja/help-message
@@ -41,6 +41,8 @@ Usage:  irb.rb [options] [programfile] [arguments]
 		    バックトレース表示をバックトレースの頭から n, 後ろ
 		    からnだけ行なう. デフォルトは16
 
+  --irb_debug n	    irbのデバッグレベルをnに設定する(非推奨).
+
   --verbose	    詳細なメッセージを出力する.
   --noverbose	    詳細なメッセージを出力しない(デフォルト).
   -v, --version	    irbのバージョンを表示する.
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
index c4bec4a854..555d1f024f 100644
--- a/lib/irb/ruby-lex.rb
+++ b/lib/irb/ruby-lex.rb
@@ -11,39 +11,73 @@
 #
 
 require "e2mmap"
-require "ripper"
+require_relative "slex"
+require_relative "ruby-token"
 
 # :stopdoc:
 class RubyLex
 
   extend Exception2MessageMapper
+  def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
+  def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
+  def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
+  def_exception(:TkReading2TokenDuplicateError,
+                "key duplicate(token_n='%s', key='%s')")
+  def_exception(:SyntaxError, "%s")
+
   def_exception(:TerminateLineInput, "Terminate Line Input")
 
+  include RubyToken
+
+  class << self
+    attr_accessor :debug_level
+    def debug?
+      @debug_level > 0
+    end
+  end
+  @debug_level = 0
+
   def initialize
+    lex_init
+    set_input(STDIN)
+
+    @seek = 0
     @exp_line_no = @line_no = 1
+    @base_char_no = 0
+    @char_no = 0
+    @rests = []
+    @readed = []
+    @here_readed = []
+
     @indent = 0
+    @indent_stack = []
+    @lex_state = EXPR_BEG
+    @space_seen = false
+    @here_header = false
+    @post_symbeg = false
+
     @continue = false
     @line = ""
+
+    @skip_space = false
+    @readed_auto_clean_up = false
+    @exception_on_syntax_error = true
+
     @prompt = nil
   end
 
+  attr_accessor :skip_space
+  attr_accessor :readed_auto_clean_up
+  attr_accessor :exception_on_syntax_error
+
+  attr_reader :seek
+  attr_reader :char_no
+  attr_reader :line_no
+  attr_reader :indent
+
   # io functions
   def set_input(io, p = nil, &block)
     @io = io
-    if @io.respond_to?(:check_termination)
-      @io.check_termination do |code|
-        @tokens = Ripper.lex(code)
-        continue = process_continue
-        code_block_open = check_code_block(code)
-        indent = process_nesting_level
-        ltype = process_literal_type
-        if code_block_open or ltype or continue or indent > 0
-          false
-        else
-          true
-        end
-      end
-    end
     if p.respond_to?(:call)
       @input = p
     elsif block_given?
@@ -53,6 +87,112 @@ class RubyLex
     end
   end
 
+  def get_readed
+    if idx = @readed.rindex("\n")
+      @base_char_no = @readed.size - (idx + 1)
+    else
+      @base_char_no += @readed.size
+    end
+
+    readed = @readed.join("")
+    @readed = []
+    readed
+  end
+
+  def getc
+    while @rests.empty?
+      @rests.push nil unless buf_input
+    end
+    c = @rests.shift
+    if @here_header
+      @here_readed.push c
+    else
+      @readed.push c
+    end
+    @seek += 1
+    if c == "\n"
+      @line_no += 1
+      @char_no = 0
+    else
+      @char_no += 1
+    end
+    c
+  end
+
+  def gets
+    l = ""
+    while c = getc
+      l.concat(c)
+      break if c == "\n"
+    end
+    return nil if l == "" and c.nil?
+    l
+  end
+
+  def eof?
+    @io.eof?
+  end
+
+  def getc_of_rests
+    if @rests.empty?
+      nil
+    else
+      getc
+    end
+  end
+
+  def ungetc(c = nil)
+    if @here_readed.empty?
+      c2 = @readed.pop
+    else
+      c2 = @here_readed.pop
+    end
+    c = c2 unless c
+    @rests.unshift c #c =
+    @seek -= 1
+    if c == "\n"
+      @line_no -= 1
+      if idx = @readed.rindex("\n")
+        @char_no = idx + 1
+      else
+        @char_no = @base_char_no + @readed.size
+      end
+    else
+      @char_no -= 1
+    end
+  end
+
+  def peek_equal?(str)
+    chrs = str.split(//)
+    until @rests.size >= chrs.size
+      return false unless buf_input
+    end
+    @rests[0, chrs.size] == chrs
+  end
+
+  def peek_match?(regexp)
+    while @rests.empty?
+      return false unless buf_input
+    end
+    regexp =~ @rests.join("")
+  end
+
+  def peek(i = 0)
+    while @rests.size <= i
+      return nil unless buf_input
+    end
+    @rests[i]
+  end
+
+  def buf_input
+    prompt
+    line = @input.call
+    return nil unless line
+    @rests.concat line.chars.to_a
+    true
+  end
+  private :buf_input
+
   def set_prompt(p = nil, &block)
     p = block if block_given?
     if p.respond_to?(:call)
@@ -70,11 +210,20 @@ class RubyLex
 
   def initialize_input
     @ltype = nil
+    @quoted = nil
     @indent = 0
+    @indent_stack = []
+    @lex_state = EXPR_BEG
+    @space_seen = false
+    @here_header = false
+
     @continue = false
+    @post_symbeg = false
+
+    prompt
+
     @line = ""
     @exp_line_no = @line_no
-    @code_block_open = false
   end
 
   def each_top_level_statement
@@ -82,14 +231,13 @@ class RubyLex
     catch(:TERM_INPUT) do
       loop do
         begin
+          @continue = false
           prompt
           unless l = lex
             throw :TERM_INPUT if @line == ''
           else
-            @line_no += 1
-            next if l == "\n"
             @line.concat l
-            if @code_block_open or @ltype or @continue or @indent > 0
+            if @ltype or @continue or @indent > 0
               next
             end
           end
@@ -102,203 +250,930 @@ class RubyLex
           @exp_line_no = @line_no
 
           @indent = 0
+          @indent_stack = []
+          prompt
         rescue TerminateLineInput
           initialize_input
           prompt
+          get_readed
         end
       end
     end
   end
 
   def lex
-    line = @input.call
-    if @io.respond_to?(:check_termination)
-      return line # multiline
-    end
-    code = @line + (line.nil? ? '' : line)
-    code.gsub!(/\n*$/, '').concat("\n")
-    @tokens = Ripper.lex(code)
-    @continue = process_continue
-    @code_block_open = check_code_block(code)
-    @indent = process_nesting_level
-    @ltype = process_literal_type
-    line
+    continue = @continue
+    while tk = token
+      case tk
+      when TkNL, TkEND_OF_SCRIPT
+        @continue = continue unless continue.nil?
+        break unless @continue
+      when TkSPACE, TkCOMMENT
+      when TkSEMICOLON, TkBEGIN, TkELSE
+        @continue = continue = false
+      else
+        continue = nil
+      end
+    end
+    line = get_readed
+    if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
+      nil
+    else
+      line
+    end
+  end
+
+  def token
+    @prev_seek = @seek
+    @prev_line_no = @line_no
+    @prev_char_no = @char_no
+    begin
+      begin
+        tk = @OP.match(self)
+        @space_seen = tk.kind_of?(TkSPACE)
+        @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
+        @post_symbeg = tk.kind_of?(TkSYMBEG)
+      rescue SyntaxError
+        raise if @exception_on_syntax_error
+        tk = TkError.new(@seek, @line_no, @char_no)
+      end
+    end while @skip_space and tk.kind_of?(TkSPACE)
+    if @readed_auto_clean_up
+      get_readed
+    end
+    tk
+  end
+
+  ENINDENT_CLAUSE = [
+    "case", "class", "def", "do", "for", "if",
+    "module", "unless", "until", "while", "begin"
+  ]
+  DEINDENT_CLAUSE = ["end"
+  ]
+
+  PERCENT_LTYPE = {
+    "q" => "\'",
+    "Q" => "\"",
+    "x" => "\`",
+    "r" => "/",
+    "w" => "]",
+    "W" => "]",
+    "i" => "]",
+    "I" => "]",
+    "s" => ":"
+  }
+
+  PERCENT_PAREN = {
+    "{" => "}",
+    "[" => "]",
+    "<" => ">",
+    "(" => ")"
+  }
+
+  Ltype2Token = {
+    "\'" => TkSTRING,
+    "\"" => TkSTRING,
+    "\`" => TkXSTRING,
+    "/" => TkREGEXP,
+    "]" => TkDSTRING,
+    ":" => TkSYMBOL
+  }
+  DLtype2Token = {
+    "\"" => TkDSTRING,
+    "\`" => TkDXSTRING,
+    "/" => TkDREGEXP,
+  }
+
+  def lex_init()
+    @OP = IRB::SLex.new
+    @OP.def_rules("\0", "\004", "\032") do |op, io|
+      Token(TkEND_OF_SCRIPT)
+    end
+
+    @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
+      @space_seen = true
+      while getc =~ /[ \t\f\r\13]/; end
+      ungetc
+      Token(TkSPACE)
+    end
+
+    @OP.def_rule("#") do |op, io|
+      identify_comment
+    end
+
+    @OP.def_rule("=begin",
+                 proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
+      |op, io|
+      @ltype = "="
+      until getc == "\n"; end
+      until peek_equal?("=end") && peek(4) =~ /\s/
+        until getc == "\n"; end
+      end
+      gets
+      @ltype = nil
+      Token(TkRD_COMMENT)
+    end
+
+    @OP.def_rule("\n") do |op, io|
+      print "\\n\n" if RubyLex.debug?
+      case @lex_state
+      when EXPR_BEG, EXPR_FNAME, EXPR_DOT
+        @continue = true
+      else
+        @continue = false
+        @lex_state = EXPR_BEG
+        until (@indent_stack.empty? ||
+            [TkLPAREN, TkLBRACK, TkLBRACE,
+             TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
+          @indent_stack.pop
+        end
+      end
+      @here_header = false
+      @here_readed = []
+      Token(TkNL)
+    end
+
+    @OP.def_rules("*", "**",
+                  "=", "==", "===",
+                  "=~", "<=>",
+                  "<", "<=",
+                  ">", ">=", ">>",
+                  "!", "!=", "!~") do
+      |op, io|
+      case @lex_state
+      when EXPR_FNAME, EXPR_DOT
+        @lex_state = EXPR_ARG
+      else
+        @lex_state = EXPR_BEG
+      end
+      Token(op)
+    end
+
+    @OP.def_rules("<<") do
+      |op, io|
+      tk = nil
+      if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
+          (@lex_state != EXPR_ARG || @space_seen)
+        c = peek(0)
+        if /[-~"'`\w]/ =~ c
+          tk = identify_here_document
+        end
+      end
+      unless tk
+        tk = Token(op)
+        case @lex_state
+        when EXPR_FNAME, EXPR_DOT
+          @lex_state = EXPR_ARG
+        else
+          @lex_state = EXPR_BEG
+        end
+      end
+      tk
+    end
+
+    @OP.def_rules("'", '"') do
+      |op, io|
+      identify_string(op)
+    end
+
+    @OP.def_rules("`") do
+      |op, io|
+      if @lex_state == EXPR_FNAME
+        @lex_state = EXPR_END
+        Token(op)
+      else
+        identify_string(op)
+      end
+    end
+
+    @OP.def_rules('?') do
+      |op, io|
+      if @lex_state == EXPR_END
+        @lex_state = EXPR_BEG
+        Token(TkQUESTION)
+      else
+        ch = getc
+        if @lex_state == EXPR_ARG && ch =~ /\s/
+          ungetc
+          @lex_state = EXPR_BEG;
+          Token(TkQUESTION)
+        else
+          if (ch == '\\')
+            read_escape
+          end
+          @lex_state = EXPR_END
+          Token(TkINTEGER)
+        end
+      end
+    end
+
+    @OP.def_rules("&", "&&", "|", "||") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      Token(op)
+    end
+
+    @OP.def_rules("+=", "-=", "*=", "**=",
+                  "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      op =~ /^(.*)=$/
+      Token(TkOPASGN, $1)
+    end
+
+    @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
+      |op, io|
+      @lex_state = EXPR_ARG
+      Token(op)
+    end
+
+    @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
+      |op, io|
+      @lex_state = EXPR_ARG
+      Token(op)
+    end
+
+    @OP.def_rules("+", "-") do
+      |op, io|
+      catch(:RET) do
+        if @lex_state == EXPR_ARG
+          if @space_seen and peek(0) =~ /[0-9]/
+            throw :RET, identify_number
+          else
+            @lex_state = EXPR_BEG
+          end
+        elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
+          throw :RET, identify_number
+        else
+          @lex_state = EXPR_BEG
+        end
+        Token(op)
+      end
+    end
+
+    @OP.def_rule(".") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      if peek(0) =~ /[0-9]/
+        ungetc
+        identify_number
+      else
+        # for "obj.if" etc.
+        @lex_state = EXPR_DOT
+        Token(TkDOT)
+      end
+    end
+
+    @OP.def_rules("..", "...") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      Token(op)
+    end
+
+    lex_int2
+  end
+
+  def lex_int2
+    @OP.def_rules("]", "}", ")") do
+      |op, io|
+      @lex_state = EXPR_END
+      @indent -= 1
+      @indent_stack.pop
+      Token(op)
+    end
+
+    @OP.def_rule(":") do
+      |op, io|
+      if @lex_state == EXPR_END || peek(0) =~ /\s/
+        @lex_state = EXPR_BEG
+        Token(TkCOLON)
+      else
+        @lex_state = EXPR_FNAME
+        Token(TkSYMBEG)
+      end
+    end
+
+    @OP.def_rule("::") do
+       |op, io|
+      if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
+        @lex_state = EXPR_BEG
+        Token(TkCOLON3)
+      else
+        @lex_state = EXPR_DOT
+        Token(TkCOLON2)
+      end
+    end
+
+    @OP.def_rule("/") do
+      |op, io|
+      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+        identify_string(op)
+      elsif peek(0) == '='
+        getc
+        @lex_state = EXPR_BEG
+        Token(TkOPASGN, "/") #/)
+      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
+        identify_string(op)
+      else
+        @lex_state = EXPR_BEG
+        Token("/") #/)
+      end
+    end
+
+    @OP.def_rules("^") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      Token("^")
+    end
+
+    @OP.def_rules(",") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      Token(op)
+    end
+
+    @OP.def_rules(";") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      until (@indent_stack.empty? ||
+          [TkLPAREN, TkLBRACK, TkLBRACE,
+           TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
+        @indent_stack.pop
+      end
+      Token(op)
+    end
+
+    @OP.def_rule("~") do
+      |op, io|
+      @lex_state = EXPR_BEG
+      Token("~")
+    end
+
+    @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
+      |op, io|
+      @lex_state = EXPR_BEG
+      Token("~")
+    end
+
+    @OP.def_rule("(") do
+      |op, io|
+      @indent += 1
+      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+        @lex_state = EXPR_BEG
+        tk_c = TkfLPAREN
+      else
+        @lex_state = EXPR_BEG
+        tk_c = TkLPAREN
+      end
+      @indent_stack.push tk_c
+      Token(tk_c)
+    end
+
+    @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
+      |op, io|
+      @lex_state = EXPR_ARG
+      Token("[]")
+    end
+
+    @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
+      |op, io|
+      @lex_state = EXPR_ARG
+      Token("[]=")
+    end
+
+    @OP.def_rule("[") do
+      |op, io|
+      @indent += 1
+      if @lex_state == EXPR_FNAME
+        tk_c = TkfLBRACK
+      else
+        if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+          tk_c = TkLBRACK
+        elsif @lex_state == EXPR_ARG && @space_seen
+          tk_c = TkLBRACK
+        else
+          tk_c = TkfLBRACK
+        end
+        @lex_state = EXPR_BEG
+      end
+      @indent_stack.push tk_c
+      Token(tk_c)
+    end
+
+    @OP.def_rule("{") do
+      |op, io|
+      @indent += 1
+      if @lex_state != EXPR_END && @lex_state != EXPR_ARG
+        tk_c = TkLBRACE
+      else
+        tk_c = TkfLBRACE
+      end
+      @lex_state = EXPR_BEG
+      @indent_stack.push tk_c
+      Token(tk_c)
+    end
+
+    @OP.def_rule('\\') do
+      |op, io|
+      if getc == "\n"
+        @space_seen = true
+        @continue = true
+        Token(TkSPACE)
+      else
+        read_escape
+        Token("\\")
+      end
+    end
+
+    @OP.def_rule('%') do
+      |op, io|
+      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
+        identify_quotation
+      elsif peek(0) == '='
+        getc
+        Token(TkOPASGN, :%)
+      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
+        identify_quotation
+      else
+        @lex_state = EXPR_BEG
+        Token("%") #))
+      end
+    end
+
+    @OP.def_rule('$') do
+      |op, io|
+      identify_gvar
+    end
+
+    @OP.def_rule('@') do
+      |op, io|
+      if peek(0) =~ /[\w@]/
+        ungetc
+        identify_identifier
+      else
+        Token("@")
+      end
+    end
+
+    @OP.def_rule("") do
+      |op, io|
+      printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
+      if peek(0) =~ /[0-9]/
+        t = identify_number
+      elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
+        t = identify_identifier
+      end
+      printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
+      t
+    end
+
+    p @OP if RubyLex.debug?
   end
 
-  def process_continue
-    continued_bits = Ripper::EXPR_BEG | Ripper::EXPR_FNAME | Ripper::EXPR_DOT
-    # last token is always newline
-    if @tokens.size >= 2 and @tokens[-2][1] == :on_regexp_end
-      # end of regexp literal
-      return false
-    elsif @tokens.size >= 2 and @tokens[-2][1] == :on_semicolon
-      return false
-    elsif @tokens.size >= 2 and @tokens[-2][1] == :on_kw and (@tokens[-2][2] == 'begin' or @tokens[-2][2] == 'else')
-      return false
-    elsif !@tokens.empty? and @tokens.last[2] == "\\\n"
-      return true
-    elsif @tokens.size >= 2 and @tokens[-2][3].anybits?(continued_bits)
-      # end of literal except for regexp
-      return true
-    end
-    false
+  def identify_gvar
+    @lex_state = EXPR_END
+
+    case ch = getc
+    when /[~_*$?!@\/\\;,=:<>".]/   #"
+      Token(TkGVAR, "$" + ch)
+    when "-"
+      Token(TkGVAR, "$-" + getc)
+    when "&", "`", "'", "+"
+      Token(TkBACK_REF, "$"+ch)
+    when /[1-9]/
+      while getc =~ /[0-9]/; end
+      ungetc
+      Token(TkNTH_REF)
+    when /\w/
+      ungetc
+      ungetc
+      identify_identifier
+    else
+      ungetc
+      Token("$")
+    end
   end
 
-  def check_code_block(code)
-    return true if @tokens.empty?
-    if @tokens.last[1] == :on_heredoc_beg
-      return true
-    end
-
-    begin # check if parser error are available
-      RubyVM::InstructionSequence.compile(code)
-    rescue SyntaxError => e
-      case e.message
-      when /unterminated (?:string|regexp) meets end of file/
-        # "unterminated regexp meets end of file"
-        #
-        #   example:
-        #     /
-        #
-        # "unterminated string meets end of file"
-        #
-        #   example:
-        #     '
-        return true
-      when /syntax error, unexpected end-of-input/
-        # "syntax error, unexpected end-of-input, expecting keyword_end"
-        #
-        #   example:
-        #     if ture
-        #       hoge
-        #       if false
-        #         fuga
-        #       end
-        return true
-      when /syntax error, unexpected keyword_end/
-        # "syntax error, unexpected keyword_end"
-        #
-        #   example:
-        #     if (
-        #     end
-        #
-        #   example:
-        #     end
-        return false
-      when /unexpected tREGEXP_BEG/
-        # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
-        #
-        #   example:
-        #     method / f /
-        return false
-      end
-    end
-
-    last_lex_state = @tokens.last[3]
-    if last_lex_state.allbits?(Ripper::EXPR_BEG)
-      return false
-    elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
-      return true
-    elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
-      return false
-    end
-
-    false
+  def identify_identifier
+    token = ""
+    if peek(0) =~ /[$@]/
+      token.concat(c = getc)
+      if c == "@" and peek(0) == "@"
+        token.concat getc
+      end
+    end
+
+    while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
+      print ":", ch, ":" if RubyLex.debug?
+      token.concat ch
+    end
+    ungetc
+
+    if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
+      token.concat getc
+    end
+
+    # almost fix token
+
+    case token
+    when /^\$/
+      return Token(TkGVAR, token)
+    when /^\@\@/
+      @lex_state = EXPR_END
+      # p Token(TkCVAR, token)
+      return Token(TkCVAR, token)
+    when /^\@/
+      @lex_state = EXPR_END
+      return Token(TkIVAR, token)
+    end
+
+    if @lex_state != EXPR_DOT
+      print token, "\n" if RubyLex.debug?
+
+      token_c, *trans = TkReading2Token[token]
+      if token_c
+        # reserved word?
+
+        if (@lex_state != EXPR_BEG &&
+            @lex_state != EXPR_FNAME &&
+            trans[1])
+          # modifiers
+          token_c = TkSymbol2Token[trans[1]]
+          @lex_state = trans[0]
+        else
+          if @lex_state != EXPR_FNAME and peek(0) != ':'
+            if ENINDENT_CLAUSE.include?(token)
+              # check for ``class = val'' etc.
+              valid = true
+              case token
+              when "class"
+                valid = false unless peek_match?(/^\s*(<<|\w|::)/)
+              when "def"
+                valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
+              when "do"
+                valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
+              when *ENINDENT_CLAUSE
+                valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
+              else
+                # no nothing
+              end
+              if valid
+                if token == "do"
+                  if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
+                    @indent += 1
+                    @indent_stack.push token_c
+                  end
+                else
+                  @indent += 1
+                  @indent_stack.push token_c
+                end
+              end
+
+            elsif DEINDENT_CLAUSE.include?(token)
+              @indent -= 1
+              @indent_stack.pop
+            end
+            @lex_state = trans[0]
+          else
+            @lex_state = EXPR_END
+          end
+        end
+        return Token(token_c, token)
+      end
+    end
+
+    if @lex_state == EXPR_FNAME
+      @lex_state = EXPR_END
+      if peek(0) == '='
+        token.concat getc
+      end
+    elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
+      @lex_state = EXPR_ARG
+    else
+      @lex_state = EXPR_END
+    end
+
+    if token[0, 1] =~ /[A-Z]/
+      return Token(TkCONSTANT, token)
+    elsif token[token.size - 1, 1] =~ /[!?]/
+      return Token(TkFID, token)
+    else
+      return Token(TkIDENTIFIER, token)
+    end
   end
 
-  def process_nesting_level
-    @tokens.inject(0) { |indent, t|
-      case t[1]
-      when :on_lbracket, :on_lbrace, :on_lparen
-        indent += 1
-      when :on_rbracket, :on_rbrace, :on_rparen
-        indent -= 1
-      when :on_kw
-        case t[2]
-        when 'def', 'do', 'case', 'for', 'begin', 'class', 'module'
-          indent += 1
-        when 'if', 'unless', 'while', 'until', 'rescue'
-          # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
-          indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL)
-        when 'end'
-          indent -= 1
+  def identify_here_document
+    ch = getc
+    if ch == "-" || ch == "~"
+      ch = getc
+      indent = true
+    end
+    if /['"`]/ =~ ch
+      lt = ch
+      quoted = ""
+      while (c = getc) && c != lt
+        quoted.concat c
+      end
+    else
+      lt = '"'
+      quoted = ch.dup
+      while (c = getc) && c =~ /\w/
+        quoted.concat c
+      end
+      ungetc
+    end
+
+    ltback, @ltype = @ltype, lt
+    reserve = []
+    while ch = getc
+      reserve.push ch
+      if ch == "\\"
+        reserve.push ch = getc
+      elsif ch == "\n"
+        break
+      end
+    end
+
+    @here_header = false
+
+    line = ""
+    while ch = getc
+      if ch == "\n"
+        if line == quoted
+          break
+        end
+        line = ""
+      else
+        line.concat ch unless indent && line == "" && /\s/ =~ ch
+        if @ltype != "'" && ch == "#" && peek(0) == "{"
+          identify_string_dvar
+        end
+      end
+    end
+
+    @here_header = true
+    @here_readed.concat reserve
+    while ch = reserve.pop
+      ungetc ch
+    end
+
+    @ltype = ltback
+    @lex_state = EXPR_END
+    Token(Ltype2Token[lt])
+  end
+
+  def identify_quotation
+    ch = getc
+    if lt = PERCENT_LTYPE[ch]
+      ch = getc
+    elsif ch =~ /\W/
+      lt = "\""
+    else
+      RubyLex.fail SyntaxError, "unknown type of %string"
+    end
+    @quoted = ch unless @quoted = PERCENT_PAREN[ch]
+    identify_string(lt, @quoted)
+  end
+
+  def identify_number
+    @lex_state = EXPR_END
+
+    if peek(0) == "0" && peek(1) !~ /[.eE]/
+      getc
+      case peek(0)
+      when /[xX]/
+        ch = getc
+        match = /[0-9a-fA-F_]/
+      when /[bB]/
+        ch = getc
+        match = /[01_]/
+      when /[oO]/
+        ch = getc
+        match = /[0-7_]/
+      when /[dD]/
+        ch = getc
+        match = /[0-9_]/
+      when /[0-7]/
+        match = /[0-7_]/
+      when /[89]/
+        RubyLex.fail SyntaxError, "Invalid octal digit"
+      else
+        return Token(TkINTEGER)
+      end
+
+      len0 = true
+      non_digit = false
+      while ch = getc
+        if match =~ ch
+          if ch == "_"
+            if non_digit
+              RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
+            else
+              non_digit = ch
+            end
+          else
+            non_digit = false
+            len0 = false
+          end
+        else
+          ungetc
+          if len0
+            RubyLex.fail SyntaxError, "numeric literal without digits"
+          end
+          if non_digit
+            RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
+          end
+          break
+        end
+      end
+      return Token(TkINTEGER)
+    end
+
+    type = TkINTEGER
+    allow_point = true
+    allow_e = true
+    non_digit = false
+    while ch = getc
+      case ch
+      when /[0-9]/
+        non_digit = false
+      when "_"
+        non_digit = ch
+      when allow_point && "."
+        if non_digit
+          RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
+        end
+        type = TkFLOAT
+        if peek(0) !~ /[0-9]/
+          type = TkINTEGER
+          ungetc
+          break
+        end
+        allow_point = false
+      when allow_e && "e", allow_e && "E"
+        if non_digit
+          RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
+        end
+        type = TkFLOAT
+        if peek(0) =~ /[+-]/
+          getc
+        end
+        allow_e = false
+        allow_point = false
+        non_digit = ch
+      else
+        if non_digit
+          RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
         end
+        ungetc
+        break
       end
-      # percent literals are not indented
-      indent
-    }
+    end
+    Token(type)
+  end
+
+  def identify_string(ltype, quoted = ltype)
+    @ltype = ltype
+    @quoted = quoted
+    subtype = nil
+    begin
+      nest = 0
+      while ch = getc
+        if @quoted == ch and nest == 0
+          break
+        elsif @ltype != "'" && ch == "#" && peek(0) == "{"
+          identify_string_dvar
+        elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
+          subtype = true
+        elsif ch == '\\' and @ltype == "'" #'
+          case ch = getc
+          when "\\", "\n", "'"
+          else
+            ungetc
+          end
+        elsif ch == '\\' #'
+          read_escape
+        end
+        if PERCENT_PAREN.values.include?(@quoted)
+          if PERCENT_PAREN[ch] == @quoted
+            nest += 1
+          elsif ch == @quoted
+            nest -= 1
+          end
+        end
+      end
+      if @ltype == "/"
+        while /[imxoesun]/ =~ peek(0)
+          getc
+        end
+      end
+      if subtype
+        Token(DLtype2Token[ltype])
+      else
+        Token(Ltype2Token[ltype])
+      end
+    ensure
+      @ltype = nil
+      @quoted = nil
+      @lex_state = EXPR_END
+    end
   end
 
-  def check_string_literal
-    i = 0
-    start_token = []
-    end_type = []
-    while i < @tokens.size
-      t = @tokens[i]
-      case t[1]
-      when :on_tstring_beg
-        start_token << t
-        end_type << :on_tstring_end
-      when :on_regexp_beg
-        start_token << t
-        end_type << :on_regexp_end
-      when :on_symbeg
-        if (i + 1) < @tokens.size and @tokens[i + 1][1] != :on_ident
-          start_token << t
-          end_type << :on_tstring_end
+  def identify_string_dvar
+    begin
+      getc
+
+      reserve_continue = @continue
+      reserve_ltype = @ltype
+      reserve_indent = @indent
+      reserve_indent_stack = @indent_stack
+      reserve_state = @lex_state
+      reserve_quoted = @quoted
+
+      @ltype = nil
+      @quoted = nil
+      @indent = 0
+      @indent_stack = []
+      @lex_state = EXPR_BEG
+
+      loop do
+        @continue = false
+        prompt
+        tk = token
+        if @ltype or @continue or @indent >= 0
+          next
         end
-      when :on_backtick
-        start_token << t
-        end_type << :on_tstring_end
-      when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
-        start_token << t
-        end_type << :on_tstring_end
-      when :on_heredoc_beg
-        start_token << t
-        end_type << :on_heredoc_end
-      when end_type.last
-        start_token.pop
-        end_type.pop
-      end
-      i += 1
-    end
-    start_token.last.nil? ? '' : start_token.last
+        break if tk.kind_of?(TkRBRACE)
+      end
+    ensure
+      @continue = reserve_continue
+      @ltype = reserve_ltype
+      @indent = reserve_indent
+      @indent_stack = reserve_indent_stack
+      @lex_state = reserve_state
+      @quoted = reserve_quoted
+    end
   end
 
-  def process_literal_type
-    start_token = check_string_literal
-    case start_token[1]
-    when :on_tstring_beg
-      case start_token[2]
-      when ?"      then ?"
-      when /^%.$/  then ?"
-      when /^%Q.$/ then ?"
-      when ?'      then ?'
-      when /^%q.$/ then ?'
-      end
-    when :on_regexp_beg   then ?/
-    when :on_symbeg       then ?:
-    when :on_backtick     then ?`
-    when :on_qwords_beg   then ?]
-    when :on_words_beg    then ?]
-    when :on_qsymbols_beg then ?]
-    when :on_symbols_beg  then ?]
-    when :on_heredoc_beg
-      start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
-      case $1
-      when ?" then ?"
-      when ?' then ?'
-      when ?` then ?`
-      else         ?"
+  def identify_comment
+    @ltype = "#"
+
+    while ch = getc
+      if ch == "\n"
+        @ltype = nil
+        ungetc
+        break
+      end
+    end
+    return Token(TkCOMMENT)
+  end
+
+  def read_escape
+    case ch = getc
+    when "\n", "\r", "\f"
+    when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
+    when /[0-7]/
+      ungetc ch
+      3.times do
+        case ch = getc
+        when /[0-7]/
+        when nil
+          break
+        else
+          ungetc
+          break
+        end
+      end
+
+    when "x"
+      2.times do
+        case ch = getc
+        when /[0-9a-fA-F]/
+        when nil
+          break
+        else
+          ungetc
+          break
+        end
+      end
+
+    when "M"
+      if (ch = getc) != '-'
+        ungetc
+      else
+        if (ch = getc) == "\\" #"
+          read_escape
+        end
+      end
+
+    when "C", "c" #, "^"
+      if ch == "C" and (ch = getc) != "-"
+        ungetc
+      elsif (ch = getc) == "\\" #"
+        read_escape
       end
     else
-      nil
+      # other characters
     end
   end
 end
author	aycabta <aycabta@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2019-04-21 09:13:49 +0000
committer	aycabta <aycabta@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2019-04-21 09:13:49 +0000
commit	51cec00953ff8d7baa483d3846aa1dbdb89101aa (patch)
tree	2900ea0ba7c09379990e9da2edda5d6ef8fa075c /lib/irb
parent	683834eb72cfa77f4eac1c705327b522302b1721 (diff)