summaryrefslogtreecommitdiff
path: root/lib/irb/ruby-lex.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/irb/ruby-lex.rb')
-rw-r--r--lib/irb/ruby-lex.rb1121
1 files changed, 381 insertions, 740 deletions
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
index d7ac17bd79..cfe36be83f 100644
--- a/lib/irb/ruby-lex.rb
+++ b/lib/irb/ruby-lex.rb
@@ -1,833 +1,474 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
#
# irb/ruby-lex.rb - ruby lexcal analyzer
-# $Release Version: 0.9.6$
-# $Revision$
# by Keiju ISHITSUKA(keiju@ruby-lang.org)
#
-# --
-#
-#
-#
require "ripper"
require "jruby" if RUBY_ENGINE == "jruby"
-
-# :stopdoc:
-class RubyLex
-
- class TerminateLineInput < StandardError
- def initialize
- super("Terminate Line Input")
+require_relative "nesting_parser"
+
+module IRB
+ # :stopdoc:
+ class RubyLex
+ ASSIGNMENT_NODE_TYPES = [
+ # Local, instance, global, class, constant, instance, and index assignment:
+ # "foo = bar",
+ # "@foo = bar",
+ # "$foo = bar",
+ # "@@foo = bar",
+ # "::Foo = bar",
+ # "a::Foo = bar",
+ # "Foo = bar"
+ # "foo.bar = 1"
+ # "foo[1] = bar"
+ :assign,
+
+ # Operation assignment:
+ # "foo += bar"
+ # "foo -= bar"
+ # "foo ||= bar"
+ # "foo &&= bar"
+ :opassign,
+
+ # Multiple assignment:
+ # "foo, bar = 1, 2
+ :massign,
+ ]
+
+ class TerminateLineInput < StandardError
+ def initialize
+ super("Terminate Line Input")
+ end
end
- end
- def initialize
- @exp_line_no = @line_no = 1
- @indent = 0
- @continue = false
- @line = ""
- @prompt = nil
- end
+ def self.compile_with_errors_suppressed(code, line_no: 1)
+ begin
+ result = yield code, line_no
+ rescue ArgumentError
+ # Ruby can issue an error for the code if there is an
+ # incomplete magic comment for encoding in it. Force an
+ # expression with a new line before the code in this
+ # case to prevent magic comment handling. To make sure
+ # line numbers in the lexed code remain the same,
+ # decrease the line number by one.
+ code = ";\n#{code}"
+ line_no -= 1
+ result = yield code, line_no
+ end
+ result
+ end
- def self.compile_with_errors_suppressed(code, line_no: 1)
- begin
- result = yield code, line_no
- rescue ArgumentError
- # Ruby can issue an error for the code if there is an
- # incomplete magic comment for encoding in it. Force an
- # expression with a new line before the code in this
- # case to prevent magic comment handling. To make sure
- # line numbers in the lexed code remain the same,
- # decrease the line number by one.
- code = ";\n#{code}"
- line_no -= 1
- result = yield code, line_no
+ ERROR_TOKENS = [
+ :on_parse_error,
+ :compile_error,
+ :on_assign_error,
+ :on_alias_error,
+ :on_class_name_error,
+ :on_param_error
+ ]
+
+ def self.generate_local_variables_assign_code(local_variables)
+ "#{local_variables.join('=')}=nil;" unless local_variables.empty?
end
- result
- end
- # io functions
- def set_input(io, p = nil, context: nil, &block)
- @io = io
- if @io.respond_to?(:check_termination)
- @io.check_termination do |code|
- if Reline::IOGate.in_pasting?
- lex = RubyLex.new
- rest = lex.check_termination_in_prev_line(code, context: context)
- if rest
- Reline.delete_text
- rest.bytes.reverse_each do |c|
- Reline.ungetc(c)
- end
- true
- else
- false
- end
- else
- code.gsub!(/\s*\z/, '').concat("\n")
- ltype, indent, continue, code_block_open = check_state(code, context: context)
- if ltype or indent > 0 or continue or code_block_open
- false
- else
- true
- end
- end
+ # Some part of the code is not included in Ripper's token.
+ # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr.
+ # With interpolated tokens, tokens.map(&:tok).join will be equal to code.
+ def self.interpolate_ripper_ignored_tokens(code, tokens)
+ line_positions = [0]
+ code.lines.each do |line|
+ line_positions << line_positions.last + line.bytesize
end
- end
- if @io.respond_to?(:dynamic_prompt)
- @io.dynamic_prompt do |lines|
- lines << '' if lines.empty?
- result = []
- tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: context)
- code = String.new
- partial_tokens = []
- unprocessed_tokens = []
- line_num_offset = 0
- tokens.each do |t|
- partial_tokens << t
- unprocessed_tokens << t
- if t[2].include?("\n")
- t_str = t[2]
- t_str.each_line("\n") do |s|
- code << s << "\n"
- ltype, indent, continue, code_block_open = check_state(code, partial_tokens, context: context)
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
- line_num_offset += 1
- end
- unprocessed_tokens = []
- else
- code << t[2]
- end
- end
- unless unprocessed_tokens.empty?
- ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens, context: context)
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
+ prev_byte_pos = 0
+ interpolated = []
+ prev_line = 1
+ tokens.each do |t|
+ line, col = t.pos
+ byte_pos = line_positions[line - 1] + col
+ if prev_byte_pos < byte_pos
+ tok = code.byteslice(prev_byte_pos...byte_pos)
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
+ prev_line += tok.count("\n")
end
- result
+ interpolated << t
+ prev_byte_pos = byte_pos + t.tok.bytesize
+ prev_line += t.tok.count("\n")
end
+ if prev_byte_pos < code.bytesize
+ tok = code.byteslice(prev_byte_pos..)
+ pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
+ interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
+ end
+ interpolated
end
- if p.respond_to?(:call)
- @input = p
- elsif block_given?
- @input = block
- else
- @input = Proc.new{@io.gets}
- end
- end
-
- def set_prompt(p = nil, &block)
- p = block if block_given?
- if p.respond_to?(:call)
- @prompt = p
- else
- @prompt = Proc.new{print p}
- end
- end
- ERROR_TOKENS = [
- :on_parse_error,
- :compile_error,
- :on_assign_error,
- :on_alias_error,
- :on_class_name_error,
- :on_param_error
- ]
-
- def self.ripper_lex_without_warning(code, context: nil)
- verbose, $VERBOSE = $VERBOSE, nil
- if context
- lvars = context&.workspace&.binding&.local_variables
- if lvars && !lvars.empty?
- code = "#{lvars.join('=')}=nil\n#{code}"
+ def self.ripper_lex_without_warning(code, local_variables: [])
+ verbose, $VERBOSE = $VERBOSE, nil
+ lvars_code = generate_local_variables_assign_code(local_variables)
+ original_code = code
+ if lvars_code
+ code = "#{lvars_code}\n#{code}"
line_no = 0
else
line_no = 1
end
- end
- tokens = nil
- compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
- lexer = Ripper::Lexer.new(inner_code, '-', line_no)
- if lexer.respond_to?(:scan) # Ruby 2.7+
+
+ compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
+ lexer = Ripper::Lexer.new(inner_code, '-', line_no)
tokens = []
- pos_to_index = {}
lexer.scan.each do |t|
next if t.pos.first == 0
- if pos_to_index.has_key?(t[0])
- index = pos_to_index[t[0]]
- found_tk = tokens[index]
- if ERROR_TOKENS.include?(found_tk[1]) && !ERROR_TOKENS.include?(t[1])
- tokens[index] = t
- end
+ prev_tk = tokens.last
+ position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
+ if position_overlapped
+ tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event)
else
- pos_to_index[t[0]] = tokens.size
tokens << t
end
end
- else
- tokens = lexer.parse
- end
- end
- tokens
- ensure
- $VERBOSE = verbose
- end
-
- def find_prev_spaces(line_index)
- return 0 if @tokens.size == 0
- md = @tokens[0][2].match(/(\A +)/)
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
- line_count = 0
- @tokens.each_with_index do |t, i|
- if t[2].include?("\n")
- line_count += t[2].count("\n")
- if line_count >= line_index
- return prev_spaces
- end
- if (@tokens.size - 1) > i
- md = @tokens[i + 1][2].match(/(\A +)/)
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
- end
- end
- end
- prev_spaces
- end
-
- def set_auto_indent(context)
- if @io.respond_to?(:auto_indent) and context.auto_indent_mode
- @io.auto_indent do |lines, line_index, byte_pointer, is_newline|
- if is_newline
- @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: context)
- prev_spaces = find_prev_spaces(line_index)
- depth_difference = check_newline_depth_difference
- depth_difference = 0 if depth_difference < 0
- prev_spaces + depth_difference * 2
- else
- code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
- last_line = lines[line_index]&.byteslice(0, byte_pointer)
- code += last_line if last_line
- @tokens = self.class.ripper_lex_without_warning(code, context: context)
- corresponding_token_depth = check_corresponding_token_depth(lines, line_index)
- if corresponding_token_depth
- corresponding_token_depth
- else
- nil
- end
- end
+ interpolate_ripper_ignored_tokens(original_code, tokens)
end
+ ensure
+ $VERBOSE = verbose
end
- end
-
- def check_state(code, tokens = nil, context: nil)
- tokens = self.class.ripper_lex_without_warning(code, context: context) unless tokens
- ltype = process_literal_type(tokens)
- indent = process_nesting_level(tokens)
- continue = process_continue(tokens)
- code_block_open = check_code_block(code, tokens)
- [ltype, indent, continue, code_block_open]
- end
- def prompt
- if @prompt
- @prompt.call(@ltype, @indent, @continue, @line_no)
+ def check_code_state(code, local_variables:)
+ tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables)
+ opens = NestingParser.open_tokens(tokens)
+ [tokens, opens, code_terminated?(code, tokens, opens, local_variables: local_variables)]
end
- end
-
- def initialize_input
- @ltype = nil
- @indent = 0
- @continue = false
- @line = ""
- @exp_line_no = @line_no
- @code_block_open = false
- end
- def each_top_level_statement
- initialize_input
- catch(:TERM_INPUT) do
- loop do
- begin
- prompt
- unless l = lex
- throw :TERM_INPUT if @line == ''
- else
- @line_no += l.count("\n")
- if l == "\n"
- @exp_line_no += 1
- next
- end
- @line.concat l
- if @code_block_open or @ltype or @continue or @indent > 0
- next
- end
- end
- if @line != "\n"
- @line.force_encoding(@io.encoding)
- yield @line, @exp_line_no
- end
- raise TerminateLineInput if @io.eof?
- @line = ''
- @exp_line_no = @line_no
-
- @indent = 0
- rescue TerminateLineInput
- initialize_input
- prompt
- end
+ def code_terminated?(code, tokens, opens, local_variables:)
+ case check_code_syntax(code, local_variables: local_variables)
+ when :unrecoverable_error
+ true
+ when :recoverable_error
+ false
+ when :other_error
+ opens.empty? && !should_continue?(tokens)
+ when :valid
+ !should_continue?(tokens)
end
end
- end
- def lex
- line = @input.call
- if @io.respond_to?(:check_termination)
- return line # multiline
- end
- code = @line + (line.nil? ? '' : line)
- code.gsub!(/\s*\z/, '').concat("\n")
- @tokens = self.class.ripper_lex_without_warning(code)
- @continue = process_continue
- @code_block_open = check_code_block(code)
- @indent = process_nesting_level
- @ltype = process_literal_type
- line
- end
-
- def process_continue(tokens = @tokens)
- # last token is always newline
- if tokens.size >= 2 and tokens[-2][1] == :on_regexp_end
- # end of regexp literal
- return false
- elsif tokens.size >= 2 and tokens[-2][1] == :on_semicolon
- return false
- elsif tokens.size >= 2 and tokens[-2][1] == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2][2])
- return false
- elsif !tokens.empty? and tokens.last[2] == "\\\n"
- return true
- elsif tokens.size >= 1 and tokens[-1][1] == :on_heredoc_end # "EOH\n"
- return false
- elsif tokens.size >= 2 and defined?(Ripper::EXPR_BEG) and tokens[-2][3].anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2][2] !~ /\A\.\.\.?\z/
- # end of literal except for regexp
- # endless range at end of line is not a continue
- return true
- end
- false
- end
-
- def check_code_block(code, tokens = @tokens)
- return true if tokens.empty?
- if tokens.last[1] == :on_heredoc_beg
- return true
- end
+ def assignment_expression?(code, local_variables:)
+ # Try to parse the code and check if the last of possibly multiple
+ # expressions is an assignment type.
- begin # check if parser error are available
+ # If the expression is invalid, Ripper.sexp should return nil which will
+ # result in false being returned. Any valid expression should return an
+ # s-expression where the second element of the top level array is an
+ # array of parsed expressions. The first element of each expression is the
+ # expression's type.
verbose, $VERBOSE = $VERBOSE, nil
- case RUBY_ENGINE
- when 'ruby'
- self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
- RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
- end
- when 'jruby'
- JRuby.compile_ir(code)
- else
- catch(:valid) do
- eval("BEGIN { throw :valid, true }\n#{code}")
- false
- end
- end
- rescue EncodingError
- # This is for a hash with invalid encoding symbol, {"\xAE": 1}
- rescue SyntaxError => e
- case e.message
- when /unterminated (?:string|regexp) meets end of file/
- # "unterminated regexp meets end of file"
- #
- # example:
- # /
- #
- # "unterminated string meets end of file"
- #
- # example:
- # '
- return true
- when /syntax error, unexpected end-of-input/
- # "syntax error, unexpected end-of-input, expecting keyword_end"
- #
- # example:
- # if true
- # hoge
- # if false
- # fuga
- # end
- return true
- when /syntax error, unexpected keyword_end/
- # "syntax error, unexpected keyword_end"
- #
- # example:
- # if (
- # end
- #
- # example:
- # end
- return false
- when /syntax error, unexpected '\.'/
- # "syntax error, unexpected '.'"
- #
- # example:
- # .
- return false
- when /unexpected tREGEXP_BEG/
- # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
- #
- # example:
- # method / f /
- return false
- end
+ code = "#{RubyLex.generate_local_variables_assign_code(local_variables) || 'nil;'}\n#{code}"
+ # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part.
+ node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0)
+ ASSIGNMENT_NODE_TYPES.include?(node_type)
ensure
$VERBOSE = verbose
end
- if defined?(Ripper::EXPR_BEG)
- last_lex_state = tokens.last[3]
- if last_lex_state.allbits?(Ripper::EXPR_BEG)
- return false
- elsif last_lex_state.allbits?(Ripper::EXPR_DOT)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_CLASS)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_FNAME)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_VALUE)
- return true
- elsif last_lex_state.allbits?(Ripper::EXPR_ARG)
- return false
+ def should_continue?(tokens)
+ # Look at the last token and check if IRB need to continue reading next line.
+ # Example code that should continue: `a\` `a +` `a.`
+ # Trailing spaces, newline, comments are skipped
+ return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"
+
+ tokens.reverse_each do |token|
+ case token.event
+ when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
+ # Skip
+ when :on_regexp_end, :on_heredoc_end, :on_semicolon
+ # State is EXPR_BEG but should not continue
+ return false
+ else
+ # Endless range should not continue
+ return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)
+
+ # EXPR_DOT and most of the EXPR_BEG should continue
+ return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
+ end
end
+ false
end
- false
- end
+ def check_code_syntax(code, local_variables:)
+ lvars_code = RubyLex.generate_local_variables_assign_code(local_variables)
+ code = "#{lvars_code}\n#{code}"
- def process_nesting_level(tokens = @tokens)
- indent = 0
- in_oneliner_def = nil
- tokens.each_with_index { |t, index|
- # detecting one-liner method definition
- if in_oneliner_def.nil?
- if t[3].allbits?(Ripper::EXPR_ENDFN)
- in_oneliner_def = :ENDFN
- end
- else
- if t[3].allbits?(Ripper::EXPR_ENDFN)
- # continuing
- elsif t[3].allbits?(Ripper::EXPR_BEG)
- if t[2] == '='
- in_oneliner_def = :BODY
+ begin # check if parser error are available
+ verbose, $VERBOSE = $VERBOSE, nil
+ case RUBY_ENGINE
+ when 'ruby'
+ self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
+ RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
end
+ when 'jruby'
+ JRuby.compile_ir(code)
else
- if in_oneliner_def == :BODY
- # one-liner method definition
- indent -= 1
+ catch(:valid) do
+ eval("BEGIN { throw :valid, true }\n#{code}")
+ false
end
- in_oneliner_def = nil
end
- end
-
- case t[1]
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
- indent += 1
- when :on_rbracket, :on_rbrace, :on_rparen
- indent -= 1
- when :on_kw
- next if index > 0 and tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
- case t[2]
- when 'do'
- syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index)
- indent += 1 if syntax_of_do == :method_calling
- when 'def', 'case', 'for', 'begin', 'class', 'module'
- indent += 1
- when 'if', 'unless', 'while', 'until'
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
- indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL)
- when 'end'
- indent -= 1
+ rescue EncodingError
+ # This is for a hash with invalid encoding symbol, {"\xAE": 1}
+ :unrecoverable_error
+ rescue SyntaxError => e
+ case e.message
+ when /unterminated (?:string|regexp) meets end of file/
+ # "unterminated regexp meets end of file"
+ #
+ # example:
+ # /
+ #
+ # "unterminated string meets end of file"
+ #
+ # example:
+ # '
+ return :recoverable_error
+ when /syntax error, unexpected end-of-input/
+ # "syntax error, unexpected end-of-input, expecting keyword_end"
+ #
+ # example:
+ # if true
+ # hoge
+ # if false
+ # fuga
+ # end
+ return :recoverable_error
+ when /syntax error, unexpected keyword_end/
+ # "syntax error, unexpected keyword_end"
+ #
+ # example:
+ # if (
+ # end
+ #
+ # example:
+ # end
+ return :unrecoverable_error
+ when /syntax error, unexpected '\.'/
+ # "syntax error, unexpected '.'"
+ #
+ # example:
+ # .
+ return :unrecoverable_error
+ when /unexpected tREGEXP_BEG/
+ # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
+ #
+ # example:
+ # method / f /
+ return :unrecoverable_error
+ else
+ return :other_error
end
+ ensure
+ $VERBOSE = verbose
end
- # percent literals are not indented
- }
- indent
- end
+ :valid
+ end
- def is_method_calling?(tokens, index)
- tk = tokens[index]
- if tk[3].anybits?(Ripper::EXPR_CMDARG) and tk[1] == :on_ident
- # The target method call to pass the block with "do".
- return true
- elsif tk[3].anybits?(Ripper::EXPR_ARG) and tk[1] == :on_ident
- non_sp_index = tokens[0..(index - 1)].rindex{ |t| t[1] != :on_sp }
- if non_sp_index
- prev_tk = tokens[non_sp_index]
- if prev_tk[3].anybits?(Ripper::EXPR_DOT) and prev_tk[1] == :on_period
- # The target method call with receiver to pass the block with "do".
- return true
+ def calc_indent_level(opens)
+ indent_level = 0
+ opens.each_with_index do |t, index|
+ case t.event
+ when :on_heredoc_beg
+ if opens[index + 1]&.event != :on_heredoc_beg
+ if t.tok.match?(/^<<[~-]/)
+ indent_level += 1
+ else
+ indent_level = 0
+ end
+ end
+ when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
+ # No indent: "", //, :"", ``
+ # Indent: %(), %r(), %i(), %x()
+ indent_level += 1 if t.tok.start_with? '%'
+ when :on_embdoc_beg
+ indent_level = 0
+ else
+ indent_level += 1 unless t.tok == 'alias' || t.tok == 'undef'
end
end
+ indent_level
end
- false
- end
- def take_corresponding_syntax_to_kw_do(tokens, index)
- syntax_of_do = nil
- # Finding a syntax corresponding to "do".
- index.downto(0) do |i|
- tk = tokens[i]
- # In "continue", the token isn't the corresponding syntax to "do".
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t[1] != :on_sp }
- first_in_fomula = false
- if non_sp_index.nil?
- first_in_fomula = true
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index][1])
- first_in_fomula = true
- end
- if is_method_calling?(tokens, i)
- syntax_of_do = :method_calling
- break if first_in_fomula
- elsif tk[1] == :on_kw && %w{while until for}.include?(tk[2])
- # A loop syntax in front of "do" found.
- #
- # while cond do # also "until" or "for"
- # end
- #
- # This "do" doesn't increment indent because the loop syntax already
- # incremented.
- syntax_of_do = :loop_syntax
- break if first_in_fomula
- end
+ FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]
+
+ def free_indent_token?(token)
+ FREE_INDENT_TOKENS.include?(token&.event)
end
- syntax_of_do
- end
- def is_the_in_correspond_to_a_for(tokens, index)
- syntax_of_in = nil
- # Finding a syntax corresponding to "do".
- index.downto(0) do |i|
- tk = tokens[i]
- # In "continue", the token isn't the corresponding syntax to "do".
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t[1] != :on_sp }
- first_in_fomula = false
- if non_sp_index.nil?
- first_in_fomula = true
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index][1])
- first_in_fomula = true
- end
- if tk[1] == :on_kw && tk[2] == 'for'
- # A loop syntax in front of "do" found.
- #
- # while cond do # also "until" or "for"
- # end
- #
- # This "do" doesn't increment indent because the loop syntax already
- # incremented.
- syntax_of_in = :for
+ # Calculates the difference of pasted code's indent and indent calculated from tokens
+ def indent_difference(lines, line_results, line_index)
+ loop do
+ _tokens, prev_opens, _next_opens, min_depth = line_results[line_index]
+ open_token = prev_opens.last
+ if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token))
+ # If the leading whitespace is an indent, return the difference
+ indent_level = calc_indent_level(prev_opens.take(min_depth))
+ calculated_indent = 2 * indent_level
+ actual_indent = lines[line_index][/^ */].size
+ return actual_indent - calculated_indent
+ elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/)
+ return 0
+ end
+ # If the leading whitespace is not an indent but part of a multiline token
+ # Calculate base_indent of the multiline token's beginning line
+ line_index = open_token.pos[0] - 1
end
- break if first_in_fomula
end
- syntax_of_in
- end
- def check_newline_depth_difference
- depth_difference = 0
- open_brace_on_line = 0
- in_oneliner_def = nil
- @tokens.each_with_index do |t, index|
- # detecting one-liner method definition
- if in_oneliner_def.nil?
- if t[3].allbits?(Ripper::EXPR_ENDFN)
- in_oneliner_def = :ENDFN
- end
+ def process_indent_level(tokens, lines, line_index, is_newline)
+ line_results = NestingParser.parse_by_line(tokens)
+ result = line_results[line_index]
+ if result
+ _tokens, prev_opens, next_opens, min_depth = result
else
- if t[3].allbits?(Ripper::EXPR_ENDFN)
- # continuing
- elsif t[3].allbits?(Ripper::EXPR_BEG)
- if t[2] == '='
- in_oneliner_def = :BODY
- end
- else
- if in_oneliner_def == :BODY
- # one-liner method definition
- depth_difference -= 1
- end
- in_oneliner_def = nil
- end
+ # When last line is empty
+ prev_opens = next_opens = line_results.last[2]
+ min_depth = next_opens.size
end
- case t[1]
- when :on_ignored_nl, :on_nl, :on_comment
- if index != (@tokens.size - 1) and in_oneliner_def != :BODY
- depth_difference = 0
- open_brace_on_line = 0
- end
- next
- when :on_sp
- next
- end
- case t[1]
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
- depth_difference += 1
- open_brace_on_line += 1
- when :on_rbracket, :on_rbrace, :on_rparen
- depth_difference -= 1 if open_brace_on_line > 0
- when :on_kw
- next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
- case t[2]
- when 'do'
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
- depth_difference += 1 if syntax_of_do == :method_calling
- when 'def', 'case', 'for', 'begin', 'class', 'module'
- depth_difference += 1
- when 'if', 'unless', 'while', 'until', 'rescue'
- # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
- unless t[3].allbits?(Ripper::EXPR_LABEL)
- depth_difference += 1
- end
- when 'else', 'elsif', 'ensure', 'when'
- depth_difference += 1
- when 'in'
- unless is_the_in_correspond_to_a_for(@tokens, index)
- depth_difference += 1
- end
- when 'end'
- depth_difference -= 1
- end
- end
- end
- depth_difference
- end
+ # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
+ # Shortest open tokens can be calculated by `opens.take(min_depth)`
+ indent = 2 * calc_indent_level(prev_opens.take(min_depth))
- def check_corresponding_token_depth(lines, line_index)
- corresponding_token_depth = nil
- is_first_spaces_of_line = true
- is_first_printable_of_line = true
- spaces_of_nest = []
- spaces_at_line_head = 0
- open_brace_on_line = 0
- in_oneliner_def = nil
-
- if heredoc_scope?
- return lines[line_index][/^ */].length
- end
+ preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size
- @tokens.each_with_index do |t, index|
- # detecting one-liner method definition
- if in_oneliner_def.nil?
- if t[3].allbits?(Ripper::EXPR_ENDFN)
- in_oneliner_def = :ENDFN
- end
+ prev_open_token = prev_opens.last
+ next_open_token = next_opens.last
+
+ # Calculates base indent for pasted code on the line where prev_open_token is located
+ # irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0
+ # irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2
+ # irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4
+ if prev_open_token
+ base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max
else
- if t[3].allbits?(Ripper::EXPR_ENDFN)
- # continuing
- elsif t[3].allbits?(Ripper::EXPR_BEG)
- if t[2] == '='
- in_oneliner_def = :BODY
- end
- else
- if in_oneliner_def == :BODY
- # one-liner method definition
- if is_first_printable_of_line
- corresponding_token_depth = spaces_of_nest.pop
- else
- spaces_of_nest.pop
- corresponding_token_depth = nil
- end
- end
- in_oneliner_def = nil
- end
+ base_indent = 0
end
- case t[1]
- when :on_ignored_nl, :on_nl, :on_comment
- if in_oneliner_def != :BODY
- corresponding_token_depth = nil
- spaces_at_line_head = 0
- is_first_spaces_of_line = true
- is_first_printable_of_line = true
- open_brace_on_line = 0
+ if free_indent_token?(prev_open_token)
+ if is_newline && prev_open_token.pos[0] == line_index
+ # First newline inside free-indent token
+ base_indent + indent
+ else
+ # Accept any number of indent inside free-indent token
+ preserve_indent
end
- next
- when :on_sp
- spaces_at_line_head = t[2].count(' ') if is_first_spaces_of_line
- is_first_spaces_of_line = false
- next
- end
- case t[1]
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
- spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2)
- open_brace_on_line += 1
- when :on_rbracket, :on_rbrace, :on_rparen
- if is_first_printable_of_line
- corresponding_token_depth = spaces_of_nest.pop
+ elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg
+ if prev_open_token&.event == next_open_token&.event
+ # Accept any number of indent inside embdoc content
+ preserve_indent
else
- spaces_of_nest.pop
- corresponding_token_depth = nil
+ # =begin or =end
+ 0
end
- open_brace_on_line -= 1
- when :on_kw
- next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME)
- case t[2]
- when 'do'
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
- if syntax_of_do == :method_calling
- spaces_of_nest.push(spaces_at_line_head)
- end
- when 'def', 'case', 'for', 'begin', 'class', 'module'
- spaces_of_nest.push(spaces_at_line_head)
- when 'rescue'
- unless t[3].allbits?(Ripper::EXPR_LABEL)
- corresponding_token_depth = spaces_of_nest.last
- end
- when 'if', 'unless', 'while', 'until'
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
- unless t[3].allbits?(Ripper::EXPR_LABEL)
- spaces_of_nest.push(spaces_at_line_head)
- end
- when 'else', 'elsif', 'ensure', 'when', 'in'
- corresponding_token_depth = spaces_of_nest.last
- when 'end'
- if is_first_printable_of_line
- corresponding_token_depth = spaces_of_nest.pop
+ elsif prev_open_token&.event == :on_heredoc_beg
+ tok = prev_open_token.tok
+ if prev_opens.size <= next_opens.size
+ if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token
+ # First line in heredoc
+ tok.match?(/^<<[-~]/) ? base_indent + indent : indent
+ elsif tok.match?(/^<<~/)
+ # Accept extra indent spaces inside `<<~` heredoc
+ [base_indent + indent, preserve_indent].max
else
- spaces_of_nest.pop
- corresponding_token_depth = nil
+ # Accept any number of indent inside other heredoc
+ preserve_indent
end
+ else
+ # Heredoc close
+ prev_line_indent_level = calc_indent_level(prev_opens)
+ tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
end
+ else
+ base_indent + indent
end
- is_first_spaces_of_line = false
- is_first_printable_of_line = false
end
- corresponding_token_depth
- end
- def check_string_literal(tokens)
- i = 0
- start_token = []
- end_type = []
- while i < tokens.size
- t = tokens[i]
- case t[1]
- when *end_type.last
- start_token.pop
- end_type.pop
- when :on_tstring_beg
- start_token << t
- end_type << [:on_tstring_end, :on_label_end]
- when :on_regexp_beg
- start_token << t
- end_type << :on_regexp_end
- when :on_symbeg
- acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick}
- if (i + 1) < tokens.size
- if acceptable_single_tokens.all?{ |st| tokens[i + 1][1] != st }
- start_token << t
- end_type << :on_tstring_end
- else
- i += 1
- end
- end
- when :on_backtick
- start_token << t
- end_type << :on_tstring_end
- when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
- start_token << t
- end_type << :on_tstring_end
- when :on_heredoc_beg
- start_token << t
- end_type << :on_heredoc_end
- end
- i += 1
- end
- start_token.last.nil? ? '' : start_token.last
- end
+ LTYPE_TOKENS = %i[
+ on_heredoc_beg on_tstring_beg
+ on_regexp_beg on_symbeg on_backtick
+ on_symbols_beg on_qsymbols_beg
+ on_words_beg on_qwords_beg
+ ]
- def process_literal_type(tokens = @tokens)
- start_token = check_string_literal(tokens)
- case start_token[1]
- when :on_tstring_beg
- case start_token[2]
- when ?" then ?"
- when /^%.$/ then ?"
- when /^%Q.$/ then ?"
- when ?' then ?'
- when /^%q.$/ then ?'
- end
- when :on_regexp_beg then ?/
- when :on_symbeg then ?:
- when :on_backtick then ?`
- when :on_qwords_beg then ?]
- when :on_words_beg then ?]
- when :on_qsymbols_beg then ?]
- when :on_symbols_beg then ?]
- when :on_heredoc_beg
- start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/
- case $1
- when ?" then ?"
- when ?' then ?'
- when ?` then ?`
- else ?"
+ def ltype_from_open_tokens(opens)
+ start_token = opens.reverse_each.find do |tok|
+ LTYPE_TOKENS.include?(tok.event)
end
- else
- nil
- end
- end
+ return nil unless start_token
- def check_termination_in_prev_line(code, context: nil)
- tokens = self.class.ripper_lex_without_warning(code, context: context)
- past_first_newline = false
- index = tokens.rindex do |t|
- # traverse first token before last line
- if past_first_newline
- if t.tok.include?("\n")
- true
+ case start_token&.event
+ when :on_tstring_beg
+ case start_token&.tok
+ when ?" then ?"
+ when /^%.$/ then ?"
+ when /^%Q.$/ then ?"
+ when ?' then ?'
+ when /^%q.$/ then ?'
end
- elsif t.tok.include?("\n")
- past_first_newline = true
- false
+ when :on_regexp_beg then ?/
+ when :on_symbeg then ?:
+ when :on_backtick then ?`
+ when :on_qwords_beg then ?]
+ when :on_words_beg then ?]
+ when :on_qsymbols_beg then ?]
+ when :on_symbols_beg then ?]
+ when :on_heredoc_beg
+ start_token&.tok =~ /<<[-~]?(['"`])\w+\1/
+ $1 || ?"
else
- false
+ nil
end
end
- if index
- first_token = nil
- last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
- last_line_tokens.each do |t|
- unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
- first_token = t
- break
- end
- end
- if first_token.nil?
- return false
- elsif first_token && first_token.state == Ripper::EXPR_DOT
- return false
- else
- tokens_without_last_line = tokens[0..index]
- ltype = process_literal_type(tokens_without_last_line)
- indent = process_nesting_level(tokens_without_last_line)
- continue = process_continue(tokens_without_last_line)
- code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line)
- if ltype or indent > 0 or continue or code_block_open
- return false
+
+ def check_termination_in_prev_line(code, local_variables:)
+ tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables)
+ past_first_newline = false
+ index = tokens.rindex do |t|
+ # traverse first token before last line
+ if past_first_newline
+ if t.tok.include?("\n")
+ true
+ end
+ elsif t.tok.include?("\n")
+ past_first_newline = true
+ false
else
- return last_line_tokens.map(&:tok).join('')
+ false
end
end
- end
- false
- end
- private
+ if index
+ first_token = nil
+ last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
+ last_line_tokens.each do |t|
+ unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
+ first_token = t
+ break
+ end
+ end
- def heredoc_scope?
- heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) }
- heredoc_tokens[-1]&.event == :on_heredoc_beg
+ if first_token && first_token.state != Ripper::EXPR_DOT
+ tokens_without_last_line = tokens[0..index]
+ code_without_last_line = tokens_without_last_line.map(&:tok).join
+ opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line)
+ if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line, local_variables: local_variables)
+ return last_line_tokens.map(&:tok).join
+ end
+ end
+ end
+ false
+ end
end
+ # :startdoc:
end
-# :startdoc:
+
+RubyLex = IRB::RubyLex
+Object.deprecate_constant(:RubyLex)