diff options
Diffstat (limited to 'lib/irb/ruby-lex.rb')
-rw-r--r-- | lib/irb/ruby-lex.rb | 1121 |
1 files changed, 381 insertions, 740 deletions
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index d7ac17bd79..cfe36be83f 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -1,833 +1,474 @@ -# frozen_string_literal: false +# frozen_string_literal: true # # irb/ruby-lex.rb - ruby lexcal analyzer -# $Release Version: 0.9.6$ -# $Revision$ # by Keiju ISHITSUKA(keiju@ruby-lang.org) # -# -- -# -# -# require "ripper" require "jruby" if RUBY_ENGINE == "jruby" - -# :stopdoc: -class RubyLex - - class TerminateLineInput < StandardError - def initialize - super("Terminate Line Input") +require_relative "nesting_parser" + +module IRB + # :stopdoc: + class RubyLex + ASSIGNMENT_NODE_TYPES = [ + # Local, instance, global, class, constant, instance, and index assignment: + # "foo = bar", + # "@foo = bar", + # "$foo = bar", + # "@@foo = bar", + # "::Foo = bar", + # "a::Foo = bar", + # "Foo = bar" + # "foo.bar = 1" + # "foo[1] = bar" + :assign, + + # Operation assignment: + # "foo += bar" + # "foo -= bar" + # "foo ||= bar" + # "foo &&= bar" + :opassign, + + # Multiple assignment: + # "foo, bar = 1, 2 + :massign, + ] + + class TerminateLineInput < StandardError + def initialize + super("Terminate Line Input") + end end - end - def initialize - @exp_line_no = @line_no = 1 - @indent = 0 - @continue = false - @line = "" - @prompt = nil - end + def self.compile_with_errors_suppressed(code, line_no: 1) + begin + result = yield code, line_no + rescue ArgumentError + # Ruby can issue an error for the code if there is an + # incomplete magic comment for encoding in it. Force an + # expression with a new line before the code in this + # case to prevent magic comment handling. To make sure + # line numbers in the lexed code remain the same, + # decrease the line number by one. + code = ";\n#{code}" + line_no -= 1 + result = yield code, line_no + end + result + end - def self.compile_with_errors_suppressed(code, line_no: 1) - begin - result = yield code, line_no - rescue ArgumentError - # Ruby can issue an error for the code if there is an - # incomplete magic comment for encoding in it. Force an - # expression with a new line before the code in this - # case to prevent magic comment handling. To make sure - # line numbers in the lexed code remain the same, - # decrease the line number by one. - code = ";\n#{code}" - line_no -= 1 - result = yield code, line_no + ERROR_TOKENS = [ + :on_parse_error, + :compile_error, + :on_assign_error, + :on_alias_error, + :on_class_name_error, + :on_param_error + ] + + def self.generate_local_variables_assign_code(local_variables) + "#{local_variables.join('=')}=nil;" unless local_variables.empty? end - result - end - # io functions - def set_input(io, p = nil, context: nil, &block) - @io = io - if @io.respond_to?(:check_termination) - @io.check_termination do |code| - if Reline::IOGate.in_pasting? - lex = RubyLex.new - rest = lex.check_termination_in_prev_line(code, context: context) - if rest - Reline.delete_text - rest.bytes.reverse_each do |c| - Reline.ungetc(c) - end - true - else - false - end - else - code.gsub!(/\s*\z/, '').concat("\n") - ltype, indent, continue, code_block_open = check_state(code, context: context) - if ltype or indent > 0 or continue or code_block_open - false - else - true - end - end + # Some part of the code is not included in Ripper's token. + # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr. + # With interpolated tokens, tokens.map(&:tok).join will be equal to code. + def self.interpolate_ripper_ignored_tokens(code, tokens) + line_positions = [0] + code.lines.each do |line| + line_positions << line_positions.last + line.bytesize end - end - if @io.respond_to?(:dynamic_prompt) - @io.dynamic_prompt do |lines| - lines << '' if lines.empty? - result = [] - tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: context) - code = String.new - partial_tokens = [] - unprocessed_tokens = [] - line_num_offset = 0 - tokens.each do |t| - partial_tokens << t - unprocessed_tokens << t - if t[2].include?("\n") - t_str = t[2] - t_str.each_line("\n") do |s| - code << s << "\n" - ltype, indent, continue, code_block_open = check_state(code, partial_tokens, context: context) - result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset) - line_num_offset += 1 - end - unprocessed_tokens = [] - else - code << t[2] - end - end - unless unprocessed_tokens.empty? - ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens, context: context) - result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset) + prev_byte_pos = 0 + interpolated = [] + prev_line = 1 + tokens.each do |t| + line, col = t.pos + byte_pos = line_positions[line - 1] + col + if prev_byte_pos < byte_pos + tok = code.byteslice(prev_byte_pos...byte_pos) + pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] + interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) + prev_line += tok.count("\n") end - result + interpolated << t + prev_byte_pos = byte_pos + t.tok.bytesize + prev_line += t.tok.count("\n") end + if prev_byte_pos < code.bytesize + tok = code.byteslice(prev_byte_pos..) + pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]] + interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0) + end + interpolated end - if p.respond_to?(:call) - @input = p - elsif block_given? - @input = block - else - @input = Proc.new{@io.gets} - end - end - - def set_prompt(p = nil, &block) - p = block if block_given? - if p.respond_to?(:call) - @prompt = p - else - @prompt = Proc.new{print p} - end - end - ERROR_TOKENS = [ - :on_parse_error, - :compile_error, - :on_assign_error, - :on_alias_error, - :on_class_name_error, - :on_param_error - ] - - def self.ripper_lex_without_warning(code, context: nil) - verbose, $VERBOSE = $VERBOSE, nil - if context - lvars = context&.workspace&.binding&.local_variables - if lvars && !lvars.empty? - code = "#{lvars.join('=')}=nil\n#{code}" + def self.ripper_lex_without_warning(code, local_variables: []) + verbose, $VERBOSE = $VERBOSE, nil + lvars_code = generate_local_variables_assign_code(local_variables) + original_code = code + if lvars_code + code = "#{lvars_code}\n#{code}" line_no = 0 else line_no = 1 end - end - tokens = nil - compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no| - lexer = Ripper::Lexer.new(inner_code, '-', line_no) - if lexer.respond_to?(:scan) # Ruby 2.7+ + + compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no| + lexer = Ripper::Lexer.new(inner_code, '-', line_no) tokens = [] - pos_to_index = {} lexer.scan.each do |t| next if t.pos.first == 0 - if pos_to_index.has_key?(t[0]) - index = pos_to_index[t[0]] - found_tk = tokens[index] - if ERROR_TOKENS.include?(found_tk[1]) && !ERROR_TOKENS.include?(t[1]) - tokens[index] = t - end + prev_tk = tokens.last + position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize + if position_overlapped + tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event) else - pos_to_index[t[0]] = tokens.size tokens << t end end - else - tokens = lexer.parse - end - end - tokens - ensure - $VERBOSE = verbose - end - - def find_prev_spaces(line_index) - return 0 if @tokens.size == 0 - md = @tokens[0][2].match(/(\A +)/) - prev_spaces = md.nil? ? 0 : md[1].count(' ') - line_count = 0 - @tokens.each_with_index do |t, i| - if t[2].include?("\n") - line_count += t[2].count("\n") - if line_count >= line_index - return prev_spaces - end - if (@tokens.size - 1) > i - md = @tokens[i + 1][2].match(/(\A +)/) - prev_spaces = md.nil? ? 0 : md[1].count(' ') - end - end - end - prev_spaces - end - - def set_auto_indent(context) - if @io.respond_to?(:auto_indent) and context.auto_indent_mode - @io.auto_indent do |lines, line_index, byte_pointer, is_newline| - if is_newline - @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: context) - prev_spaces = find_prev_spaces(line_index) - depth_difference = check_newline_depth_difference - depth_difference = 0 if depth_difference < 0 - prev_spaces + depth_difference * 2 - else - code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join - last_line = lines[line_index]&.byteslice(0, byte_pointer) - code += last_line if last_line - @tokens = self.class.ripper_lex_without_warning(code, context: context) - corresponding_token_depth = check_corresponding_token_depth(lines, line_index) - if corresponding_token_depth - corresponding_token_depth - else - nil - end - end + interpolate_ripper_ignored_tokens(original_code, tokens) end + ensure + $VERBOSE = verbose end - end - - def check_state(code, tokens = nil, context: nil) - tokens = self.class.ripper_lex_without_warning(code, context: context) unless tokens - ltype = process_literal_type(tokens) - indent = process_nesting_level(tokens) - continue = process_continue(tokens) - code_block_open = check_code_block(code, tokens) - [ltype, indent, continue, code_block_open] - end - def prompt - if @prompt - @prompt.call(@ltype, @indent, @continue, @line_no) + def check_code_state(code, local_variables:) + tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables) + opens = NestingParser.open_tokens(tokens) + [tokens, opens, code_terminated?(code, tokens, opens, local_variables: local_variables)] end - end - - def initialize_input - @ltype = nil - @indent = 0 - @continue = false - @line = "" - @exp_line_no = @line_no - @code_block_open = false - end - def each_top_level_statement - initialize_input - catch(:TERM_INPUT) do - loop do - begin - prompt - unless l = lex - throw :TERM_INPUT if @line == '' - else - @line_no += l.count("\n") - if l == "\n" - @exp_line_no += 1 - next - end - @line.concat l - if @code_block_open or @ltype or @continue or @indent > 0 - next - end - end - if @line != "\n" - @line.force_encoding(@io.encoding) - yield @line, @exp_line_no - end - raise TerminateLineInput if @io.eof? - @line = '' - @exp_line_no = @line_no - - @indent = 0 - rescue TerminateLineInput - initialize_input - prompt - end + def code_terminated?(code, tokens, opens, local_variables:) + case check_code_syntax(code, local_variables: local_variables) + when :unrecoverable_error + true + when :recoverable_error + false + when :other_error + opens.empty? && !should_continue?(tokens) + when :valid + !should_continue?(tokens) end end - end - def lex - line = @input.call - if @io.respond_to?(:check_termination) - return line # multiline - end - code = @line + (line.nil? ? '' : line) - code.gsub!(/\s*\z/, '').concat("\n") - @tokens = self.class.ripper_lex_without_warning(code) - @continue = process_continue - @code_block_open = check_code_block(code) - @indent = process_nesting_level - @ltype = process_literal_type - line - end - - def process_continue(tokens = @tokens) - # last token is always newline - if tokens.size >= 2 and tokens[-2][1] == :on_regexp_end - # end of regexp literal - return false - elsif tokens.size >= 2 and tokens[-2][1] == :on_semicolon - return false - elsif tokens.size >= 2 and tokens[-2][1] == :on_kw and ['begin', 'else', 'ensure'].include?(tokens[-2][2]) - return false - elsif !tokens.empty? and tokens.last[2] == "\\\n" - return true - elsif tokens.size >= 1 and tokens[-1][1] == :on_heredoc_end # "EOH\n" - return false - elsif tokens.size >= 2 and defined?(Ripper::EXPR_BEG) and tokens[-2][3].anybits?(Ripper::EXPR_BEG | Ripper::EXPR_FNAME) and tokens[-2][2] !~ /\A\.\.\.?\z/ - # end of literal except for regexp - # endless range at end of line is not a continue - return true - end - false - end - - def check_code_block(code, tokens = @tokens) - return true if tokens.empty? - if tokens.last[1] == :on_heredoc_beg - return true - end + def assignment_expression?(code, local_variables:) + # Try to parse the code and check if the last of possibly multiple + # expressions is an assignment type. - begin # check if parser error are available + # If the expression is invalid, Ripper.sexp should return nil which will + # result in false being returned. Any valid expression should return an + # s-expression where the second element of the top level array is an + # array of parsed expressions. The first element of each expression is the + # expression's type. verbose, $VERBOSE = $VERBOSE, nil - case RUBY_ENGINE - when 'ruby' - self.class.compile_with_errors_suppressed(code) do |inner_code, line_no| - RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no) - end - when 'jruby' - JRuby.compile_ir(code) - else - catch(:valid) do - eval("BEGIN { throw :valid, true }\n#{code}") - false - end - end - rescue EncodingError - # This is for a hash with invalid encoding symbol, {"\xAE": 1} - rescue SyntaxError => e - case e.message - when /unterminated (?:string|regexp) meets end of file/ - # "unterminated regexp meets end of file" - # - # example: - # / - # - # "unterminated string meets end of file" - # - # example: - # ' - return true - when /syntax error, unexpected end-of-input/ - # "syntax error, unexpected end-of-input, expecting keyword_end" - # - # example: - # if true - # hoge - # if false - # fuga - # end - return true - when /syntax error, unexpected keyword_end/ - # "syntax error, unexpected keyword_end" - # - # example: - # if ( - # end - # - # example: - # end - return false - when /syntax error, unexpected '\.'/ - # "syntax error, unexpected '.'" - # - # example: - # . - return false - when /unexpected tREGEXP_BEG/ - # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" - # - # example: - # method / f / - return false - end + code = "#{RubyLex.generate_local_variables_assign_code(local_variables) || 'nil;'}\n#{code}" + # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part. + node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0) + ASSIGNMENT_NODE_TYPES.include?(node_type) ensure $VERBOSE = verbose end - if defined?(Ripper::EXPR_BEG) - last_lex_state = tokens.last[3] - if last_lex_state.allbits?(Ripper::EXPR_BEG) - return false - elsif last_lex_state.allbits?(Ripper::EXPR_DOT) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_CLASS) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_FNAME) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_VALUE) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_ARG) - return false + def should_continue?(tokens) + # Look at the last token and check if IRB need to continue reading next line. + # Example code that should continue: `a\` `a +` `a.` + # Trailing spaces, newline, comments are skipped + return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n" + + tokens.reverse_each do |token| + case token.event + when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end + # Skip + when :on_regexp_end, :on_heredoc_end, :on_semicolon + # State is EXPR_BEG but should not continue + return false + else + # Endless range should not continue + return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/) + + # EXPR_DOT and most of the EXPR_BEG should continue + return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT) + end end + false end - false - end + def check_code_syntax(code, local_variables:) + lvars_code = RubyLex.generate_local_variables_assign_code(local_variables) + code = "#{lvars_code}\n#{code}" - def process_nesting_level(tokens = @tokens) - indent = 0 - in_oneliner_def = nil - tokens.each_with_index { |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t[3].allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end - else - if t[3].allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t[3].allbits?(Ripper::EXPR_BEG) - if t[2] == '=' - in_oneliner_def = :BODY + begin # check if parser error are available + verbose, $VERBOSE = $VERBOSE, nil + case RUBY_ENGINE + when 'ruby' + self.class.compile_with_errors_suppressed(code) do |inner_code, line_no| + RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no) end + when 'jruby' + JRuby.compile_ir(code) else - if in_oneliner_def == :BODY - # one-liner method definition - indent -= 1 + catch(:valid) do + eval("BEGIN { throw :valid, true }\n#{code}") + false end - in_oneliner_def = nil end - end - - case t[1] - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - indent += 1 - when :on_rbracket, :on_rbrace, :on_rparen - indent -= 1 - when :on_kw - next if index > 0 and tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME) - case t[2] - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index) - indent += 1 if syntax_of_do == :method_calling - when 'def', 'case', 'for', 'begin', 'class', 'module' - indent += 1 - when 'if', 'unless', 'while', 'until' - # postfix if/unless/while/until must be Ripper::EXPR_LABEL - indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL) - when 'end' - indent -= 1 + rescue EncodingError + # This is for a hash with invalid encoding symbol, {"\xAE": 1} + :unrecoverable_error + rescue SyntaxError => e + case e.message + when /unterminated (?:string|regexp) meets end of file/ + # "unterminated regexp meets end of file" + # + # example: + # / + # + # "unterminated string meets end of file" + # + # example: + # ' + return :recoverable_error + when /syntax error, unexpected end-of-input/ + # "syntax error, unexpected end-of-input, expecting keyword_end" + # + # example: + # if true + # hoge + # if false + # fuga + # end + return :recoverable_error + when /syntax error, unexpected keyword_end/ + # "syntax error, unexpected keyword_end" + # + # example: + # if ( + # end + # + # example: + # end + return :unrecoverable_error + when /syntax error, unexpected '\.'/ + # "syntax error, unexpected '.'" + # + # example: + # . + return :unrecoverable_error + when /unexpected tREGEXP_BEG/ + # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" + # + # example: + # method / f / + return :unrecoverable_error + else + return :other_error end + ensure + $VERBOSE = verbose end - # percent literals are not indented - } - indent - end + :valid + end - def is_method_calling?(tokens, index) - tk = tokens[index] - if tk[3].anybits?(Ripper::EXPR_CMDARG) and tk[1] == :on_ident - # The target method call to pass the block with "do". - return true - elsif tk[3].anybits?(Ripper::EXPR_ARG) and tk[1] == :on_ident - non_sp_index = tokens[0..(index - 1)].rindex{ |t| t[1] != :on_sp } - if non_sp_index - prev_tk = tokens[non_sp_index] - if prev_tk[3].anybits?(Ripper::EXPR_DOT) and prev_tk[1] == :on_period - # The target method call with receiver to pass the block with "do". - return true + def calc_indent_level(opens) + indent_level = 0 + opens.each_with_index do |t, index| + case t.event + when :on_heredoc_beg + if opens[index + 1]&.event != :on_heredoc_beg + if t.tok.match?(/^<<[~-]/) + indent_level += 1 + else + indent_level = 0 + end + end + when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick + # No indent: "", //, :"", `` + # Indent: %(), %r(), %i(), %x() + indent_level += 1 if t.tok.start_with? '%' + when :on_embdoc_beg + indent_level = 0 + else + indent_level += 1 unless t.tok == 'alias' || t.tok == 'undef' end end + indent_level end - false - end - def take_corresponding_syntax_to_kw_do(tokens, index) - syntax_of_do = nil - # Finding a syntax corresponding to "do". - index.downto(0) do |i| - tk = tokens[i] - # In "continue", the token isn't the corresponding syntax to "do". - non_sp_index = tokens[0..(i - 1)].rindex{ |t| t[1] != :on_sp } - first_in_fomula = false - if non_sp_index.nil? - first_in_fomula = true - elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index][1]) - first_in_fomula = true - end - if is_method_calling?(tokens, i) - syntax_of_do = :method_calling - break if first_in_fomula - elsif tk[1] == :on_kw && %w{while until for}.include?(tk[2]) - # A loop syntax in front of "do" found. - # - # while cond do # also "until" or "for" - # end - # - # This "do" doesn't increment indent because the loop syntax already - # incremented. - syntax_of_do = :loop_syntax - break if first_in_fomula - end + FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg] + + def free_indent_token?(token) + FREE_INDENT_TOKENS.include?(token&.event) end - syntax_of_do - end - def is_the_in_correspond_to_a_for(tokens, index) - syntax_of_in = nil - # Finding a syntax corresponding to "do". - index.downto(0) do |i| - tk = tokens[i] - # In "continue", the token isn't the corresponding syntax to "do". - non_sp_index = tokens[0..(i - 1)].rindex{ |t| t[1] != :on_sp } - first_in_fomula = false - if non_sp_index.nil? - first_in_fomula = true - elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index][1]) - first_in_fomula = true - end - if tk[1] == :on_kw && tk[2] == 'for' - # A loop syntax in front of "do" found. - # - # while cond do # also "until" or "for" - # end - # - # This "do" doesn't increment indent because the loop syntax already - # incremented. - syntax_of_in = :for + # Calculates the difference of pasted code's indent and indent calculated from tokens + def indent_difference(lines, line_results, line_index) + loop do + _tokens, prev_opens, _next_opens, min_depth = line_results[line_index] + open_token = prev_opens.last + if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token)) + # If the leading whitespace is an indent, return the difference + indent_level = calc_indent_level(prev_opens.take(min_depth)) + calculated_indent = 2 * indent_level + actual_indent = lines[line_index][/^ */].size + return actual_indent - calculated_indent + elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/) + return 0 + end + # If the leading whitespace is not an indent but part of a multiline token + # Calculate base_indent of the multiline token's beginning line + line_index = open_token.pos[0] - 1 end - break if first_in_fomula end - syntax_of_in - end - def check_newline_depth_difference - depth_difference = 0 - open_brace_on_line = 0 - in_oneliner_def = nil - @tokens.each_with_index do |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t[3].allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end + def process_indent_level(tokens, lines, line_index, is_newline) + line_results = NestingParser.parse_by_line(tokens) + result = line_results[line_index] + if result + _tokens, prev_opens, next_opens, min_depth = result else - if t[3].allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t[3].allbits?(Ripper::EXPR_BEG) - if t[2] == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - depth_difference -= 1 - end - in_oneliner_def = nil - end + # When last line is empty + prev_opens = next_opens = line_results.last[2] + min_depth = next_opens.size end - case t[1] - when :on_ignored_nl, :on_nl, :on_comment - if index != (@tokens.size - 1) and in_oneliner_def != :BODY - depth_difference = 0 - open_brace_on_line = 0 - end - next - when :on_sp - next - end - case t[1] - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - depth_difference += 1 - open_brace_on_line += 1 - when :on_rbracket, :on_rbrace, :on_rparen - depth_difference -= 1 if open_brace_on_line > 0 - when :on_kw - next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME) - case t[2] - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index) - depth_difference += 1 if syntax_of_do == :method_calling - when 'def', 'case', 'for', 'begin', 'class', 'module' - depth_difference += 1 - when 'if', 'unless', 'while', 'until', 'rescue' - # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL - unless t[3].allbits?(Ripper::EXPR_LABEL) - depth_difference += 1 - end - when 'else', 'elsif', 'ensure', 'when' - depth_difference += 1 - when 'in' - unless is_the_in_correspond_to_a_for(@tokens, index) - depth_difference += 1 - end - when 'end' - depth_difference -= 1 - end - end - end - depth_difference - end + # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation. + # Shortest open tokens can be calculated by `opens.take(min_depth)` + indent = 2 * calc_indent_level(prev_opens.take(min_depth)) - def check_corresponding_token_depth(lines, line_index) - corresponding_token_depth = nil - is_first_spaces_of_line = true - is_first_printable_of_line = true - spaces_of_nest = [] - spaces_at_line_head = 0 - open_brace_on_line = 0 - in_oneliner_def = nil - - if heredoc_scope? - return lines[line_index][/^ */].length - end + preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size - @tokens.each_with_index do |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t[3].allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end + prev_open_token = prev_opens.last + next_open_token = next_opens.last + + # Calculates base indent for pasted code on the line where prev_open_token is located + # irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0 + # irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2 + # irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4 + if prev_open_token + base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max else - if t[3].allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t[3].allbits?(Ripper::EXPR_BEG) - if t[2] == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop - else - spaces_of_nest.pop - corresponding_token_depth = nil - end - end - in_oneliner_def = nil - end + base_indent = 0 end - case t[1] - when :on_ignored_nl, :on_nl, :on_comment - if in_oneliner_def != :BODY - corresponding_token_depth = nil - spaces_at_line_head = 0 - is_first_spaces_of_line = true - is_first_printable_of_line = true - open_brace_on_line = 0 + if free_indent_token?(prev_open_token) + if is_newline && prev_open_token.pos[0] == line_index + # First newline inside free-indent token + base_indent + indent + else + # Accept any number of indent inside free-indent token + preserve_indent end - next - when :on_sp - spaces_at_line_head = t[2].count(' ') if is_first_spaces_of_line - is_first_spaces_of_line = false - next - end - case t[1] - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2) - open_brace_on_line += 1 - when :on_rbracket, :on_rbrace, :on_rparen - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop + elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg + if prev_open_token&.event == next_open_token&.event + # Accept any number of indent inside embdoc content + preserve_indent else - spaces_of_nest.pop - corresponding_token_depth = nil + # =begin or =end + 0 end - open_brace_on_line -= 1 - when :on_kw - next if index > 0 and @tokens[index - 1][3].allbits?(Ripper::EXPR_FNAME) - case t[2] - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index) - if syntax_of_do == :method_calling - spaces_of_nest.push(spaces_at_line_head) - end - when 'def', 'case', 'for', 'begin', 'class', 'module' - spaces_of_nest.push(spaces_at_line_head) - when 'rescue' - unless t[3].allbits?(Ripper::EXPR_LABEL) - corresponding_token_depth = spaces_of_nest.last - end - when 'if', 'unless', 'while', 'until' - # postfix if/unless/while/until must be Ripper::EXPR_LABEL - unless t[3].allbits?(Ripper::EXPR_LABEL) - spaces_of_nest.push(spaces_at_line_head) - end - when 'else', 'elsif', 'ensure', 'when', 'in' - corresponding_token_depth = spaces_of_nest.last - when 'end' - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop + elsif prev_open_token&.event == :on_heredoc_beg + tok = prev_open_token.tok + if prev_opens.size <= next_opens.size + if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token + # First line in heredoc + tok.match?(/^<<[-~]/) ? base_indent + indent : indent + elsif tok.match?(/^<<~/) + # Accept extra indent spaces inside `<<~` heredoc + [base_indent + indent, preserve_indent].max else - spaces_of_nest.pop - corresponding_token_depth = nil + # Accept any number of indent inside other heredoc + preserve_indent end + else + # Heredoc close + prev_line_indent_level = calc_indent_level(prev_opens) + tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0 end + else + base_indent + indent end - is_first_spaces_of_line = false - is_first_printable_of_line = false end - corresponding_token_depth - end - def check_string_literal(tokens) - i = 0 - start_token = [] - end_type = [] - while i < tokens.size - t = tokens[i] - case t[1] - when *end_type.last - start_token.pop - end_type.pop - when :on_tstring_beg - start_token << t - end_type << [:on_tstring_end, :on_label_end] - when :on_regexp_beg - start_token << t - end_type << :on_regexp_end - when :on_symbeg - acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick} - if (i + 1) < tokens.size - if acceptable_single_tokens.all?{ |st| tokens[i + 1][1] != st } - start_token << t - end_type << :on_tstring_end - else - i += 1 - end - end - when :on_backtick - start_token << t - end_type << :on_tstring_end - when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg - start_token << t - end_type << :on_tstring_end - when :on_heredoc_beg - start_token << t - end_type << :on_heredoc_end - end - i += 1 - end - start_token.last.nil? ? '' : start_token.last - end + LTYPE_TOKENS = %i[ + on_heredoc_beg on_tstring_beg + on_regexp_beg on_symbeg on_backtick + on_symbols_beg on_qsymbols_beg + on_words_beg on_qwords_beg + ] - def process_literal_type(tokens = @tokens) - start_token = check_string_literal(tokens) - case start_token[1] - when :on_tstring_beg - case start_token[2] - when ?" then ?" - when /^%.$/ then ?" - when /^%Q.$/ then ?" - when ?' then ?' - when /^%q.$/ then ?' - end - when :on_regexp_beg then ?/ - when :on_symbeg then ?: - when :on_backtick then ?` - when :on_qwords_beg then ?] - when :on_words_beg then ?] - when :on_qsymbols_beg then ?] - when :on_symbols_beg then ?] - when :on_heredoc_beg - start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/ - case $1 - when ?" then ?" - when ?' then ?' - when ?` then ?` - else ?" + def ltype_from_open_tokens(opens) + start_token = opens.reverse_each.find do |tok| + LTYPE_TOKENS.include?(tok.event) end - else - nil - end - end + return nil unless start_token - def check_termination_in_prev_line(code, context: nil) - tokens = self.class.ripper_lex_without_warning(code, context: context) - past_first_newline = false - index = tokens.rindex do |t| - # traverse first token before last line - if past_first_newline - if t.tok.include?("\n") - true + case start_token&.event + when :on_tstring_beg + case start_token&.tok + when ?" then ?" + when /^%.$/ then ?" + when /^%Q.$/ then ?" + when ?' then ?' + when /^%q.$/ then ?' end - elsif t.tok.include?("\n") - past_first_newline = true - false + when :on_regexp_beg then ?/ + when :on_symbeg then ?: + when :on_backtick then ?` + when :on_qwords_beg then ?] + when :on_words_beg then ?] + when :on_qsymbols_beg then ?] + when :on_symbols_beg then ?] + when :on_heredoc_beg + start_token&.tok =~ /<<[-~]?(['"`])\w+\1/ + $1 || ?" else - false + nil end end - if index - first_token = nil - last_line_tokens = tokens[(index + 1)..(tokens.size - 1)] - last_line_tokens.each do |t| - unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event) - first_token = t - break - end - end - if first_token.nil? - return false - elsif first_token && first_token.state == Ripper::EXPR_DOT - return false - else - tokens_without_last_line = tokens[0..index] - ltype = process_literal_type(tokens_without_last_line) - indent = process_nesting_level(tokens_without_last_line) - continue = process_continue(tokens_without_last_line) - code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line) - if ltype or indent > 0 or continue or code_block_open - return false + + def check_termination_in_prev_line(code, local_variables:) + tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables) + past_first_newline = false + index = tokens.rindex do |t| + # traverse first token before last line + if past_first_newline + if t.tok.include?("\n") + true + end + elsif t.tok.include?("\n") + past_first_newline = true + false else - return last_line_tokens.map(&:tok).join('') + false end end - end - false - end - private + if index + first_token = nil + last_line_tokens = tokens[(index + 1)..(tokens.size - 1)] + last_line_tokens.each do |t| + unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event) + first_token = t + break + end + end - def heredoc_scope? - heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) } - heredoc_tokens[-1]&.event == :on_heredoc_beg + if first_token && first_token.state != Ripper::EXPR_DOT + tokens_without_last_line = tokens[0..index] + code_without_last_line = tokens_without_last_line.map(&:tok).join + opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line) + if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line, local_variables: local_variables) + return last_line_tokens.map(&:tok).join + end + end + end + false + end end + # :startdoc: end -# :startdoc: + +RubyLex = IRB::RubyLex +Object.deprecate_constant(:RubyLex) |