diff options
author | tomoya ishida <tomoyapenguin@gmail.com> | 2023-06-16 00:39:53 +0900 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2023-06-15 15:39:58 +0000 |
commit | 364a6d56d776270da09604816d623047c66c5e32 (patch) | |
tree | 2e482a3252c5a366e9aab8fe23ae3757759069a3 /lib/irb/ruby-lex.rb | |
parent | c1c926219de5489c321d53577ff2eb8c041e166f (diff) |
[ruby/irb] Rewrite RubyLex to fix some bugs and make it possible to
add new features easily
(https://github.com/ruby/irb/pull/500)
* Add nesting level parser for multiple use (indent, prompt, termination check)
* Rewrite RubyLex using NestingParser
* Add nesting parser tests, fix some existing tests
* Add description comment, rename method to NestingParser
* Add comments and tweak code to RubyLex
* Update NestingParser test
* Extract list of ltype tokens to constants
Diffstat (limited to 'lib/irb/ruby-lex.rb')
-rw-r--r-- | lib/irb/ruby-lex.rb | 553 |
1 files changed, 96 insertions, 457 deletions
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index e29d52e47c..77c5b07ae9 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -6,6 +6,7 @@ require "ripper" require "jruby" if RUBY_ENGINE == "jruby" +require_relative "nesting_parser" # :stopdoc: class RubyLex @@ -54,8 +55,7 @@ class RubyLex if @io.respond_to?(:check_termination) @io.check_termination do |code| if Reline::IOGate.in_pasting? - lex = RubyLex.new(@context) - rest = lex.check_termination_in_prev_line(code) + rest = check_termination_in_prev_line(code) if rest Reline.delete_text rest.bytes.reverse_each do |c| @@ -69,64 +69,39 @@ class RubyLex # Accept any single-line input for symbol aliases or commands that transform args next true if single_line_command?(code) - ltype, indent, continue, code_block_open = check_code_state(code) - if ltype or indent > 0 or continue or code_block_open - false - else - true - end + _tokens, _opens, terminated = check_code_state(code) + terminated end end end if @io.respond_to?(:dynamic_prompt) @io.dynamic_prompt do |lines| lines << '' if lines.empty? - result = [] tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: @context) - code = String.new - partial_tokens = [] - unprocessed_tokens = [] - line_num_offset = 0 - tokens.each do |t| - partial_tokens << t - unprocessed_tokens << t - if t.tok.include?("\n") - t_str = t.tok - t_str.each_line("\n") do |s| - code << s - next unless s.include?("\n") - ltype, indent, continue, code_block_open = check_state(code, partial_tokens) - result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset) - line_num_offset += 1 - end - unprocessed_tokens = [] - else - code << t.tok + line_results = IRB::NestingParser.parse_by_line(tokens) + tokens_until_line = [] + line_results.map.with_index do |(line_tokens, _prev_opens, next_opens, _min_depth), line_num_offset| + line_tokens.each do |token, _s| + # Avoid appending duplicated token. Tokens that include "\n" like multiline tstring_content can exist in multiple lines. + tokens_until_line << token if token != tokens_until_line.last end + continue = process_continue(tokens_until_line) + prompt(next_opens, continue, line_num_offset) end - - unless unprocessed_tokens.empty? - ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens) - result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset) - end - result end end if @io.respond_to?(:auto_indent) and @context.auto_indent_mode @io.auto_indent do |lines, line_index, byte_pointer, is_newline| if is_newline - @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context) - prev_spaces = find_prev_spaces(line_index) - depth_difference = check_newline_depth_difference - depth_difference = 0 if depth_difference < 0 - prev_spaces + depth_difference * 2 + tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context) + process_indent_level(tokens, lines) else code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join last_line = lines[line_index]&.byteslice(0, byte_pointer) code += last_line if last_line - @tokens = self.class.ripper_lex_without_warning(code, context: @context) - check_corresponding_token_depth(lines, line_index) + tokens = self.class.ripper_lex_without_warning(code, context: @context) + check_corresponding_token_depth(tokens, lines, line_index) end end end @@ -176,50 +151,30 @@ class RubyLex $VERBOSE = verbose end - def find_prev_spaces(line_index) - return 0 if @tokens.size == 0 - md = @tokens[0].tok.match(/(\A +)/) - prev_spaces = md.nil? ? 0 : md[1].count(' ') - line_count = 0 - @tokens.each_with_index do |t, i| - if t.tok.include?("\n") - line_count += t.tok.count("\n") - if line_count >= line_index - return prev_spaces - end - next if t.event == :on_tstring_content || t.event == :on_words_sep - if (@tokens.size - 1) > i - md = @tokens[i + 1].tok.match(/(\A +)/) - prev_spaces = md.nil? ? 0 : md[1].count(' ') - end - end - end - prev_spaces - end - - def check_state(code, tokens) - ltype = process_literal_type(tokens) - indent = process_nesting_level(tokens) - continue = process_continue(tokens) - lvars_code = self.class.generate_local_variables_assign_code(@context.local_variables) - code = "#{lvars_code}\n#{code}" if lvars_code - code_block_open = check_code_block(code, tokens) - [ltype, indent, continue, code_block_open] + def prompt(opens, continue, line_num_offset) + ltype = ltype_from_open_tokens(opens) + _indent_level, nesting_level = calc_nesting_depth(opens) + @prompt&.call(ltype, nesting_level, opens.any? || continue, @line_no + line_num_offset) end def check_code_state(code) check_target_code = code.gsub(/\s*\z/, '').concat("\n") tokens = self.class.ripper_lex_without_warning(check_target_code, context: @context) - check_state(check_target_code, tokens) + opens = IRB::NestingParser.open_tokens(tokens) + [tokens, opens, code_terminated?(code, tokens, opens)] end - def save_prompt_to_context_io(ltype, indent, continue, line_num_offset) + def code_terminated?(code, tokens, opens) + opens.empty? && !process_continue(tokens) && !check_code_block(code, tokens) + end + + def save_prompt_to_context_io(opens, continue, line_num_offset) # Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`. - @prompt.call(ltype, indent, continue, @line_no + line_num_offset) + prompt(opens, continue, line_num_offset) end def readmultiline - save_prompt_to_context_io(nil, 0, false, 0) + save_prompt_to_context_io([], false, 0) # multiline return @input.call if @io.respond_to?(:check_termination) @@ -237,11 +192,12 @@ class RubyLex # Accept any single-line input for symbol aliases or commands that transform args return code if single_line_command?(code) - ltype, indent, continue, code_block_open = check_code_state(code) - return code unless ltype or indent > 0 or continue or code_block_open + tokens, opens, terminated = check_code_state(code) + return code if terminated line_offset += 1 - save_prompt_to_context_io(ltype, indent, continue, line_offset) + continue = process_continue(tokens) + save_prompt_to_context_io(opens, continue, line_offset) end end @@ -282,9 +238,6 @@ class RubyLex def check_code_block(code, tokens) return true if tokens.empty? - if tokens.last.event == :on_heredoc_beg - return true - end begin # check if parser error are available verbose, $VERBOSE = $VERBOSE, nil @@ -372,365 +325,82 @@ class RubyLex false end - def process_nesting_level(tokens) - indent = 0 - in_oneliner_def = nil - tokens.each_with_index { |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t.state.allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end - else - if t.state.allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t.state.allbits?(Ripper::EXPR_BEG) - if t.tok == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - indent -= 1 - end - in_oneliner_def = nil - end - end - + # Calculates [indent_level, nesting_level]. nesting_level is used in prompt string. + def calc_nesting_depth(opens) + indent_level = 0 + nesting_level = 0 + opens.each do |t| case t.event - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - indent += 1 - when :on_rbracket, :on_rbrace, :on_rparen - indent -= 1 - when :on_kw - next if index > 0 and tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME) - case t.tok - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index) - indent += 1 if syntax_of_do == :method_calling - when 'def', 'case', 'for', 'begin', 'class', 'module' - indent += 1 - when 'if', 'unless', 'while', 'until' - # postfix if/unless/while/until must be Ripper::EXPR_LABEL - indent += 1 unless t.state.allbits?(Ripper::EXPR_LABEL) - when 'end' - indent -= 1 - end - end - # percent literals are not indented - } - indent - end - - def is_method_calling?(tokens, index) - tk = tokens[index] - if tk.state.anybits?(Ripper::EXPR_CMDARG) and tk.event == :on_ident - # The target method call to pass the block with "do". - return true - elsif tk.state.anybits?(Ripper::EXPR_ARG) and tk.event == :on_ident - non_sp_index = tokens[0..(index - 1)].rindex{ |t| t.event != :on_sp } - if non_sp_index - prev_tk = tokens[non_sp_index] - if prev_tk.state.anybits?(Ripper::EXPR_DOT) and prev_tk.event == :on_period - # The target method call with receiver to pass the block with "do". - return true - end + when :on_heredoc_beg + # TODO: indent heredoc + when :on_tstring_beg, :on_regexp_beg, :on_symbeg + # can be indented if t.tok starts with `%` + when :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_embexpr_beg + # can be indented but not indented in current implementation + when :on_embdoc_beg + indent_level = 0 + else + nesting_level += 1 + indent_level += 1 end end - false + [indent_level, nesting_level] end - def take_corresponding_syntax_to_kw_do(tokens, index) - syntax_of_do = nil - # Finding a syntax corresponding to "do". - index.downto(0) do |i| - tk = tokens[i] - # In "continue", the token isn't the corresponding syntax to "do". - non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp } - first_in_fomula = false - if non_sp_index.nil? - first_in_fomula = true - elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event) - first_in_fomula = true - end - if is_method_calling?(tokens, i) - syntax_of_do = :method_calling - break if first_in_fomula - elsif tk.event == :on_kw && %w{while until for}.include?(tk.tok) - # A loop syntax in front of "do" found. - # - # while cond do # also "until" or "for" - # end - # - # This "do" doesn't increment indent because the loop syntax already - # incremented. - syntax_of_do = :loop_syntax - break if first_in_fomula - end + def free_indent_token(opens, line_index) + last_token = opens.last + return unless last_token + if last_token.event == :on_heredoc_beg && last_token.pos.first < line_index + 1 + # accept extra indent spaces inside heredoc + last_token end - syntax_of_do end - def is_the_in_correspond_to_a_for(tokens, index) - syntax_of_in = nil - # Finding a syntax corresponding to "do". - index.downto(0) do |i| - tk = tokens[i] - # In "continue", the token isn't the corresponding syntax to "do". - non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp } - first_in_fomula = false - if non_sp_index.nil? - first_in_fomula = true - elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event) - first_in_fomula = true - end - if tk.event == :on_kw && tk.tok == 'for' - # A loop syntax in front of "do" found. - # - # while cond do # also "until" or "for" - # end - # - # This "do" doesn't increment indent because the loop syntax already - # incremented. - syntax_of_in = :for - end - break if first_in_fomula + def process_indent_level(tokens, lines) + opens = IRB::NestingParser.open_tokens(tokens) + indent_level, _nesting_level = calc_nesting_depth(opens) + indent = indent_level * 2 + line_index = lines.size - 2 + if free_indent_token(opens, line_index) + return [indent, lines[line_index][/^ */].length].max end - syntax_of_in - end - - def check_newline_depth_difference - depth_difference = 0 - open_brace_on_line = 0 - in_oneliner_def = nil - @tokens.each_with_index do |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t.state.allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end - else - if t.state.allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t.state.allbits?(Ripper::EXPR_BEG) - if t.tok == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - depth_difference -= 1 - end - in_oneliner_def = nil - end - end - case t.event - when :on_ignored_nl, :on_nl, :on_comment - if index != (@tokens.size - 1) and in_oneliner_def != :BODY - depth_difference = 0 - open_brace_on_line = 0 - end - next - when :on_sp - next - end - - case t.event - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - depth_difference += 1 - open_brace_on_line += 1 - when :on_rbracket, :on_rbrace, :on_rparen - depth_difference -= 1 if open_brace_on_line > 0 - when :on_kw - next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME) - case t.tok - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index) - depth_difference += 1 if syntax_of_do == :method_calling - when 'def', 'case', 'for', 'begin', 'class', 'module' - depth_difference += 1 - when 'if', 'unless', 'while', 'until', 'rescue' - # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL - unless t.state.allbits?(Ripper::EXPR_LABEL) - depth_difference += 1 - end - when 'else', 'elsif', 'ensure', 'when' - depth_difference += 1 - when 'in' - unless is_the_in_correspond_to_a_for(@tokens, index) - depth_difference += 1 - end - when 'end' - depth_difference -= 1 - end - end - end - depth_difference + indent end - def check_corresponding_token_depth(lines, line_index) - corresponding_token_depth = nil - is_first_spaces_of_line = true - is_first_printable_of_line = true - spaces_of_nest = [] - spaces_at_line_head = 0 - open_brace_on_line = 0 - in_oneliner_def = nil - - if heredoc_scope? + def check_corresponding_token_depth(tokens, lines, line_index) + line_results = IRB::NestingParser.parse_by_line(tokens) + result = line_results[line_index] + return unless result + + # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation. + # Shortest open tokens can be calculated by `opens.take(min_depth)` + _tokens, prev_opens, opens, min_depth = result + indent_level, _nesting_level = calc_nesting_depth(opens.take(min_depth)) + indent = indent_level * 2 + free_indent_tok = free_indent_token(opens, line_index) + prev_line_free_indent_tok = free_indent_token(prev_opens, line_index - 1) + if prev_line_free_indent_tok && prev_line_free_indent_tok != free_indent_tok + return indent + elsif free_indent_tok return lines[line_index][/^ */].length end - - @tokens.each_with_index do |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t.state.allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end - else - if t.state.allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t.state.allbits?(Ripper::EXPR_BEG) - if t.tok == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop - else - spaces_of_nest.pop - corresponding_token_depth = nil - end - end - in_oneliner_def = nil - end - end - - case t.event - when :on_ignored_nl, :on_nl, :on_comment, :on_heredoc_end, :on_embdoc_end - if in_oneliner_def != :BODY - corresponding_token_depth = nil - spaces_at_line_head = 0 - is_first_spaces_of_line = true - is_first_printable_of_line = true - open_brace_on_line = 0 - end - next - when :on_sp - spaces_at_line_head = t.tok.count(' ') if is_first_spaces_of_line - is_first_spaces_of_line = false - next - end - - case t.event - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2) - open_brace_on_line += 1 - when :on_rbracket, :on_rbrace, :on_rparen - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop - else - spaces_of_nest.pop - corresponding_token_depth = nil - end - open_brace_on_line -= 1 - when :on_kw - next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME) - case t.tok - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index) - if syntax_of_do == :method_calling - spaces_of_nest.push(spaces_at_line_head) - end - when 'def', 'case', 'for', 'begin', 'class', 'module' - spaces_of_nest.push(spaces_at_line_head) - when 'rescue' - unless t.state.allbits?(Ripper::EXPR_LABEL) - corresponding_token_depth = spaces_of_nest.last - end - when 'if', 'unless', 'while', 'until' - # postfix if/unless/while/until must be Ripper::EXPR_LABEL - unless t.state.allbits?(Ripper::EXPR_LABEL) - spaces_of_nest.push(spaces_at_line_head) - end - when 'else', 'elsif', 'ensure', 'when' - corresponding_token_depth = spaces_of_nest.last - when 'in' - if in_keyword_case_scope? - corresponding_token_depth = spaces_of_nest.last - end - when 'end' - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop - else - spaces_of_nest.pop - corresponding_token_depth = nil - end - end - end - is_first_spaces_of_line = false - is_first_printable_of_line = false - end - corresponding_token_depth + prev_indent_level, _prev_nesting_level = calc_nesting_depth(prev_opens) + indent if indent_level < prev_indent_level end - def check_string_literal(tokens) - i = 0 - start_token = [] - end_type = [] - pending_heredocs = [] - while i < tokens.size - t = tokens[i] - case t.event - when *end_type.last - start_token.pop - end_type.pop - when :on_tstring_beg - start_token << t - end_type << [:on_tstring_end, :on_label_end] - when :on_regexp_beg - start_token << t - end_type << :on_regexp_end - when :on_symbeg - acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick} - if (i + 1) < tokens.size - if acceptable_single_tokens.all?{ |st| tokens[i + 1].event != st } - start_token << t - end_type << :on_tstring_end - else - i += 1 - end - end - when :on_backtick - if t.state.allbits?(Ripper::EXPR_BEG) - start_token << t - end_type << :on_tstring_end - end - when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg - start_token << t - end_type << :on_tstring_end - when :on_heredoc_beg - pending_heredocs << t - end + LTYPE_TOKENS = %i[ + on_heredoc_beg on_tstring_beg + on_regexp_beg on_symbeg on_backtick + on_symbols_beg on_qsymbols_beg + on_words_beg on_qwords_beg + ] - if pending_heredocs.any? && t.tok.include?("\n") - pending_heredocs.reverse_each do |t| - start_token << t - end_type << :on_heredoc_end - end - pending_heredocs = [] - end - i += 1 + def ltype_from_open_tokens(opens) + start_token = opens.reverse_each.find do |tok| + LTYPE_TOKENS.include?(tok.event) end - pending_heredocs.first || start_token.last - end - - def process_literal_type(tokens) - start_token = check_string_literal(tokens) - return nil if start_token == "" + return nil unless start_token case start_token&.event when :on_tstring_beg @@ -783,47 +453,16 @@ class RubyLex end end - if first_token.nil? - return false - elsif first_token && first_token.state == Ripper::EXPR_DOT - return false - else + if first_token && first_token.state != Ripper::EXPR_DOT tokens_without_last_line = tokens[0..index] - ltype = process_literal_type(tokens_without_last_line) - indent = process_nesting_level(tokens_without_last_line) - continue = process_continue(tokens_without_last_line) - code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line) - if ltype or indent > 0 or continue or code_block_open - return false - else - return last_line_tokens.map(&:tok).join('') + code_without_last_line = tokens_without_last_line.map(&:tok).join + opens_without_last_line = IRB::NestingParser.open_tokens(tokens_without_last_line) + if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line) + return last_line_tokens.map(&:tok).join end end end false end - - private - - def heredoc_scope? - heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) } - heredoc_tokens[-1]&.event == :on_heredoc_beg - end - - def in_keyword_case_scope? - kw_tokens = @tokens.select { |t| t.event == :on_kw && ['case', 'for', 'end'].include?(t.tok) } - counter = 0 - kw_tokens.reverse.each do |t| - if t.tok == 'case' - return true if counter.zero? - counter += 1 - elsif t.tok == 'for' - counter += 1 - elsif t.tok == 'end' - counter -= 1 - end - end - false - end end # :startdoc: |