From 198d197aeb79566673db2440a3b90ff87ff279cd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 2 Apr 2024 15:24:28 -0400 Subject: [ruby/prism] Allow block exits in defined? and fix modifier while/until https://github.com/ruby/prism/commit/2752f0b8df --- prism/parser.h | 16 +++++ prism/prism.c | 119 +++++++++++++++++++++++++++-------- prism/templates/src/diagnostic.c.erb | 4 +- test/prism/errors_test.rb | 15 ++--- test/prism/parse_test.rb | 2 +- 5 files changed, 118 insertions(+), 38 deletions(-) diff --git a/prism/parser.h b/prism/parser.h index f706a67de7..9377c22d5f 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -289,6 +289,9 @@ typedef enum { /** a method definition's parameters */ PM_CONTEXT_DEF_PARAMS, + /** a defined? expression */ + PM_CONTEXT_DEFINED, + /** a method definition's default parameter */ PM_CONTEXT_DEFAULT_PARAMS, @@ -716,6 +719,19 @@ struct pm_parser { /** The current parameter name id on parsing its default value. */ pm_constant_id_t current_param_name; + /** + * When parsing block exits (e.g., break, next, redo), we need to validate + * that they are in correct contexts. For the most part we can do this by + * looking at our parent contexts. However, modifier while and until + * expressions can change that context to make block exits valid. In these + * cases, we need to keep track of the block exits and then validate them + * after the expression has been parsed. + * + * We use a pointer here because we don't want to keep a whole list attached + * since this will only be used in the context of begin/end expressions. + */ + pm_node_list_t *current_block_exits; + /** The version of prism that we should use to parse. */ pm_options_version_t version; diff --git a/prism/prism.c b/prism/prism.c index b4795317da..c6971fbd88 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -38,6 +38,7 @@ debug_context(pm_context_t context) { case PM_CONTEXT_CASE_WHEN: return "CASE_WHEN"; case PM_CONTEXT_DEF: return "DEF"; case PM_CONTEXT_DEF_PARAMS: return "DEF_PARAMS"; + case PM_CONTEXT_DEFINED: return "DEFINED"; case PM_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS"; case PM_CONTEXT_ENSURE: return "ENSURE"; case PM_CONTEXT_ENSURE_DEF: return "ENSURE_DEF"; @@ -7591,6 +7592,7 @@ context_terminator(pm_context_t context, pm_token_t *token) { switch (context) { case PM_CONTEXT_MAIN: case PM_CONTEXT_DEF_PARAMS: + case PM_CONTEXT_DEFINED: return token->type == PM_TOKEN_EOF; case PM_CONTEXT_DEFAULT_PARAMS: return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT; @@ -7741,6 +7743,7 @@ context_human(pm_context_t context) { case PM_CONTEXT_DEF: return "method definition"; case PM_CONTEXT_DEF_PARAMS: return "method parameters"; case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value"; + case PM_CONTEXT_DEFINED: return "'defined?' expression"; case PM_CONTEXT_ELSE: return "'else' clause"; case PM_CONTEXT_ELSIF: return "'elsif' clause"; case PM_CONTEXT_EMBEXPR: return "embedded expression"; @@ -15705,13 +15708,15 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { * context. If it isn't, add an error to the parser. */ static void -parse_block_exit(pm_parser_t *parser, const pm_token_t *token) { +parse_block_exit(pm_parser_t *parser, pm_node_t *node, const char *type) { pm_context_node_t *context_node = parser->current_context; + bool through_begin = false; while (context_node != NULL) { switch (context_node->context) { case PM_CONTEXT_BLOCK_BRACES: case PM_CONTEXT_BLOCK_KEYWORDS: + case PM_CONTEXT_DEFINED: case PM_CONTEXT_FOR: case PM_CONTEXT_LAMBDA_BRACES: case PM_CONTEXT_LAMBDA_DO_END: @@ -15731,15 +15736,34 @@ parse_block_exit(pm_parser_t *parser, const pm_token_t *token) { case PM_CONTEXT_RESCUE_DEF: case PM_CONTEXT_RESCUE_ELSE_DEF: case PM_CONTEXT_SCLASS: - // These are the bad cases. We're not allowed to have a block - // exit in these contexts. - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, *token, PM_ERR_INVALID_BLOCK_EXIT); + if (through_begin) { + // If we get here, then we're about to mark this block exit + // as invalid. However, it could later _become_ valid if we + // find a trailing while/until on the begin. In this case + // instead of adding the error here, we'll add the block + // exit to the list of exits for the begin node, and the + // begin node parsing will handle validating it instead. + assert(parser->current_block_exits != NULL); + pm_node_list_append(parser->current_block_exits, node); + } else { + // These are the bad cases. We're not allowed to have a + // block exit in these contexts. + PM_PARSER_ERR_NODE_FORMAT(parser, node, PM_ERR_INVALID_BLOCK_EXIT, type); + } + return; case PM_CONTEXT_NONE: // This case should never happen. assert(false && "unreachable"); break; case PM_CONTEXT_BEGIN: + case PM_CONTEXT_RESCUE_ELSE: + case PM_CONTEXT_RESCUE: + // If we got to a begin node, then we'll track that we have + // gotten here because we need to know it if this block exit is + // later marked as invalid. + through_begin = true; + /* fallthrough */ case PM_CONTEXT_CASE_WHEN: case PM_CONTEXT_CASE_IN: case PM_CONTEXT_DEFAULT_PARAMS: @@ -15751,8 +15775,6 @@ parse_block_exit(pm_parser_t *parser, const pm_token_t *token) { case PM_CONTEXT_IF: case PM_CONTEXT_PARENS: case PM_CONTEXT_PREDICATE: - case PM_CONTEXT_RESCUE_ELSE: - case PM_CONTEXT_RESCUE: case PM_CONTEXT_UNLESS: // In these contexts we should continue walking up the list of // contexts. @@ -15767,11 +15789,12 @@ parse_block_exit(pm_parser_t *parser, const pm_token_t *token) { * Ensures that the current retry token is valid in the current context. */ static void -parse_retry(pm_parser_t *parser, const pm_token_t *token) { +parse_retry(pm_parser_t *parser, const pm_node_t *node) { pm_context_node_t *context_node = parser->current_context; while (context_node != NULL) { switch (context_node->context) { + case PM_CONTEXT_DEFINED: case PM_CONTEXT_RESCUE: case PM_CONTEXT_RESCUE_DEF: // These are the good cases. We're allowed to have a retry here. @@ -15785,19 +15808,19 @@ parse_retry(pm_parser_t *parser, const pm_token_t *token) { case PM_CONTEXT_SCLASS: // These are the bad cases. We're not allowed to have a retry in // these contexts. - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, *token, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE); + pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE); return; case PM_CONTEXT_RESCUE_ELSE: case PM_CONTEXT_RESCUE_ELSE_DEF: // These are also bad cases, but with a more specific error // message indicating the else. - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, *token, PM_ERR_INVALID_RETRY_AFTER_ELSE); + pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE); return; case PM_CONTEXT_ENSURE: case PM_CONTEXT_ENSURE_DEF: // These are also bad cases, but with a more specific error // message indicating the ensure. - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, *token, PM_ERR_INVALID_RETRY_AFTER_ENSURE); + pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE); return; case PM_CONTEXT_NONE: // This case should never happen. @@ -15836,12 +15859,13 @@ parse_retry(pm_parser_t *parser, const pm_token_t *token) { * Ensures that the current yield token is valid in the current context. */ static void -parse_yield(pm_parser_t *parser, const pm_token_t *token) { +parse_yield(pm_parser_t *parser, const pm_node_t *node) { pm_context_node_t *context_node = parser->current_context; while (context_node != NULL) { switch (context_node->context) { case PM_CONTEXT_DEF: + case PM_CONTEXT_DEFINED: case PM_CONTEXT_ENSURE_DEF: case PM_CONTEXT_RESCUE_DEF: case PM_CONTEXT_RESCUE_ELSE_DEF: @@ -15853,7 +15877,7 @@ parse_yield(pm_parser_t *parser, const pm_token_t *token) { case PM_CONTEXT_MODULE: // These are the bad cases. We're not allowed to have a retry in // these contexts. - pm_parser_err_token(parser, token, PM_ERR_INVALID_YIELD); + pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD); return; case PM_CONTEXT_NONE: // This case should never happen. @@ -16749,6 +16773,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t begin_keyword = parser->previous; accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + + pm_node_list_t *previous_block_exits = parser->current_block_exits; + pm_node_list_t current_block_exits = { 0 }; + parser->current_block_exits = ¤t_block_exits; + pm_statements_node_t *begin_statements = NULL; if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) { @@ -16769,6 +16798,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_err_node(parser, (pm_node_t *) begin_node->else_clause, PM_ERR_BEGIN_LONELY_ELSE); } + if (!match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) { + // If we didn't find a while or until modifier, then we need to + // go back in and mark all of the block exits as invalid. + for (size_t block_exit_index = 0; block_exit_index < current_block_exits.size; block_exit_index++) { + pm_node_t *block_exit = current_block_exits.nodes[block_exit_index]; + const char *type; + + switch (PM_NODE_TYPE(block_exit)) { + case PM_BREAK_NODE: type = "break"; break; + case PM_NEXT_NODE: type = "next"; break; + case PM_REDO_NODE: type = "redo"; break; + default: assert(false && "unreachable"); type = ""; break; + } + + PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type); + } + } + + parser->current_block_exits = previous_block_exits; + pm_node_list_free(¤t_block_exits); + return (pm_node_t *) begin_node; } case PM_TOKEN_KEYWORD_BEGIN_UPCASE: { @@ -16810,12 +16860,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } switch (keyword.type) { - case PM_TOKEN_KEYWORD_BREAK: - parse_block_exit(parser, &keyword); - return (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments); - case PM_TOKEN_KEYWORD_NEXT: - parse_block_exit(parser, &keyword); - return (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments); + case PM_TOKEN_KEYWORD_BREAK: { + pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments); + parse_block_exit(parser, node, "break"); + return node; + } + case PM_TOKEN_KEYWORD_NEXT: { + pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments); + parse_block_exit(parser, node, "next"); + return node; + } case PM_TOKEN_KEYWORD_RETURN: { if ( (parser->current_context->context == PM_CONTEXT_CLASS) || @@ -16848,14 +16902,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments); } case PM_TOKEN_KEYWORD_YIELD: { - parse_yield(parser, &parser->current); parser_lex(parser); pm_token_t keyword = parser->previous; pm_arguments_t arguments = { 0 }; parse_arguments_list(parser, &arguments, false, accepts_command_call); - return (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc); + pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc); + parse_yield(parser, node); + + return node; } case PM_TOKEN_KEYWORD_CLASS: { parser_lex(parser); @@ -17245,6 +17301,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t lparen; pm_token_t rparen; pm_node_t *expression; + context_push(parser, PM_CONTEXT_DEFINED); if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { lparen = parser->previous; @@ -17263,6 +17320,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_DEFINED_EXPRESSION); } + context_pop(parser); return (pm_node_t *) pm_defined_node_create( parser, &lparen, @@ -17482,14 +17540,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b case PM_TOKEN_KEYWORD_NIL: parser_lex(parser); return (pm_node_t *) pm_nil_node_create(parser, &parser->previous); - case PM_TOKEN_KEYWORD_REDO: - parse_block_exit(parser, &parser->current); + case PM_TOKEN_KEYWORD_REDO: { parser_lex(parser); - return (pm_node_t *) pm_redo_node_create(parser, &parser->previous); - case PM_TOKEN_KEYWORD_RETRY: - parse_retry(parser, &parser->current); + + pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous); + parse_block_exit(parser, node, "redo"); + + return node; + } + case PM_TOKEN_KEYWORD_RETRY: { parser_lex(parser); - return (pm_node_t *) pm_retry_node_create(parser, &parser->previous); + + pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous); + parse_retry(parser, node); + + return node; + } case PM_TOKEN_KEYWORD_SELF: parser_lex(parser); return (pm_node_t *) pm_self_node_create(parser, &parser->previous); @@ -19580,6 +19646,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .pattern_matching_newlines = false, .in_keyword_arg = false, .current_param_name = 0, + .current_block_exits = NULL, .semantic_token_seen = false, .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET, .current_regular_expression_ascii_only = false diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index c7c5f1a625..cedd340e08 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -206,7 +206,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0] = { "`%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = { "'%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INSTANCE_VARIABLE_BARE] = { "'@' without identifiers is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_INVALID_BLOCK_EXIT] = { "Invalid %.*s", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_INVALID_BLOCK_EXIT] = { "Invalid %s", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INVALID_FLOAT_EXPONENT] = { "invalid exponent", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INVALID_NUMBER_BINARY] = { "invalid binary number", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_INVALID_NUMBER_DECIMAL] = { "invalid decimal number", PM_ERROR_LEVEL_SYNTAX }, @@ -299,7 +299,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_RESCUE_MODIFIER_VALUE] = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_RESCUE_TERM] = { "expected a closing delimiter for the `rescue` clause", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_RESCUE_VARIABLE] = { "expected an exception variable after `=>` in a rescue statement", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_RETURN_INVALID] = { "invalid `return` in a class or module body", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_RETURN_INVALID] = { "Invalid return in class/module body", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_SINGLETON_FOR_LITERALS] = { "cannot define singleton method for literals", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STATEMENT_ALIAS] = { "unexpected an `alias` at a non-statement position", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_STATEMENT_POSTEXE_END] = { "unexpected an `END` at a non-statement position", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index adcf2d03b1..1d9bc0f986 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -257,7 +257,7 @@ module Prism ["unexpected ',', expecting end-of-input", 6..7], ["unexpected ',', ignoring it", 6..7], ["expected a matching `)`", 6..6], - ["Invalid next", 0..4], + ["Invalid next", 0..12], ["unexpected ')', expecting end-of-input", 12..13], ["unexpected ')', ignoring it", 12..13] ] @@ -266,7 +266,7 @@ module Prism def test_next_1 assert_errors expression("next 1,;"), "next 1,;", [ ["expected an argument", 6..7], - ["Invalid next", 0..4] + ["Invalid next", 0..7] ] end @@ -275,7 +275,7 @@ module Prism ["unexpected ',', expecting end-of-input", 7..8], ["unexpected ',', ignoring it", 7..8], ["expected a matching `)`", 7..7], - ["Invalid break", 0..5], + ["Invalid break", 0..13], ["unexpected ')', expecting end-of-input", 13..14], ["unexpected ')', ignoring it", 13..14] ] @@ -284,7 +284,7 @@ module Prism def test_break_1 assert_errors expression("break 1,;"), "break 1,;", [ ["expected an argument", 7..8], - ["Invalid break", 0..5] + ["Invalid break", 0..8] ] end @@ -1095,7 +1095,7 @@ module Prism ) assert_errors expected, "class A; return; end", [ - ["invalid `return` in a class or module body", 9..15] + ["Invalid return in class/module body", 9..15] ] end @@ -1110,7 +1110,7 @@ module Prism ) assert_errors expected, "module A; return; end", [ - ["invalid `return` in a class or module body", 10..16] + ["Invalid return in class/module body", 10..16] ] end @@ -1577,11 +1577,8 @@ module Prism message = "unexpected void value expression" assert_errors expression(source), source, [ [message, 7..13], - ["Invalid break", 35..40], [message, 35..40], - ["Invalid next", 51..55], [message, 51..55], - ["Invalid redo", 66..70], [message, 66..70], ["Invalid retry without rescue", 81..86], [message, 81..86], diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb index d171a3aa76..afb53e0668 100644 --- a/test/prism/parse_test.rb +++ b/test/prism/parse_test.rb @@ -189,7 +189,7 @@ module Prism FileUtils.mkdir_p(directory) unless File.directory?(directory) ripper_should_match = ripper_enabled - check_valid_syntax = true + check_valid_syntax = RUBY_VERSION >= "3.2.0" case relative when "seattlerb/pct_w_heredoc_interp_nested.txt" -- cgit v1.2.3