summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-01-29 17:27:45 -0500
committergit <svn-admin@ruby-lang.org>2024-01-30 16:10:08 +0000
commitba06a8259a3f21c9cbee0f4f55b82c016a45a3b9 (patch)
tree6bf9054c571735c63df122962f054b37bc43b04f
parentc85e28d12a4855e64271f0be4510b63053b628b7 (diff)
[ruby/prism] Better error messages for unexpected tokens in prefix
https://github.com/ruby/prism/commit/a35b8e45ee
-rw-r--r--prism/diagnostic.c5
-rw-r--r--prism/diagnostic.h12
-rw-r--r--prism/parser.h3
-rw-r--r--prism/prism.c152
-rw-r--r--prism/prism.h10
-rw-r--r--prism/templates/ext/prism/api_node.c.erb2
-rw-r--r--prism/templates/src/token_type.c.erb357
-rw-r--r--test/prism/errors_test.rb149
-rw-r--r--test/prism/format_errors_test.rb4
9 files changed, 572 insertions, 122 deletions
diff --git a/prism/diagnostic.c b/prism/diagnostic.c
index 3ff4a933c6..bf89ca781a 100644
--- a/prism/diagnostic.c
+++ b/prism/diagnostic.c
@@ -71,6 +71,8 @@ typedef struct {
* * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
*/
static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
+ [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL },
+
// Errors
[PM_ERR_ALIAS_ARGUMENT] = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_FATAL },
[PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_FATAL },
@@ -106,7 +108,6 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
[PM_ERR_BLOCK_PARAM_PIPE_TERM] = { "expected the block parameters to end with `|`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_BLOCK_TERM_BRACE] = { "expected a block beginning with `{` to end with `}`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_BLOCK_TERM_END] = { "expected a block beginning with `do` to end with `end`", PM_ERROR_LEVEL_FATAL },
- [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL },
[PM_ERR_CANNOT_PARSE_STRING_PART] = { "cannot parse the string part", PM_ERROR_LEVEL_FATAL },
[PM_ERR_CASE_EXPRESSION_AFTER_CASE] = { "expected an expression after `case`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = { "expected an expression after `when`", PM_ERROR_LEVEL_FATAL },
@@ -277,6 +278,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
[PM_ERR_UNARY_RECEIVER_BANG] = { "expected a receiver for unary `!`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_UNARY_RECEIVER_MINUS] = { "expected a receiver for unary `-`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_UNARY_RECEIVER_PLUS] = { "expected a receiver for unary `+`", PM_ERROR_LEVEL_FATAL },
+ [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT] = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_FATAL },
+ [PM_ERR_UNEXPECTED_TOKEN_IGNORE] = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_FATAL },
[PM_ERR_UNARY_RECEIVER_TILDE] = { "expected a receiver for unary `~`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_UNTIL_TERM] = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_FATAL },
[PM_ERR_VOID_EXPRESSION] = { "unexpected void value expression", PM_ERROR_LEVEL_FATAL },
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 9b600208ae..33123262b5 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -66,6 +66,11 @@ typedef struct {
* of errors between the parser and the user.
*/
typedef enum {
+ // This is a special error that we can potentially replace by others. For
+ // an example of how this is used, see parse_expression_prefix.
+ PM_ERR_CANNOT_PARSE_EXPRESSION,
+
+ // These are the error codes.
PM_ERR_ALIAS_ARGUMENT,
PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
PM_ERR_ARGUMENT_AFTER_BLOCK,
@@ -100,7 +105,6 @@ typedef enum {
PM_ERR_BLOCK_PARAM_PIPE_TERM,
PM_ERR_BLOCK_TERM_BRACE,
PM_ERR_BLOCK_TERM_END,
- PM_ERR_CANNOT_PARSE_EXPRESSION,
PM_ERR_CANNOT_PARSE_STRING_PART,
PM_ERR_CASE_EXPRESSION_AFTER_CASE,
PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
@@ -272,6 +276,8 @@ typedef enum {
PM_ERR_UNARY_RECEIVER_MINUS,
PM_ERR_UNARY_RECEIVER_PLUS,
PM_ERR_UNARY_RECEIVER_TILDE,
+ PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
+ PM_ERR_UNEXPECTED_TOKEN_IGNORE,
PM_ERR_UNDEF_ARGUMENT,
PM_ERR_UNTIL_TERM,
PM_ERR_VOID_EXPRESSION,
@@ -280,13 +286,15 @@ typedef enum {
PM_ERR_WRITE_TARGET_READONLY,
PM_ERR_WRITE_TARGET_UNEXPECTED,
PM_ERR_XSTRING_TERM,
+
+ // These are the warning codes.
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
PM_WARN_AMBIGUOUS_PREFIX_STAR,
PM_WARN_AMBIGUOUS_SLASH,
PM_WARN_END_IN_METHOD,
- /* This must be the last member. */
+ // This is the number of diagnostic codes.
PM_DIAGNOSTIC_ID_LEN,
} pm_diagnostic_id_t;
diff --git a/prism/parser.h b/prism/parser.h
index c7ebb64b60..6ee215c76d 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -259,6 +259,9 @@ typedef struct pm_parser pm_parser_t;
* token that is understood by a parent context but not by the current context.
*/
typedef enum {
+ /** a null context, used for returning a value from a function */
+ PM_CONTEXT_NONE = 0,
+
/** a begin statement */
PM_CONTEXT_BEGIN,
diff --git a/prism/prism.c b/prism/prism.c
index 36699f5894..ea2723cfaf 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
PRISM_ATTRIBUTE_UNUSED static void
debug_token(pm_token_t * token) {
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
+ fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
}
#endif
@@ -6719,21 +6719,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
return token->type == PM_TOKEN_BRACE_RIGHT;
case PM_CONTEXT_PREDICATE:
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
+ case PM_CONTEXT_NONE:
+ return false;
}
return false;
}
-static bool
-context_recoverable(pm_parser_t *parser, pm_token_t *token) {
+/**
+ * Returns the context that the given token is found to be terminating, or
+ * returns PM_CONTEXT_NONE.
+ */
+static pm_context_t
+context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
pm_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
- if (context_terminator(context_node->context, token)) return true;
+ if (context_terminator(context_node->context, token)) return context_node->context;
context_node = context_node->prev;
}
- return false;
+ return PM_CONTEXT_NONE;
}
static bool
@@ -6761,7 +6767,7 @@ context_pop(pm_parser_t *parser) {
}
static bool
-context_p(pm_parser_t *parser, pm_context_t context) {
+context_p(const pm_parser_t *parser, pm_context_t context) {
pm_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
@@ -6773,7 +6779,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
}
static bool
-context_def_p(pm_parser_t *parser) {
+context_def_p(const pm_parser_t *parser) {
pm_context_node_t *context_node = parser->current_context;
while (context_node != NULL) {
@@ -6796,6 +6802,55 @@ context_def_p(pm_parser_t *parser) {
return false;
}
+/**
+ * Returns a human readable string for the given context, used in error
+ * messages.
+ */
+static const char *
+context_human(pm_context_t context) {
+ switch (context) {
+ case PM_CONTEXT_NONE:
+ assert(false && "unreachable");
+ return "";
+ case PM_CONTEXT_BEGIN: return "begin statement";
+ case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
+ case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
+ case PM_CONTEXT_CASE_WHEN: return "'when' clause";
+ case PM_CONTEXT_CASE_IN: return "'in' clause";
+ case PM_CONTEXT_CLASS: return "class definition";
+ case PM_CONTEXT_DEF: return "method definition";
+ case PM_CONTEXT_DEF_PARAMS: return "method parameters";
+ case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
+ case PM_CONTEXT_ELSE: return "'else' clause";
+ case PM_CONTEXT_ELSIF: return "'elsif' clause";
+ case PM_CONTEXT_EMBEXPR: return "embedded expression";
+ case PM_CONTEXT_ENSURE: return "'ensure' clause";
+ case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
+ case PM_CONTEXT_FOR: return "for loop";
+ case PM_CONTEXT_FOR_INDEX: return "for loop index";
+ case PM_CONTEXT_IF: return "if statement";
+ case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
+ case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
+ case PM_CONTEXT_MAIN: return "top level context";
+ case PM_CONTEXT_MODULE: return "module definition";
+ case PM_CONTEXT_PARENS: return "parentheses";
+ case PM_CONTEXT_POSTEXE: return "'END' block";
+ case PM_CONTEXT_PREDICATE: return "predicate";
+ case PM_CONTEXT_PREEXE: return "'BEGIN' block";
+ case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
+ case PM_CONTEXT_RESCUE: return "'rescue' clause";
+ case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
+ case PM_CONTEXT_SCLASS: return "singleton class definition";
+ case PM_CONTEXT_UNLESS: return "unless statement";
+ case PM_CONTEXT_UNTIL: return "until statement";
+ case PM_CONTEXT_WHILE: return "while statement";
+ }
+
+ assert(false && "unreachable");
+ return "";
+}
+
/******************************************************************************/
/* Specific token lexers */
/******************************************************************************/
@@ -14177,7 +14232,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
* Parse an expression that begins with the previous node that we just lexed.
*/
static inline pm_node_t *
-parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
+parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
switch (parser->current.type) {
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
parser_lex(parser);
@@ -14595,30 +14650,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
- }
- else {
+ } else {
// Check if `it` is not going to be assigned.
switch (parser->current.type) {
- case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
- case PM_TOKEN_AMPERSAND_EQUAL:
- case PM_TOKEN_CARET_EQUAL:
- case PM_TOKEN_EQUAL:
- case PM_TOKEN_GREATER_GREATER_EQUAL:
- case PM_TOKEN_LESS_LESS_EQUAL:
- case PM_TOKEN_MINUS_EQUAL:
- case PM_TOKEN_PARENTHESIS_RIGHT:
- case PM_TOKEN_PERCENT_EQUAL:
- case PM_TOKEN_PIPE_EQUAL:
- case PM_TOKEN_PIPE_PIPE_EQUAL:
- case PM_TOKEN_PLUS_EQUAL:
- case PM_TOKEN_SLASH_EQUAL:
- case PM_TOKEN_STAR_EQUAL:
- case PM_TOKEN_STAR_STAR_EQUAL:
- break;
- default:
- // Once we know it's neither a method call nor an assignment,
- // we can finally create `it` default parameter.
- node = pm_node_check_it(parser, node);
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+ case PM_TOKEN_AMPERSAND_EQUAL:
+ case PM_TOKEN_CARET_EQUAL:
+ case PM_TOKEN_EQUAL:
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
+ case PM_TOKEN_LESS_LESS_EQUAL:
+ case PM_TOKEN_MINUS_EQUAL:
+ case PM_TOKEN_PARENTHESIS_RIGHT:
+ case PM_TOKEN_PERCENT_EQUAL:
+ case PM_TOKEN_PIPE_EQUAL:
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
+ case PM_TOKEN_PLUS_EQUAL:
+ case PM_TOKEN_SLASH_EQUAL:
+ case PM_TOKEN_STAR_EQUAL:
+ case PM_TOKEN_STAR_STAR_EQUAL:
+ break;
+ default:
+ // Once we know it's neither a method call nor an
+ // assignment, we can finally create `it` default
+ // parameter.
+ node = pm_node_check_it(parser, node);
}
}
@@ -14656,6 +14711,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// If we get here, then we tried to find something in the
// heredoc but couldn't actually parse anything, so we'll just
// return a missing node.
+ //
+ // parse_string_part handles its own errors, so there is no need
+ // for us to add one here.
node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
// If we get here, then the part that we parsed was plain string
@@ -16301,6 +16359,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// context of a multiple assignment. We enforce that here. We'll
// still lex past it though and create a missing node place.
if (binding_power != PM_BINDING_POWER_STATEMENT) {
+ pm_parser_err_previous(parser, diag_id);
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
}
@@ -16487,12 +16546,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
}
- default:
- if (context_recoverable(parser, &parser->current)) {
+ default: {
+ pm_context_t recoverable = context_recoverable(parser, &parser->current);
+
+ if (recoverable != PM_CONTEXT_NONE) {
parser->recovering = true;
+
+ // If the given error is not the generic one, then we'll add it
+ // here because it will provide more context in addition to the
+ // recoverable error that we will also add.
+ if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
+ pm_parser_err_previous(parser, diag_id);
+ }
+
+ // If we get here, then we are assuming this token is closing a
+ // parent context, so we'll indicate that to the user so that
+ // they know how we behaved.
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
+ } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
+ // We're going to make a special case here, because "cannot
+ // parse expression" is pretty generic, and we know here that we
+ // have an unexpected token.
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
+ } else {
+ pm_parser_err_previous(parser, diag_id);
}
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ }
}
}
@@ -17455,15 +17536,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
*/
static pm_node_t *
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
- pm_token_t recovery = parser->previous;
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
switch (PM_NODE_TYPE(node)) {
case PM_MISSING_NODE:
// If we found a syntax error, then the type of node returned by
- // parse_expression_prefix is going to be a missing node. In that
- // case we need to add the error message to the parser's error list.
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
+ // parse_expression_prefix is going to be a missing node.
return node;
case PM_PRE_EXECUTION_NODE:
case PM_POST_EXECUTION_NODE:
diff --git a/prism/prism.h b/prism/prism.h
index 45bfff7a11..08d216cbb5 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -168,7 +168,15 @@ PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t si
* @param token_type The token type to convert to a string.
* @return A string representation of the given token type.
*/
-PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
+PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type);
+
+/**
+ * Returns the human name of the given token type.
+ *
+ * @param token_type The token type to convert to a human name.
+ * @return The human name of the given token type.
+ */
+const char * pm_token_type_human(pm_token_type_t token_type);
/**
* Format the errors on the parser into the given buffer.
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
index 93f67f6551..20b3810715 100644
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -19,7 +19,7 @@ pm_location_new(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, V
VALUE
pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source) {
- ID type = rb_intern(pm_token_type_to_str(token->type));
+ ID type = rb_intern(pm_token_type_name(token->type));
VALUE location = pm_location_new(parser, token->start, token->end, source);
VALUE argv[] = {
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb
index d3c1c3f1b8..99f5d1b254 100644
--- a/prism/templates/src/token_type.c.erb
+++ b/prism/templates/src/token_type.c.erb
@@ -6,15 +6,364 @@
* Returns a string representation of the given token type.
*/
PRISM_EXPORTED_FUNCTION const char *
-pm_token_type_to_str(pm_token_type_t token_type)
-{
+pm_token_type_name(pm_token_type_t token_type) {
switch (token_type) {
<%- tokens.each do |token| -%>
case PM_TOKEN_<%= token.name %>:
return "<%= token.name %>";
<%- end -%>
case PM_TOKEN_MAXIMUM:
- return "MAXIMUM";
+ assert(false && "unreachable");
+ return "";
}
- return "\0";
+
+ // Provide a default, because some compilers can't determine that the above
+ // switch is exhaustive.
+ assert(false && "unreachable");
+ return "";
+}
+
+/**
+ * Returns the human name of the given token type.
+ */
+const char *
+pm_token_type_human(pm_token_type_t token_type) {
+ switch (token_type) {
+ case PM_TOKEN_EOF:
+ return "end of file";
+ case PM_TOKEN_MISSING:
+ return "missing token";
+ case PM_TOKEN_NOT_PROVIDED:
+ return "not provided token";
+ case PM_TOKEN_AMPERSAND:
+ return "'&'";
+ case PM_TOKEN_AMPERSAND_AMPERSAND:
+ return "'&&'";
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+ return "'&&='";
+ case PM_TOKEN_AMPERSAND_DOT:
+ return "'&.'";
+ case PM_TOKEN_AMPERSAND_EQUAL:
+ return "'&='";
+ case PM_TOKEN_BACKTICK:
+ return "'`'";
+ case PM_TOKEN_BACK_REFERENCE:
+ return "back reference";
+ case PM_TOKEN_BANG:
+ return "'!'";
+ case PM_TOKEN_BANG_EQUAL:
+ return "'!='";
+ case PM_TOKEN_BANG_TILDE:
+ return "'!~'";
+ case PM_TOKEN_BRACE_LEFT:
+ return "'{'";
+ case PM_TOKEN_BRACE_RIGHT:
+ return "'}'";
+ case PM_TOKEN_BRACKET_LEFT:
+ return "'['";
+ case PM_TOKEN_BRACKET_LEFT_ARRAY:
+ return "'['";
+ case PM_TOKEN_BRACKET_LEFT_RIGHT:
+ return "'[]'";
+ case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
+ return "'[]='";
+ case PM_TOKEN_BRACKET_RIGHT:
+ return "']'";
+ case PM_TOKEN_CARET:
+ return "'^'";
+ case PM_TOKEN_CARET_EQUAL:
+ return "'^='";
+ case PM_TOKEN_CHARACTER_LITERAL:
+ return "character literal";
+ case PM_TOKEN_CLASS_VARIABLE:
+ return "class variable";
+ case PM_TOKEN_COLON:
+ return "':'";
+ case PM_TOKEN_COLON_COLON:
+ return "'::'";
+ case PM_TOKEN_COMMA:
+ return "','";
+ case PM_TOKEN_COMMENT:
+ return "comment";
+ case PM_TOKEN_CONSTANT:
+ return "constant";
+ case PM_TOKEN_DOT:
+ return "'.'";
+ case PM_TOKEN_DOT_DOT:
+ return "'..'";
+ case PM_TOKEN_DOT_DOT_DOT:
+ return "'...'";
+ case PM_TOKEN_EMBDOC_BEGIN:
+ return "'=begin'";
+ case PM_TOKEN_EMBDOC_END:
+ return "'=end'";
+ case PM_TOKEN_EMBDOC_LINE:
+ return "embedded documentation line";
+ case PM_TOKEN_EMBEXPR_BEGIN:
+ return "'#{'";
+ case PM_TOKEN_EMBEXPR_END:
+ return "'}'";
+ case PM_TOKEN_EMBVAR:
+ return "'#'";
+ case PM_TOKEN_EQUAL:
+ return "'='";
+ case PM_TOKEN_EQUAL_EQUAL:
+ return "'=='";
+ case PM_TOKEN_EQUAL_EQUAL_EQUAL:
+ return "'==='";
+ case PM_TOKEN_EQUAL_GREATER:
+ return "'=>'";
+ case PM_TOKEN_EQUAL_TILDE:
+ return "'=~'";
+ case PM_TOKEN_FLOAT:
+ return "float";
+ case PM_TOKEN_FLOAT_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_FLOAT_RATIONAL:
+ return "rational";
+ case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ return "global variable";
+ case PM_TOKEN_GREATER:
+ return "'>'";
+ case PM_TOKEN_GREATER_EQUAL:
+ return "'>='";
+ case PM_TOKEN_GREATER_GREATER:
+ return "'>>'";
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
+ return "'>>='";
+ case PM_TOKEN_HEREDOC_END:
+ return "heredoc ending";
+ case PM_TOKEN_HEREDOC_START:
+ return "heredoc beginning";
+ case PM_TOKEN_IDENTIFIER:
+ return "local variable or method identifier";
+ case PM_TOKEN_IGNORED_NEWLINE:
+ return "ignored newline";
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ return "instance variable";
+ case PM_TOKEN_INTEGER:
+ return "integer";
+ case PM_TOKEN_INTEGER_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_INTEGER_RATIONAL:
+ return "rational";
+ case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_KEYWORD_ALIAS:
+ return "'alias'";
+ case PM_TOKEN_KEYWORD_AND:
+ return "'and'";
+ case PM_TOKEN_KEYWORD_BEGIN:
+ return "'begin'";
+ case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
+ return "'BEGIN'";
+ case PM_TOKEN_KEYWORD_BREAK:
+ return "'break'";
+ case PM_TOKEN_KEYWORD_CASE:
+ return "'case'";
+ case PM_TOKEN_KEYWORD_CLASS:
+ return "'class'";
+ case PM_TOKEN_KEYWORD_DEF:
+ return "'def'";
+ case PM_TOKEN_KEYWORD_DEFINED:
+ return "'defined?'";
+ case PM_TOKEN_KEYWORD_DO:
+ return "'do'";
+ case PM_TOKEN_KEYWORD_DO_LOOP:
+ return "'do'";
+ case PM_TOKEN_KEYWORD_ELSE:
+ return "'else'";
+ case PM_TOKEN_KEYWORD_ELSIF:
+ return "'elsif'";
+ case PM_TOKEN_KEYWORD_END:
+ return "'end'";
+ case PM_TOKEN_KEYWORD_END_UPCASE:
+ return "'END'";
+ case PM_TOKEN_KEYWORD_ENSURE:
+ return "'ensure'";
+ case PM_TOKEN_KEYWORD_FALSE:
+ return "'false'";
+ case PM_TOKEN_KEYWORD_FOR:
+ return "'for'";
+ case PM_TOKEN_KEYWORD_IF:
+ return "'if'";
+ case PM_TOKEN_KEYWORD_IF_MODIFIER:
+ return "'if'";
+ case PM_TOKEN_KEYWORD_IN:
+ return "'in'";
+ case PM_TOKEN_KEYWORD_MODULE:
+ return "'module'";
+ case PM_TOKEN_KEYWORD_NEXT:
+ return "'next'";
+ case PM_TOKEN_KEYWORD_NIL:
+ return "'nil'";
+ case PM_TOKEN_KEYWORD_NOT:
+ return "'not'";
+ case PM_TOKEN_KEYWORD_OR:
+ return "'or'";
+ case PM_TOKEN_KEYWORD_REDO:
+ return "'redo'";
+ case PM_TOKEN_KEYWORD_RESCUE:
+ return "'rescue'";
+ case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
+ return "'rescue'";
+ case PM_TOKEN_KEYWORD_RETRY:
+ return "'retry'";
+ case PM_TOKEN_KEYWORD_RETURN:
+ return "'return'";
+ case PM_TOKEN_KEYWORD_SELF:
+ return "'self'";
+ case PM_TOKEN_KEYWORD_SUPER:
+ return "'super'";
+ case PM_TOKEN_KEYWORD_THEN:
+ return "'then'";
+ case PM_TOKEN_KEYWORD_TRUE:
+ return "'true'";
+ case PM_TOKEN_KEYWORD_UNDEF:
+ return "'undef'";
+ case PM_TOKEN_KEYWORD_UNLESS:
+ return "'unless'";
+ case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
+ return "'unless'";
+ case PM_TOKEN_KEYWORD_UNTIL:
+ return "'until'";
+ case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
+ return "'until'";
+ case PM_TOKEN_KEYWORD_WHEN:
+ return "'when'";
+ case PM_TOKEN_KEYWORD_WHILE:
+ return "'while'";
+ case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
+ return "'while'";
+ case PM_TOKEN_KEYWORD_YIELD:
+ return "'yield'";
+ case PM_TOKEN_KEYWORD___ENCODING__:
+ return "'__ENCODING__'";
+ case PM_TOKEN_KEYWORD___FILE__:
+ return "'__FILE__'";
+ case PM_TOKEN_KEYWORD___LINE__:
+ return "'__LINE__'";
+ case PM_TOKEN_LABEL:
+ return "label";
+ case PM_TOKEN_LABEL_END:
+ return "':'";
+ case PM_TOKEN_LAMBDA_BEGIN:
+ return "'{'";
+ case PM_TOKEN_LESS:
+ return "'<'";
+ case PM_TOKEN_LESS_EQUAL:
+ return "'<='";
+ case PM_TOKEN_LESS_EQUAL_GREATER:
+ return "'<=>'";
+ case PM_TOKEN_LESS_LESS:
+ return "'<<'";
+ case PM_TOKEN_LESS_LESS_EQUAL:
+ return "'<<='";
+ case PM_TOKEN_METHOD_NAME:
+ return "method name";
+ case PM_TOKEN_MINUS:
+ return "'-'";
+ case PM_TOKEN_MINUS_EQUAL:
+ return "'-='";
+ case PM_TOKEN_MINUS_GREATER:
+ return "'->'";
+ case PM_TOKEN_NEWLINE:
+ return "newline";
+ case PM_TOKEN_NUMBERED_REFERENCE:
+ return "numbered reference";
+ case PM_TOKEN_PARENTHESIS_LEFT:
+ return "'('";
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+ return "'('";
+ case PM_TOKEN_PARENTHESIS_RIGHT:
+ return "')'";
+ case PM_TOKEN_PERCENT:
+ return "'%'";
+ case PM_TOKEN_PERCENT_EQUAL:
+ return "'%='";
+ case PM_TOKEN_PERCENT_LOWER_I:
+ return "'%i'";
+ case PM_TOKEN_PERCENT_LOWER_W:
+ return "'%w'";
+ case PM_TOKEN_PERCENT_LOWER_X:
+ return "'%x'";
+ case PM_TOKEN_PERCENT_UPPER_I:
+ return "'%I'";
+ case PM_TOKEN_PERCENT_UPPER_W:
+ return "'%W'";
+ case PM_TOKEN_PIPE:
+ return "'|'";
+ case PM_TOKEN_PIPE_EQUAL:
+ return "'|='";
+ case PM_TOKEN_PIPE_PIPE:
+ return "'||'";
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
+ return "'||='";
+ case PM_TOKEN_PLUS:
+ return "'+'";
+ case PM_TOKEN_PLUS_EQUAL:
+ return "'+='";
+ case PM_TOKEN_QUESTION_MARK:
+ return "'?'";
+ case PM_TOKEN_REGEXP_BEGIN:
+ return "regular expression beginning";
+ case PM_TOKEN_REGEXP_END:
+ return "regular expression ending";
+ case PM_TOKEN_SEMICOLON:
+ return "';'";
+ case PM_TOKEN_SLASH:
+ return "'/'";
+ case PM_TOKEN_SLASH_EQUAL:
+ return "'/='";
+ case PM_TOKEN_STAR:
+ return "'*'";
+ case PM_TOKEN_STAR_EQUAL:
+ return "'*='";
+ case PM_TOKEN_STAR_STAR:
+ return "'**'";
+ case PM_TOKEN_STAR_STAR_EQUAL:
+ return "'**='";
+ case PM_TOKEN_STRING_BEGIN:
+ return "string beginning";
+ case PM_TOKEN_STRING_CONTENT:
+ return "string content";
+ case PM_TOKEN_STRING_END:
+ return "string ending";
+ case PM_TOKEN_SYMBOL_BEGIN:
+ return "symbol beginning";
+ case PM_TOKEN_TILDE:
+ return "'~'";
+ case PM_TOKEN_UAMPERSAND:
+ return "'&'";
+ case PM_TOKEN_UCOLON_COLON:
+ return "'::'";
+ case PM_TOKEN_UDOT_DOT:
+ return "'..'";
+ case PM_TOKEN_UDOT_DOT_DOT:
+ return "'...'";
+ case PM_TOKEN_UMINUS:
+ return "'-'";
+ case PM_TOKEN_UMINUS_NUM:
+ return "'-'";
+ case PM_TOKEN_UPLUS:
+ return "'+'";
+ case PM_TOKEN_USTAR:
+ return "'*'";
+ case PM_TOKEN_USTAR_STAR:
+ return "'**'";
+ case PM_TOKEN_WORDS_SEP:
+ return "string separator";
+ case PM_TOKEN___END__:
+ return "'__END__'";
+ case PM_TOKEN_MAXIMUM:
+ assert(false && "unreachable");
+ return "";
+ }
+
+ // Provide a default, because some compilers can't determine that the above
+ // switch is exhaustive.
+ assert(false && "unreachable");
+ return "";
}
diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index 2b45167bee..4518c8a65d 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -26,7 +26,8 @@ module Prism
)
assert_errors expected, "module Parent module end", [
- ["expected a constant name after `module`", 20..20]
+ ["expected a constant name after `module`", 14..20],
+ ["unexpected 'end', assuming it is closing the parent module definition", 21..24]
]
end
@@ -98,7 +99,8 @@ module Prism
)
assert_errors expected, "BEGIN { 1 + }", [
- ["expected an expression after the operator", 11..11]
+ ["expected an expression after the operator", 10..11],
+ ["unexpected '}', assuming it is closing the parent 'BEGIN' block", 12..13]
]
end
@@ -189,7 +191,7 @@ module Prism
def test_unterminated_parenthesized_expression
assert_errors expression('(1 + 2'), '(1 + 2', [
["expected a newline or semicolon after the statement", 6..6],
- ["cannot parse the expression", 6..6],
+ ["unexpected end of file, assuming it is closing the parent top level context", 6..6],
["expected a matching `)`", 6..6]
]
end
@@ -203,7 +205,8 @@ module Prism
def test_unterminated_argument_expression
assert_errors expression('a %'), 'a %', [
["invalid `%` token", 2..3],
- ["expected an expression after the operator", 3..3],
+ ["expected an expression after the operator", 2..3],
+ ["unexpected end of file, assuming it is closing the parent top level context", 3..3]
]
end
@@ -222,62 +225,62 @@ module Prism
def test_1_2_3
assert_errors expression("(1, 2, 3)"), "(1, 2, 3)", [
["expected a newline or semicolon after the statement", 2..2],
- ["cannot parse the expression", 2..2],
+ ["unexpected ',', ignoring it", 2..3],
["expected a matching `)`", 2..2],
["expected a newline or semicolon after the statement", 2..2],
- ["cannot parse the expression", 2..2],
+ ["unexpected ',', ignoring it", 2..3],
["expected a newline or semicolon after the statement", 5..5],
- ["cannot parse the expression", 5..5],
+ ["unexpected ',', ignoring it", 5..6],
["expected a newline or semicolon after the statement", 8..8],
- ["cannot parse the expression", 8..8]
+ ["unexpected ')', ignoring it", 8..9]
]
end
def test_return_1_2_3
assert_error_messages "return(1, 2, 3)", [
"expected a newline or semicolon after the statement",
- "cannot parse the expression",
+ "unexpected ',', ignoring it",
"expected a matching `)`",
"expected a newline or semicolon after the statement",
- "cannot parse the expression"
+ "unexpected ')', ignoring it"
]
end
def test_return_1
assert_errors expression("return 1,;"), "return 1,;", [
- ["expected an argument", 9..9]
+ ["expected an argument", 8..9]
]
end
def test_next_1_2_3
assert_errors expression("next(1, 2, 3)"), "next(1, 2, 3)", [
["expected a newline or semicolon after the statement", 6..6],
- ["cannot parse the expression", 6..6],
+ ["unexpected ',', ignoring it", 6..7],
["expected a matching `)`", 6..6],
["expected a newline or semicolon after the statement", 12..12],
- ["cannot parse the expression", 12..12]
+ ["unexpected ')', ignoring it", 12..13]
]
end
def test_next_1
assert_errors expression("next 1,;"), "next 1,;", [
- ["expected an argument", 7..7]
+ ["expected an argument", 6..7]
]
end
def test_break_1_2_3
assert_errors expression("break(1, 2, 3)"), "break(1, 2, 3)", [
["expected a newline or semicolon after the statement", 7..7],
- ["cannot parse the expression", 7..7],
+ ["unexpected ',', ignoring it", 7..8],
["expected a matching `)`", 7..7],
["expected a newline or semicolon after the statement", 13..13],
- ["cannot parse the expression", 13..13]
+ ["unexpected ')', ignoring it", 13..14]
]
end
def test_break_1
assert_errors expression("break 1,;"), "break 1,;", [
- ["expected an argument", 8..8]
+ ["expected an argument", 7..8]
]
end
@@ -338,22 +341,22 @@ module Prism
["expected a matching `)`", 8..8],
["expected a `.` or `::` after the receiver in a method definition", 8..8],
["expected a delimiter to close the parameters", 9..9],
- ["cannot parse the expression", 9..9],
- ["cannot parse the expression", 11..11]
+ ["unexpected ')', ignoring it", 10..11],
+ ["unexpected '.', ignoring it", 11..12]
]
end
def test_def_with_empty_expression_receiver
assert_errors expression("def ().a; end"), "def ().a; end", [
- ["expected a receiver for the method definition", 5..5]
+ ["expected a receiver for the method definition", 4..5]
]
end
def test_block_beginning_with_brace_and_ending_with_end
assert_error_messages "x.each { x end", [
"expected a newline or semicolon after the statement",
- "cannot parse the expression",
- "cannot parse the expression",
+ "unexpected 'end', ignoring it",
+ "unexpected end of file, assuming it is closing the parent top level context",
"expected a block beginning with `{` to end with `}`"
]
end
@@ -401,7 +404,7 @@ module Prism
assert_error_messages "foo(*bar and baz)", [
"expected a `)` to close the arguments",
"expected a newline or semicolon after the statement",
- "cannot parse the expression"
+ "unexpected ')', ignoring it"
]
end
@@ -1490,8 +1493,8 @@ module Prism
assert_errors expression(source), source, [
["expected a `do` keyword or a `{` to open the lambda block", 3..3],
["expected a newline or semicolon after the statement", 7..7],
- ["cannot parse the expression", 7..7],
- ["expected a lambda block beginning with `do` to end with `end`", 7..7],
+ ["unexpected end of file, assuming it is closing the parent top level context", 7..7],
+ ["expected a lambda block beginning with `do` to end with `end`", 7..7]
]
end
@@ -1546,10 +1549,11 @@ module Prism
def test_while_endless_method
source = "while def f = g do end"
+
assert_errors expression(source), source, [
- ['expected a predicate expression for the `while` statement', 22..22],
- ['cannot parse the expression', 22..22],
- ['expected an `end` to close the `while` statement', 22..22]
+ ["expected a predicate expression for the `while` statement", 22..22],
+ ["unexpected end of file, assuming it is closing the parent top level context", 22..22],
+ ["expected an `end` to close the `while` statement", 22..22]
]
end
@@ -1558,13 +1562,12 @@ module Prism
a in b + c
a => b + c
RUBY
- message1 = 'expected a newline or semicolon after the statement'
- message2 = 'cannot parse the expression'
+
assert_errors expression(source), source, [
- [message1, 6..6],
- [message2, 6..6],
- [message1, 17..17],
- [message2, 17..17],
+ ["expected a newline or semicolon after the statement", 6..6],
+ ["unexpected '+', ignoring it", 7..8],
+ ["expected a newline or semicolon after the statement", 17..17],
+ ["unexpected '+', ignoring it", 18..19]
]
end
@@ -1859,9 +1862,10 @@ module Prism
def test_non_assoc_range
source = '1....2'
+
assert_errors expression(source), source, [
- ['expected a newline or semicolon after the statement', 4..4],
- ['cannot parse the expression', 4..4],
+ ["expected a newline or semicolon after the statement", 4..4],
+ ["unexpected '.', ignoring it", 4..5]
]
end
@@ -1892,25 +1896,24 @@ module Prism
undef x + 1
undef x.z
RUBY
- message1 = 'expected a newline or semicolon after the statement'
- message2 = 'cannot parse the expression'
+
assert_errors expression(source), source, [
- [message1, 9..9],
- [message2, 9..9],
- [message1, 23..23],
- [message2, 23..23],
- [message1, 39..39],
- [message2, 39..39],
- [message1, 57..57],
- [message2, 57..57],
- [message1, 71..71],
- [message2, 71..71],
- [message1, 87..87],
- [message2, 87..87],
- [message1, 97..97],
- [message2, 97..97],
- [message1, 109..109],
- [message2, 109..109],
+ ["expected a newline or semicolon after the statement", 9..9],
+ ["unexpected '+', ignoring it", 10..11],
+ ["expected a newline or semicolon after the statement", 23..23],
+ ["unexpected '.', ignoring it", 23..24],
+ ["expected a newline or semicolon after the statement", 39..39],
+ ["unexpected '+', ignoring it", 40..41],
+ ["expected a newline or semicolon after the statement", 57..57],
+ ["unexpected '.', ignoring it", 57..58],
+ ["expected a newline or semicolon after the statement", 71..71],
+ ["unexpected '+', ignoring it", 72..73],
+ ["expected a newline or semicolon after the statement", 87..87],
+ ["unexpected '.', ignoring it", 87..88],
+ ["expected a newline or semicolon after the statement", 97..97],
+ ["unexpected '+', ignoring it", 98..99],
+ ["expected a newline or semicolon after the statement", 109..109],
+ ["unexpected '.', ignoring it", 109..110]
]
end
@@ -1934,13 +1937,12 @@ module Prism
..1..
...1..
RUBY
- message1 = 'expected a newline or semicolon after the statement'
- message2 = 'cannot parse the expression'
+
assert_errors expression(source), source, [
- [message1, 3..3],
- [message2, 3..3],
- [message1, 10..10],
- [message2, 10..10],
+ ["expected a newline or semicolon after the statement", 3..3],
+ ["unexpected '..', ignoring it", 3..5],
+ ["expected a newline or semicolon after the statement", 10..10],
+ ["unexpected '..', ignoring it", 10..12]
]
end
@@ -2047,21 +2049,20 @@ module Prism
1 !~ 2 !~ 3
1 <=> 2 <=> 3
RUBY
- message1 = 'expected a newline or semicolon after the statement'
- message2 = 'cannot parse the expression'
+
assert_errors expression(source), source, [
- [message1, 6..6],
- [message2, 6..6],
- [message1, 18..18],
- [message2, 18..18],
- [message1, 31..31],
- [message2, 31..31],
- [message1, 44..44],
- [message2, 44..44],
- [message1, 56..56],
- [message2, 56..56],
- [message1, 69..69],
- [message2, 69..69],
+ ["expected a newline or semicolon after the statement", 6..6],
+ ["unexpected '==', ignoring it", 7..9],
+ ["expected a newline or semicolon after the statement", 18..18],
+ ["unexpected '!=', ignoring it", 19..21],
+ ["expected a newline or semicolon after the statement", 31..31],
+ ["unexpected '===', ignoring it", 32..35],
+ ["expected a newline or semicolon after the statement", 44..44],
+ ["unexpected '=~', ignoring it", 45..47],
+ ["expected a newline or semicolon after the statement", 56..56],
+ ["unexpected '!~', ignoring it", 57..59],
+ ["expected a newline or semicolon after the statement", 69..69],
+ ["unexpected '<=>', ignoring it", 70..73]
]
end
diff --git a/test/prism/format_errors_test.rb b/test/prism/format_errors_test.rb
index 3533a73863..34d320a7b6 100644
--- a/test/prism/format_errors_test.rb
+++ b/test/prism/format_errors_test.rb
@@ -9,8 +9,8 @@ module Prism
def test_format_errors
assert_equal <<~ERROR, Debug.format_errors("<>", false)
> 1 | <>
- | ^ cannot parse the expression
- | ^ cannot parse the expression
+ | ^ unexpected '<', ignoring it
+ | ^ unexpected '>', ignoring it
ERROR
end
end