[ruby/prism] Better error messages for unexpected tokens in prefix

https://github.com/ruby/prism/commit/a35b8e45ee
author: Kevin Newton <kddnewton@gmail.com> 2024-01-29 17:27:45 -0500
committer: git <svn-admin@ruby-lang.org> 2024-01-30 16:10:08 +0000
commit: ba06a8259a3f21c9cbee0f4f55b82c016a45a3b9 (patch)
tree: 6bf9054c571735c63df122962f054b37bc43b04f
parent: c85e28d12a4855e64271f0be4510b63053b628b7 (diff)
9 files changed, 572 insertions, 122 deletions
diff --git a/prism/diagnostic.c b/prism/diagnostic.c
index 3ff4a933c6..bf89ca781a 100644
--- a/prism/diagnostic.c
+++ b/prism/diagnostic.c
@@ -71,6 +71,8 @@ typedef struct {
  * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
  */
 static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
+    [PM_ERR_CANNOT_PARSE_EXPRESSION]            = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL },
+
     // Errors
     [PM_ERR_ALIAS_ARGUMENT]                     = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_AMPAMPEQ_MULTI_ASSIGN]              = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_FATAL },
@@ -106,7 +108,6 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_BLOCK_PARAM_PIPE_TERM]              = { "expected the block parameters to end with `|`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_BLOCK_TERM_BRACE]                   = { "expected a block beginning with `{` to end with `}`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_BLOCK_TERM_END]                     = { "expected a block beginning with `do` to end with `end`", PM_ERROR_LEVEL_FATAL },
-    [PM_ERR_CANNOT_PARSE_EXPRESSION]            = { "cannot parse the expression", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_CANNOT_PARSE_STRING_PART]           = { "cannot parse the string part", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_CASE_EXPRESSION_AFTER_CASE]         = { "expected an expression after `case`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_CASE_EXPRESSION_AFTER_WHEN]         = { "expected an expression after `when`", PM_ERROR_LEVEL_FATAL },
@@ -277,6 +278,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_UNARY_RECEIVER_BANG]                = { "expected a receiver for unary `!`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNARY_RECEIVER_MINUS]               = { "expected a receiver for unary `-`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNARY_RECEIVER_PLUS]                = { "expected a receiver for unary `+`", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT]     = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_FATAL },
+    [PM_ERR_UNEXPECTED_TOKEN_IGNORE]            = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNARY_RECEIVER_TILDE]               = { "expected a receiver for unary `~`", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_UNTIL_TERM]                         = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_FATAL },
     [PM_ERR_VOID_EXPRESSION]                    = { "unexpected void value expression", PM_ERROR_LEVEL_FATAL },
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 9b600208ae..33123262b5 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -66,6 +66,11 @@ typedef struct {
  * of errors between the parser and the user.
  */
 typedef enum {
+    // This is a special error that we can potentially replace by others. For
+    // an example of how this is used, see parse_expression_prefix.
+    PM_ERR_CANNOT_PARSE_EXPRESSION,
+
+    // These are the error codes.
     PM_ERR_ALIAS_ARGUMENT,
     PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
     PM_ERR_ARGUMENT_AFTER_BLOCK,
@@ -100,7 +105,6 @@ typedef enum {
     PM_ERR_BLOCK_PARAM_PIPE_TERM,
     PM_ERR_BLOCK_TERM_BRACE,
     PM_ERR_BLOCK_TERM_END,
-    PM_ERR_CANNOT_PARSE_EXPRESSION,
     PM_ERR_CANNOT_PARSE_STRING_PART,
     PM_ERR_CASE_EXPRESSION_AFTER_CASE,
     PM_ERR_CASE_EXPRESSION_AFTER_WHEN,
@@ -272,6 +276,8 @@ typedef enum {
     PM_ERR_UNARY_RECEIVER_MINUS,
     PM_ERR_UNARY_RECEIVER_PLUS,
     PM_ERR_UNARY_RECEIVER_TILDE,
+    PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
+    PM_ERR_UNEXPECTED_TOKEN_IGNORE,
     PM_ERR_UNDEF_ARGUMENT,
     PM_ERR_UNTIL_TERM,
     PM_ERR_VOID_EXPRESSION,
@@ -280,13 +286,15 @@ typedef enum {
     PM_ERR_WRITE_TARGET_READONLY,
     PM_ERR_WRITE_TARGET_UNEXPECTED,
     PM_ERR_XSTRING_TERM,
+
+    // These are the warning codes.
     PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS,
     PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
     PM_WARN_AMBIGUOUS_PREFIX_STAR,
     PM_WARN_AMBIGUOUS_SLASH,
     PM_WARN_END_IN_METHOD,
 
-    /* This must be the last member. */
+    // This is the number of diagnostic codes.
     PM_DIAGNOSTIC_ID_LEN,
 } pm_diagnostic_id_t;
 
diff --git a/prism/parser.h b/prism/parser.h
index c7ebb64b60..6ee215c76d 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -259,6 +259,9 @@ typedef struct pm_parser pm_parser_t;
  * token that is understood by a parent context but not by the current context.
  */
 typedef enum {
+    /** a null context, used for returning a value from a function */
+    PM_CONTEXT_NONE = 0,
+
     /** a begin statement */
     PM_CONTEXT_BEGIN,
 
diff --git a/prism/prism.c b/prism/prism.c
index 36699f5894..ea2723cfaf 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
 
 PRISM_ATTRIBUTE_UNUSED static void
 debug_token(pm_token_t * token) {
-    fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
+    fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
 }
 
 #endif
@@ -6719,21 +6719,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
             return token->type == PM_TOKEN_BRACE_RIGHT;
         case PM_CONTEXT_PREDICATE:
             return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
+        case PM_CONTEXT_NONE:
+            return false;
     }
 
     return false;
 }
 
-static bool
-context_recoverable(pm_parser_t *parser, pm_token_t *token) {
+/**
+ * Returns the context that the given token is found to be terminating, or
+ * returns PM_CONTEXT_NONE.
+ */
+static pm_context_t
+context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
     pm_context_node_t *context_node = parser->current_context;
 
     while (context_node != NULL) {
-        if (context_terminator(context_node->context, token)) return true;
+        if (context_terminator(context_node->context, token)) return context_node->context;
         context_node = context_node->prev;
     }
 
-    return false;
+    return PM_CONTEXT_NONE;
 }
 
 static bool
@@ -6761,7 +6767,7 @@ context_pop(pm_parser_t *parser) {
 }
 
 static bool
-context_p(pm_parser_t *parser, pm_context_t context) {
+context_p(const pm_parser_t *parser, pm_context_t context) {
     pm_context_node_t *context_node = parser->current_context;
 
     while (context_node != NULL) {
@@ -6773,7 +6779,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
 }
 
 static bool
-context_def_p(pm_parser_t *parser) {
+context_def_p(const pm_parser_t *parser) {
     pm_context_node_t *context_node = parser->current_context;
 
     while (context_node != NULL) {
@@ -6796,6 +6802,55 @@ context_def_p(pm_parser_t *parser) {
     return false;
 }
 
+/**
+ * Returns a human readable string for the given context, used in error
+ * messages.
+ */
+static const char *
+context_human(pm_context_t context) {
+    switch (context) {
+        case PM_CONTEXT_NONE:
+            assert(false && "unreachable");
+            return "";
+        case PM_CONTEXT_BEGIN: return "begin statement";
+        case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
+        case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
+        case PM_CONTEXT_CASE_WHEN: return "'when' clause";
+        case PM_CONTEXT_CASE_IN: return "'in' clause";
+        case PM_CONTEXT_CLASS: return "class definition";
+        case PM_CONTEXT_DEF: return "method definition";
+        case PM_CONTEXT_DEF_PARAMS: return "method parameters";
+        case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
+        case PM_CONTEXT_ELSE: return "'else' clause";
+        case PM_CONTEXT_ELSIF: return "'elsif' clause";
+        case PM_CONTEXT_EMBEXPR: return "embedded expression";
+        case PM_CONTEXT_ENSURE: return "'ensure' clause";
+        case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
+        case PM_CONTEXT_FOR: return "for loop";
+        case PM_CONTEXT_FOR_INDEX: return "for loop index";
+        case PM_CONTEXT_IF: return "if statement";
+        case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
+        case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
+        case PM_CONTEXT_MAIN: return "top level context";
+        case PM_CONTEXT_MODULE: return "module definition";
+        case PM_CONTEXT_PARENS: return "parentheses";
+        case PM_CONTEXT_POSTEXE: return "'END' block";
+        case PM_CONTEXT_PREDICATE: return "predicate";
+        case PM_CONTEXT_PREEXE: return "'BEGIN' block";
+        case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
+        case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
+        case PM_CONTEXT_RESCUE: return "'rescue' clause";
+        case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
+        case PM_CONTEXT_SCLASS: return "singleton class definition";
+        case PM_CONTEXT_UNLESS: return "unless statement";
+        case PM_CONTEXT_UNTIL: return "until statement";
+        case PM_CONTEXT_WHILE: return "while statement";
+    }
+
+    assert(false && "unreachable");
+    return "";
+}
+
 /******************************************************************************/
 /* Specific token lexers                                                      */
 /******************************************************************************/
@@ -14177,7 +14232,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
  * Parse an expression that begins with the previous node that we just lexed.
  */
 static inline pm_node_t *
-parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
+parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
     switch (parser->current.type) {
         case PM_TOKEN_BRACKET_LEFT_ARRAY: {
             parser_lex(parser);
@@ -14595,30 +14650,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
             if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
                 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
-            }
-            else {
+            } else {
                 // Check if `it` is not going to be assigned.
                 switch (parser->current.type) {
-                  case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
-                  case PM_TOKEN_AMPERSAND_EQUAL:
-                  case PM_TOKEN_CARET_EQUAL:
-                  case PM_TOKEN_EQUAL:
-                  case PM_TOKEN_GREATER_GREATER_EQUAL:
-                  case PM_TOKEN_LESS_LESS_EQUAL:
-                  case PM_TOKEN_MINUS_EQUAL:
-                  case PM_TOKEN_PARENTHESIS_RIGHT:
-                  case PM_TOKEN_PERCENT_EQUAL:
-                  case PM_TOKEN_PIPE_EQUAL:
-                  case PM_TOKEN_PIPE_PIPE_EQUAL:
-                  case PM_TOKEN_PLUS_EQUAL:
-                  case PM_TOKEN_SLASH_EQUAL:
-                  case PM_TOKEN_STAR_EQUAL:
-                  case PM_TOKEN_STAR_STAR_EQUAL:
-                    break;
-                  default:
-                    // Once we know it's neither a method call nor an assignment,
-                    // we can finally create `it` default parameter.
-                    node = pm_node_check_it(parser, node);
+                    case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+                    case PM_TOKEN_AMPERSAND_EQUAL:
+                    case PM_TOKEN_CARET_EQUAL:
+                    case PM_TOKEN_EQUAL:
+                    case PM_TOKEN_GREATER_GREATER_EQUAL:
+                    case PM_TOKEN_LESS_LESS_EQUAL:
+                    case PM_TOKEN_MINUS_EQUAL:
+                    case PM_TOKEN_PARENTHESIS_RIGHT:
+                    case PM_TOKEN_PERCENT_EQUAL:
+                    case PM_TOKEN_PIPE_EQUAL:
+                    case PM_TOKEN_PIPE_PIPE_EQUAL:
+                    case PM_TOKEN_PLUS_EQUAL:
+                    case PM_TOKEN_SLASH_EQUAL:
+                    case PM_TOKEN_STAR_EQUAL:
+                    case PM_TOKEN_STAR_STAR_EQUAL:
+                        break;
+                    default:
+                        // Once we know it's neither a method call nor an
+                        // assignment, we can finally create `it` default
+                        // parameter.
+                        node = pm_node_check_it(parser, node);
                 }
             }
 
@@ -14656,6 +14711,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
                 // If we get here, then we tried to find something in the
                 // heredoc but couldn't actually parse anything, so we'll just
                 // return a missing node.
+                //
+                // parse_string_part handles its own errors, so there is no need
+                // for us to add one here.
                 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
             } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
                 // If we get here, then the part that we parsed was plain string
@@ -16301,6 +16359,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             // context of a multiple assignment. We enforce that here. We'll
             // still lex past it though and create a missing node place.
             if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                pm_parser_err_previous(parser, diag_id);
                 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
             }
 
@@ -16487,12 +16546,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
 
             return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
         }
-        default:
-            if (context_recoverable(parser, &parser->current)) {
+        default: {
+            pm_context_t recoverable = context_recoverable(parser, &parser->current);
+
+            if (recoverable != PM_CONTEXT_NONE) {
                 parser->recovering = true;
+
+                // If the given error is not the generic one, then we'll add it
+                // here because it will provide more context in addition to the
+                // recoverable error that we will also add.
+                if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
+                    pm_parser_err_previous(parser, diag_id);
+                }
+
+                // If we get here, then we are assuming this token is closing a
+                // parent context, so we'll indicate that to the user so that
+                // they know how we behaved.
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
+            } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
+                // We're going to make a special case here, because "cannot
+                // parse expression" is pretty generic, and we know here that we
+                // have an unexpected token.
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
+            } else {
+                pm_parser_err_previous(parser, diag_id);
             }
 
             return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+        }
     }
 }
 
@@ -17455,15 +17536,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
  */
 static pm_node_t *
 parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
-    pm_token_t recovery = parser->previous;
-    pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
+    pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
 
     switch (PM_NODE_TYPE(node)) {
         case PM_MISSING_NODE:
             // If we found a syntax error, then the type of node returned by
-            // parse_expression_prefix is going to be a missing node. In that
-            // case we need to add the error message to the parser's error list.
-            pm_parser_err(parser, recovery.end, recovery.end, diag_id);
+            // parse_expression_prefix is going to be a missing node.
             return node;
         case PM_PRE_EXECUTION_NODE:
         case PM_POST_EXECUTION_NODE:
diff --git a/prism/prism.h b/prism/prism.h
index 45bfff7a11..08d216cbb5 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -168,7 +168,15 @@ PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t si
  * @param token_type The token type to convert to a string.
  * @return A string representation of the given token type.
  */
-PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
+PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type);
+
+/**
+ * Returns the human name of the given token type.
+ *
+ * @param token_type The token type to convert to a human name.
+ * @return The human name of the given token type.
+ */
+const char * pm_token_type_human(pm_token_type_t token_type);
 
 /**
  * Format the errors on the parser into the given buffer.
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
index 93f67f6551..20b3810715 100644
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -19,7 +19,7 @@ pm_location_new(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, V
 
 VALUE
 pm_token_new(pm_parser_t *parser, pm_token_t *token, rb_encoding *encoding, VALUE source) {
-    ID type = rb_intern(pm_token_type_to_str(token->type));
+    ID type = rb_intern(pm_token_type_name(token->type));
     VALUE location = pm_location_new(parser, token->start, token->end, source);
 
     VALUE argv[] = {
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb
index d3c1c3f1b8..99f5d1b254 100644
--- a/prism/templates/src/token_type.c.erb
+++ b/prism/templates/src/token_type.c.erb
@@ -6,15 +6,364 @@
  * Returns a string representation of the given token type.
  */
 PRISM_EXPORTED_FUNCTION const char *
-pm_token_type_to_str(pm_token_type_t token_type)
-{
+pm_token_type_name(pm_token_type_t token_type) {
     switch (token_type) {
 <%- tokens.each do |token| -%>
         case PM_TOKEN_<%= token.name %>:
             return "<%= token.name %>";
 <%- end -%>
         case PM_TOKEN_MAXIMUM:
-            return "MAXIMUM";
+            assert(false && "unreachable");
+            return "";
     }
-    return "\0";
+
+    // Provide a default, because some compilers can't determine that the above
+    // switch is exhaustive.
+    assert(false && "unreachable");
+    return "";
+}
+
+/**
+ * Returns the human name of the given token type.
+ */
+const char *
+pm_token_type_human(pm_token_type_t token_type) {
+    switch (token_type) {
+        case PM_TOKEN_EOF:
+            return "end of file";
+        case PM_TOKEN_MISSING:
+            return "missing token";
+        case PM_TOKEN_NOT_PROVIDED:
+            return "not provided token";
+        case PM_TOKEN_AMPERSAND:
+            return "'&'";
+        case PM_TOKEN_AMPERSAND_AMPERSAND:
+            return "'&&'";
+        case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+            return "'&&='";
+        case PM_TOKEN_AMPERSAND_DOT:
+            return "'&.'";
+        case PM_TOKEN_AMPERSAND_EQUAL:
+            return "'&='";
+        case PM_TOKEN_BACKTICK:
+            return "'`'";
+        case PM_TOKEN_BACK_REFERENCE:
+            return "back reference";
+        case PM_TOKEN_BANG:
+            return "'!'";
+        case PM_TOKEN_BANG_EQUAL:
+            return "'!='";
+        case PM_TOKEN_BANG_TILDE:
+            return "'!~'";
+        case PM_TOKEN_BRACE_LEFT:
+            return "'{'";
+        case PM_TOKEN_BRACE_RIGHT:
+            return "'}'";
+        case PM_TOKEN_BRACKET_LEFT:
+            return "'['";
+        case PM_TOKEN_BRACKET_LEFT_ARRAY:
+            return "'['";
+        case PM_TOKEN_BRACKET_LEFT_RIGHT:
+            return "'[]'";
+        case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
+            return "'[]='";
+        case PM_TOKEN_BRACKET_RIGHT:
+            return "']'";
+        case PM_TOKEN_CARET:
+            return "'^'";
+        case PM_TOKEN_CARET_EQUAL:
+            return "'^='";
+        case PM_TOKEN_CHARACTER_LITERAL:
+            return "character literal";
+        case PM_TOKEN_CLASS_VARIABLE:
+            return "class variable";
+        case PM_TOKEN_COLON:
+            return "':'";
+        case PM_TOKEN_COLON_COLON:
+            return "'::'";
+        case PM_TOKEN_COMMA:
+            return "','";
+        case PM_TOKEN_COMMENT:
+            return "comment";
+        case PM_TOKEN_CONSTANT:
+            return "constant";
+        case PM_TOKEN_DOT:
+            return "'.'";
+        case PM_TOKEN_DOT_DOT:
+            return "'..'";
+        case PM_TOKEN_DOT_DOT_DOT:
+            return "'...'";
+        case PM_TOKEN_EMBDOC_BEGIN:
+            return "'=begin'";
+        case PM_TOKEN_EMBDOC_END:
+            return "'=end'";
+        case PM_TOKEN_EMBDOC_LINE:
+            return "embedded documentation line";
+        case PM_TOKEN_EMBEXPR_BEGIN:
+            return "'#{'";
+        case PM_TOKEN_EMBEXPR_END:
+            return "'}'";
+        case PM_TOKEN_EMBVAR:
+            return "'#'";
+        case PM_TOKEN_EQUAL:
+            return "'='";
+        case PM_TOKEN_EQUAL_EQUAL:
+            return "'=='";
+        case PM_TOKEN_EQUAL_EQUAL_EQUAL:
+            return "'==='";
+        case PM_TOKEN_EQUAL_GREATER:
+            return "'=>'";
+        case PM_TOKEN_EQUAL_TILDE:
+            return "'=~'";
+        case PM_TOKEN_FLOAT:
+            return "float";
+        case PM_TOKEN_FLOAT_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_FLOAT_RATIONAL:
+            return "rational";
+        case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_GLOBAL_VARIABLE:
+            return "global variable";
+        case PM_TOKEN_GREATER:
+            return "'>'";
+        case PM_TOKEN_GREATER_EQUAL:
+            return "'>='";
+        case PM_TOKEN_GREATER_GREATER:
+            return "'>>'";
+        case PM_TOKEN_GREATER_GREATER_EQUAL:
+            return "'>>='";
+        case PM_TOKEN_HEREDOC_END:
+            return "heredoc ending";
+        case PM_TOKEN_HEREDOC_START:
+            return "heredoc beginning";
+        case PM_TOKEN_IDENTIFIER:
+            return "local variable or method identifier";
+        case PM_TOKEN_IGNORED_NEWLINE:
+            return "ignored newline";
+        case PM_TOKEN_INSTANCE_VARIABLE:
+            return "instance variable";
+        case PM_TOKEN_INTEGER:
+            return "integer";
+        case PM_TOKEN_INTEGER_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_INTEGER_RATIONAL:
+            return "rational";
+        case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_KEYWORD_ALIAS:
+            return "'alias'";
+        case PM_TOKEN_KEYWORD_AND:
+            return "'and'";
+        case PM_TOKEN_KEYWORD_BEGIN:
+            return "'begin'";
+        case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
+            return "'BEGIN'";
+        case PM_TOKEN_KEYWORD_BREAK:
+            return "'break'";
+        case PM_TOKEN_KEYWORD_CASE:
+            return "'case'";
+        case PM_TOKEN_KEYWORD_CLASS:
+            return "'class'";
+        case PM_TOKEN_KEYWORD_DEF:
+            return "'def'";
+        case PM_TOKEN_KEYWORD_DEFINED:
+            return "'defined?'";
+        case PM_TOKEN_KEYWORD_DO:
+            return "'do'";
+        case PM_TOKEN_KEYWORD_DO_LOOP:
+            return "'do'";
+        case PM_TOKEN_KEYWORD_ELSE:
+            return "'else'";
+        case PM_TOKEN_KEYWORD_ELSIF:
+            return "'elsif'";
+        case PM_TOKEN_KEYWORD_END:
+            return "'end'";
+        case PM_TOKEN_KEYWORD_END_UPCASE:
+            return "'END'";
+        case PM_TOKEN_KEYWORD_ENSURE:
+            return "'ensure'";
+        case PM_TOKEN_KEYWORD_FALSE:
+            return "'false'";
+        case PM_TOKEN_KEYWORD_FOR:
+            return "'for'";
+        case PM_TOKEN_KEYWORD_IF:
+            return "'if'";
+        case PM_TOKEN_KEYWORD_IF_MODIFIER:
+            return "'if'";
+        case PM_TOKEN_KEYWORD_IN:
+            return "'in'";
+        case PM_TOKEN_KEYWORD_MODULE:
+            return "'module'";
+        case PM_TOKEN_KEYWORD_NEXT:
+            return "'next'";
+        case PM_TOKEN_KEYWORD_NIL:
+            return "'nil'";
+        case PM_TOKEN_KEYWORD_NOT:
+            return "'not'";
+        case PM_TOKEN_KEYWORD_OR:
+            return "'or'";
+        case PM_TOKEN_KEYWORD_REDO:
+            return "'redo'";
+        case PM_TOKEN_KEYWORD_RESCUE:
+            return "'rescue'";
+        case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
+            return "'rescue'";
+        case PM_TOKEN_KEYWORD_RETRY:
+            return "'retry'";
+        case PM_TOKEN_KEYWORD_RETURN:
+            return "'return'";
+        case PM_TOKEN_KEYWORD_SELF:
+            return "'self'";
+        case PM_TOKEN_KEYWORD_SUPER:
+            return "'super'";
+        case PM_TOKEN_KEYWORD_THEN:
+            return "'then'";
+        case PM_TOKEN_KEYWORD_TRUE:
+            return "'true'";
+        case PM_TOKEN_KEYWORD_UNDEF:
+            return "'undef'";
+        case PM_TOKEN_KEYWORD_UNLESS:
+            return "'unless'";
+        case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
+            return "'unless'";
+        case PM_TOKEN_KEYWORD_UNTIL:
+            return "'until'";
+        case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
+            return "'until'";
+        case PM_TOKEN_KEYWORD_WHEN:
+            return "'when'";
+        case PM_TOKEN_KEYWORD_WHILE:
+            return "'while'";
+        case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
+            return "'while'";
+        case PM_TOKEN_KEYWORD_YIELD:
+            return "'yield'";
+        case PM_TOKEN_KEYWORD___ENCODING__:
+            return "'__ENCODING__'";
+        case PM_TOKEN_KEYWORD___FILE__:
+            return "'__FILE__'";
+        case PM_TOKEN_KEYWORD___LINE__:
+            return "'__LINE__'";
+        case PM_TOKEN_LABEL:
+            return "label";
+        case PM_TOKEN_LABEL_END:
+            return "':'";
+        case PM_TOKEN_LAMBDA_BEGIN:
+            return "'{'";
+        case PM_TOKEN_LESS:
+            return "'<'";
+        case PM_TOKEN_LESS_EQUAL:
+            return "'<='";
+        case PM_TOKEN_LESS_EQUAL_GREATER:
+            return "'<=>'";
+        case PM_TOKEN_LESS_LESS:
+            return "'<<'";
+        case PM_TOKEN_LESS_LESS_EQUAL:
+            return "'<<='";
+        case PM_TOKEN_METHOD_NAME:
+            return "method name";
+        case PM_TOKEN_MINUS:
+            return "'-'";
+        case PM_TOKEN_MINUS_EQUAL:
+            return "'-='";
+        case PM_TOKEN_MINUS_GREATER:
+            return "'->'";
+        case PM_TOKEN_NEWLINE:
+            return "newline";
+        case PM_TOKEN_NUMBERED_REFERENCE:
+            return "numbered reference";
+        case PM_TOKEN_PARENTHESIS_LEFT:
+            return "'('";
+        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+            return "'('";
+        case PM_TOKEN_PARENTHESIS_RIGHT:
+            return "')'";
+        case PM_TOKEN_PERCENT:
+            return "'%'";
+        case PM_TOKEN_PERCENT_EQUAL:
+            return "'%='";
+        case PM_TOKEN_PERCENT_LOWER_I:
+            return "'%i'";
+        case PM_TOKEN_PERCENT_LOWER_W:
+            return "'%w'";
+        case PM_TOKEN_PERCENT_LOWER_X:
+            return "'%x'";
+        case PM_TOKEN_PERCENT_UPPER_I:
+            return "'%I'";
+        case PM_TOKEN_PERCENT_UPPER_W:
+            return "'%W'";
+        case PM_TOKEN_PIPE:
+            return "'|'";
+        case PM_TOKEN_PIPE_EQUAL:
+            return "'|='";
+        case PM_TOKEN_PIPE_PIPE:
+            return "'||'";
+        case PM_TOKEN_PIPE_PIPE_EQUAL:
+            return "'||='";
+        case PM_TOKEN_PLUS:
+            return "'+'";
+        case PM_TOKEN_PLUS_EQUAL:
+            return "'+='";
+        case PM_TOKEN_QUESTION_MARK:
+            return "'?'";
+        case PM_TOKEN_REGEXP_BEGIN:
+            return "regular expression beginning";
+        case PM_TOKEN_REGEXP_END:
+            return "regular expression ending";
+        case PM_TOKEN_SEMICOLON:
+            return "';'";
+        case PM_TOKEN_SLASH:
+            return "'/'";
+        case PM_TOKEN_SLASH_EQUAL:
+            return "'/='";
+        case PM_TOKEN_STAR:
+            return "'*'";
+        case PM_TOKEN_STAR_EQUAL:
+            return "'*='";
+        case PM_TOKEN_STAR_STAR:
+            return "'**'";
+        case PM_TOKEN_STAR_STAR_EQUAL:
+            return "'**='";
+        case PM_TOKEN_STRING_BEGIN:
+            return "string beginning";
+        case PM_TOKEN_STRING_CONTENT:
+            return "string content";
+        case PM_TOKEN_STRING_END:
+            return "string ending";
+        case PM_TOKEN_SYMBOL_BEGIN:
+            return "symbol beginning";
+        case PM_TOKEN_TILDE:
+            return "'~'";
+        case PM_TOKEN_UAMPERSAND:
+            return "'&'";
+        case PM_TOKEN_UCOLON_COLON:
+            return "'::'";
+        case PM_TOKEN_UDOT_DOT:
+            return "'..'";
+        case PM_TOKEN_UDOT_DOT_DOT:
+            return "'...'";
+        case PM_TOKEN_UMINUS:
+            return "'-'";
+        case PM_TOKEN_UMINUS_NUM:
+            return "'-'";
+        case PM_TOKEN_UPLUS:
+            return "'+'";
+        case PM_TOKEN_USTAR:
+            return "'*'";
+        case PM_TOKEN_USTAR_STAR:
+            return "'**'";
+        case PM_TOKEN_WORDS_SEP:
+            return "string separator";
+        case PM_TOKEN___END__:
+            return "'__END__'";
+        case PM_TOKEN_MAXIMUM:
+            assert(false && "unreachable");
+            return "";
+    }
+
+    // Provide a default, because some compilers can't determine that the above
+    // switch is exhaustive.
+    assert(false && "unreachable");
+    return "";
 }
diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index 2b45167bee..4518c8a65d 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -26,7 +26,8 @@ module Prism
       )
 
       assert_errors expected, "module Parent module end", [
-        ["expected a constant name after `module`", 20..20]
+        ["expected a constant name after `module`", 14..20],
+        ["unexpected 'end', assuming it is closing the parent module definition", 21..24]
       ]
     end
 
@@ -98,7 +99,8 @@ module Prism
       )
 
       assert_errors expected, "BEGIN { 1 + }", [
-        ["expected an expression after the operator", 11..11]
+        ["expected an expression after the operator", 10..11],
+        ["unexpected '}', assuming it is closing the parent 'BEGIN' block", 12..13]
       ]
     end
 
@@ -189,7 +191,7 @@ module Prism
     def test_unterminated_parenthesized_expression
       assert_errors expression('(1 + 2'), '(1 + 2', [
         ["expected a newline or semicolon after the statement", 6..6],
-        ["cannot parse the expression", 6..6],
+        ["unexpected end of file, assuming it is closing the parent top level context", 6..6],
         ["expected a matching `)`", 6..6]
       ]
     end
@@ -203,7 +205,8 @@ module Prism
     def test_unterminated_argument_expression
       assert_errors expression('a %'), 'a %', [
         ["invalid `%` token", 2..3],
-        ["expected an expression after the operator", 3..3],
+        ["expected an expression after the operator", 2..3],
+        ["unexpected end of file, assuming it is closing the parent top level context", 3..3]
       ]
     end
 
@@ -222,62 +225,62 @@ module Prism
     def test_1_2_3
       assert_errors expression("(1, 2, 3)"), "(1, 2, 3)", [
         ["expected a newline or semicolon after the statement", 2..2],
-        ["cannot parse the expression", 2..2],
+        ["unexpected ',', ignoring it", 2..3],
         ["expected a matching `)`", 2..2],
         ["expected a newline or semicolon after the statement", 2..2],
-        ["cannot parse the expression", 2..2],
+        ["unexpected ',', ignoring it", 2..3],
         ["expected a newline or semicolon after the statement", 5..5],
-        ["cannot parse the expression", 5..5],
+        ["unexpected ',', ignoring it", 5..6],
         ["expected a newline or semicolon after the statement", 8..8],
-        ["cannot parse the expression", 8..8]
+        ["unexpected ')', ignoring it", 8..9]
       ]
     end
 
     def test_return_1_2_3
       assert_error_messages "return(1, 2, 3)", [
         "expected a newline or semicolon after the statement",
-        "cannot parse the expression",
+        "unexpected ',', ignoring it",
         "expected a matching `)`",
         "expected a newline or semicolon after the statement",
-        "cannot parse the expression"
+        "unexpected ')', ignoring it"
       ]
     end
 
     def test_return_1
       assert_errors expression("return 1,;"), "return 1,;", [
-        ["expected an argument", 9..9]
+        ["expected an argument", 8..9]
       ]
     end
 
     def test_next_1_2_3
       assert_errors expression("next(1, 2, 3)"), "next(1, 2, 3)", [
         ["expected a newline or semicolon after the statement", 6..6],
-        ["cannot parse the expression", 6..6],
+        ["unexpected ',', ignoring it", 6..7],
         ["expected a matching `)`", 6..6],
         ["expected a newline or semicolon after the statement", 12..12],
-        ["cannot parse the expression", 12..12]
+        ["unexpected ')', ignoring it", 12..13]
       ]
     end
 
     def test_next_1
       assert_errors expression("next 1,;"), "next 1,;", [
-        ["expected an argument", 7..7]
+        ["expected an argument", 6..7]
       ]
     end
 
     def test_break_1_2_3
       assert_errors expression("break(1, 2, 3)"), "break(1, 2, 3)", [
         ["expected a newline or semicolon after the statement", 7..7],
-        ["cannot parse the expression", 7..7],
+        ["unexpected ',', ignoring it", 7..8],
         ["expected a matching `)`", 7..7],
         ["expected a newline or semicolon after the statement", 13..13],
-        ["cannot parse the expression", 13..13]
+        ["unexpected ')', ignoring it", 13..14]
       ]
     end
 
     def test_break_1
       assert_errors expression("break 1,;"), "break 1,;", [
-        ["expected an argument", 8..8]
+        ["expected an argument", 7..8]
       ]
     end
 
@@ -338,22 +341,22 @@ module Prism
         ["expected a matching `)`", 8..8],
         ["expected a `.` or `::` after the receiver in a method definition", 8..8],
         ["expected a delimiter to close the parameters", 9..9],
-        ["cannot parse the expression", 9..9],
-        ["cannot parse the expression", 11..11]
+        ["unexpected ')', ignoring it", 10..11],
+        ["unexpected '.', ignoring it", 11..12]
       ]
     end
 
     def test_def_with_empty_expression_receiver
       assert_errors expression("def ().a; end"), "def ().a; end", [
-        ["expected a receiver for the method definition", 5..5]
+        ["expected a receiver for the method definition", 4..5]
       ]
     end
 
     def test_block_beginning_with_brace_and_ending_with_end
       assert_error_messages "x.each { x end", [
         "expected a newline or semicolon after the statement",
-        "cannot parse the expression",
-        "cannot parse the expression",
+        "unexpected 'end', ignoring it",
+        "unexpected end of file, assuming it is closing the parent top level context",
         "expected a block beginning with `{` to end with `}`"
       ]
     end
@@ -401,7 +404,7 @@ module Prism
       assert_error_messages "foo(*bar and baz)", [
         "expected a `)` to close the arguments",
         "expected a newline or semicolon after the statement",
-        "cannot parse the expression"
+        "unexpected ')', ignoring it"
       ]
     end
 
@@ -1490,8 +1493,8 @@ module Prism
       assert_errors expression(source), source, [
         ["expected a `do` keyword or a `{` to open the lambda block", 3..3],
         ["expected a newline or semicolon after the statement", 7..7],
-        ["cannot parse the expression", 7..7],
-        ["expected a lambda block beginning with `do` to end with `end`", 7..7],
+        ["unexpected end of file, assuming it is closing the parent top level context", 7..7],
+        ["expected a lambda block beginning with `do` to end with `end`", 7..7]
       ]
     end
 
@@ -1546,10 +1549,11 @@ module Prism
 
     def test_while_endless_method
       source = "while def f = g do end"
+
       assert_errors expression(source), source, [
-        ['expected a predicate expression for the `while` statement', 22..22],
-        ['cannot parse the expression', 22..22],
-        ['expected an `end` to close the `while` statement', 22..22]
+        ["expected a predicate expression for the `while` statement", 22..22],
+        ["unexpected end of file, assuming it is closing the parent top level context", 22..22],
+        ["expected an `end` to close the `while` statement", 22..22]
       ]
     end
 
@@ -1558,13 +1562,12 @@ module Prism
         a in b + c
         a => b + c
       RUBY
-      message1 = 'expected a newline or semicolon after the statement'
-      message2 = 'cannot parse the expression'
+
       assert_errors expression(source), source, [
-        [message1, 6..6],
-        [message2, 6..6],
-        [message1, 17..17],
-        [message2, 17..17],
+        ["expected a newline or semicolon after the statement", 6..6],
+        ["unexpected '+', ignoring it", 7..8],
+        ["expected a newline or semicolon after the statement", 17..17],
+        ["unexpected '+', ignoring it", 18..19]
       ]
     end
 
@@ -1859,9 +1862,10 @@ module Prism
 
     def test_non_assoc_range
       source = '1....2'
+
       assert_errors expression(source), source, [
-        ['expected a newline or semicolon after the statement', 4..4],
-        ['cannot parse the expression', 4..4],
+        ["expected a newline or semicolon after the statement", 4..4],
+        ["unexpected '.', ignoring it", 4..5]
       ]
     end
 
@@ -1892,25 +1896,24 @@ module Prism
         undef x + 1
         undef x.z
       RUBY
-      message1 = 'expected a newline or semicolon after the statement'
-      message2 = 'cannot parse the expression'
+
       assert_errors expression(source), source, [
-        [message1, 9..9],
-        [message2, 9..9],
-        [message1, 23..23],
-        [message2, 23..23],
-        [message1, 39..39],
-        [message2, 39..39],
-        [message1, 57..57],
-        [message2, 57..57],
-        [message1, 71..71],
-        [message2, 71..71],
-        [message1, 87..87],
-        [message2, 87..87],
-        [message1, 97..97],
-        [message2, 97..97],
-        [message1, 109..109],
-        [message2, 109..109],
+        ["expected a newline or semicolon after the statement", 9..9],
+        ["unexpected '+', ignoring it", 10..11],
+        ["expected a newline or semicolon after the statement", 23..23],
+        ["unexpected '.', ignoring it", 23..24],
+        ["expected a newline or semicolon after the statement", 39..39],
+        ["unexpected '+', ignoring it", 40..41],
+        ["expected a newline or semicolon after the statement", 57..57],
+        ["unexpected '.', ignoring it", 57..58],
+        ["expected a newline or semicolon after the statement", 71..71],
+        ["unexpected '+', ignoring it", 72..73],
+        ["expected a newline or semicolon after the statement", 87..87],
+        ["unexpected '.', ignoring it", 87..88],
+        ["expected a newline or semicolon after the statement", 97..97],
+        ["unexpected '+', ignoring it", 98..99],
+        ["expected a newline or semicolon after the statement", 109..109],
+        ["unexpected '.', ignoring it", 109..110]
       ]
     end
 
@@ -1934,13 +1937,12 @@ module Prism
         ..1..
         ...1..
       RUBY
-      message1 = 'expected a newline or semicolon after the statement'
-      message2 =  'cannot parse the expression'
+
       assert_errors expression(source), source, [
-        [message1, 3..3],
-        [message2, 3..3],
-        [message1, 10..10],
-        [message2, 10..10],
+        ["expected a newline or semicolon after the statement", 3..3],
+        ["unexpected '..', ignoring it", 3..5],
+        ["expected a newline or semicolon after the statement", 10..10],
+        ["unexpected '..', ignoring it", 10..12]
       ]
     end
 
@@ -2047,21 +2049,20 @@ module Prism
         1 !~ 2 !~ 3
         1 <=> 2 <=> 3
       RUBY
-      message1 = 'expected a newline or semicolon after the statement'
-      message2 = 'cannot parse the expression'
+
       assert_errors expression(source), source, [
-        [message1, 6..6],
-        [message2, 6..6],
-        [message1, 18..18],
-        [message2, 18..18],
-        [message1, 31..31],
-        [message2, 31..31],
-        [message1, 44..44],
-        [message2, 44..44],
-        [message1, 56..56],
-        [message2, 56..56],
-        [message1, 69..69],
-        [message2, 69..69],
+        ["expected a newline or semicolon after the statement", 6..6],
+        ["unexpected '==', ignoring it", 7..9],
+        ["expected a newline or semicolon after the statement", 18..18],
+        ["unexpected '!=', ignoring it", 19..21],
+        ["expected a newline or semicolon after the statement", 31..31],
+        ["unexpected '===', ignoring it", 32..35],
+        ["expected a newline or semicolon after the statement", 44..44],
+        ["unexpected '=~', ignoring it", 45..47],
+        ["expected a newline or semicolon after the statement", 56..56],
+        ["unexpected '!~', ignoring it", 57..59],
+        ["expected a newline or semicolon after the statement", 69..69],
+        ["unexpected '<=>', ignoring it", 70..73]
       ]
     end
 
diff --git a/test/prism/format_errors_test.rb b/test/prism/format_errors_test.rb
index 3533a73863..34d320a7b6 100644
--- a/test/prism/format_errors_test.rb
+++ b/test/prism/format_errors_test.rb
@@ -9,8 +9,8 @@ module Prism
     def test_format_errors
       assert_equal <<~ERROR, Debug.format_errors("<>", false)
         > 1 | <>
-            | ^ cannot parse the expression
-            |  ^ cannot parse the expression
+            | ^ unexpected '<', ignoring it
+            |  ^ unexpected '>', ignoring it
       ERROR
     end
   end
author	Kevin Newton <kddnewton@gmail.com>	2024-01-29 17:27:45 -0500
committer	git <svn-admin@ruby-lang.org>	2024-01-30 16:10:08 +0000
commit	ba06a8259a3f21c9cbee0f4f55b82c016a45a3b9 (patch)
tree	6bf9054c571735c63df122962f054b37bc43b04f
parent	c85e28d12a4855e64271f0be4510b63053b628b7 (diff)