summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-02-06 12:59:47 -0500
committergit <svn-admin@ruby-lang.org>2024-02-06 18:10:50 +0000
commitf5b368df0ceb1e705cd94e39ef8459dae07e6d52 (patch)
tree602618ff2ff30b8ca7d188fe0b4e071ac02366a0
parentccec209b2cced2ddb8463c4933ef729a44d0363c (diff)
[ruby/prism] Better invalid token messages
https://github.com/ruby/prism/commit/8c9bed2a4d
-rw-r--r--prism/diagnostic.c4
-rw-r--r--prism/diagnostic.h4
-rw-r--r--prism/prism.c18
-rw-r--r--test/prism/format_errors_test.rb2
4 files changed, 21 insertions, 7 deletions
diff --git a/prism/diagnostic.c b/prism/diagnostic.c
index df7ae381ba..c718246c80 100644
--- a/prism/diagnostic.c
+++ b/prism/diagnostic.c
@@ -198,8 +198,10 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
[PM_ERR_INVALID_NUMBER_HEXADECIMAL] = { "invalid hexadecimal number", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_NUMBER_OCTAL] = { "invalid octal number", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_NUMBER_UNDERSCORE] = { "invalid underscore placement in number", PM_ERROR_LEVEL_FATAL },
+ [PM_ERR_INVALID_CHARACTER] = { "invalid character 0x%X", PM_ERROR_LEVEL_FATAL },
+ [PM_ERR_INVALID_MULTIBYTE_CHARACTER] = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_FATAL },
+ [PM_ERR_INVALID_PRINTABLE_CHARACTER] = { "invalid character `%c`", PM_ERROR_LEVEL_FATAL },
[PM_ERR_INVALID_PERCENT] = { "invalid `%` token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
- [PM_ERR_INVALID_TOKEN] = { "invalid token", PM_ERROR_LEVEL_FATAL }, // TODO WHAT?
[PM_ERR_INVALID_VARIABLE_GLOBAL] = { "invalid global variable", PM_ERROR_LEVEL_FATAL },
[PM_ERR_IT_NOT_ALLOWED] = { "`it` is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_FATAL },
[PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_FATAL },
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 35a5c88793..019afb96b3 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -196,8 +196,10 @@ typedef enum {
PM_ERR_INVALID_NUMBER_HEXADECIMAL,
PM_ERR_INVALID_NUMBER_OCTAL,
PM_ERR_INVALID_NUMBER_UNDERSCORE,
+ PM_ERR_INVALID_CHARACTER,
+ PM_ERR_INVALID_MULTIBYTE_CHARACTER,
+ PM_ERR_INVALID_PRINTABLE_CHARACTER,
PM_ERR_INVALID_PERCENT,
- PM_ERR_INVALID_TOKEN,
PM_ERR_INVALID_VARIABLE_GLOBAL,
PM_ERR_IT_NOT_ALLOWED,
PM_ERR_LAMBDA_OPEN,
diff --git a/prism/prism.c b/prism/prism.c
index 3ed55f06d8..22503fd726 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -9590,11 +9590,21 @@ parser_lex(pm_parser_t *parser) {
if (*parser->current.start != '_') {
size_t width = char_is_identifier_start(parser, parser->current.start);
- // If this isn't the beginning of an identifier, then it's an invalid
- // token as we've exhausted all of the other options. We'll skip past
- // it and return the next token.
+ // If this isn't the beginning of an identifier, then
+ // it's an invalid token as we've exhausted all of the
+ // other options. We'll skip past it and return the next
+ // token after adding an appropriate error message.
if (!width) {
- pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN);
+ pm_diagnostic_id_t diag_id;
+ if (*parser->current.start >= 0x80) {
+ diag_id = PM_ERR_INVALID_MULTIBYTE_CHARACTER;
+ } else if (char_is_ascii_printable(*parser->current.start) || (*parser->current.start == '\\')) {
+ diag_id = PM_ERR_INVALID_PRINTABLE_CHARACTER;
+ } else {
+ diag_id = PM_ERR_INVALID_CHARACTER;
+ }
+
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, *parser->current.start);
goto lex_next_token;
}
diff --git a/test/prism/format_errors_test.rb b/test/prism/format_errors_test.rb
index bc0b26165d..a142e8eee1 100644
--- a/test/prism/format_errors_test.rb
+++ b/test/prism/format_errors_test.rb
@@ -16,7 +16,7 @@ module Prism
assert_equal <<~'ERROR', Debug.format_errors('"%W"\u"', false)
> 1 | "%W"\u"
| ^ expected a newline or semicolon after the statement
- | ^ invalid token
+ | ^ invalid character `\`
| ^ expected a closing delimiter for the string literal
ERROR
end