summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-08-15 12:24:49 -0400
committergit <svn-admin@ruby-lang.org>2024-08-15 16:40:05 +0000
commitcbf508da58d50f7448f10a1a6030b3a48dfec14d (patch)
tree2bdd689dc6bb7f13009842a293b029283052719e
parent8312c5be74f6ce4ae3ebdb6efdcec1134b0c3bba (diff)
[ruby/prism] Special error for too short unicode errors
https://github.com/ruby/prism/commit/9f1f7d08bd
-rw-r--r--prism/config.yml1
-rw-r--r--prism/prism.c5
-rw-r--r--prism/templates/src/diagnostic.c.erb1
-rw-r--r--test/prism/errors/regexp_unicode_too_short.txt3
4 files changed, 9 insertions, 1 deletions
diff --git a/prism/config.yml b/prism/config.yml
index fed8265173..6ead7a9d95 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -83,6 +83,7 @@ errors:
- ESCAPE_INVALID_UNICODE_CM_FLAGS
- ESCAPE_INVALID_UNICODE_LITERAL
- ESCAPE_INVALID_UNICODE_LONG
+ - ESCAPE_INVALID_UNICODE_SHORT
- ESCAPE_INVALID_UNICODE_TERM
- EXPECT_ARGUMENT
- EXPECT_EOL_AFTER_STATEMENT
diff --git a/prism/prism.c b/prism/prism.c
index 30fb1ad298..a706881e3e 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -9771,7 +9771,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
} else {
size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
- if (length == 4) {
+ if (length == 0) {
+ const uint8_t *start = parser->current.end - 2;
+ PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+ } else if (length == 4) {
uint32_t value = escape_unicode(parser, parser->current.end, 4);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb
index 5e6858ac61..d3fabb8449 100644
--- a/prism/templates/src/diagnostic.c.erb
+++ b/prism/templates/src/diagnostic.c.erb
@@ -167,6 +167,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_SHORT] = { "too short escape sequence: %.*s", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_ARGUMENT] = { "expected an argument", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX },
diff --git a/test/prism/errors/regexp_unicode_too_short.txt b/test/prism/errors/regexp_unicode_too_short.txt
new file mode 100644
index 0000000000..a7638b2712
--- /dev/null
+++ b/test/prism/errors/regexp_unicode_too_short.txt
@@ -0,0 +1,3 @@
+/\u/
+ ^~ too short escape sequence: \u
+