diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2024-05-01 18:05:37 -0400 |
|---|---|---|
| committer | Kevin Newton <kddnewton@gmail.com> | 2024-05-01 19:19:07 -0400 |
| commit | b64fd2f5c5b45b7caf29c44794118fc0f49d2bdd (patch) | |
| tree | 13cc8109d05b6f3d8efb6b4c2e0bfb3bebca97f3 | |
| parent | 1be5ede766d93d0d5056a99773c166987c5d5235 (diff) | |
[PRISM] Closer error messages on invalid unicode escapes in character literals
| -rw-r--r-- | prism/prism.c | 33 | ||||
| -rw-r--r-- | prism/templates/src/diagnostic.c.erb | 2 |
2 files changed, 17 insertions, 18 deletions
diff --git a/prism/prism.c b/prism/prism.c index 0caa6bc802..df5fc92fd2 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -9526,22 +9526,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre const uint8_t *start = parser->current.end - 1; parser->current.end++; - if ( - (parser->current.end + 4 <= parser->end) && - pm_char_is_hexadecimal_digit(parser->current.end[0]) && - pm_char_is_hexadecimal_digit(parser->current.end[1]) && - pm_char_is_hexadecimal_digit(parser->current.end[2]) && - pm_char_is_hexadecimal_digit(parser->current.end[3]) - ) { - uint32_t value = escape_unicode(parser->current.end, 4); - - if (flags & PM_ESCAPE_FLAG_REGEXP) { - pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start)); - } - escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value); - - parser->current.end += 4; - } else if (peek(parser) == '{') { + if (peek(parser) == '{') { const uint8_t *unicode_codepoints_start = parser->current.end - 2; parser->current.end++; @@ -9591,7 +9576,21 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start)); } } else { - pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE); + size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4)); + + if (length == 4) { + uint32_t value = escape_unicode(parser->current.end, 4); + + if (flags & PM_ESCAPE_FLAG_REGEXP) { + pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start)); + } + + escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value); + parser->current.end += 4; + } else { + parser->current.end += length; + pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE); + } } return; diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 910b03c9f9..5b0adcb9b1 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -163,7 +163,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_ESCAPE_INVALID_META_REPEAT] = { "invalid meta escape sequence; meta cannot be repeated", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE] = { "invalid Unicode escape sequence", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX }, - [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; multiple codepoints are not allowed in a character literal", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "invalid Unicode escape sequence; needs closing `}`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_ARGUMENT] = { "expected an argument", PM_ERROR_LEVEL_SYNTAX }, |
