summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEarlopain <14981592+Earlopain@users.noreply.github.com>2025-12-03 09:05:13 +0100
committergit <svn-admin@ruby-lang.org>2025-12-03 18:24:14 +0000
commited31a0caa88006afa507fd387e3f84ad8b8ddb00 (patch)
tree8ae1621e117aec25b2b3c64ba7c56cfea4d78d48
parentf9cd94f17d6fef49f1ee5cbb8f66839f0d7a5db9 (diff)
[ruby/prism] Correctly handle line continuations in %w/i% interrupted by heredocs
See https://bugs.ruby-lang.org/issues/21756. Ripper fails to parse this, but prism actually also doesn't handle it correctly. When heredocs are used, even in lowercase percent arays there can be multiple `STRING_CONTENT` tokens. We need to concat them. Luckily we don't need to handle as many cases as in uppercase arrays where interpolation is allowed. https://github.com/ruby/prism/commit/211677000e
-rw-r--r--prism/prism.c71
1 files changed, 62 insertions, 9 deletions
diff --git a/prism/prism.c b/prism/prism.c
index cd4d166a12..291d1d8521 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -19299,18 +19299,52 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_array_node_elements_append(array, UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)));
+
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
+ parser_lex(parser);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+ parser_lex(parser);
+ pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+ pm_token_t bounds = not_provided(parser);
+
+ pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
+ pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
+ pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
+ parser_lex(parser);
+
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+ pm_interpolated_symbol_node_append(interpolated, first_string);
+ pm_interpolated_symbol_node_append(interpolated, second_string);
+
+ xfree(current);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
}
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+ if (current) {
+ pm_array_node_elements_append(array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;
@@ -19489,23 +19523,42 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // skip all leading whitespaces
- accept1(parser, PM_TOKEN_WORDS_SEP);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
- pm_array_node_elements_append(array, string);
+
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = string;
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+ pm_interpolated_string_node_append(interpolated, current);
+ pm_interpolated_string_node_append(interpolated, string);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+ parser_lex(parser);
}
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ if (current) {
+ pm_array_node_elements_append(array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;