[ruby/prism] Correctly handle line continuations in %w/i% interrupted by heredocs

See https://bugs.ruby-lang.org/issues/21756. Ripper fails to parse this, but prism actually also doesn't handle it correctly. When heredocs are used, even in lowercase percent arays there can be multiple `STRING_CONTENT` tokens. We need to concat them. Luckily we don't need to handle as many cases as in uppercase arrays where interpolation is allowed. https://github.com/ruby/prism/commit/211677000e
author: Earlopain <14981592+Earlopain@users.noreply.github.com> 2025-12-03 09:05:13 +0100
committer: git <svn-admin@ruby-lang.org> 2025-12-03 18:24:14 +0000
commit: ed31a0caa88006afa507fd387e3f84ad8b8ddb00 (patch)
tree: 8ae1621e117aec25b2b3c64ba7c56cfea4d78d48
parent: f9cd94f17d6fef49f1ee5cbb8f66839f0d7a5db9 (diff)
1 files changed, 62 insertions, 9 deletions
diff --git a/prism/prism.c b/prism/prism.c
index cd4d166a12..291d1d8521 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -19299,18 +19299,52 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_lex(parser);
             pm_token_t opening = parser->previous;
             pm_array_node_t *array = pm_array_node_create(parser, &opening);
+            pm_node_t *current = NULL;
 
             while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
                 accept1(parser, PM_TOKEN_WORDS_SEP);
                 if (match1(parser, PM_TOKEN_STRING_END)) break;
 
-                if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                // Interpolation is not possible but nested heredocs can still lead to
+                // consecutive (disjoint) string tokens when the final newline is escaped.
+                while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
                     pm_token_t opening = not_provided(parser);
                     pm_token_t closing = not_provided(parser);
-                    pm_array_node_elements_append(array, UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)));
+
+                    // Record the string node, moving to interpolation if needed.
+                    if (current == NULL) {
+                        current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
+                        parser_lex(parser);
+                    } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                        pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+                        parser_lex(parser);
+                        pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
+                    } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                        pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+                        pm_token_t bounds = not_provided(parser);
+
+                        pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
+                        pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
+                        pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
+                        parser_lex(parser);
+
+                        pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+                        pm_interpolated_symbol_node_append(interpolated, first_string);
+                        pm_interpolated_symbol_node_append(interpolated, second_string);
+
+                        xfree(current);
+                        current = UP(interpolated);
+                    } else {
+                        assert(false && "unreachable");
+                    }
                 }
 
-                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+                if (current) {
+                    pm_array_node_elements_append(array, current);
+                    current = NULL;
+                } else {
+                    expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+                }
             }
 
             pm_token_t closing = parser->current;
@@ -19489,23 +19523,42 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
             parser_lex(parser);
             pm_token_t opening = parser->previous;
             pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
-            // skip all leading whitespaces
-            accept1(parser, PM_TOKEN_WORDS_SEP);
+            pm_node_t *current = NULL;
 
             while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
                 accept1(parser, PM_TOKEN_WORDS_SEP);
                 if (match1(parser, PM_TOKEN_STRING_END)) break;
 
-                if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                // Interpolation is not possible but nested heredocs can still lead to
+                // consecutive (disjoint) string tokens when the final newline is escaped.
+                while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
                     pm_token_t opening = not_provided(parser);
                     pm_token_t closing = not_provided(parser);
 
                     pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
-                    pm_array_node_elements_append(array, string);
+
+                    // Record the string node, moving to interpolation if needed.
+                    if (current == NULL) {
+                        current = string;
+                    } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                        pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
+                    } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                        pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+                        pm_interpolated_string_node_append(interpolated, current);
+                        pm_interpolated_string_node_append(interpolated, string);
+                        current = UP(interpolated);
+                    } else {
+                        assert(false && "unreachable");
+                    }
+                    parser_lex(parser);
                 }
 
-                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+                if (current) {
+                    pm_array_node_elements_append(array, current);
+                    current = NULL;
+                } else {
+                    expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+                }
             }
 
             pm_token_t closing = parser->current;
author	Earlopain <14981592+Earlopain@users.noreply.github.com>	2025-12-03 09:05:13 +0100
committer	git <svn-admin@ruby-lang.org>	2025-12-03 18:24:14 +0000
commit	ed31a0caa88006afa507fd387e3f84ad8b8ddb00 (patch)
tree	8ae1621e117aec25b2b3c64ba7c56cfea4d78d48
parent	f9cd94f17d6fef49f1ee5cbb8f66839f0d7a5db9 (diff)