summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/yarp/snapshots/unparser/corpus/semantic/dstr.txt10
-rw-r--r--yarp/yarp.c230
2 files changed, 128 insertions, 112 deletions
diff --git a/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt b/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt
index b3ea49149c..10345e1db5 100644
--- a/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt
+++ b/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt
@@ -198,11 +198,11 @@ ProgramNode(0...608)(
)
),
StringConcatNode(552...560)(
- StringNode(552...554)((552...553), (553...553), (553...554), ""),
- StringConcatNode(555...560)(
- StringNode(555...557)((555...556), (556...556), (556...557), ""),
- StringNode(558...560)((558...559), (559...559), (559...560), "")
- )
+ StringConcatNode(552...557)(
+ StringNode(552...554)((552...553), (553...553), (553...554), ""),
+ StringNode(555...557)((555...556), (556...556), (556...557), "")
+ ),
+ StringNode(558...560)((558...559), (559...559), (559...560), "")
),
StringConcatNode(562...574)(
InterpolatedStringNode(562...570)(
diff --git a/yarp/yarp.c b/yarp/yarp.c
index 6573a8ef40..22b1f9ab48 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -11950,95 +11950,115 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
return (yp_node_t *) node;
}
case YP_TOKEN_STRING_BEGIN: {
- assert(parser->lex_modes.current->mode == YP_LEX_STRING);
- bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
+ yp_node_t *result = NULL;
- yp_token_t opening = parser->current;
- parser_lex(parser);
-
- yp_node_t *node;
+ while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) {
+ assert(parser->lex_modes.current->mode == YP_LEX_STRING);
+ bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
- if (accept(parser, YP_TOKEN_STRING_END)) {
- // If we get here, then we have an end immediately after a start. In
- // that case we'll create an empty content token and return an
- // uninterpolated string.
- yp_token_t content = (yp_token_t) {
- .type = YP_TOKEN_STRING_CONTENT,
- .start = parser->previous.start,
- .end = parser->previous.start
- };
+ yp_node_t *node = NULL;
+ yp_token_t opening = parser->current;
+ parser_lex(parser);
- node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
- } else if (accept(parser, YP_TOKEN_LABEL_END)) {
- // If we get here, then we have an end of a label immediately after a
- // start. In that case we'll create an empty symbol node.
- yp_token_t opening = not_provided(parser);
- yp_token_t content = (yp_token_t) {
- .type = YP_TOKEN_STRING_CONTENT,
- .start = parser->previous.start,
- .end = parser->previous.start
- };
+ if (accept(parser, YP_TOKEN_STRING_END)) {
+ // If we get here, then we have an end immediately after a
+ // start. In that case we'll create an empty content token
+ // and return an uninterpolated string.
+ yp_token_t content = (yp_token_t) {
+ .type = YP_TOKEN_STRING_CONTENT,
+ .start = parser->previous.start,
+ .end = parser->previous.start
+ };
+
+ node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
+ } else if (accept(parser, YP_TOKEN_LABEL_END)) {
+ // If we get here, then we have an end of a label
+ // immediately after a start. In that case we'll create an
+ // empty symbol node.
+ yp_token_t opening = not_provided(parser);
+ yp_token_t content = (yp_token_t) {
+ .type = YP_TOKEN_STRING_CONTENT,
+ .start = parser->previous.start,
+ .end = parser->previous.start
+ };
+
+ node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
+ } else if (!lex_interpolation) {
+ // If we don't accept interpolation then we expect the
+ // string to start with a single string content node.
+ expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
+ yp_token_t content = parser->previous;
+
+ // It is unfortunately possible to have multiple string
+ // content nodes in a row in the case that there's heredoc
+ // content in the middle of the string, like this cursed
+ // example:
+ //
+ // <<-END+'b
+ // a
+ // END
+ // c'+'d'
+ //
+ // In that case we need to switch to an interpolated string
+ // to be able to contain all of the parts.
+ if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
+ yp_node_list_t parts = YP_EMPTY_NODE_LIST;
- return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
- } else if (!lex_interpolation) {
- // If we don't accept interpolation then we expect the string to start
- // with a single string content node.
- expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
- yp_token_t content = parser->previous;
+ yp_token_t delimiters = not_provided(parser);
+ yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
+ yp_node_list_append(&parts, part);
- // It is unfortunately possible to have multiple string content nodes in
- // a row in the case that there's heredoc content in the middle of the
- // string, like this cursed example:
- //
- // <<-END+'b
- // a
- // END
- // c'+'d'
- //
- // In that case we need to switch to an interpolated string to be able
- // to contain all of the parts.
- if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
- yp_node_list_t parts = YP_EMPTY_NODE_LIST;
+ while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
+ part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
+ yp_node_list_append(&parts, part);
+ }
- yp_token_t delimiters = not_provided(parser);
- yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
- yp_node_list_append(&parts, part);
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
+ node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ } else if (accept(parser, YP_TOKEN_LABEL_END)) {
+ node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+ } else {
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
+ node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
+ }
+ } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
+ // In this case we've hit string content so we know the string at
+ // least has something in it. We'll need to check if the following
+ // token is the end (in which case we can return a plain string) or if
+ // it's not then it has interpolation.
+ yp_token_t content = parser->current;
+ parser_lex(parser);
- while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
- part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
+ if (accept(parser, YP_TOKEN_STRING_END)) {
+ node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+ } else if (accept(parser, YP_TOKEN_LABEL_END)) {
+ node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+ } else {
+ // If we get here, then we have interpolation so we'll need to create
+ // a string or symbol node with interpolation.
+ yp_node_list_t parts = YP_EMPTY_NODE_LIST;
+ yp_token_t string_opening = not_provided(parser);
+ yp_token_t string_closing = not_provided(parser);
+ yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
yp_node_list_append(&parts, part);
- }
-
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
- return (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
- }
- if (accept(parser, YP_TOKEN_LABEL_END)) {
- return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
- }
-
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
- node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
- } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
- // In this case we've hit string content so we know the string at
- // least has something in it. We'll need to check if the following
- // token is the end (in which case we can return a plain string) or if
- // it's not then it has interpolation.
- yp_token_t content = parser->current;
- parser_lex(parser);
+ while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
+ yp_node_t *part = parse_string_part(parser);
+ if (part != NULL) yp_node_list_append(&parts, part);
+ }
- if (accept(parser, YP_TOKEN_STRING_END)) {
- node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
- } else if (accept(parser, YP_TOKEN_LABEL_END)) {
- return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+ if (accept(parser, YP_TOKEN_LABEL_END)) {
+ node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ } else {
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
+ node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ }
+ }
} else {
- // If we get here, then we have interpolation so we'll need to create
- // a string or symbol node with interpolation.
+ // If we get here, then the first part of the string is not plain string
+ // content, in which case we need to parse the string as an interpolated
+ // string.
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
- yp_token_t string_opening = not_provided(parser);
- yp_token_t string_closing = not_provided(parser);
- yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
- yp_node_list_append(&parts, part);
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser);
@@ -12046,43 +12066,39 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
}
if (accept(parser, YP_TOKEN_LABEL_END)) {
- return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ } else {
+ expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
+ node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
-
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
- node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
- } else {
- // If we get here, then the first part of the string is not plain string
- // content, in which case we need to parse the string as an interpolated
- // string.
- yp_node_list_t parts = YP_EMPTY_NODE_LIST;
- while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
- yp_node_t *part = parse_string_part(parser);
- if (part != NULL) yp_node_list_append(&parts, part);
- }
+ if (result == NULL) {
+ // If the node we just parsed is a symbol node, then we
+ // can't concatenate it with anything else, so we can now
+ // return that node.
+ if (YP_NODE_TYPE_P(node, YP_NODE_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_SYMBOL_NODE)) {
+ return node;
+ }
- if (accept(parser, YP_TOKEN_LABEL_END)) {
- return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
- }
+ // If we don't already have a node, then it's fine and we
+ // can just set the result to be the node we just parsed.
+ result = node;
+ } else {
+ // Otherwise we need to check the type of the node we just
+ // parsed. If it cannot be concatenated with the previous
+ // node, then we'll need to add a syntax error.
+ if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_STRING_NODE)) {
+ yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Unexpected string concatenation.");
+ }
- expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
- node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ // Either way we will create a concat node to hold the
+ // strings together.
+ result = (yp_node_t *) yp_string_concat_node_create(parser, result, node);
+ }
}
- // If there's a string immediately following this string, then it's a
- // concatenatation. In this case we'll parse the next string and create a
- // node in the tree that concatenates the two strings.
- if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
- return (yp_node_t *) yp_string_concat_node_create(
- parser,
- node,
- parse_expression(parser, YP_BINDING_POWER_CALL, "Expected string on the right side of concatenation.")
- );
- } else {
- return node;
- }
+ return result;
}
case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t lex_mode = *parser->lex_modes.current;