diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2023-09-13 15:39:22 -0400 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2023-09-14 13:58:12 +0000 |
| commit | 72d008d88d32fe3eb3f7033d93c90a00cb7d7c61 (patch) | |
| tree | 6a75f4ae86a2f1591c74187275d4f007e34d7c0d | |
| parent | b85bb90c3464bca8d19b73247ace809b44f35679 (diff) | |
[ruby/yarp] Extract out parse_strings
https://github.com/ruby/yarp/commit/55446f7a43
| -rw-r--r-- | yarp/yarp.c | 308 |
1 files changed, 157 insertions, 151 deletions
diff --git a/yarp/yarp.c b/yarp/yarp.c index 643f795c74..d7eec49d92 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -10906,6 +10906,161 @@ parse_negative_numeric(yp_node_t *node) { } } +// Parse a set of strings that could be concatenated together. +static inline yp_node_t * +parse_strings(yp_parser_t *parser) { + assert(parser->current.type == YP_TOKEN_STRING_BEGIN); + yp_node_t *result = NULL; + + while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) { + assert(parser->lex_modes.current->mode == YP_LEX_STRING); + bool lex_interpolation = parser->lex_modes.current->as.string.interpolation; + + yp_node_t *node = NULL; + yp_token_t opening = parser->current; + parser_lex(parser); + + if (accept(parser, YP_TOKEN_STRING_END)) { + // If we get here, then we have an end immediately after a + // start. In that case we'll create an empty content token + // and return an uninterpolated string. + yp_token_t content = (yp_token_t) { + .type = YP_TOKEN_STRING_CONTENT, + .start = parser->previous.start, + .end = parser->previous.start + }; + + node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE); + } else if (accept(parser, YP_TOKEN_LABEL_END)) { + // If we get here, then we have an end of a label + // immediately after a start. In that case we'll create an + // empty symbol node. + yp_token_t opening = not_provided(parser); + yp_token_t content = (yp_token_t) { + .type = YP_TOKEN_STRING_CONTENT, + .start = parser->previous.start, + .end = parser->previous.start + }; + + node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous); + } else if (!lex_interpolation) { + // If we don't accept interpolation then we expect the + // string to start with a single string content node. + expect(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_EXPECT_STRING_CONTENT); + yp_token_t content = parser->previous; + + // It is unfortunately possible to have multiple string + // content nodes in a row in the case that there's heredoc + // content in the middle of the string, like this cursed + // example: + // + // <<-END+'b + // a + // END + // c'+'d' + // + // In that case we need to switch to an interpolated string + // to be able to contain all of the parts. + if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) { + yp_node_list_t parts = YP_EMPTY_NODE_LIST; + + yp_token_t delimiters = not_provided(parser); + yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL); + yp_node_list_append(&parts, part); + + while (accept(parser, YP_TOKEN_STRING_CONTENT)) { + part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL); + yp_node_list_append(&parts, part); + } + + expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM); + node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + } else if (accept(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } else { + expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM); + node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL); + } + } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) { + // In this case we've hit string content so we know the string at + // least has something in it. We'll need to check if the following + // token is the end (in which case we can return a plain string) or if + // it's not then it has interpolation. + yp_token_t content = parser->current; + parser_lex(parser); + + if (accept(parser, YP_TOKEN_STRING_END)) { + node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } else if (accept(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } else { + // If we get here, then we have interpolation so we'll need to create + // a string or symbol node with interpolation. + yp_node_list_t parts = YP_EMPTY_NODE_LIST; + yp_token_t string_opening = not_provided(parser); + yp_token_t string_closing = not_provided(parser); + yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL); + yp_node_list_append(&parts, part); + + while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { + yp_node_t *part = parse_string_part(parser); + if (part != NULL) yp_node_list_append(&parts, part); + } + + if (accept(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); + } else { + expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM); + node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + } + } + } else { + // If we get here, then the first part of the string is not plain string + // content, in which case we need to parse the string as an interpolated + // string. + yp_node_list_t parts = YP_EMPTY_NODE_LIST; + + while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { + yp_node_t *part = parse_string_part(parser); + if (part != NULL) yp_node_list_append(&parts, part); + } + + if (accept(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); + } else { + expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM); + node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + } + } + + if (result == NULL) { + // If the node we just parsed is a symbol node, then we + // can't concatenate it with anything else, so we can now + // return that node. + if (YP_NODE_TYPE_P(node, YP_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_INTERPOLATED_SYMBOL_NODE)) { + return node; + } + + // If we don't already have a node, then it's fine and we + // can just set the result to be the node we just parsed. + result = node; + } else { + // Otherwise we need to check the type of the node we just + // parsed. If it cannot be concatenated with the previous + // node, then we'll need to add a syntax error. + if (!YP_NODE_TYPE_P(node, YP_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_INTERPOLATED_STRING_NODE)) { + yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, YP_ERR_STRING_CONCATENATION); + } + + // Either way we will create a concat node to hold the + // strings together. + result = (yp_node_t *) yp_string_concat_node_create(parser, result, node); + } + } + + return result; +} + // Parse an expression that begins with the previous node that we just lexed. static inline yp_node_t * parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { @@ -12888,157 +13043,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { return (yp_node_t *) node; } - case YP_TOKEN_STRING_BEGIN: { - yp_node_t *result = NULL; - - while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) { - assert(parser->lex_modes.current->mode == YP_LEX_STRING); - bool lex_interpolation = parser->lex_modes.current->as.string.interpolation; - - yp_node_t *node = NULL; - yp_token_t opening = parser->current; - parser_lex(parser); - - if (accept(parser, YP_TOKEN_STRING_END)) { - // If we get here, then we have an end immediately after a - // start. In that case we'll create an empty content token - // and return an uninterpolated string. - yp_token_t content = (yp_token_t) { - .type = YP_TOKEN_STRING_CONTENT, - .start = parser->previous.start, - .end = parser->previous.start - }; - - node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE); - } else if (accept(parser, YP_TOKEN_LABEL_END)) { - // If we get here, then we have an end of a label - // immediately after a start. In that case we'll create an - // empty symbol node. - yp_token_t opening = not_provided(parser); - yp_token_t content = (yp_token_t) { - .type = YP_TOKEN_STRING_CONTENT, - .start = parser->previous.start, - .end = parser->previous.start - }; - - node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous); - } else if (!lex_interpolation) { - // If we don't accept interpolation then we expect the - // string to start with a single string content node. - expect(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_EXPECT_STRING_CONTENT); - yp_token_t content = parser->previous; - - // It is unfortunately possible to have multiple string - // content nodes in a row in the case that there's heredoc - // content in the middle of the string, like this cursed - // example: - // - // <<-END+'b - // a - // END - // c'+'d' - // - // In that case we need to switch to an interpolated string - // to be able to contain all of the parts. - if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) { - yp_node_list_t parts = YP_EMPTY_NODE_LIST; - - yp_token_t delimiters = not_provided(parser); - yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL); - yp_node_list_append(&parts, part); - - while (accept(parser, YP_TOKEN_STRING_CONTENT)) { - part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL); - yp_node_list_append(&parts, part); - } - - expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM); - node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); - } else if (accept(parser, YP_TOKEN_LABEL_END)) { - node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); - } else { - expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM); - node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL); - } - } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) { - // In this case we've hit string content so we know the string at - // least has something in it. We'll need to check if the following - // token is the end (in which case we can return a plain string) or if - // it's not then it has interpolation. - yp_token_t content = parser->current; - parser_lex(parser); - - if (accept(parser, YP_TOKEN_STRING_END)) { - node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); - } else if (accept(parser, YP_TOKEN_LABEL_END)) { - node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); - } else { - // If we get here, then we have interpolation so we'll need to create - // a string or symbol node with interpolation. - yp_node_list_t parts = YP_EMPTY_NODE_LIST; - yp_token_t string_opening = not_provided(parser); - yp_token_t string_closing = not_provided(parser); - yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL); - yp_node_list_append(&parts, part); - - while (!match_any_3_type_p(parser, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { - yp_node_t *part = parse_string_part(parser); - if (part != NULL) yp_node_list_append(&parts, part); - } - - if (accept(parser, YP_TOKEN_LABEL_END)) { - node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); - } else { - expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM); - node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); - } - } - } else { - // If we get here, then the first part of the string is not plain string - // content, in which case we need to parse the string as an interpolated - // string. - yp_node_list_t parts = YP_EMPTY_NODE_LIST; - - while (!match_any_3_type_p(parser, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { - yp_node_t *part = parse_string_part(parser); - if (part != NULL) yp_node_list_append(&parts, part); - } - - if (accept(parser, YP_TOKEN_LABEL_END)) { - node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); - } else { - expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM); - node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); - } - } - - if (result == NULL) { - // If the node we just parsed is a symbol node, then we - // can't concatenate it with anything else, so we can now - // return that node. - if (YP_NODE_TYPE_P(node, YP_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_INTERPOLATED_SYMBOL_NODE)) { - return node; - } - - // If we don't already have a node, then it's fine and we - // can just set the result to be the node we just parsed. - result = node; - } else { - // Otherwise we need to check the type of the node we just - // parsed. If it cannot be concatenated with the previous - // node, then we'll need to add a syntax error. - if (!YP_NODE_TYPE_P(node, YP_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_INTERPOLATED_STRING_NODE)) { - yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, YP_ERR_STRING_CONCATENATION); - } - - // Either way we will create a concat node to hold the - // strings together. - result = (yp_node_t *) yp_string_concat_node_create(parser, result, node); - } - } - - return result; - } + case YP_TOKEN_STRING_BEGIN: + return parse_strings(parser); case YP_TOKEN_SYMBOL_BEGIN: { yp_lex_mode_t lex_mode = *parser->lex_modes.current; parser_lex(parser); |
