diff options
| -rw-r--r-- | prism/prism.c | 300 | ||||
| -rw-r--r-- | test/prism/snapshots/seattlerb/qw_escape.txt | 2 | ||||
| -rw-r--r-- | test/prism/snapshots/seattlerb/qw_escape_term.txt | 2 | ||||
| -rw-r--r-- | test/prism/snapshots/spanning_heredoc.txt | 26 | ||||
| -rw-r--r-- | test/prism/unescape_test.rb | 10 |
5 files changed, 223 insertions, 117 deletions
diff --git a/prism/prism.c b/prism/prism.c index 77a813138b..161d5cc66b 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -4369,9 +4369,9 @@ pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *ri return node; } -// Allocate a new StringNode node. -static pm_string_node_t * -pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) { +// Allocate a new StringNode node with the current string on the parser. +static inline pm_string_node_t * +pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) { pm_string_node_t *node = PM_ALLOC_NODE(parser, pm_string_node_t); pm_node_flags_t flags = 0; @@ -4391,12 +4391,27 @@ pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), .content_loc = PM_LOCATION_TOKEN_VALUE(content), .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .unescaped = PM_EMPTY_STRING + .unescaped = *string }; return node; } +// Allocate a new StringNode node. +static pm_string_node_t * +pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) { + return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING); +} + +// Allocate a new StringNode node and create it using the current string on the +// parser. +static pm_string_node_t * +pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) { + pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string); + parser->current_string = PM_EMPTY_STRING; + return node; +} + // Allocate and initialize a new SuperNode node. static pm_super_node_t * pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) { @@ -7790,7 +7805,7 @@ parser_lex(pm_parser_t *parser) { } } } - case PM_LEX_LIST: + case PM_LEX_LIST: { if (parser->next_start != NULL) { parser->current.end = parser->next_start; parser->next_start = NULL; @@ -7939,6 +7954,7 @@ parser_lex(pm_parser_t *parser) { // flush the heredoc and continue parsing after // heredoc_end. parser_flush_heredoc_end(parser); + pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length); LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. @@ -8003,7 +8019,7 @@ parser_lex(pm_parser_t *parser) { // If we were unable to find a breakpoint, then this token hits the end of // the file. LEX(PM_TOKEN_EOF); - + } case PM_LEX_REGEXP: { // First, we'll set to start of this token to be the current end. if (parser->next_start == NULL) { @@ -8159,17 +8175,20 @@ parser_lex(pm_parser_t *parser) { // These are the places where we need to split up the content of the // string. We'll use strpbrk to find the first of these characters. - const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints; + pm_lex_mode_t *lex_mode = parser->lex_modes.current; + const uint8_t *breakpoints = lex_mode->as.string.breakpoints; const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + // If we haven't found an escape yet, then this buffer will be + // unallocated since we can refer directly to the source string. + pm_buffer_t buffer = (pm_buffer_t) { .value = NULL, .length = 0, .capacity = 0 }; + const uint8_t *buffer_cursor = NULL; + while (breakpoint != NULL) { // If we hit the incrementor, then we'll increment then nesting and // continue lexing. - if ( - parser->lex_modes.current->as.string.incrementor != '\0' && - *breakpoint == parser->lex_modes.current->as.string.incrementor - ) { - parser->lex_modes.current->as.string.nesting++; + if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) { + lex_mode->as.string.nesting++; breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); continue; } @@ -8177,12 +8196,12 @@ parser_lex(pm_parser_t *parser) { // Note that we have to check the terminator here first because we could // potentially be parsing a % string that has a # character as the // terminator. - if (*breakpoint == parser->lex_modes.current->as.string.terminator) { + if (*breakpoint == lex_mode->as.string.terminator) { // If this terminator doesn't actually close the string, then we need // to continue on past it. - if (parser->lex_modes.current->as.string.nesting > 0) { + if (lex_mode->as.string.nesting > 0) { breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); - parser->lex_modes.current->as.string.nesting--; + lex_mode->as.string.nesting--; continue; } @@ -8190,6 +8209,14 @@ parser_lex(pm_parser_t *parser) { // then we need to return that content as string content first. if (breakpoint > parser->current.start) { parser->current.end = breakpoint; + + if (buffer_cursor == NULL) { + pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end); + } else { + pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor)); + pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length); + } + LEX(PM_TOKEN_STRING_CONTENT); } @@ -8203,11 +8230,7 @@ parser_lex(pm_parser_t *parser) { parser->current.end = breakpoint + 1; } - if ( - parser->lex_modes.current->as.string.label_allowed && - (peek(parser) == ':') && - (peek_offset(parser, 1) != ':') - ) { + if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) { parser->current.end++; lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED); lex_mode_pop(parser); @@ -8230,6 +8253,14 @@ parser_lex(pm_parser_t *parser) { } else { parser->current.end = breakpoint + 1; parser_flush_heredoc_end(parser); + + if (buffer_cursor == NULL) { + pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end); + } else { + pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor)); + pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length); + } + LEX(PM_TOKEN_STRING_CONTENT); } } @@ -8240,46 +8271,103 @@ parser_lex(pm_parser_t *parser) { breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); break; case '\\': { - // If we hit escapes, then we need to treat the next token - // literally. In this case we'll skip past the next character and - // find the next breakpoint. - pm_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? PM_UNESCAPE_ALL : PM_UNESCAPE_MINIMAL; - size_t difference = pm_unescape_calculate_difference(parser, breakpoint, unescape_type); - if (difference == 0) { - // we're at the end of the file + // Here we hit escapes. + if (buffer_cursor == NULL) { + pm_buffer_init_capacity(&buffer, 16); + pm_buffer_append_bytes(&buffer, parser->current.start, (size_t) (breakpoint - parser->current.start)); + } else { + pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (breakpoint - buffer_cursor)); + } + + parser->current.end = breakpoint + 1; + + // If we've hit the end of the file, then break out of + // the loop by setting the breakpoint to NULL. + if (parser->current.end == parser->end) { breakpoint = NULL; - break; + continue; } - // If the result is an escaped newline ... - if (breakpoint[difference - 1] == '\n') { - if (parser->heredoc_end) { - // ... if we are on the same line as a heredoc, flush the heredoc and - // continue parsing after heredoc_end. - parser->current.end = breakpoint + difference; - parser_flush_heredoc_end(parser); - LEX(PM_TOKEN_STRING_CONTENT); - } else { - // ... else track the newline. - pm_newline_list_append(&parser->newline_list, breakpoint + difference - 1); - } + uint8_t peeked = peek(parser); + switch (peeked) { + case '\\': + pm_buffer_append_u8(&buffer, '\\'); + parser->current.end++; + break; + case '\r': + parser->current.end++; + if (peek(parser) != '\n') { + if (!lex_mode->as.string.interpolation) { + pm_buffer_append_u8(&buffer, '\\'); + } + pm_buffer_append_u8(&buffer, '\r'); + break; + } + /* fallthrough */ + case '\n': + if (!lex_mode->as.string.interpolation) { + pm_buffer_append_u8(&buffer, '\\'); + pm_buffer_append_u8(&buffer, '\n'); + } + + if (parser->heredoc_end) { + // ... if we are on the same line as a heredoc, + // flush the heredoc and continue parsing after + // heredoc_end. + parser_flush_heredoc_end(parser); + pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length); + LEX(PM_TOKEN_STRING_CONTENT); + } else { + // ... else track the newline. + pm_newline_list_append(&parser->newline_list, parser->current.end); + } + + parser->current.end++; + break; + default: + if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) { + pm_buffer_append_u8(&buffer, peeked); + parser->current.end++; + } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) { + pm_buffer_append_u8(&buffer, peeked); + parser->current.end++; + } else if (lex_mode->as.string.interpolation) { + escape_read(parser, &buffer, PM_ESCAPE_FLAG_NONE); + } else { + pm_buffer_append_u8(&buffer, '\\'); + pm_buffer_append_u8(&buffer, peeked); + parser->current.end++; + } + + break; } - breakpoint = pm_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); + buffer_cursor = parser->current.end; + breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); break; } case '#': { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type != PM_TOKEN_NOT_PROVIDED) { - LEX(type); + + if (type == PM_TOKEN_NOT_PROVIDED) { + // If we haven't returned at this point then we had something that + // looked like an interpolated class or instance variable like "#@" + // but wasn't actually. In this case we'll just skip to the next + // breakpoint. + breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + break; } - // If we haven't returned at this point then we had something that - // looked like an interpolated class or instance variable like "#@" - // but wasn't actually. In this case we'll just skip to the next - // breakpoint. - breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); - break; + if (type == PM_TOKEN_STRING_CONTENT) { + if (buffer_cursor == NULL) { + pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end); + } else { + pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor)); + pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length); + } + } + + LEX(type); } default: assert(false && "unreachable"); @@ -11807,17 +11895,30 @@ parse_strings(pm_parser_t *parser) { // start. In that case we'll create an empty content token and // return an uninterpolated string. pm_token_t content = parse_strings_empty_content(parser->previous.start); - node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_NONE); + pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous); + + pm_string_shared_init(&string->unescaped, content.start, content.end); + node = (pm_node_t *) string; } else if (accept1(parser, PM_TOKEN_LABEL_END)) { // If we get here, then we have an end of a label immediately // after a start. In that case we'll create an empty symbol // node. pm_token_t opening = not_provided(parser); pm_token_t content = parse_strings_empty_content(parser->previous.start); - node = (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &parser->previous); + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous); + + pm_string_shared_init(&symbol->unescaped, content.start, content.end); + node = (pm_node_t *) symbol; } else if (!lex_interpolation) { // If we don't accept interpolation then we expect the string to // start with a single string content node. + pm_string_t unescaped; + if (match1(parser, PM_TOKEN_EOF)) { + unescaped = PM_EMPTY_STRING; + } else { + unescaped = parser->current_string; + } + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT); pm_token_t content = parser->previous; @@ -11836,13 +11937,14 @@ parse_strings(pm_parser_t *parser) { pm_node_list_t parts = PM_EMPTY_NODE_LIST; pm_token_t delimiters = not_provided(parser); - pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, PM_UNESCAPE_MINIMAL); + pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped); pm_node_list_append(&parts, part); - while (accept1(parser, PM_TOKEN_STRING_CONTENT)) { - part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, PM_UNESCAPE_MINIMAL); + do { + part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters); pm_node_list_append(&parts, part); - } + parser_lex(parser); + } while (match1(parser, PM_TOKEN_STRING_CONTENT)); expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM); node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); @@ -11850,7 +11952,7 @@ parse_strings(pm_parser_t *parser) { node = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL); } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM); - node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_MINIMAL); + node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &parser->previous); } } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) { // In this case we've hit string content so we know the string @@ -11858,10 +11960,12 @@ parse_strings(pm_parser_t *parser) { // following token is the end (in which case we can return a // plain string) or if it's not then it has interpolation. pm_token_t content = parser->current; + pm_string_t unescaped = parser->current_string; parser_lex(parser); - if (accept1(parser, PM_TOKEN_STRING_END)) { - node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL); + if (match1(parser, PM_TOKEN_STRING_END)) { + node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &parser->current); + parser_lex(parser); } else if (accept1(parser, PM_TOKEN_LABEL_END)) { node = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL); } else { @@ -11871,11 +11975,14 @@ parse_strings(pm_parser_t *parser) { pm_token_t string_opening = not_provided(parser); pm_token_t string_closing = not_provided(parser); - pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, PM_UNESCAPE_ALL); + pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped); pm_node_list_append(&parts, part); while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) { - if ((part = parse_string_part(parser)) != NULL) { + if (match1(parser, PM_TOKEN_STRING_CONTENT)) { + pm_node_list_append(&parts, (pm_node_t *) pm_string_node_create_current_string(parser, &string_opening, &parser->current, &string_closing)); + parser_lex(parser); + } else if ((part = parse_string_part(parser)) != NULL) { pm_node_list_append(&parts, part); } } @@ -11888,14 +11995,19 @@ parse_strings(pm_parser_t *parser) { } } } else { - // If we get here, then the first part of the string is not - // plain string content, in which case we need to parse the - // string as an interpolated string. + // If we get here, then the first part of the string is not plain + // string content, in which case we need to parse the string as an + // interpolated string. pm_node_list_t parts = PM_EMPTY_NODE_LIST; + pm_token_t string_opening = not_provided(parser); + pm_token_t string_closing = not_provided(parser); pm_node_t *part = NULL; while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) { - if ((part = parse_string_part(parser)) != NULL) { + if (match1(parser, PM_TOKEN_STRING_CONTENT)) { + pm_node_list_append(&parts, (pm_node_t *) pm_string_node_create_current_string(parser, &string_opening, &parser->current, &string_closing)); + parser_lex(parser); + } else if ((part = parse_string_part(parser)) != NULL) { pm_node_list_append(&parts, part); } } @@ -12171,8 +12283,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) { content.start = content.start + 1; pm_token_t closing = not_provided(parser); - pm_string_node_t *node = (pm_string_node_t *) pm_string_node_create(parser, &opening, &content, &closing); - node->unescaped = parser->current_string; + pm_string_node_t *node = (pm_string_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing); // Characters can be followed by strings in which case they are // automatically concatenated. @@ -13397,15 +13508,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) { accept1(parser, PM_TOKEN_WORDS_SEP); if (match1(parser, PM_TOKEN_STRING_END)) break; - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT); + if (match1(parser, PM_TOKEN_STRING_CONTENT)) { + pm_token_t opening = not_provided(parser); + pm_token_t closing = not_provided(parser); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); + pm_symbol_node_t *symbol = (pm_symbol_node_t *) pm_symbol_node_create(parser, &opening, &parser->current, &closing); + symbol->unescaped = parser->current_string; - pm_symbol_node_t *symbol = (pm_symbol_node_t *) pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - symbol->unescaped = parser->current_string; + pm_array_node_elements_append(array, (pm_node_t *) symbol); + } - pm_array_node_elements_append(array, (pm_node_t *) symbol); + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT); } expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM); @@ -13445,35 +13558,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) { // If we hit content and the current node is NULL, then this is // the first string content we've seen. In that case we're going // to create a new string node and set that to the current. - parser_lex(parser); - - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->current, &closing); symbol->unescaped = parser->current_string; + parser_lex(parser); current = (pm_node_t *) symbol; } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { // If we hit string content and the current node is an // interpolated string, then we need to append the string content // to the list of child nodes. + pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing); parser_lex(parser); - pm_string_node_t *string = pm_string_node_create(parser, &opening, &parser->previous, &closing); - string->unescaped = parser->current_string; - - pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, (pm_node_t *) string); + pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string); } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { // If we hit string content and the current node is a string node, // then we need to convert the current node into an interpolated // string and add the string content to the list of child nodes. + pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing); parser_lex(parser); pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); pm_interpolated_symbol_node_append(interpolated, current); - - pm_string_node_t *string = pm_string_node_create(parser, &opening, &parser->previous, &closing); - string->unescaped = parser->current_string; - - pm_interpolated_symbol_node_append(interpolated, (pm_node_t *) string); + pm_interpolated_symbol_node_append(interpolated, string); current = (pm_node_t *) interpolated; } else { assert(false && "unreachable"); @@ -13580,15 +13687,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) { accept1(parser, PM_TOKEN_WORDS_SEP); if (match1(parser, PM_TOKEN_STRING_END)) break; - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); - - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); + if (match1(parser, PM_TOKEN_STRING_CONTENT)) { + pm_token_t opening = not_provided(parser); + pm_token_t closing = not_provided(parser); - pm_string_node_t *string = (pm_string_node_t *) pm_string_node_create(parser, &opening, &parser->previous, &closing); - string->unescaped = parser->current_string; + pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing); + pm_array_node_elements_append(array, string); + } - pm_array_node_elements_append(array, (pm_node_t *) string); + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); } expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM); @@ -13621,30 +13728,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) { break; } case PM_TOKEN_STRING_CONTENT: { - parser_lex(parser); pm_token_t opening = not_provided(parser); pm_token_t closing = not_provided(parser); - pm_string_node_t *string = (pm_string_node_t *) pm_string_node_create(parser, &opening, &parser->previous, &closing); - string->unescaped = parser->current_string; + pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing); + parser_lex(parser); if (current == NULL) { // If we hit content and the current node is NULL, then this is // the first string content we've seen. In that case we're going // to create a new string node and set that to the current. - current = (pm_node_t *) string; + current = string; } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { // If we hit string content and the current node is an // interpolated string, then we need to append the string content // to the list of child nodes. - pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, (pm_node_t *) string); + pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string); } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { // If we hit string content and the current node is a string node, // then we need to convert the current node into an interpolated // string and add the string content to the list of child nodes. pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); pm_interpolated_string_node_append(interpolated, current); - pm_interpolated_string_node_append(interpolated, (pm_node_t *) string); + pm_interpolated_string_node_append(interpolated, string); current = (pm_node_t *) interpolated; } else { assert(false && "unreachable"); diff --git a/test/prism/snapshots/seattlerb/qw_escape.txt b/test/prism/snapshots/seattlerb/qw_escape.txt index ef60ee32c4..d92c1da7a6 100644 --- a/test/prism/snapshots/seattlerb/qw_escape.txt +++ b/test/prism/snapshots/seattlerb/qw_escape.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,3) = "%q(" ├── content_loc: (1,3)-(1,6) = "\u0001\\'" ├── closing_loc: (1,6)-(1,7) = ")" - └── unescaped: "\u0001'" + └── unescaped: "\u0001\\'" diff --git a/test/prism/snapshots/seattlerb/qw_escape_term.txt b/test/prism/snapshots/seattlerb/qw_escape_term.txt index 0677055ad6..e935b7eb68 100644 --- a/test/prism/snapshots/seattlerb/qw_escape_term.txt +++ b/test/prism/snapshots/seattlerb/qw_escape_term.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,3) = "%q|" ├── content_loc: (1,3)-(1,25) = "blah blah \\| blah blah" ├── closing_loc: (1,25)-(1,26) = "|" - └── unescaped: "blah blah \\| blah blah" + └── unescaped: "blah blah | blah blah" diff --git a/test/prism/snapshots/spanning_heredoc.txt b/test/prism/snapshots/spanning_heredoc.txt index 884028f054..2c59cb4368 100644 --- a/test/prism/snapshots/spanning_heredoc.txt +++ b/test/prism/snapshots/spanning_heredoc.txt @@ -73,10 +73,10 @@ │ │ └── @ InterpolatedStringNode (location: (10,9)-(13,2)) │ │ ├── opening_loc: (10,9)-(10,10) = "\"" │ │ ├── parts: (length: 2) - │ │ │ ├── @ StringNode (location: (10,10)-(10,0)) + │ │ │ ├── @ StringNode (location: (10,10)-(10,12)) │ │ │ │ ├── flags: ∅ │ │ │ │ ├── opening_loc: ∅ - │ │ │ │ ├── content_loc: (10,10)-(10,0) = "d\\\n" + │ │ │ │ ├── content_loc: (10,10)-(10,12) = "d\\" │ │ │ │ ├── closing_loc: ∅ │ │ │ │ └── unescaped: "d" │ │ │ └── @ StringNode (location: (13,0)-(13,1)) @@ -107,10 +107,10 @@ │ │ └── @ InterpolatedStringNode (location: (16,9)-(19,2)) │ │ ├── opening_loc: (16,9)-(16,12) = "%q[" │ │ ├── parts: (length: 2) - │ │ │ ├── @ StringNode (location: (16,12)-(16,0)) + │ │ │ ├── @ StringNode (location: (16,12)-(16,14)) │ │ │ │ ├── flags: ∅ │ │ │ │ ├── opening_loc: ∅ - │ │ │ │ ├── content_loc: (16,12)-(16,0) = "f\\\n" + │ │ │ │ ├── content_loc: (16,12)-(16,14) = "f\\" │ │ │ │ ├── closing_loc: ∅ │ │ │ │ └── unescaped: "f\\\n" │ │ │ └── @ StringNode (location: (19,0)-(19,1)) @@ -141,10 +141,10 @@ │ │ └── @ InterpolatedStringNode (location: (22,9)-(25,2)) │ │ ├── opening_loc: (22,9)-(22,12) = "%Q[" │ │ ├── parts: (length: 2) - │ │ │ ├── @ StringNode (location: (22,12)-(22,0)) + │ │ │ ├── @ StringNode (location: (22,12)-(22,14)) │ │ │ │ ├── flags: ∅ │ │ │ │ ├── opening_loc: ∅ - │ │ │ │ ├── content_loc: (22,12)-(22,0) = "h\\\n" + │ │ │ │ ├── content_loc: (22,12)-(22,14) = "h\\" │ │ │ │ ├── closing_loc: ∅ │ │ │ │ └── unescaped: "h" │ │ │ └── @ StringNode (location: (25,0)-(25,1)) @@ -179,7 +179,7 @@ │ │ │ │ ├── opening_loc: ∅ │ │ │ │ ├── content_loc: (28,12)-(28,14) = "j\\" │ │ │ │ ├── closing_loc: ∅ - │ │ │ │ └── unescaped: "j" + │ │ │ │ └── unescaped: "j\n" │ │ │ └── @ StringNode (location: (31,0)-(31,1)) │ │ │ ├── flags: ∅ │ │ │ ├── opening_loc: ∅ @@ -216,7 +216,7 @@ │ │ │ │ │ ├── opening_loc: ∅ │ │ │ │ │ ├── content_loc: (35,12)-(35,14) = "l\\" │ │ │ │ │ ├── closing_loc: ∅ - │ │ │ │ │ └── unescaped: "l" + │ │ │ │ │ └── unescaped: "l\n" │ │ │ │ └── @ StringNode (location: (38,0)-(38,1)) │ │ │ │ ├── flags: ∅ │ │ │ │ ├── opening_loc: ∅ @@ -250,7 +250,7 @@ │ │ │ │ ├── opening_loc: ∅ │ │ │ │ ├── value_loc: (41,12)-(41,14) = "n\\" │ │ │ │ ├── closing_loc: ∅ - │ │ │ │ └── unescaped: "n" + │ │ │ │ └── unescaped: "n\n" │ │ │ └── @ SymbolNode (location: (44,0)-(44,1)) │ │ │ ├── opening_loc: ∅ │ │ │ ├── value_loc: (44,0)-(44,1) = "n" @@ -278,18 +278,18 @@ │ │ └── unescaped: "o\n" │ └── @ ArrayNode (location: (48,9)-(51,2)) │ ├── elements: (length: 1) - │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(51,1)) + │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(48,14)) │ │ ├── opening_loc: ∅ │ │ ├── parts: (length: 2) │ │ │ ├── @ SymbolNode (location: (48,12)-(48,14)) │ │ │ │ ├── opening_loc: ∅ │ │ │ │ ├── value_loc: (48,12)-(48,14) = "p\\" │ │ │ │ ├── closing_loc: ∅ - │ │ │ │ └── unescaped: "p" - │ │ │ └── @ StringNode (location: (51,0)-(51,1)) + │ │ │ │ └── unescaped: "p\n" + │ │ │ └── @ StringNode (location: (48,12)-(48,14)) │ │ │ ├── flags: ∅ │ │ │ ├── opening_loc: ∅ - │ │ │ ├── content_loc: (51,0)-(51,1) = "p" + │ │ │ ├── content_loc: (48,12)-(48,14) = "p\\" │ │ │ ├── closing_loc: ∅ │ │ │ └── unescaped: "p" │ │ └── closing_loc: ∅ diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb index 0053fdd364..95b3675e2e 100644 --- a/test/prism/unescape_test.rb +++ b/test/prism/unescape_test.rb @@ -95,11 +95,11 @@ module Prism contexts = [ [Context::String.new("?", ""), escapes], - # [Context::String.new("'", "'"), escapes], - # [Context::String.new("\"", "\""), escapes], - # [Context::String.new("%q[", "]"), escapes], - # [Context::String.new("%Q[", "]"), escapes], - # [Context::String.new("%[", "]"), escapes], + [Context::String.new("'", "'"), escapes], + [Context::String.new("\"", "\""), escapes], + [Context::String.new("%q[", "]"), escapes], + [Context::String.new("%Q[", "]"), escapes], + [Context::String.new("%[", "]"), escapes], # [Context::String.new("`", "`"), escapes], # [Context::String.new("<<~H\n", "\nH"), escapes], # [Context::String.new("<<~'H'\n", "\nH"), escapes], |
