summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prism/prism.c300
-rw-r--r--test/prism/snapshots/seattlerb/qw_escape.txt2
-rw-r--r--test/prism/snapshots/seattlerb/qw_escape_term.txt2
-rw-r--r--test/prism/snapshots/spanning_heredoc.txt26
-rw-r--r--test/prism/unescape_test.rb10
5 files changed, 223 insertions, 117 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 77a813138b..161d5cc66b 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -4369,9 +4369,9 @@ pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *ri
return node;
}
-// Allocate a new StringNode node.
-static pm_string_node_t *
-pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+// Allocate a new StringNode node with the current string on the parser.
+static inline pm_string_node_t *
+pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
pm_string_node_t *node = PM_ALLOC_NODE(parser, pm_string_node_t);
pm_node_flags_t flags = 0;
@@ -4391,12 +4391,27 @@ pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
.opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.content_loc = PM_LOCATION_TOKEN_VALUE(content),
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .unescaped = PM_EMPTY_STRING
+ .unescaped = *string
};
return node;
}
+// Allocate a new StringNode node.
+static pm_string_node_t *
+pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+ return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
+}
+
+// Allocate a new StringNode node and create it using the current string on the
+// parser.
+static pm_string_node_t *
+pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+ pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
+ parser->current_string = PM_EMPTY_STRING;
+ return node;
+}
+
// Allocate and initialize a new SuperNode node.
static pm_super_node_t *
pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
@@ -7790,7 +7805,7 @@ parser_lex(pm_parser_t *parser) {
}
}
}
- case PM_LEX_LIST:
+ case PM_LEX_LIST: {
if (parser->next_start != NULL) {
parser->current.end = parser->next_start;
parser->next_start = NULL;
@@ -7939,6 +7954,7 @@ parser_lex(pm_parser_t *parser) {
// flush the heredoc and continue parsing after
// heredoc_end.
parser_flush_heredoc_end(parser);
+ pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
@@ -8003,7 +8019,7 @@ parser_lex(pm_parser_t *parser) {
// If we were unable to find a breakpoint, then this token hits the end of
// the file.
LEX(PM_TOKEN_EOF);
-
+ }
case PM_LEX_REGEXP: {
// First, we'll set to start of this token to be the current end.
if (parser->next_start == NULL) {
@@ -8159,17 +8175,20 @@ parser_lex(pm_parser_t *parser) {
// These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters.
- const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
+ pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+ const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ // If we haven't found an escape yet, then this buffer will be
+ // unallocated since we can refer directly to the source string.
+ pm_buffer_t buffer = (pm_buffer_t) { .value = NULL, .length = 0, .capacity = 0 };
+ const uint8_t *buffer_cursor = NULL;
+
while (breakpoint != NULL) {
// If we hit the incrementor, then we'll increment then nesting and
// continue lexing.
- if (
- parser->lex_modes.current->as.string.incrementor != '\0' &&
- *breakpoint == parser->lex_modes.current->as.string.incrementor
- ) {
- parser->lex_modes.current->as.string.nesting++;
+ if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
+ lex_mode->as.string.nesting++;
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
continue;
}
@@ -8177,12 +8196,12 @@ parser_lex(pm_parser_t *parser) {
// Note that we have to check the terminator here first because we could
// potentially be parsing a % string that has a # character as the
// terminator.
- if (*breakpoint == parser->lex_modes.current->as.string.terminator) {
+ if (*breakpoint == lex_mode->as.string.terminator) {
// If this terminator doesn't actually close the string, then we need
// to continue on past it.
- if (parser->lex_modes.current->as.string.nesting > 0) {
+ if (lex_mode->as.string.nesting > 0) {
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
- parser->lex_modes.current->as.string.nesting--;
+ lex_mode->as.string.nesting--;
continue;
}
@@ -8190,6 +8209,14 @@ parser_lex(pm_parser_t *parser) {
// then we need to return that content as string content first.
if (breakpoint > parser->current.start) {
parser->current.end = breakpoint;
+
+ if (buffer_cursor == NULL) {
+ pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+ } else {
+ pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor));
+ pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+ }
+
LEX(PM_TOKEN_STRING_CONTENT);
}
@@ -8203,11 +8230,7 @@ parser_lex(pm_parser_t *parser) {
parser->current.end = breakpoint + 1;
}
- if (
- parser->lex_modes.current->as.string.label_allowed &&
- (peek(parser) == ':') &&
- (peek_offset(parser, 1) != ':')
- ) {
+ if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
parser->current.end++;
lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
lex_mode_pop(parser);
@@ -8230,6 +8253,14 @@ parser_lex(pm_parser_t *parser) {
} else {
parser->current.end = breakpoint + 1;
parser_flush_heredoc_end(parser);
+
+ if (buffer_cursor == NULL) {
+ pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+ } else {
+ pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor));
+ pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+ }
+
LEX(PM_TOKEN_STRING_CONTENT);
}
}
@@ -8240,46 +8271,103 @@ parser_lex(pm_parser_t *parser) {
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\\': {
- // If we hit escapes, then we need to treat the next token
- // literally. In this case we'll skip past the next character and
- // find the next breakpoint.
- pm_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? PM_UNESCAPE_ALL : PM_UNESCAPE_MINIMAL;
- size_t difference = pm_unescape_calculate_difference(parser, breakpoint, unescape_type);
- if (difference == 0) {
- // we're at the end of the file
+ // Here we hit escapes.
+ if (buffer_cursor == NULL) {
+ pm_buffer_init_capacity(&buffer, 16);
+ pm_buffer_append_bytes(&buffer, parser->current.start, (size_t) (breakpoint - parser->current.start));
+ } else {
+ pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (breakpoint - buffer_cursor));
+ }
+
+ parser->current.end = breakpoint + 1;
+
+ // If we've hit the end of the file, then break out of
+ // the loop by setting the breakpoint to NULL.
+ if (parser->current.end == parser->end) {
breakpoint = NULL;
- break;
+ continue;
}
- // If the result is an escaped newline ...
- if (breakpoint[difference - 1] == '\n') {
- if (parser->heredoc_end) {
- // ... if we are on the same line as a heredoc, flush the heredoc and
- // continue parsing after heredoc_end.
- parser->current.end = breakpoint + difference;
- parser_flush_heredoc_end(parser);
- LEX(PM_TOKEN_STRING_CONTENT);
- } else {
- // ... else track the newline.
- pm_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
- }
+ uint8_t peeked = peek(parser);
+ switch (peeked) {
+ case '\\':
+ pm_buffer_append_u8(&buffer, '\\');
+ parser->current.end++;
+ break;
+ case '\r':
+ parser->current.end++;
+ if (peek(parser) != '\n') {
+ if (!lex_mode->as.string.interpolation) {
+ pm_buffer_append_u8(&buffer, '\\');
+ }
+ pm_buffer_append_u8(&buffer, '\r');
+ break;
+ }
+ /* fallthrough */
+ case '\n':
+ if (!lex_mode->as.string.interpolation) {
+ pm_buffer_append_u8(&buffer, '\\');
+ pm_buffer_append_u8(&buffer, '\n');
+ }
+
+ if (parser->heredoc_end) {
+ // ... if we are on the same line as a heredoc,
+ // flush the heredoc and continue parsing after
+ // heredoc_end.
+ parser_flush_heredoc_end(parser);
+ pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+ LEX(PM_TOKEN_STRING_CONTENT);
+ } else {
+ // ... else track the newline.
+ pm_newline_list_append(&parser->newline_list, parser->current.end);
+ }
+
+ parser->current.end++;
+ break;
+ default:
+ if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
+ pm_buffer_append_u8(&buffer, peeked);
+ parser->current.end++;
+ } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
+ pm_buffer_append_u8(&buffer, peeked);
+ parser->current.end++;
+ } else if (lex_mode->as.string.interpolation) {
+ escape_read(parser, &buffer, PM_ESCAPE_FLAG_NONE);
+ } else {
+ pm_buffer_append_u8(&buffer, '\\');
+ pm_buffer_append_u8(&buffer, peeked);
+ parser->current.end++;
+ }
+
+ break;
}
- breakpoint = pm_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+ buffer_cursor = parser->current.end;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
case '#': {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type != PM_TOKEN_NOT_PROVIDED) {
- LEX(type);
+
+ if (type == PM_TOKEN_NOT_PROVIDED) {
+ // If we haven't returned at this point then we had something that
+ // looked like an interpolated class or instance variable like "#@"
+ // but wasn't actually. In this case we'll just skip to the next
+ // breakpoint.
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ break;
}
- // If we haven't returned at this point then we had something that
- // looked like an interpolated class or instance variable like "#@"
- // but wasn't actually. In this case we'll just skip to the next
- // breakpoint.
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- break;
+ if (type == PM_TOKEN_STRING_CONTENT) {
+ if (buffer_cursor == NULL) {
+ pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+ } else {
+ pm_buffer_append_bytes(&buffer, buffer_cursor, (size_t) (parser->current.end - buffer_cursor));
+ pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+ }
+ }
+
+ LEX(type);
}
default:
assert(false && "unreachable");
@@ -11807,17 +11895,30 @@ parse_strings(pm_parser_t *parser) {
// start. In that case we'll create an empty content token and
// return an uninterpolated string.
pm_token_t content = parse_strings_empty_content(parser->previous.start);
- node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_NONE);
+ pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_string_shared_init(&string->unescaped, content.start, content.end);
+ node = (pm_node_t *) string;
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
// If we get here, then we have an end of a label immediately
// after a start. In that case we'll create an empty symbol
// node.
pm_token_t opening = not_provided(parser);
pm_token_t content = parse_strings_empty_content(parser->previous.start);
- node = (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &parser->previous);
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_string_shared_init(&symbol->unescaped, content.start, content.end);
+ node = (pm_node_t *) symbol;
} else if (!lex_interpolation) {
// If we don't accept interpolation then we expect the string to
// start with a single string content node.
+ pm_string_t unescaped;
+ if (match1(parser, PM_TOKEN_EOF)) {
+ unescaped = PM_EMPTY_STRING;
+ } else {
+ unescaped = parser->current_string;
+ }
+
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
pm_token_t content = parser->previous;
@@ -11836,13 +11937,14 @@ parse_strings(pm_parser_t *parser) {
pm_node_list_t parts = PM_EMPTY_NODE_LIST;
pm_token_t delimiters = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, PM_UNESCAPE_MINIMAL);
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
pm_node_list_append(&parts, part);
- while (accept1(parser, PM_TOKEN_STRING_CONTENT)) {
- part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, PM_UNESCAPE_MINIMAL);
+ do {
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
pm_node_list_append(&parts, part);
- }
+ parser_lex(parser);
+ } while (match1(parser, PM_TOKEN_STRING_CONTENT));
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
@@ -11850,7 +11952,7 @@ parse_strings(pm_parser_t *parser) {
node = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
- node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_MINIMAL);
+ node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &parser->previous);
}
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the string
@@ -11858,10 +11960,12 @@ parse_strings(pm_parser_t *parser) {
// following token is the end (in which case we can return a
// plain string) or if it's not then it has interpolation.
pm_token_t content = parser->current;
+ pm_string_t unescaped = parser->current_string;
parser_lex(parser);
- if (accept1(parser, PM_TOKEN_STRING_END)) {
- node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
+ if (match1(parser, PM_TOKEN_STRING_END)) {
+ node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &parser->current);
+ parser_lex(parser);
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
node = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
} else {
@@ -11871,11 +11975,14 @@ parse_strings(pm_parser_t *parser) {
pm_token_t string_opening = not_provided(parser);
pm_token_t string_closing = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, PM_UNESCAPE_ALL);
+ pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
pm_node_list_append(&parts, part);
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_node_list_append(&parts, (pm_node_t *) pm_string_node_create_current_string(parser, &string_opening, &parser->current, &string_closing));
+ parser_lex(parser);
+ } else if ((part = parse_string_part(parser)) != NULL) {
pm_node_list_append(&parts, part);
}
}
@@ -11888,14 +11995,19 @@ parse_strings(pm_parser_t *parser) {
}
}
} else {
- // If we get here, then the first part of the string is not
- // plain string content, in which case we need to parse the
- // string as an interpolated string.
+ // If we get here, then the first part of the string is not plain
+ // string content, in which case we need to parse the string as an
+ // interpolated string.
pm_node_list_t parts = PM_EMPTY_NODE_LIST;
+ pm_token_t string_opening = not_provided(parser);
+ pm_token_t string_closing = not_provided(parser);
pm_node_t *part = NULL;
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_node_list_append(&parts, (pm_node_t *) pm_string_node_create_current_string(parser, &string_opening, &parser->current, &string_closing));
+ parser_lex(parser);
+ } else if ((part = parse_string_part(parser)) != NULL) {
pm_node_list_append(&parts, part);
}
}
@@ -12171,8 +12283,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
content.start = content.start + 1;
pm_token_t closing = not_provided(parser);
- pm_string_node_t *node = (pm_string_node_t *) pm_string_node_create(parser, &opening, &content, &closing);
- node->unescaped = parser->current_string;
+ pm_string_node_t *node = (pm_string_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
// Characters can be followed by strings in which case they are
// automatically concatenated.
@@ -13397,15 +13508,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_token_t opening = not_provided(parser);
+ pm_token_t closing = not_provided(parser);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
+ pm_symbol_node_t *symbol = (pm_symbol_node_t *) pm_symbol_node_create(parser, &opening, &parser->current, &closing);
+ symbol->unescaped = parser->current_string;
- pm_symbol_node_t *symbol = (pm_symbol_node_t *) pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
- symbol->unescaped = parser->current_string;
+ pm_array_node_elements_append(array, (pm_node_t *) symbol);
+ }
- pm_array_node_elements_append(array, (pm_node_t *) symbol);
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
}
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
@@ -13445,35 +13558,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
// If we hit content and the current node is NULL, then this is
// the first string content we've seen. In that case we're going
// to create a new string node and set that to the current.
- parser_lex(parser);
-
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->current, &closing);
symbol->unescaped = parser->current_string;
+ parser_lex(parser);
current = (pm_node_t *) symbol;
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
// If we hit string content and the current node is an
// interpolated string, then we need to append the string content
// to the list of child nodes.
+ pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
parser_lex(parser);
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &parser->previous, &closing);
- string->unescaped = parser->current_string;
-
- pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, (pm_node_t *) string);
+ pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
} else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
// If we hit string content and the current node is a string node,
// then we need to convert the current node into an interpolated
// string and add the string content to the list of child nodes.
+ pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
parser_lex(parser);
pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
pm_interpolated_symbol_node_append(interpolated, current);
-
- pm_string_node_t *string = pm_string_node_create(parser, &opening, &parser->previous, &closing);
- string->unescaped = parser->current_string;
-
- pm_interpolated_symbol_node_append(interpolated, (pm_node_t *) string);
+ pm_interpolated_symbol_node_append(interpolated, string);
current = (pm_node_t *) interpolated;
} else {
assert(false && "unreachable");
@@ -13580,15 +13687,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
-
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_token_t opening = not_provided(parser);
+ pm_token_t closing = not_provided(parser);
- pm_string_node_t *string = (pm_string_node_t *) pm_string_node_create(parser, &opening, &parser->previous, &closing);
- string->unescaped = parser->current_string;
+ pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ pm_array_node_elements_append(array, string);
+ }
- pm_array_node_elements_append(array, (pm_node_t *) string);
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
}
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
@@ -13621,30 +13728,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
break;
}
case PM_TOKEN_STRING_CONTENT: {
- parser_lex(parser);
pm_token_t opening = not_provided(parser);
pm_token_t closing = not_provided(parser);
- pm_string_node_t *string = (pm_string_node_t *) pm_string_node_create(parser, &opening, &parser->previous, &closing);
- string->unescaped = parser->current_string;
+ pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ parser_lex(parser);
if (current == NULL) {
// If we hit content and the current node is NULL, then this is
// the first string content we've seen. In that case we're going
// to create a new string node and set that to the current.
- current = (pm_node_t *) string;
+ current = string;
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
// If we hit string content and the current node is an
// interpolated string, then we need to append the string content
// to the list of child nodes.
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, (pm_node_t *) string);
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
// If we hit string content and the current node is a string node,
// then we need to convert the current node into an interpolated
// string and add the string content to the list of child nodes.
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
pm_interpolated_string_node_append(interpolated, current);
- pm_interpolated_string_node_append(interpolated, (pm_node_t *) string);
+ pm_interpolated_string_node_append(interpolated, string);
current = (pm_node_t *) interpolated;
} else {
assert(false && "unreachable");
diff --git a/test/prism/snapshots/seattlerb/qw_escape.txt b/test/prism/snapshots/seattlerb/qw_escape.txt
index ef60ee32c4..d92c1da7a6 100644
--- a/test/prism/snapshots/seattlerb/qw_escape.txt
+++ b/test/prism/snapshots/seattlerb/qw_escape.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,3) = "%q("
├── content_loc: (1,3)-(1,6) = "\u0001\\'"
├── closing_loc: (1,6)-(1,7) = ")"
- └── unescaped: "\u0001'"
+ └── unescaped: "\u0001\\'"
diff --git a/test/prism/snapshots/seattlerb/qw_escape_term.txt b/test/prism/snapshots/seattlerb/qw_escape_term.txt
index 0677055ad6..e935b7eb68 100644
--- a/test/prism/snapshots/seattlerb/qw_escape_term.txt
+++ b/test/prism/snapshots/seattlerb/qw_escape_term.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,3) = "%q|"
├── content_loc: (1,3)-(1,25) = "blah blah \\| blah blah"
├── closing_loc: (1,25)-(1,26) = "|"
- └── unescaped: "blah blah \\| blah blah"
+ └── unescaped: "blah blah | blah blah"
diff --git a/test/prism/snapshots/spanning_heredoc.txt b/test/prism/snapshots/spanning_heredoc.txt
index 884028f054..2c59cb4368 100644
--- a/test/prism/snapshots/spanning_heredoc.txt
+++ b/test/prism/snapshots/spanning_heredoc.txt
@@ -73,10 +73,10 @@
│ │ └── @ InterpolatedStringNode (location: (10,9)-(13,2))
│ │ ├── opening_loc: (10,9)-(10,10) = "\""
│ │ ├── parts: (length: 2)
- │ │ │ ├── @ StringNode (location: (10,10)-(10,0))
+ │ │ │ ├── @ StringNode (location: (10,10)-(10,12))
│ │ │ │ ├── flags: ∅
│ │ │ │ ├── opening_loc: ∅
- │ │ │ │ ├── content_loc: (10,10)-(10,0) = "d\\\n"
+ │ │ │ │ ├── content_loc: (10,10)-(10,12) = "d\\"
│ │ │ │ ├── closing_loc: ∅
│ │ │ │ └── unescaped: "d"
│ │ │ └── @ StringNode (location: (13,0)-(13,1))
@@ -107,10 +107,10 @@
│ │ └── @ InterpolatedStringNode (location: (16,9)-(19,2))
│ │ ├── opening_loc: (16,9)-(16,12) = "%q["
│ │ ├── parts: (length: 2)
- │ │ │ ├── @ StringNode (location: (16,12)-(16,0))
+ │ │ │ ├── @ StringNode (location: (16,12)-(16,14))
│ │ │ │ ├── flags: ∅
│ │ │ │ ├── opening_loc: ∅
- │ │ │ │ ├── content_loc: (16,12)-(16,0) = "f\\\n"
+ │ │ │ │ ├── content_loc: (16,12)-(16,14) = "f\\"
│ │ │ │ ├── closing_loc: ∅
│ │ │ │ └── unescaped: "f\\\n"
│ │ │ └── @ StringNode (location: (19,0)-(19,1))
@@ -141,10 +141,10 @@
│ │ └── @ InterpolatedStringNode (location: (22,9)-(25,2))
│ │ ├── opening_loc: (22,9)-(22,12) = "%Q["
│ │ ├── parts: (length: 2)
- │ │ │ ├── @ StringNode (location: (22,12)-(22,0))
+ │ │ │ ├── @ StringNode (location: (22,12)-(22,14))
│ │ │ │ ├── flags: ∅
│ │ │ │ ├── opening_loc: ∅
- │ │ │ │ ├── content_loc: (22,12)-(22,0) = "h\\\n"
+ │ │ │ │ ├── content_loc: (22,12)-(22,14) = "h\\"
│ │ │ │ ├── closing_loc: ∅
│ │ │ │ └── unescaped: "h"
│ │ │ └── @ StringNode (location: (25,0)-(25,1))
@@ -179,7 +179,7 @@
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── content_loc: (28,12)-(28,14) = "j\\"
│ │ │ │ ├── closing_loc: ∅
- │ │ │ │ └── unescaped: "j"
+ │ │ │ │ └── unescaped: "j\n"
│ │ │ └── @ StringNode (location: (31,0)-(31,1))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: ∅
@@ -216,7 +216,7 @@
│ │ │ │ │ ├── opening_loc: ∅
│ │ │ │ │ ├── content_loc: (35,12)-(35,14) = "l\\"
│ │ │ │ │ ├── closing_loc: ∅
- │ │ │ │ │ └── unescaped: "l"
+ │ │ │ │ │ └── unescaped: "l\n"
│ │ │ │ └── @ StringNode (location: (38,0)-(38,1))
│ │ │ │ ├── flags: ∅
│ │ │ │ ├── opening_loc: ∅
@@ -250,7 +250,7 @@
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── value_loc: (41,12)-(41,14) = "n\\"
│ │ │ │ ├── closing_loc: ∅
- │ │ │ │ └── unescaped: "n"
+ │ │ │ │ └── unescaped: "n\n"
│ │ │ └── @ SymbolNode (location: (44,0)-(44,1))
│ │ │ ├── opening_loc: ∅
│ │ │ ├── value_loc: (44,0)-(44,1) = "n"
@@ -278,18 +278,18 @@
│ │ └── unescaped: "o\n"
│ └── @ ArrayNode (location: (48,9)-(51,2))
│ ├── elements: (length: 1)
- │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(51,1))
+ │ │ └── @ InterpolatedSymbolNode (location: (48,12)-(48,14))
│ │ ├── opening_loc: ∅
│ │ ├── parts: (length: 2)
│ │ │ ├── @ SymbolNode (location: (48,12)-(48,14))
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── value_loc: (48,12)-(48,14) = "p\\"
│ │ │ │ ├── closing_loc: ∅
- │ │ │ │ └── unescaped: "p"
- │ │ │ └── @ StringNode (location: (51,0)-(51,1))
+ │ │ │ │ └── unescaped: "p\n"
+ │ │ │ └── @ StringNode (location: (48,12)-(48,14))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: ∅
- │ │ │ ├── content_loc: (51,0)-(51,1) = "p"
+ │ │ │ ├── content_loc: (48,12)-(48,14) = "p\\"
│ │ │ ├── closing_loc: ∅
│ │ │ └── unescaped: "p"
│ │ └── closing_loc: ∅
diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb
index 0053fdd364..95b3675e2e 100644
--- a/test/prism/unescape_test.rb
+++ b/test/prism/unescape_test.rb
@@ -95,11 +95,11 @@ module Prism
contexts = [
[Context::String.new("?", ""), escapes],
- # [Context::String.new("'", "'"), escapes],
- # [Context::String.new("\"", "\""), escapes],
- # [Context::String.new("%q[", "]"), escapes],
- # [Context::String.new("%Q[", "]"), escapes],
- # [Context::String.new("%[", "]"), escapes],
+ [Context::String.new("'", "'"), escapes],
+ [Context::String.new("\"", "\""), escapes],
+ [Context::String.new("%q[", "]"), escapes],
+ [Context::String.new("%Q[", "]"), escapes],
+ [Context::String.new("%[", "]"), escapes],
# [Context::String.new("`", "`"), escapes],
# [Context::String.new("<<~H\n", "\nH"), escapes],
# [Context::String.new("<<~'H'\n", "\nH"), escapes],