summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prism/prism.c132
-rw-r--r--test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt2
-rw-r--r--test/prism/snapshots/whitequark/ruby_bug_11989.txt2
3 files changed, 101 insertions, 35 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 55d0c92760..c9cfe65d76 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -8426,14 +8426,15 @@ parser_lex(pm_parser_t *parser) {
// Now let's grab the information about the identifier off of the current
// lex mode.
- const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
- size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
+ pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+ const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
+ size_t ident_length = lex_mode->as.heredoc.ident_length;
// If we are immediately following a newline and we have hit the
// terminator, then we need to return the ending of the heredoc.
if (current_token_starts_line(parser)) {
const uint8_t *start = parser->current.start;
- if (parser->lex_modes.current->as.heredoc.indent != PM_HEREDOC_INDENT_NONE) {
+ if (lex_mode->as.heredoc.indent != PM_HEREDOC_INDENT_NONE) {
start += pm_strspn_inline_whitespace(start, parser->end - start);
}
@@ -8453,10 +8454,10 @@ parser_lex(pm_parser_t *parser) {
}
if (matched) {
- if (*parser->lex_modes.current->as.heredoc.next_start == '\\') {
+ if (*lex_mode->as.heredoc.next_start == '\\') {
parser->next_start = NULL;
} else {
- parser->next_start = parser->lex_modes.current->as.heredoc.next_start;
+ parser->next_start = lex_mode->as.heredoc.next_start;
parser->heredoc_end = parser->current.end;
}
@@ -8469,17 +8470,18 @@ parser_lex(pm_parser_t *parser) {
}
}
- // Otherwise we'll be parsing string content. These are the places where
- // we need to split up the content of the heredoc. We'll use strpbrk to
- // find the first of these characters.
+ // Otherwise we'll be parsing string content. These are the places
+ // where we need to split up the content of the heredoc. We'll use
+ // strpbrk to find the first of these characters.
uint8_t breakpoints[] = "\n\\#";
- pm_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
+ pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
breakpoints[2] = '\0';
}
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ pm_token_buffer_t token_buffer = { 0 };
while (breakpoint != NULL) {
switch (*breakpoint) {
@@ -8491,13 +8493,14 @@ parser_lex(pm_parser_t *parser) {
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
parser_flush_heredoc_end(parser);
parser->current.end = breakpoint + 1;
+ pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
pm_newline_list_append(&parser->newline_list, breakpoint);
const uint8_t *start = breakpoint + 1;
- if (parser->lex_modes.current->as.heredoc.indent != PM_HEREDOC_INDENT_NONE) {
+ if (lex_mode->as.heredoc.indent != PM_HEREDOC_INDENT_NONE) {
start += pm_strspn_inline_whitespace(start, parser->end - start);
}
@@ -8515,6 +8518,7 @@ parser_lex(pm_parser_t *parser) {
match_eol_at(parser, start + ident_length)
) {
parser->current.end = breakpoint + 1;
+ pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
}
@@ -8531,37 +8535,83 @@ parser_lex(pm_parser_t *parser) {
// stop looping before the newline and not after the
// newline so that we can still potentially find the
// terminator of the heredoc.
- size_t eol_length = match_eol_at(parser, breakpoint + 1);
- if (eol_length) {
- breakpoint += eol_length;
- } else {
- pm_unescape_type_t unescape_type = (quote == PM_HEREDOC_QUOTE_SINGLE) ? PM_UNESCAPE_MINIMAL : PM_UNESCAPE_ALL;
- size_t difference = pm_unescape_calculate_difference(parser, breakpoint, unescape_type);
- if (difference == 0) {
- // we're at the end of the file
- breakpoint = NULL;
- break;
- }
+ parser->current.end = breakpoint + 1;
+ pm_token_buffer_escape(parser, &token_buffer);
- pm_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
+ // If we've hit the end of the file, then break out of
+ // the loop by setting the breakpoint to NULL.
+ if (parser->current.end == parser->end) {
+ breakpoint = NULL;
+ continue;
+ }
- breakpoint = pm_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+ uint8_t peeked = peek(parser);
+ switch (peeked) {
+ case '\r':
+ parser->current.end++;
+ if (peek(parser) != '\n') {
+ pm_token_buffer_push(&token_buffer, '\r');
+ break;
+ }
+ /* fallthrough */
+ case '\n':
+ // If this is a dedenting heredoc then we need
+ // to leave the escaped newline in place so that
+ // it can be removed later when we dedent the
+ // heredoc.
+ if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
+ pm_token_buffer_push(&token_buffer, '\\');
+ pm_token_buffer_push(&token_buffer, '\n');
+ }
+
+ if (parser->heredoc_end) {
+ // ... if we are on the same line as a heredoc,
+ // flush the heredoc and continue parsing after
+ // heredoc_end.
+ parser_flush_heredoc_end(parser);
+ pm_token_buffer_copy(parser, &token_buffer);
+ LEX(PM_TOKEN_STRING_CONTENT);
+ } else {
+ // ... else track the newline.
+ pm_newline_list_append(&parser->newline_list, parser->current.end);
+ }
+
+ parser->current.end++;
+ break;
+ default:
+ if (quote == PM_HEREDOC_QUOTE_SINGLE) {
+ pm_token_buffer_push(&token_buffer, '\\');
+ pm_token_buffer_push(&token_buffer, peeked);
+ parser->current.end++;
+ } else {
+ escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+ }
+
+ break;
}
+ token_buffer.cursor = parser->current.end;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
case '#': {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type != PM_TOKEN_NOT_PROVIDED) {
- LEX(type);
+
+ if (type == PM_TOKEN_NOT_PROVIDED) {
+ // If we haven't returned at this point then we had
+ // something that looked like an interpolated class
+ // or instance variable like "#@" but wasn't
+ // actually. In this case we'll just skip to the
+ // next breakpoint.
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+ break;
}
- // If we haven't returned at this point then we had something
- // that looked like an interpolated class or instance variable
- // like "#@" but wasn't actually. In this case we'll just skip
- // to the next breakpoint.
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
- break;
+ if (type == PM_TOKEN_STRING_CONTENT) {
+ pm_token_buffer_flush(parser, &token_buffer);
+ }
+
+ LEX(type);
}
default:
assert(false && "unreachable");
@@ -12499,7 +12549,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
node->location.end = opening.end;
} else {
- part = parse_string_part(parser);
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_token_t opening = not_provided(parser);
+ pm_token_t closing = not_provided(parser);
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ parser_lex(parser);
+ } else {
+ part = parse_string_part(parser);
+ }
if (part == NULL) {
// If we get here, then we tried to find something in the
@@ -12539,7 +12596,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
pm_node_list_append(&parts, part);
while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
- if ((part = parse_string_part(parser)) != NULL) {
+ if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_token_t opening = not_provided(parser);
+ pm_token_t closing = not_provided(parser);
+ part = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ parser_lex(parser);
+ } else {
+ part = parse_string_part(parser);
+ }
+
+ if (part != NULL) {
pm_node_list_append(&parts, part);
}
}
diff --git a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
index 20496c3cfc..ad38f53cee 100644
--- a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
+++ b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
@@ -114,7 +114,7 @@
│ ├── opening_loc: (51,0)-(51,9) = "<<-'HERE'"
│ ├── content_loc: (52,0)-(53,0) = "a\\\nb\n"
│ ├── closing_loc: (54,0)-(54,0) = "HERE\n"
- │ └── unescaped: "a\\\nb\n"
+ │ └── unescaped: "ab\n"
├── @ XStringNode (location: (56,0)-(56,9))
│ ├── opening_loc: (56,0)-(56,9) = "<<-`HERE`"
│ ├── content_loc: (57,0)-(58,0) = "a\\\nb\n"
diff --git a/test/prism/snapshots/whitequark/ruby_bug_11989.txt b/test/prism/snapshots/whitequark/ruby_bug_11989.txt
index 2d56025693..27ec4058af 100644
--- a/test/prism/snapshots/whitequark/ruby_bug_11989.txt
+++ b/test/prism/snapshots/whitequark/ruby_bug_11989.txt
@@ -16,7 +16,7 @@
│ ├── opening_loc: (1,2)-(1,8) = "<<~\"E\""
│ ├── content_loc: (2,0)-(2,0) = " x\\n y\n"
│ ├── closing_loc: (3,0)-(3,0) = "E\n"
- │ └── unescaped: "x\n y\n"
+ │ └── unescaped: "x\n y\n"
├── closing_loc: ∅
├── block: ∅
├── flags: ∅