summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-03-07 14:45:32 -0500
committergit <svn-admin@ruby-lang.org>2024-03-07 20:25:24 +0000
commit76e11595e28e258f4a4187a6d3eaccc9ca752e10 (patch)
tree042ed2e64c01ebbbe98ba5762814486b660a056e
parent18ee7c9a108bf3424814565377c8796e5e455cf7 (diff)
[ruby/prism] Fix up tilde heredoc line continuations
https://github.com/ruby/prism/commit/15e74b2f65
-rw-r--r--prism/parser.h3
-rw-r--r--prism/prism.c38
-rw-r--r--test/prism/ruby_parser_test.rb2
-rw-r--r--test/prism/snapshots/heredocs_with_ignored_newlines.txt14
-rw-r--r--test/prism/snapshots/whitequark/parser_bug_640.txt20
-rw-r--r--test/prism/snapshots/whitequark/slash_newline_in_heredocs.txt14
-rw-r--r--test/prism/unescape_test.rb2
7 files changed, 72 insertions, 21 deletions
diff --git a/prism/parser.h b/prism/parser.h
index 80521e4ad9..02f60192d5 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -234,6 +234,9 @@ typedef struct pm_lex_mode {
* a tilde heredoc.
*/
size_t common_whitespace;
+
+ /** True if the previous token ended with a line continuation. */
+ bool line_continuation;
} heredoc;
} as;
diff --git a/prism/prism.c b/prism/prism.c
index 6717488882..d7ee5ac7db 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -9450,7 +9450,8 @@ parser_lex(pm_parser_t *parser) {
.next_start = parser->current.end,
.quote = quote,
.indent = indent,
- .common_whitespace = (size_t) -1
+ .common_whitespace = (size_t) -1,
+ .line_continuation = false
}
});
@@ -10719,6 +10720,9 @@ parser_lex(pm_parser_t *parser) {
// current lex mode.
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+ bool line_continuation = lex_mode->as.heredoc.line_continuation;
+ lex_mode->as.heredoc.line_continuation = false;
+
// We'll check if we're at the end of the file. If we are, then we
// will add an error (because we weren't able to find the
// terminator) but still continue parsing so that content after the
@@ -10736,7 +10740,7 @@ parser_lex(pm_parser_t *parser) {
// If we are immediately following a newline and we have hit the
// terminator, then we need to return the ending of the heredoc.
- if (current_token_starts_line(parser)) {
+ if (!line_continuation && current_token_starts_line(parser)) {
const uint8_t *start = parser->current.start;
if (start + ident_length <= parser->end) {
const uint8_t *newline = next_newline(start, parser->end - start);
@@ -10808,7 +10812,7 @@ parser_lex(pm_parser_t *parser) {
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
pm_token_buffer_t token_buffer = { { 0 }, 0 };
- bool was_escaped_newline = false;
+ bool was_line_continuation = false;
while (breakpoint != NULL) {
switch (*breakpoint) {
@@ -10831,7 +10835,7 @@ parser_lex(pm_parser_t *parser) {
// some leading whitespace.
const uint8_t *start = breakpoint + 1;
- if (!was_escaped_newline && (start + ident_length <= parser->end)) {
+ if (!was_line_continuation && (start + ident_length <= parser->end)) {
// We want to match the terminator starting from the end of the line in case
// there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
const uint8_t *newline = next_newline(start, parser->end - start);
@@ -10873,7 +10877,6 @@ parser_lex(pm_parser_t *parser) {
// heredoc here as string content. Then, the next time a
// token is lexed, it will match again and return the
// end of the heredoc.
-
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
lex_mode->as.heredoc.common_whitespace = whitespace;
@@ -10881,7 +10884,7 @@ parser_lex(pm_parser_t *parser) {
parser->current.end = breakpoint + 1;
- if (!was_escaped_newline) {
+ if (!was_line_continuation) {
pm_token_buffer_flush(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
}
@@ -10943,7 +10946,26 @@ parser_lex(pm_parser_t *parser) {
}
/* fallthrough */
case '\n':
- was_escaped_newline = true;
+ // If we are in a tilde here, we should
+ // break out of the loop and return the
+ // string content.
+ if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
+ const uint8_t *end = parser->current.end;
+ pm_newline_list_append(&parser->newline_list, end);
+
+ // Here we want the buffer to only
+ // include up to the backslash.
+ parser->current.end = breakpoint;
+ pm_token_buffer_flush(parser, &token_buffer);
+
+ // Now we can advance the end of the
+ // token past the newline.
+ parser->current.end = end + 1;
+ lex_mode->as.heredoc.line_continuation = true;
+ LEX(PM_TOKEN_STRING_CONTENT);
+ }
+
+ was_line_continuation = true;
token_buffer.cursor = parser->current.end + 1;
breakpoint = parser->current.end;
continue;
@@ -10980,7 +11002,7 @@ parser_lex(pm_parser_t *parser) {
assert(false && "unreachable");
}
- was_escaped_newline = false;
+ was_line_continuation = false;
}
if (parser->current.end > parser->current.start) {
diff --git a/test/prism/ruby_parser_test.rb b/test/prism/ruby_parser_test.rb
index 89150b2faa..1d22f0e7b8 100644
--- a/test/prism/ruby_parser_test.rb
+++ b/test/prism/ruby_parser_test.rb
@@ -71,6 +71,7 @@ module Prism
# https://github.com/seattlerb/ruby_parser/issues/344
failures = crlf | %w[
alias.txt
+ heredocs_with_ignored_newlines.txt
method_calls.txt
methods.txt
multi_write.txt
@@ -94,6 +95,7 @@ module Prism
whitequark/lvar_injecting_match.txt
whitequark/not.txt
whitequark/op_asgn_cmd.txt
+ whitequark/parser_bug_640.txt
whitequark/parser_slash_slash_n_escaping_in_literals.txt
whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt
whitequark/pattern_matching_single_line.txt
diff --git a/test/prism/snapshots/heredocs_with_ignored_newlines.txt b/test/prism/snapshots/heredocs_with_ignored_newlines.txt
index 00111b1ca5..cdc0b4faab 100644
--- a/test/prism/snapshots/heredocs_with_ignored_newlines.txt
+++ b/test/prism/snapshots/heredocs_with_ignored_newlines.txt
@@ -11,7 +11,7 @@
│ └── unescaped: ""
└── @ InterpolatedStringNode (location: (4,0)-(4,8))
├── opening_loc: (4,0)-(4,8) = "<<~THERE"
- ├── parts: (length: 8)
+ ├── parts: (length: 9)
│ ├── @ StringNode (location: (5,0)-(6,0))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅
@@ -42,12 +42,18 @@
│ │ ├── content_loc: (9,0)-(10,0) = "\n"
│ │ ├── closing_loc: ∅
│ │ └── unescaped: "\n"
- │ ├── @ StringNode (location: (10,0)-(12,0))
+ │ ├── @ StringNode (location: (10,0)-(11,0))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅
- │ │ ├── content_loc: (10,0)-(12,0) = " <<~BUT\\\n but\n"
+ │ │ ├── content_loc: (10,0)-(11,0) = " <<~BUT\\\n"
│ │ ├── closing_loc: ∅
- │ │ └── unescaped: "<<~BUT but\n"
+ │ │ └── unescaped: "<<~BUT"
+ │ ├── @ StringNode (location: (11,0)-(12,0))
+ │ │ ├── flags: ∅
+ │ │ ├── opening_loc: ∅
+ │ │ ├── content_loc: (11,0)-(12,0) = " but\n"
+ │ │ ├── closing_loc: ∅
+ │ │ └── unescaped: " but\n"
│ ├── @ StringNode (location: (12,0)-(13,0))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅
diff --git a/test/prism/snapshots/whitequark/parser_bug_640.txt b/test/prism/snapshots/whitequark/parser_bug_640.txt
index 0320011e2e..a9d3f957e8 100644
--- a/test/prism/snapshots/whitequark/parser_bug_640.txt
+++ b/test/prism/snapshots/whitequark/parser_bug_640.txt
@@ -3,9 +3,19 @@
└── statements:
@ StatementsNode (location: (1,0)-(1,6))
└── body: (length: 1)
- └── @ StringNode (location: (1,0)-(1,6))
- ├── flags: ∅
+ └── @ InterpolatedStringNode (location: (1,0)-(1,6))
├── opening_loc: (1,0)-(1,6) = "<<~FOO"
- ├── content_loc: (2,0)-(4,0) = " baz\\\n qux\n"
- ├── closing_loc: (4,0)-(5,0) = "FOO\n"
- └── unescaped: "baz qux\n"
+ ├── parts: (length: 2)
+ │ ├── @ StringNode (location: (2,0)-(3,0))
+ │ │ ├── flags: ∅
+ │ │ ├── opening_loc: ∅
+ │ │ ├── content_loc: (2,0)-(3,0) = " baz\\\n"
+ │ │ ├── closing_loc: ∅
+ │ │ └── unescaped: "baz"
+ │ └── @ StringNode (location: (3,0)-(4,0))
+ │ ├── flags: ∅
+ │ ├── opening_loc: ∅
+ │ ├── content_loc: (3,0)-(4,0) = " qux\n"
+ │ ├── closing_loc: ∅
+ │ └── unescaped: "qux\n"
+ └── closing_loc: (4,0)-(5,0) = "FOO\n"
diff --git a/test/prism/snapshots/whitequark/slash_newline_in_heredocs.txt b/test/prism/snapshots/whitequark/slash_newline_in_heredocs.txt
index 58a134dd62..8d6fce2ba9 100644
--- a/test/prism/snapshots/whitequark/slash_newline_in_heredocs.txt
+++ b/test/prism/snapshots/whitequark/slash_newline_in_heredocs.txt
@@ -11,13 +11,19 @@
│ └── unescaped: " 1 2\n 3\n"
└── @ InterpolatedStringNode (location: (8,0)-(8,4))
├── opening_loc: (8,0)-(8,4) = "<<~E"
- ├── parts: (length: 2)
- │ ├── @ StringNode (location: (9,0)-(11,0))
+ ├── parts: (length: 3)
+ │ ├── @ StringNode (location: (9,0)-(10,0))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅
- │ │ ├── content_loc: (9,0)-(11,0) = " 1 \\\n 2\n"
+ │ │ ├── content_loc: (9,0)-(10,0) = " 1 \\\n"
│ │ ├── closing_loc: ∅
- │ │ └── unescaped: "1 2\n"
+ │ │ └── unescaped: "1 "
+ │ ├── @ StringNode (location: (10,0)-(11,0))
+ │ │ ├── flags: ∅
+ │ │ ├── opening_loc: ∅
+ │ │ ├── content_loc: (10,0)-(11,0) = " 2\n"
+ │ │ ├── closing_loc: ∅
+ │ │ └── unescaped: "2\n"
│ └── @ StringNode (location: (11,0)-(12,0))
│ ├── flags: ∅
│ ├── opening_loc: ∅
diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb
index 2a352c5234..72ad780d8b 100644
--- a/test/prism/unescape_test.rb
+++ b/test/prism/unescape_test.rb
@@ -230,6 +230,8 @@ module Prism
else
assert_equal expected.bytes, actual.bytes, message
end
+ rescue Exception
+ binding.irb
end
end
end