summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-03-25 08:32:58 -0400
committerKevin Newton <kddnewton@gmail.com>2024-03-25 11:52:09 -0400
commit14ab698967cdaedc0a922a2bdf30dfc69bdba7eb (patch)
tree8917fec07e109a3d077e3bea25c5d77d59cc172d
parenta31ca3500d995b6706f94ff72166d699c5faeb27 (diff)
[ruby/prism] Handle CLRF inside heredoc contents
https://github.com/ruby/prism/commit/1fbac72485
-rw-r--r--prism/prism.c19
-rw-r--r--test/prism/ruby_parser_test.rb21
-rw-r--r--test/prism/snapshots/dos_endings.txt4
-rw-r--r--test/prism/snapshots/heredoc_with_comment.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt2
-rw-r--r--test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt2
12 files changed, 32 insertions, 30 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 77cbcea2fe..a140dc734f 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -11267,11 +11267,11 @@ parser_lex(pm_parser_t *parser) {
// Otherwise we'll be parsing string content. These are the places
// where we need to split up the content of the heredoc. We'll use
// strpbrk to find the first of these characters.
- uint8_t breakpoints[] = "\n\\#";
+ uint8_t breakpoints[] = "\r\n\\#";
pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
- breakpoints[2] = '\0';
+ breakpoints[3] = '\0';
}
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
@@ -11285,6 +11285,21 @@ parser_lex(pm_parser_t *parser) {
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
+ case '\r':
+ parser->current.end = breakpoint + 1;
+
+ if (peek_at(parser, breakpoint + 1) != '\n') {
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+ break;
+ }
+
+ // If we hit a \r\n sequence, then we want to replace it
+ // with a single \n character in the final string.
+ pm_token_buffer_escape(parser, &token_buffer);
+ breakpoint++;
+ token_buffer.cursor = breakpoint;
+
+ /* fallthrough */
case '\n': {
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
parser_flush_heredoc_end(parser);
diff --git a/test/prism/ruby_parser_test.rb b/test/prism/ruby_parser_test.rb
index 952e493af9..e06b7ae438 100644
--- a/test/prism/ruby_parser_test.rb
+++ b/test/prism/ruby_parser_test.rb
@@ -52,25 +52,10 @@ module Prism
whitequark/string_concat.txt
]
- # These files contain CRLF line endings, which ruby_parser translates into
- # LF before it gets back to the node. This means the node actually has the
- # wrong contents.
- crlf = %w[
- dos_endings.txt
- heredoc_with_comment.txt
- seattlerb/heredoc__backslash_dos_format.txt
- seattlerb/heredoc_with_carriage_return_escapes_windows.txt
- seattlerb/heredoc_with_extra_carriage_horrible_mix.txt
- seattlerb/heredoc_with_extra_carriage_returns_windows.txt
- seattlerb/heredoc_with_extra_carriage_returns.txt
- seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt
- seattlerb/heredoc_with_only_carriage_returns_windows.txt
- seattlerb/heredoc_with_only_carriage_returns.txt
- ]
-
# https://github.com/seattlerb/ruby_parser/issues/344
- failures = crlf | %w[
+ failures = %w[
alias.txt
+ dos_endings.txt
heredocs_with_ignored_newlines.txt
method_calls.txt
methods.txt
@@ -79,8 +64,10 @@ module Prism
patterns.txt
regex.txt
seattlerb/and_multi.txt
+ seattlerb/heredoc__backslash_dos_format.txt
seattlerb/heredoc_bad_hex_escape.txt
seattlerb/heredoc_bad_oct_escape.txt
+ seattlerb/heredoc_with_extra_carriage_horrible_mix.txt
spanning_heredoc_newlines.txt
spanning_heredoc.txt
tilde_heredocs.txt
diff --git a/test/prism/snapshots/dos_endings.txt b/test/prism/snapshots/dos_endings.txt
index ed75b8a52f..c5b962f218 100644
--- a/test/prism/snapshots/dos_endings.txt
+++ b/test/prism/snapshots/dos_endings.txt
@@ -48,7 +48,7 @@
│ ├── opening_loc: (7,0)-(7,4) = "<<-E"
│ ├── content_loc: (8,0)-(11,0) = " 1 \\\r\n 2\r\n 3\r\n"
│ ├── closing_loc: (11,0)-(12,0) = "E\r\n"
- │ └── unescaped: " 1 2\r\n 3\r\n"
+ │ └── unescaped: " 1 2\n 3\n"
├── @ LocalVariableWriteNode (location: (13,0)-(15,0))
│ ├── name: :x
│ ├── depth: 0
@@ -94,7 +94,7 @@
│ │ │ │ ├── opening_loc: ∅
│ │ │ │ ├── content_loc: (19,0)-(20,0) = " baz\r\n"
│ │ │ │ ├── closing_loc: ∅
- │ │ │ │ └── unescaped: "baz\r\n"
+ │ │ │ │ └── unescaped: "baz\n"
│ │ │ └── closing_loc: (20,0)-(21,0) = " EOF\r\n"
│ │ ├── call_operator_loc: (17,14)-(17,15) = "."
│ │ ├── name: :chop
diff --git a/test/prism/snapshots/heredoc_with_comment.txt b/test/prism/snapshots/heredoc_with_comment.txt
index 117fdc117a..f2225ca981 100644
--- a/test/prism/snapshots/heredoc_with_comment.txt
+++ b/test/prism/snapshots/heredoc_with_comment.txt
@@ -11,7 +11,7 @@
│ ├── opening_loc: (1,0)-(1,9) = "<<-TARGET"
│ ├── content_loc: (2,0)-(3,0) = " content makes for an obvious error\r\n"
│ ├── closing_loc: (3,0)-(3,6) = "TARGET"
- │ └── unescaped: " content makes for an obvious error\r\n"
+ │ └── unescaped: " content makes for an obvious error\n"
├── call_operator_loc: (1,9)-(1,10) = "."
├── name: :chomp
├── message_loc: (1,10)-(1,15) = "chomp"
diff --git a/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt b/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt
index 6ba437e36a..353e4c6964 100644
--- a/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt
+++ b/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt
@@ -13,5 +13,5 @@
│ ├── opening_loc: (1,6)-(1,12) = "<<-XXX"
│ ├── content_loc: (2,0)-(4,0) = "before\\\r\nafter\r\n"
│ ├── closing_loc: (4,0)-(5,0) = "XXX\r\n"
- │ └── unescaped: "beforeafter\r\n"
+ │ └── unescaped: "beforeafter\n"
└── operator_loc: (1,4)-(1,5) = "="
diff --git a/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt
index 21802c5707..2ef6763389 100644
--- a/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt
+++ b/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,5) = "<<EOS"
├── content_loc: (2,0)-(4,0) = "foo\\rbar\r\nbaz\\r\r\n"
├── closing_loc: (4,0)-(5,0) = "EOS\r\n"
- └── unescaped: "foo\rbar\r\nbaz\r\r\n"
+ └── unescaped: "foo\rbar\nbaz\r\n"
diff --git a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt
index dca03e71b0..fbee030100 100644
--- a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt
+++ b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,7) = "<<'eot'"
├── content_loc: (2,0)-(3,0) = "body\r\n"
├── closing_loc: (3,0)-(4,0) = "eot\n"
- └── unescaped: "body\r\n"
+ └── unescaped: "body\n"
diff --git a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt
index 93fa1a1687..b59203bc4e 100644
--- a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt
+++ b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,5) = "<<EOS"
├── content_loc: (2,0)-(4,0) = "foo\rbar\r\nbaz\n"
├── closing_loc: (4,0)-(5,0) = "EOS\n"
- └── unescaped: "foo\rbar\r\nbaz\n"
+ └── unescaped: "foo\rbar\nbaz\n"
diff --git a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt
index 98b6e3fe11..36bc4c6560 100644
--- a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt
+++ b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,5) = "<<EOS"
├── content_loc: (2,0)-(4,0) = "foo\rbar\r\r\nbaz\r\n"
├── closing_loc: (4,0)-(5,0) = "EOS\r\n"
- └── unescaped: "foo\rbar\r\r\nbaz\r\n"
+ └── unescaped: "foo\rbar\r\nbaz\n"
diff --git a/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt
index dc8f8ae6d6..7eb04bdbd5 100644
--- a/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt
+++ b/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt
@@ -22,5 +22,5 @@
│ ├── opening_loc: ∅
│ ├── content_loc: (2,10)-(3,0) = "\r\n"
│ ├── closing_loc: ∅
- │ └── unescaped: "\r\n"
+ │ └── unescaped: "\n"
└── closing_loc: (3,0)-(4,0) = "EOS\r\n"
diff --git a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt
index 6771f4afd7..6a535c6472 100644
--- a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt
+++ b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,5) = "<<EOS"
├── content_loc: (2,0)-(5,0) = "\r\n\r\r\n\\r\n"
├── closing_loc: (5,0)-(6,0) = "EOS\n"
- └── unescaped: "\r\n\r\r\n\r\n"
+ └── unescaped: "\n\r\n\r\n"
diff --git a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt
index b0f5d369dc..6539846ff1 100644
--- a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt
+++ b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt
@@ -8,4 +8,4 @@
├── opening_loc: (1,0)-(1,5) = "<<EOS"
├── content_loc: (2,0)-(5,0) = "\r\r\n\r\r\r\n\\r\r\n"
├── closing_loc: (5,0)-(6,0) = "EOS\r\n"
- └── unescaped: "\r\r\n\r\r\r\n\r\r\n"
+ └── unescaped: "\r\n\r\r\n\r\n"