diff options
author | Kevin Newton <kddnewton@gmail.com> | 2024-03-25 08:32:58 -0400 |
---|---|---|
committer | Kevin Newton <kddnewton@gmail.com> | 2024-03-25 11:52:09 -0400 |
commit | 14ab698967cdaedc0a922a2bdf30dfc69bdba7eb (patch) | |
tree | 8917fec07e109a3d077e3bea25c5d77d59cc172d | |
parent | a31ca3500d995b6706f94ff72166d699c5faeb27 (diff) |
[ruby/prism] Handle CLRF inside heredoc contents
https://github.com/ruby/prism/commit/1fbac72485
12 files changed, 32 insertions, 30 deletions
diff --git a/prism/prism.c b/prism/prism.c index 77cbcea2fe..a140dc734f 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -11267,11 +11267,11 @@ parser_lex(pm_parser_t *parser) { // Otherwise we'll be parsing string content. These are the places // where we need to split up the content of the heredoc. We'll use // strpbrk to find the first of these characters. - uint8_t breakpoints[] = "\n\\#"; + uint8_t breakpoints[] = "\r\n\\#"; pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote; if (quote == PM_HEREDOC_QUOTE_SINGLE) { - breakpoints[2] = '\0'; + breakpoints[3] = '\0'; } const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); @@ -11285,6 +11285,21 @@ parser_lex(pm_parser_t *parser) { parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); break; + case '\r': + parser->current.end = breakpoint + 1; + + if (peek_at(parser, breakpoint + 1) != '\n') { + breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); + break; + } + + // If we hit a \r\n sequence, then we want to replace it + // with a single \n character in the final string. + pm_token_buffer_escape(parser, &token_buffer); + breakpoint++; + token_buffer.cursor = breakpoint; + + /* fallthrough */ case '\n': { if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) { parser_flush_heredoc_end(parser); diff --git a/test/prism/ruby_parser_test.rb b/test/prism/ruby_parser_test.rb index 952e493af9..e06b7ae438 100644 --- a/test/prism/ruby_parser_test.rb +++ b/test/prism/ruby_parser_test.rb @@ -52,25 +52,10 @@ module Prism whitequark/string_concat.txt ] - # These files contain CRLF line endings, which ruby_parser translates into - # LF before it gets back to the node. This means the node actually has the - # wrong contents. - crlf = %w[ - dos_endings.txt - heredoc_with_comment.txt - seattlerb/heredoc__backslash_dos_format.txt - seattlerb/heredoc_with_carriage_return_escapes_windows.txt - seattlerb/heredoc_with_extra_carriage_horrible_mix.txt - seattlerb/heredoc_with_extra_carriage_returns_windows.txt - seattlerb/heredoc_with_extra_carriage_returns.txt - seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt - seattlerb/heredoc_with_only_carriage_returns_windows.txt - seattlerb/heredoc_with_only_carriage_returns.txt - ] - # https://github.com/seattlerb/ruby_parser/issues/344 - failures = crlf | %w[ + failures = %w[ alias.txt + dos_endings.txt heredocs_with_ignored_newlines.txt method_calls.txt methods.txt @@ -79,8 +64,10 @@ module Prism patterns.txt regex.txt seattlerb/and_multi.txt + seattlerb/heredoc__backslash_dos_format.txt seattlerb/heredoc_bad_hex_escape.txt seattlerb/heredoc_bad_oct_escape.txt + seattlerb/heredoc_with_extra_carriage_horrible_mix.txt spanning_heredoc_newlines.txt spanning_heredoc.txt tilde_heredocs.txt diff --git a/test/prism/snapshots/dos_endings.txt b/test/prism/snapshots/dos_endings.txt index ed75b8a52f..c5b962f218 100644 --- a/test/prism/snapshots/dos_endings.txt +++ b/test/prism/snapshots/dos_endings.txt @@ -48,7 +48,7 @@ │ ├── opening_loc: (7,0)-(7,4) = "<<-E" │ ├── content_loc: (8,0)-(11,0) = " 1 \\\r\n 2\r\n 3\r\n" │ ├── closing_loc: (11,0)-(12,0) = "E\r\n" - │ └── unescaped: " 1 2\r\n 3\r\n" + │ └── unescaped: " 1 2\n 3\n" ├── @ LocalVariableWriteNode (location: (13,0)-(15,0)) │ ├── name: :x │ ├── depth: 0 @@ -94,7 +94,7 @@ │ │ │ │ ├── opening_loc: ∅ │ │ │ │ ├── content_loc: (19,0)-(20,0) = " baz\r\n" │ │ │ │ ├── closing_loc: ∅ - │ │ │ │ └── unescaped: "baz\r\n" + │ │ │ │ └── unescaped: "baz\n" │ │ │ └── closing_loc: (20,0)-(21,0) = " EOF\r\n" │ │ ├── call_operator_loc: (17,14)-(17,15) = "." │ │ ├── name: :chop diff --git a/test/prism/snapshots/heredoc_with_comment.txt b/test/prism/snapshots/heredoc_with_comment.txt index 117fdc117a..f2225ca981 100644 --- a/test/prism/snapshots/heredoc_with_comment.txt +++ b/test/prism/snapshots/heredoc_with_comment.txt @@ -11,7 +11,7 @@ │ ├── opening_loc: (1,0)-(1,9) = "<<-TARGET" │ ├── content_loc: (2,0)-(3,0) = " content makes for an obvious error\r\n" │ ├── closing_loc: (3,0)-(3,6) = "TARGET" - │ └── unescaped: " content makes for an obvious error\r\n" + │ └── unescaped: " content makes for an obvious error\n" ├── call_operator_loc: (1,9)-(1,10) = "." ├── name: :chomp ├── message_loc: (1,10)-(1,15) = "chomp" diff --git a/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt b/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt index 6ba437e36a..353e4c6964 100644 --- a/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt +++ b/test/prism/snapshots/seattlerb/heredoc__backslash_dos_format.txt @@ -13,5 +13,5 @@ │ ├── opening_loc: (1,6)-(1,12) = "<<-XXX" │ ├── content_loc: (2,0)-(4,0) = "before\\\r\nafter\r\n" │ ├── closing_loc: (4,0)-(5,0) = "XXX\r\n" - │ └── unescaped: "beforeafter\r\n" + │ └── unescaped: "beforeafter\n" └── operator_loc: (1,4)-(1,5) = "=" diff --git a/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt index 21802c5707..2ef6763389 100644 --- a/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt +++ b/test/prism/snapshots/seattlerb/heredoc_with_carriage_return_escapes_windows.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,5) = "<<EOS" ├── content_loc: (2,0)-(4,0) = "foo\\rbar\r\nbaz\\r\r\n" ├── closing_loc: (4,0)-(5,0) = "EOS\r\n" - └── unescaped: "foo\rbar\r\nbaz\r\r\n" + └── unescaped: "foo\rbar\nbaz\r\n" diff --git a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt index dca03e71b0..fbee030100 100644 --- a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt +++ b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_horrible_mix.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,7) = "<<'eot'" ├── content_loc: (2,0)-(3,0) = "body\r\n" ├── closing_loc: (3,0)-(4,0) = "eot\n" - └── unescaped: "body\r\n" + └── unescaped: "body\n" diff --git a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt index 93fa1a1687..b59203bc4e 100644 --- a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt +++ b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,5) = "<<EOS" ├── content_loc: (2,0)-(4,0) = "foo\rbar\r\nbaz\n" ├── closing_loc: (4,0)-(5,0) = "EOS\n" - └── unescaped: "foo\rbar\r\nbaz\n" + └── unescaped: "foo\rbar\nbaz\n" diff --git a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt index 98b6e3fe11..36bc4c6560 100644 --- a/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt +++ b/test/prism/snapshots/seattlerb/heredoc_with_extra_carriage_returns_windows.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,5) = "<<EOS" ├── content_loc: (2,0)-(4,0) = "foo\rbar\r\r\nbaz\r\n" ├── closing_loc: (4,0)-(5,0) = "EOS\r\n" - └── unescaped: "foo\rbar\r\r\nbaz\r\n" + └── unescaped: "foo\rbar\r\nbaz\n" diff --git a/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt index dc8f8ae6d6..7eb04bdbd5 100644 --- a/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt +++ b/test/prism/snapshots/seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt @@ -22,5 +22,5 @@ │ ├── opening_loc: ∅ │ ├── content_loc: (2,10)-(3,0) = "\r\n" │ ├── closing_loc: ∅ - │ └── unescaped: "\r\n" + │ └── unescaped: "\n" └── closing_loc: (3,0)-(4,0) = "EOS\r\n" diff --git a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt index 6771f4afd7..6a535c6472 100644 --- a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt +++ b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,5) = "<<EOS" ├── content_loc: (2,0)-(5,0) = "\r\n\r\r\n\\r\n" ├── closing_loc: (5,0)-(6,0) = "EOS\n" - └── unescaped: "\r\n\r\r\n\r\n" + └── unescaped: "\n\r\n\r\n" diff --git a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt index b0f5d369dc..6539846ff1 100644 --- a/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt +++ b/test/prism/snapshots/seattlerb/heredoc_with_only_carriage_returns_windows.txt @@ -8,4 +8,4 @@ ├── opening_loc: (1,0)-(1,5) = "<<EOS" ├── content_loc: (2,0)-(5,0) = "\r\r\n\r\r\r\n\\r\r\n" ├── closing_loc: (5,0)-(6,0) = "EOS\r\n" - └── unescaped: "\r\r\n\r\r\r\n\r\r\n" + └── unescaped: "\r\n\r\r\n\r\n" |