[ruby/prism] Enable remaining heredoc unescape tests

https://github.com/ruby/prism/commit/c7ea4941c5
author: Kevin Newton <kddnewton@gmail.com> 2023-10-11 11:12:41 -0400
committer: Kevin Newton <kddnewton@gmail.com> 2023-10-13 15:31:30 -0400
commit: d6424453dbb8cd2794e22a7d3ba400732e709ec2 (patch)
tree: 0f74de41136def37a174d19fa72ee68244422346
parent: 24768d8a5786fe801b346b690a43bf256a380835 (diff)
3 files changed, 23 insertions, 14 deletions
diff --git a/prism/prism.c b/prism/prism.c
index c9cfe65d76..0dbe408659 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -8482,6 +8482,7 @@ parser_lex(pm_parser_t *parser) {
 
             const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
             pm_token_buffer_t token_buffer = { 0 };
+            bool was_escaped_newline = false;
 
             while (breakpoint != NULL) {
                 switch (*breakpoint) {
@@ -8509,6 +8510,7 @@ parser_lex(pm_parser_t *parser) {
                         // content. Then, the next time a token is lexed, it will match
                         // again and return the end of the heredoc.
                         if (
+                            !was_escaped_newline &&
                             (start + ident_length <= parser->end) &&
                             (memcmp(start, ident_start, ident_length) == 0)
                         ) {
@@ -8550,6 +8552,9 @@ parser_lex(pm_parser_t *parser) {
                             case '\r':
                                 parser->current.end++;
                                 if (peek(parser) != '\n') {
+                                    if (quote == PM_HEREDOC_QUOTE_SINGLE) {
+                                        pm_token_buffer_push(&token_buffer, '\\');
+                                    }
                                     pm_token_buffer_push(&token_buffer, '\r');
                                     break;
                                 }
@@ -8559,25 +8564,19 @@ parser_lex(pm_parser_t *parser) {
                                 // to leave the escaped newline in place so that
                                 // it can be removed later when we dedent the
                                 // heredoc.
-                                if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
+                                if (quote == PM_HEREDOC_QUOTE_SINGLE || lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
                                     pm_token_buffer_push(&token_buffer, '\\');
                                     pm_token_buffer_push(&token_buffer, '\n');
                                 }
 
-                                if (parser->heredoc_end) {
-                                    // ... if we are on the same line as a heredoc,
-                                    // flush the heredoc and continue parsing after
-                                    // heredoc_end.
-                                    parser_flush_heredoc_end(parser);
-                                    pm_token_buffer_copy(parser, &token_buffer);
-                                    LEX(PM_TOKEN_STRING_CONTENT);
-                                } else {
-                                    // ... else track the newline.
-                                    pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                token_buffer.cursor = parser->current.end + 1;
+                                breakpoint = parser->current.end;
+
+                                if (quote != PM_HEREDOC_QUOTE_SINGLE) {
+                                    was_escaped_newline = true;
                                 }
 
-                                parser->current.end++;
-                                break;
+                                continue;
                             default:
                                 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
                                     pm_token_buffer_push(&token_buffer, '\\');
@@ -8616,6 +8615,8 @@ parser_lex(pm_parser_t *parser) {
                     default:
                         assert(false && "unreachable");
                 }
+
+                was_escaped_newline = false;
             }
 
             // If we've hit the end of the string, then this is an unterminated
diff --git a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
index ad38f53cee..20496c3cfc 100644
--- a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
+++ b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
@@ -114,7 +114,7 @@
         │   ├── opening_loc: (51,0)-(51,9) = "<<-'HERE'"
         │   ├── content_loc: (52,0)-(53,0) = "a\\\nb\n"
         │   ├── closing_loc: (54,0)-(54,0) = "HERE\n"
-        │   └── unescaped: "ab\n"
+        │   └── unescaped: "a\\\nb\n"
         ├── @ XStringNode (location: (56,0)-(56,9))
         │   ├── opening_loc: (56,0)-(56,9) = "<<-`HERE`"
         │   ├── content_loc: (57,0)-(58,0) = "a\\\nb\n"
diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb
index d431fe9afd..de4d1bf1b9 100644
--- a/test/prism/unescape_test.rb
+++ b/test/prism/unescape_test.rb
@@ -102,6 +102,14 @@ module Prism
       [Context::String.new("%[", "]"),           escapes],
       [Context::String.new("`", "`"),            escapes],
       [Context::String.new("%x[", "]"),          escapes],
+      [Context::String.new("<<H\n", "\nH"),      escapes],
+      [Context::String.new("<<'H'\n", "\nH"),    escapes],
+      [Context::String.new("<<\"H\"\n", "\nH"),  escapes],
+      [Context::String.new("<<`H`\n", "\nH"),    escapes],
+      [Context::String.new("<<-H\n", "\nH"),      escapes],
+      [Context::String.new("<<-'H'\n", "\nH"),    escapes],
+      [Context::String.new("<<-\"H\"\n", "\nH"),  escapes],
+      [Context::String.new("<<-`H`\n", "\nH"),    escapes],
       # [Context::String.new("<<~H\n", "\nH"),     escapes],
       # [Context::String.new("<<~'H'\n", "\nH"),   escapes],
       # [Context::String.new("<<~\"H\"\n", "\nH"), escapes],
author	Kevin Newton <kddnewton@gmail.com>	2023-10-11 11:12:41 -0400
committer	Kevin Newton <kddnewton@gmail.com>	2023-10-13 15:31:30 -0400
commit	d6424453dbb8cd2794e22a7d3ba400732e709ec2 (patch)
tree	0f74de41136def37a174d19fa72ee68244422346
parent	24768d8a5786fe801b346b690a43bf256a380835 (diff)