[ruby/yarp] Extract out heredoc parsing into parse_strings

https://github.com/ruby/yarp/commit/c5a1094988
author: Kevin Newton <kddnewton@gmail.com> 2023-09-13 21:02:16 -0400
committer: git <svn-admin@ruby-lang.org> 2023-09-14 13:58:12 +0000
commit: 57745450dd85567cbdce703f12c9825fd81e52a2 (patch)
tree: ac7e572fdb2ab8513272f439918579b507106a78
parent: 72d008d88d32fe3eb3f7033d93c90a00cb7d7c61 (diff)
10 files changed, 225 insertions, 228 deletions
diff --git a/test/yarp/snapshots/dash_heredocs.txt b/test/yarp/snapshots/dash_heredocs.txt
index 0a3c43cac7..a4af04d892 100644
--- a/test/yarp/snapshots/dash_heredocs.txt
+++ b/test/yarp/snapshots/dash_heredocs.txt
@@ -142,7 +142,7 @@
         │   └── unescaped: "  a\n  b\n"
         ├── @ StringNode (location: (201...206))
         │   ├── opening_loc: (201...206) = "<<-''"
-        │   ├── content_loc: (206...207) = "\n"
+        │   ├── content_loc: (207...207) = ""
         │   ├── closing_loc: (207...208) = "\n"
         │   └── unescaped: ""
         ├── @ StringNode (location: (209...217))
diff --git a/test/yarp/snapshots/heredoc_with_escaped_newline_at_start.txt b/test/yarp/snapshots/heredoc_with_escaped_newline_at_start.txt
index 0672ef2587..38bf2549e5 100644
--- a/test/yarp/snapshots/heredoc_with_escaped_newline_at_start.txt
+++ b/test/yarp/snapshots/heredoc_with_escaped_newline_at_start.txt
@@ -7,7 +7,7 @@
         │   ├── receiver:
         │   │   @ StringNode (location: (0...9))
         │   │   ├── opening_loc: (0...9) = "<<-TARGET"
-        │   │   ├── content_loc: (9...27) = ".gsub /^\\s{/, ''\\\n"
+        │   │   ├── content_loc: (27...27) = ""
         │   │   ├── closing_loc: (27...34) = "TARGET\n"
         │   │   └── unescaped: ""
         │   ├── call_operator_loc: (9...10) = "."
@@ -35,7 +35,7 @@
             ├── receiver:
             │   @ StringNode (location: (37...46))
             │   ├── opening_loc: (37...46) = "<<-TARGET"
-            │   ├── content_loc: (46...65) = ".gsub /^\\s{/, ''\\\r\n"
+            │   ├── content_loc: (65...65) = ""
             │   ├── closing_loc: (65...73) = "TARGET\r\n"
             │   └── unescaped: ""
             ├── call_operator_loc: (46...47) = "."
diff --git a/test/yarp/snapshots/heredoc_with_trailing_newline.txt b/test/yarp/snapshots/heredoc_with_trailing_newline.txt
index 077e9703af..60ffcb2a03 100644
--- a/test/yarp/snapshots/heredoc_with_trailing_newline.txt
+++ b/test/yarp/snapshots/heredoc_with_trailing_newline.txt
@@ -5,6 +5,6 @@
     └── body: (length: 1)
         └── @ StringNode (location: (0...6))
             ├── opening_loc: (0...6) = "<<-END"
-            ├── content_loc: (6...7) = "\n"
+            ├── content_loc: (7...7) = ""
             ├── closing_loc: (7...10) = "END"
             └── unescaped: ""
diff --git a/test/yarp/snapshots/heredocs_with_ignored_newlines.txt b/test/yarp/snapshots/heredocs_with_ignored_newlines.txt
index c0e84a874b..7eac804242 100644
--- a/test/yarp/snapshots/heredocs_with_ignored_newlines.txt
+++ b/test/yarp/snapshots/heredocs_with_ignored_newlines.txt
@@ -5,7 +5,7 @@
     └── body: (length: 2)
         ├── @ StringNode (location: (0...7))
         │   ├── opening_loc: (0...7) = "<<-HERE"
-        │   ├── content_loc: (7...9) = "\\\n"
+        │   ├── content_loc: (9...9) = ""
         │   ├── closing_loc: (9...14) = "HERE\n"
         │   └── unescaped: ""
         └── @ StringNode (location: (15...23))
diff --git a/test/yarp/snapshots/seattlerb/heredoc_squiggly_empty.txt b/test/yarp/snapshots/seattlerb/heredoc_squiggly_empty.txt
index 78e9731fdc..6fee4e8819 100644
--- a/test/yarp/snapshots/seattlerb/heredoc_squiggly_empty.txt
+++ b/test/yarp/snapshots/seattlerb/heredoc_squiggly_empty.txt
@@ -5,6 +5,6 @@
     └── body: (length: 1)
         └── @ StringNode (location: (0...4))
             ├── opening_loc: (0...4) = "<<~A"
-            ├── content_loc: (4...5) = "\n"
+            ├── content_loc: (5...5) = ""
             ├── closing_loc: (5...7) = "A\n"
             └── unescaped: ""
diff --git a/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt b/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt
index 5534951350..6efc2012af 100644
--- a/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt
+++ b/test/yarp/snapshots/unparser/corpus/semantic/dstr.txt
@@ -5,22 +5,22 @@
     └── body: (length: 33)
         ├── @ StringNode (location: (0...5))
         │   ├── opening_loc: (0...5) = "<<DOC"
-        │   ├── content_loc: (5...6) = "\n"
+        │   ├── content_loc: (6...6) = ""
         │   ├── closing_loc: (6...10) = "DOC\n"
         │   └── unescaped: ""
         ├── @ StringNode (location: (11...18))
         │   ├── opening_loc: (11...18) = "<<'DOC'"
-        │   ├── content_loc: (18...19) = "\n"
+        │   ├── content_loc: (19...19) = ""
         │   ├── closing_loc: (19...23) = "DOC\n"
         │   └── unescaped: ""
         ├── @ StringNode (location: (24...30))
         │   ├── opening_loc: (24...30) = "<<~DOC"
-        │   ├── content_loc: (30...31) = "\n"
+        │   ├── content_loc: (31...31) = ""
         │   ├── closing_loc: (31...35) = "DOC\n"
         │   └── unescaped: ""
         ├── @ StringNode (location: (36...44))
         │   ├── opening_loc: (36...44) = "<<~'DOC'"
-        │   ├── content_loc: (44...45) = "\n"
+        │   ├── content_loc: (45...45) = ""
         │   ├── closing_loc: (45...49) = "DOC\n"
         │   └── unescaped: ""
         ├── @ StringNode (location: (50...55))
diff --git a/test/yarp/snapshots/unparser/corpus/semantic/while.txt b/test/yarp/snapshots/unparser/corpus/semantic/while.txt
index c23ec9d13f..532d9201e3 100644
--- a/test/yarp/snapshots/unparser/corpus/semantic/while.txt
+++ b/test/yarp/snapshots/unparser/corpus/semantic/while.txt
@@ -187,7 +187,7 @@
         │   │   │   └── arguments: (length: 1)
         │   │   │       └── @ StringNode (location: (108...114))
         │   │   │           ├── opening_loc: (108...114) = "<<-FOO"
-        │   │   │           ├── content_loc: (114...119) = ") do\n"
+        │   │   │           ├── content_loc: (119...119) = ""
         │   │   │           ├── closing_loc: (119...123) = "FOO\n"
         │   │   │           └── unescaped: ""
         │   │   ├── closing_loc: (114...115) = ")"
diff --git a/test/yarp/snapshots/whitequark/bug_heredoc_do.txt b/test/yarp/snapshots/whitequark/bug_heredoc_do.txt
index d5c2381c9d..0342bd4bac 100644
--- a/test/yarp/snapshots/whitequark/bug_heredoc_do.txt
+++ b/test/yarp/snapshots/whitequark/bug_heredoc_do.txt
@@ -13,7 +13,7 @@
             │   └── arguments: (length: 1)
             │       └── @ StringNode (location: (2...10))
             │           ├── opening_loc: (2...10) = "<<-TABLE"
-            │           ├── content_loc: (10...14) = " do\n"
+            │           ├── content_loc: (14...14) = ""
             │           ├── closing_loc: (14...20) = "TABLE\n"
             │           └── unescaped: ""
             ├── closing_loc: ∅
diff --git a/test/yarp/snapshots/whitequark/dedenting_heredoc.txt b/test/yarp/snapshots/whitequark/dedenting_heredoc.txt
index b5cb19c475..5ef33b83cf 100644
--- a/test/yarp/snapshots/whitequark/dedenting_heredoc.txt
+++ b/test/yarp/snapshots/whitequark/dedenting_heredoc.txt
@@ -194,7 +194,7 @@
         │   │   └── arguments: (length: 1)
         │   │       └── @ StringNode (location: (196...200))
         │   │           ├── opening_loc: (196...200) = "<<~E"
-        │   │           ├── content_loc: (200...201) = "\n"
+        │   │           ├── content_loc: (201...201) = ""
         │   │           ├── closing_loc: (201...205) = "  E\n"
         │   │           └── unescaped: ""
         │   ├── closing_loc: ∅
@@ -296,7 +296,7 @@
         │   │   └── arguments: (length: 1)
         │   │       └── @ StringNode (location: (297...301))
         │   │           ├── opening_loc: (297...301) = "<<~E"
-        │   │           ├── content_loc: (301...302) = "\n"
+        │   │           ├── content_loc: (302...302) = ""
         │   │           ├── closing_loc: (302...304) = "E\n"
         │   │           └── unescaped: ""
         │   ├── closing_loc: ∅
diff --git a/yarp/yarp.c b/yarp/yarp.c
index d7eec49d92..d34f24d7ad 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -10906,105 +10906,126 @@ parse_negative_numeric(yp_node_t *node) {
     }
 }
 
+// Returns a string content token at a particular location that is empty.
+static yp_token_t
+parse_strings_empty_content(const uint8_t *location) {
+    return (yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = location, .end = location };
+}
+
 // Parse a set of strings that could be concatenated together.
 static inline yp_node_t *
 parse_strings(yp_parser_t *parser) {
-    assert(parser->current.type == YP_TOKEN_STRING_BEGIN);
+    assert(parser->current.type == YP_TOKEN_STRING_BEGIN || parser->current.type == YP_TOKEN_HEREDOC_START);
     yp_node_t *result = NULL;
 
-    while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) {
-        assert(parser->lex_modes.current->mode == YP_LEX_STRING);
-        bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
-
+    while (true) {
         yp_node_t *node = NULL;
-        yp_token_t opening = parser->current;
-        parser_lex(parser);
 
-        if (accept(parser, YP_TOKEN_STRING_END)) {
-            // If we get here, then we have an end immediately after a
-            // start. In that case we'll create an empty content token
-            // and return an uninterpolated string.
-            yp_token_t content = (yp_token_t) {
-                .type = YP_TOKEN_STRING_CONTENT,
-                .start = parser->previous.start,
-                .end = parser->previous.start
-            };
-
-            node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
-        } else if (accept(parser, YP_TOKEN_LABEL_END)) {
-            // If we get here, then we have an end of a label
-            // immediately after a start. In that case we'll create an
-            // empty symbol node.
-            yp_token_t opening = not_provided(parser);
-            yp_token_t content = (yp_token_t) {
-                .type = YP_TOKEN_STRING_CONTENT,
-                .start = parser->previous.start,
-                .end = parser->previous.start
-            };
-
-            node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
-        } else if (!lex_interpolation) {
-            // If we don't accept interpolation then we expect the
-            // string to start with a single string content node.
-            expect(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_EXPECT_STRING_CONTENT);
-            yp_token_t content = parser->previous;
+        if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
+            // Here we have found a string literal. We'll parse it and add it to
+            // the list of strings.
+            assert(parser->lex_modes.current->mode == YP_LEX_STRING);
+            bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
 
-            // It is unfortunately possible to have multiple string
-            // content nodes in a row in the case that there's heredoc
-            // content in the middle of the string, like this cursed
-            // example:
-            //
-            // <<-END+'b
-            //  a
-            // END
-            //  c'+'d'
-            //
-            // In that case we need to switch to an interpolated string
-            // to be able to contain all of the parts.
-            if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
-                yp_node_list_t parts = YP_EMPTY_NODE_LIST;
+            yp_token_t opening = parser->current;
+            parser_lex(parser);
 
-                yp_token_t delimiters = not_provided(parser);
-                yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
-                yp_node_list_append(&parts, part);
+            if (accept(parser, YP_TOKEN_STRING_END)) {
+                // If we get here, then we have an end immediately after a
+                // start. In that case we'll create an empty content token and
+                // return an uninterpolated string.
+                yp_token_t content = parse_strings_empty_content(parser->previous.start);
+                node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
+            } else if (accept(parser, YP_TOKEN_LABEL_END)) {
+                // If we get here, then we have an end of a label immediately
+                // after a start. In that case we'll create an empty symbol
+                // node.
+                yp_token_t opening = not_provided(parser);
+                yp_token_t content = parse_strings_empty_content(parser->previous.start);
+                node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
+            } else if (!lex_interpolation) {
+                // If we don't accept interpolation then we expect the string to
+                // start with a single string content node.
+                expect(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_EXPECT_STRING_CONTENT);
+                yp_token_t content = parser->previous;
+
+                // It is unfortunately possible to have multiple string content
+                // nodes in a row in the case that there's heredoc content in
+                // the middle of the string, like this cursed example:
+                //
+                // <<-END+'b
+                //  a
+                // END
+                //  c'+'d'
+                //
+                // In that case we need to switch to an interpolated string to
+                // be able to contain all of the parts.
+                if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
+                    yp_node_list_t parts = YP_EMPTY_NODE_LIST;
 
-                while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
-                    part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
+                    yp_token_t delimiters = not_provided(parser);
+                    yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
                     yp_node_list_append(&parts, part);
-                }
 
-                expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM);
-                node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
-            } else if (accept(parser, YP_TOKEN_LABEL_END)) {
-                node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
-            } else {
-                expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM);
-                node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
-            }
-        } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
-            // In this case we've hit string content so we know the string at
-            // least has something in it. We'll need to check if the following
-            // token is the end (in which case we can return a plain string) or if
-            // it's not then it has interpolation.
-            yp_token_t content = parser->current;
-            parser_lex(parser);
+                    while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
+                        part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
+                        yp_node_list_append(&parts, part);
+                    }
 
-            if (accept(parser, YP_TOKEN_STRING_END)) {
-                node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
-            } else if (accept(parser, YP_TOKEN_LABEL_END)) {
-                node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+                    expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM);
+                    node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+                } else if (accept(parser, YP_TOKEN_LABEL_END)) {
+                    node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+                } else {
+                    expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM);
+                    node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
+                }
+            } else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
+                // In this case we've hit string content so we know the string
+                // at least has something in it. We'll need to check if the
+                // following token is the end (in which case we can return a
+                // plain string) or if it's not then it has interpolation.
+                yp_token_t content = parser->current;
+                parser_lex(parser);
+
+                if (accept(parser, YP_TOKEN_STRING_END)) {
+                    node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+                } else if (accept(parser, YP_TOKEN_LABEL_END)) {
+                    node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
+                } else {
+                    // If we get here, then we have interpolation so we'll need
+                    // to create a string or symbol node with interpolation.
+                    yp_node_list_t parts = YP_EMPTY_NODE_LIST;
+                    yp_token_t string_opening = not_provided(parser);
+                    yp_token_t string_closing = not_provided(parser);
+
+                    yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
+                    yp_node_list_append(&parts, part);
+
+                    while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
+                        if ((part = parse_string_part(parser)) != NULL) {
+                            yp_node_list_append(&parts, part);
+                        }
+                    }
+
+                    if (accept(parser, YP_TOKEN_LABEL_END)) {
+                        node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+                    } else {
+                        expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM);
+                        node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+                    }
+                }
             } else {
-                // If we get here, then we have interpolation so we'll need to create
-                // a string or symbol node with interpolation.
+                // If we get here, then the first part of the string is not
+                // plain string content, in which case we need to parse the
+                // string as an interpolated string.
                 yp_node_list_t parts = YP_EMPTY_NODE_LIST;
-                yp_token_t string_opening = not_provided(parser);
-                yp_token_t string_closing = not_provided(parser);
-                yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
-                yp_node_list_append(&parts, part);
+                yp_node_t *part = NULL;
 
                 while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
-                    yp_node_t *part = parse_string_part(parser);
-                    if (part != NULL) yp_node_list_append(&parts, part);
+                    if ((part = parse_string_part(parser)) != NULL) {
+                        yp_node_list_append(&parts, part);
+                    }
                 }
 
                 if (accept(parser, YP_TOKEN_LABEL_END)) {
@@ -11014,46 +11035,129 @@ parse_strings(yp_parser_t *parser) {
                     node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
                 }
             }
-        } else {
-            // If we get here, then the first part of the string is not plain string
-            // content, in which case we need to parse the string as an interpolated
-            // string.
-            yp_node_list_t parts = YP_EMPTY_NODE_LIST;
+        } else if (parser->current.type == YP_TOKEN_HEREDOC_START) {
+            // Here we have found a heredoc. We'll parse it and add it to the
+            // list of strings.
+            assert(parser->lex_modes.current->mode == YP_LEX_HEREDOC);
 
-            while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
-                yp_node_t *part = parse_string_part(parser);
-                if (part != NULL) yp_node_list_append(&parts, part);
-            }
+            yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
+            yp_heredoc_indent_t indent = parser->lex_modes.current->as.heredoc.indent;
 
-            if (accept(parser, YP_TOKEN_LABEL_END)) {
-                node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+            parser_lex(parser);
+            yp_token_t opening = parser->previous;
+            yp_node_t *part;
+
+            if (match_any_type_p(parser, 2, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
+                // If we get here, then we have an empty heredoc. We'll create
+                // an empty content token and return an empty string node.
+                lex_state_set(parser, YP_LEX_STATE_END);
+                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+                yp_token_t content = parse_strings_empty_content(parser->previous.start);
+
+                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
+                    node = (yp_node_t *) yp_xstring_node_create_and_unescape(parser, &opening, &content, &parser->previous);
+                } else {
+                    node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
+                }
+
+                node->location.end = opening.end;
+            } else if ((part = parse_string_part(parser)) == NULL) {
+                // If we get here, then we tried to find something in the
+                // heredoc but couldn't actually parse anything, so we'll just
+                // return a missing node.
+                node = (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end);
+            } else if (YP_NODE_TYPE_P(part, YP_STRING_NODE) && match_any_type_p(parser, 2, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
+                // If we get here, then the part that we parsed was plain string
+                // content and we're at the end of the heredoc, so we can return
+                // just a string node with the heredoc opening and closing as
+                // its opening and closing.
+                yp_string_node_t *cast = (yp_string_node_t *) part;
+
+                cast->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening);
+                cast->closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->current);
+                cast->base.location = cast->opening_loc;
+
+                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
+                    assert(sizeof(yp_string_node_t) == sizeof(yp_x_string_node_t));
+                    cast->base.type = YP_X_STRING_NODE;
+                }
+
+                lex_state_set(parser, YP_LEX_STATE_END);
+                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+
+                node = (yp_node_t *) cast;
+
+                if (indent == YP_HEREDOC_INDENT_TILDE) {
+                    int common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, -1);
+                    parse_heredoc_dedent_single_node(parser, &cast->unescaped, true, common_whitespace, quote);
+                }
             } else {
-                expect(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM);
-                node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+                // If we get here, then we have multiple parts in the heredoc,
+                // so we'll need to create an interpolated string node to hold
+                // them all.
+                yp_node_list_t parts = YP_EMPTY_NODE_LIST;
+                yp_node_list_append(&parts, part);
+
+                while (!match_any_type_p(parser, 2, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
+                    if ((part = parse_string_part(parser)) != NULL) {
+                        yp_node_list_append(&parts, part);
+                    }
+                }
+
+                // Now that we have all of the parts, create the correct type of
+                // interpolated node.
+                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
+                    yp_interpolated_x_string_node_t *cast = yp_interpolated_xstring_node_create(parser, &opening, &opening);
+                    cast->parts = parts;
+
+                    lex_state_set(parser, YP_LEX_STATE_END);
+                    expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+
+                    yp_interpolated_xstring_node_closing_set(cast, &parser->previous);
+                    cast->base.location = cast->opening_loc;
+                    node = (yp_node_t *) cast;
+                } else {
+                    yp_interpolated_string_node_t *cast = yp_interpolated_string_node_create(parser, &opening, &parts, &opening);
+
+                    lex_state_set(parser, YP_LEX_STATE_END);
+                    expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+
+                    yp_interpolated_string_node_closing_set(cast, &parser->previous);
+                    cast->base.location = cast->opening_loc;
+                    node = (yp_node_t *) cast;
+                }
+
+                // If this is a heredoc that is indented with a ~, then we need
+                // to dedent each line by the common leading whitespace.
+                if (indent == YP_HEREDOC_INDENT_TILDE) {
+                    parse_heredoc_dedent(parser, node, quote);
+                }
             }
+        } else {
+            break;
         }
 
         if (result == NULL) {
-            // If the node we just parsed is a symbol node, then we
-            // can't concatenate it with anything else, so we can now
-            // return that node.
+            // If the node we just parsed is a symbol node, then we can't
+            // concatenate it with anything else, so we can now return that
+            // node.
             if (YP_NODE_TYPE_P(node, YP_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_INTERPOLATED_SYMBOL_NODE)) {
                 return node;
             }
 
-            // If we don't already have a node, then it's fine and we
-            // can just set the result to be the node we just parsed.
+            // If we don't already have a node, then it's fine and we can just
+            // set the result to be the node we just parsed.
             result = node;
         } else {
-            // Otherwise we need to check the type of the node we just
-            // parsed. If it cannot be concatenated with the previous
-            // node, then we'll need to add a syntax error.
+            // Otherwise we need to check the type of the node we just parsed.
+            // If it cannot be concatenated with the previous node, then we'll
+            // need to add a syntax error.
             if (!YP_NODE_TYPE_P(node, YP_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_INTERPOLATED_STRING_NODE)) {
                 yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, YP_ERR_STRING_CONCATENATION);
             }
 
-            // Either way we will create a concat node to hold the
-            // strings together.
+            // Either way we will create a concat node to hold the strings
+            // together.
             result = (yp_node_t *) yp_string_concat_node_create(parser, result, node);
         }
     }
@@ -11430,115 +11534,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
 
             return node;
         }
-        case YP_TOKEN_HEREDOC_START: {
-            assert(parser->lex_modes.current->mode == YP_LEX_HEREDOC);
-            yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
-            yp_heredoc_indent_t indent = parser->lex_modes.current->as.heredoc.indent;
-
-            yp_node_t *node;
-
-            parser_lex(parser);
-
-            if (parser->current.type == YP_TOKEN_HEREDOC_END) {
-                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-                    node = (yp_node_t *) yp_xstring_node_create(
-                            parser,
-                            &parser->previous,
-                            &((yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->current.start }),
-                            &parser->current);
-
-                } else {
-                    node = (yp_node_t *)yp_string_node_create(
-                            parser,
-                            &parser->previous,
-                            &((yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->current.start }),
-                            &parser->current);
-                }
-                node->location.end = parser->previous.end;
-                lex_state_set(parser, YP_LEX_STATE_END);
-                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
-                return node;
-            }
-
-            yp_token_t opening_token = parser->previous;
-
-            yp_node_t *part = parse_string_part(parser);
-
-            if (part == NULL) {
-                // We couldn't parse anything, so return a missing node
-                return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end);
-            }
-
-            if (YP_NODE_TYPE_P(part, YP_STRING_NODE) && match_any_2_type_p(parser, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
-                // We only have a single string, so we can return it
-                yp_string_node_t *str_part = (yp_string_node_t *)part;
-                str_part->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening_token);
-                str_part->closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->current);
-                str_part->base.location = str_part->opening_loc;
-                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-                    part->type = YP_X_STRING_NODE;
-                }
-                lex_state_set(parser, YP_LEX_STATE_END);
-                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
-                node = part;
-                if (indent == YP_HEREDOC_INDENT_TILDE) {
-                    int common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, -1);
-                    parse_heredoc_dedent_single_node(parser, &str_part->unescaped, true, common_whitespace, quote);
-                }
-            }
-            else {
-                // We have multiple parts, continue parsing them
-                yp_node_list_t parts = YP_EMPTY_NODE_LIST;
-                yp_node_list_append(&parts, part);
-
-                while (!match_any_2_type_p(parser, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
-                    if ((part = parse_string_part(parser)) != NULL) {
-                        yp_node_list_append(&parts, part);
-                    }
-                }
-
-                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-                    node = (yp_node_t *) yp_interpolated_xstring_node_create(parser, &opening_token, &opening_token);
-                    ((yp_interpolated_x_string_node_t *)node)->parts = parts;
-                } else {
-                    node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening_token, NULL, &opening_token);
-                    ((yp_interpolated_string_node_t *)node)->parts = parts;
-                }
-
-
-                lex_state_set(parser, YP_LEX_STATE_END);
-                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
-
-                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-                    assert(YP_NODE_TYPE_P(node, YP_INTERPOLATED_X_STRING_NODE));
-                    yp_interpolated_xstring_node_closing_set(((yp_interpolated_x_string_node_t *) node), &parser->previous);
-                    node->location = ((yp_interpolated_x_string_node_t *) node)->opening_loc;
-                } else {
-                    assert(YP_NODE_TYPE_P(node, YP_INTERPOLATED_STRING_NODE));
-                    yp_interpolated_string_node_closing_set((yp_interpolated_string_node_t *) node, &parser->previous);
-                    node->location = ((yp_interpolated_string_node_t *) node)->opening_loc;
-                }
-
-                // If this is a heredoc that is indented with a ~, then we need to dedent
-                // each line by the common leading whitespace.
-                if (indent == YP_HEREDOC_INDENT_TILDE) {
-                    parse_heredoc_dedent(parser, node, quote);
-                }
-            }
-
-            // If there's a string immediately following this heredoc, then it's a
-            // concatenatation. In this case we'll parse the next string and create a
-            // node in the tree that concatenates the two strings.
-            if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
-                return (yp_node_t *) yp_string_concat_node_create(
-                    parser,
-                    node,
-                    parse_expression(parser, YP_BINDING_POWER_CALL, YP_ERR_CANNOT_PARSE_EXPRESSION)
-                );
-            } else {
-                return node;
-            }
-        }
+        case YP_TOKEN_HEREDOC_START:
+            return parse_strings(parser);
         case YP_TOKEN_INSTANCE_VARIABLE: {
             parser_lex(parser);
             yp_node_t *node = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous);
author	Kevin Newton <kddnewton@gmail.com>	2023-09-13 21:02:16 -0400
committer	git <svn-admin@ruby-lang.org>	2023-09-14 13:58:12 +0000
commit	57745450dd85567cbdce703f12c9825fd81e52a2 (patch)
tree	ac7e572fdb2ab8513272f439918579b507106a78
parent	72d008d88d32fe3eb3f7033d93c90a00cb7d7c61 (diff)