diff options
author | Kevin Newton <kddnewton@gmail.com> | 2024-01-09 10:01:20 -0500 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2024-01-09 15:15:52 +0000 |
commit | 88d7838445ec84b1cc630ce3bd97bb71cd0aefd4 (patch) | |
tree | 250dce4cad8ecfde3bc8a18b6f1128796be67c87 | |
parent | 02d8bad6e12b8614f007c8c30eb50aff4bddcfb4 (diff) |
[ruby/prism] Fix assertion on spanning heredocs
https://github.com/ruby/prism/commit/e190308845
-rw-r--r-- | prism/prism.c | 70 | ||||
-rw-r--r-- | prism/util/pm_newline_list.c | 12 | ||||
-rw-r--r-- | prism/util/pm_newline_list.h | 11 | ||||
-rw-r--r-- | test/prism/fixtures/spanning_heredoc_newlines.txt | 23 | ||||
-rw-r--r-- | test/prism/parse_test.rb | 2 | ||||
-rw-r--r-- | test/prism/snapshots/spanning_heredoc_newlines.txt | 155 |
6 files changed, 223 insertions, 50 deletions
diff --git a/prism/prism.c b/prism/prism.c index 398288e641..7ff50630ce 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -8094,6 +8094,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, } /** + * Lex past the delimiter of a percent literal. Handle newlines and heredocs + * appropriately. + */ +static uint8_t +pm_lex_percent_delimiter(pm_parser_t *parser) { + size_t eol_length = match_eol(parser); + + if (eol_length) { + if (parser->heredoc_end) { + // If we have already lexed a heredoc, then the newline has already + // been added to the list. In this case we want to just flush the + // heredoc end. + parser_flush_heredoc_end(parser); + } else { + // Otherwise, we'll add the newline to the list of newlines. + pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1); + } + + const uint8_t delimiter = *parser->current.end; + parser->current.end += eol_length; + + return delimiter; + } + + return *parser->current.end++; +} + +/** * This is a convenience macro that will set the current token type, call the * lex callback, and then return from the parser_lex function. */ @@ -9049,15 +9077,8 @@ parser_lex(pm_parser_t *parser) { pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT); } - lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); - - size_t eol_length = match_eol(parser); - if (eol_length) { - parser->current.end += eol_length; - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); - } else { - parser->current.end++; - } + const uint8_t delimiter = pm_lex_percent_delimiter(parser); + lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter)); if (parser->current.end < parser->end) { LEX(PM_TOKEN_STRING_BEGIN); @@ -9077,7 +9098,7 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_list(parser, false, *parser->current.end++); + lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser)); } else { lex_mode_push_list_eof(parser); } @@ -9088,7 +9109,7 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_list(parser, true, *parser->current.end++); + lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser)); } else { lex_mode_push_list_eof(parser); } @@ -9099,9 +9120,8 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); - pm_newline_list_check_append(&parser->newline_list, parser->current.end); - parser->current.end++; + const uint8_t delimiter = pm_lex_percent_delimiter(parser); + lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter)); } else { lex_mode_push_regexp(parser, '\0', '\0'); } @@ -9112,9 +9132,8 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); - pm_newline_list_check_append(&parser->newline_list, parser->current.end); - parser->current.end++; + const uint8_t delimiter = pm_lex_percent_delimiter(parser); + lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter)); } else { lex_mode_push_string_eof(parser); } @@ -9125,9 +9144,8 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); - pm_newline_list_check_append(&parser->newline_list, parser->current.end); - parser->current.end++; + const uint8_t delimiter = pm_lex_percent_delimiter(parser); + lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter)); } else { lex_mode_push_string_eof(parser); } @@ -9138,9 +9156,9 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + const uint8_t delimiter = pm_lex_percent_delimiter(parser); + lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter)); lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM); - parser->current.end++; } else { lex_mode_push_string_eof(parser); } @@ -9151,7 +9169,7 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_list(parser, false, *parser->current.end++); + lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser)); } else { lex_mode_push_list_eof(parser); } @@ -9162,7 +9180,7 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_list(parser, true, *parser->current.end++); + lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser)); } else { lex_mode_push_list_eof(parser); } @@ -9173,8 +9191,8 @@ parser_lex(pm_parser_t *parser) { parser->current.end++; if (parser->current.end < parser->end) { - lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); - parser->current.end++; + const uint8_t delimiter = pm_lex_percent_delimiter(parser); + lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter)); } else { lex_mode_push_string_eof(parser); } diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c index f27bb75b63..32a4a050fe 100644 --- a/prism/util/pm_newline_list.c +++ b/prism/util/pm_newline_list.c @@ -46,18 +46,6 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { } /** - * Conditionally append a new offset to the newline list, if the value passed in - * is a newline. - */ -bool -pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) { - if (*cursor != '\n') { - return true; - } - return pm_newline_list_append(list, cursor); -} - -/** * Returns the line and column of the given offset. If the offset is not in the * list, the line and column of the closest offset less than the given offset * are returned. diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h index a31051f4e0..181283644f 100644 --- a/prism/util/pm_newline_list.h +++ b/prism/util/pm_newline_list.h @@ -73,17 +73,6 @@ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor); /** - * Conditionally append a new offset to the newline list, if the value passed in - * is a newline. - * - * @param list The list to append to. - * @param cursor A pointer to the offset to append. - * @return True if the reallocation of the offsets succeeds (if one was - * necessary), otherwise false. - */ -bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor); - -/** * Returns the line and column of the given offset. If the offset is not in the * list, the line and column of the closest offset less than the given offset * are returned. diff --git a/test/prism/fixtures/spanning_heredoc_newlines.txt b/test/prism/fixtures/spanning_heredoc_newlines.txt new file mode 100644 index 0000000000..32c9943aeb --- /dev/null +++ b/test/prism/fixtures/spanning_heredoc_newlines.txt @@ -0,0 +1,23 @@ +<<A+% +A + + +<<A+%r +A + + +<<A+%q +A + + +<<A+%Q +A + + +<<A+%s +A + + +<<A+%x +A + diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb index e2de55463c..6c5d125c3d 100644 --- a/test/prism/parse_test.rb +++ b/test/prism/parse_test.rb @@ -211,7 +211,7 @@ module Prism end Dir["*.txt", base: base].each do |relative| - next if relative == "newline_terminated.txt" + next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt" # We test every snippet (separated by \n\n) in isolation # to ensure the parser does not try to read bytes further than the end of each snippet diff --git a/test/prism/snapshots/spanning_heredoc_newlines.txt b/test/prism/snapshots/spanning_heredoc_newlines.txt new file mode 100644 index 0000000000..0753089082 --- /dev/null +++ b/test/prism/snapshots/spanning_heredoc_newlines.txt @@ -0,0 +1,155 @@ +@ ProgramNode (location: (1,0)-(24,0)) +├── locals: [] +└── statements: + @ StatementsNode (location: (1,0)-(24,0)) + └── body: (length: 6) + ├── @ CallNode (location: (1,0)-(4,0)) + │ ├── flags: ∅ + │ ├── receiver: + │ │ @ StringNode (location: (1,0)-(1,3)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (1,0)-(1,3) = "<<A" + │ │ ├── content_loc: (2,0)-(2,0) = "" + │ │ ├── closing_loc: (2,0)-(3,0) = "A\n" + │ │ └── unescaped: "" + │ ├── call_operator_loc: ∅ + │ ├── name: :+ + │ ├── message_loc: (1,3)-(1,4) = "+" + │ ├── opening_loc: ∅ + │ ├── arguments: + │ │ @ ArgumentsNode (location: (1,4)-(4,0)) + │ │ ├── flags: ∅ + │ │ └── arguments: (length: 1) + │ │ └── @ StringNode (location: (1,4)-(4,0)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (1,4)-(2,0) = "%\n" + │ │ ├── content_loc: (3,0)-(3,0) = "" + │ │ ├── closing_loc: (3,0)-(4,0) = "\n" + │ │ └── unescaped: "" + │ ├── closing_loc: ∅ + │ └── block: ∅ + ├── @ CallNode (location: (5,0)-(8,0)) + │ ├── flags: ∅ + │ ├── receiver: + │ │ @ StringNode (location: (5,0)-(5,3)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (5,0)-(5,3) = "<<A" + │ │ ├── content_loc: (6,0)-(6,0) = "" + │ │ ├── closing_loc: (6,0)-(7,0) = "A\n" + │ │ └── unescaped: "" + │ ├── call_operator_loc: ∅ + │ ├── name: :+ + │ ├── message_loc: (5,3)-(5,4) = "+" + │ ├── opening_loc: ∅ + │ ├── arguments: + │ │ @ ArgumentsNode (location: (5,4)-(8,0)) + │ │ ├── flags: ∅ + │ │ └── arguments: (length: 1) + │ │ └── @ RegularExpressionNode (location: (5,4)-(8,0)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (5,4)-(6,0) = "%r\n" + │ │ ├── content_loc: (6,0)-(6,0) = "" + │ │ ├── closing_loc: (7,0)-(8,0) = "\n" + │ │ └── unescaped: "" + │ ├── closing_loc: ∅ + │ └── block: ∅ + ├── @ CallNode (location: (9,0)-(12,0)) + │ ├── flags: ∅ + │ ├── receiver: + │ │ @ StringNode (location: (9,0)-(9,3)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (9,0)-(9,3) = "<<A" + │ │ ├── content_loc: (10,0)-(10,0) = "" + │ │ ├── closing_loc: (10,0)-(11,0) = "A\n" + │ │ └── unescaped: "" + │ ├── call_operator_loc: ∅ + │ ├── name: :+ + │ ├── message_loc: (9,3)-(9,4) = "+" + │ ├── opening_loc: ∅ + │ ├── arguments: + │ │ @ ArgumentsNode (location: (9,4)-(12,0)) + │ │ ├── flags: ∅ + │ │ └── arguments: (length: 1) + │ │ └── @ StringNode (location: (9,4)-(12,0)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (9,4)-(10,0) = "%q\n" + │ │ ├── content_loc: (11,0)-(11,0) = "" + │ │ ├── closing_loc: (11,0)-(12,0) = "\n" + │ │ └── unescaped: "" + │ ├── closing_loc: ∅ + │ └── block: ∅ + ├── @ CallNode (location: (13,0)-(16,0)) + │ ├── flags: ∅ + │ ├── receiver: + │ │ @ StringNode (location: (13,0)-(13,3)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (13,0)-(13,3) = "<<A" + │ │ ├── content_loc: (14,0)-(14,0) = "" + │ │ ├── closing_loc: (14,0)-(15,0) = "A\n" + │ │ └── unescaped: "" + │ ├── call_operator_loc: ∅ + │ ├── name: :+ + │ ├── message_loc: (13,3)-(13,4) = "+" + │ ├── opening_loc: ∅ + │ ├── arguments: + │ │ @ ArgumentsNode (location: (13,4)-(16,0)) + │ │ ├── flags: ∅ + │ │ └── arguments: (length: 1) + │ │ └── @ StringNode (location: (13,4)-(16,0)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (13,4)-(14,0) = "%Q\n" + │ │ ├── content_loc: (15,0)-(15,0) = "" + │ │ ├── closing_loc: (15,0)-(16,0) = "\n" + │ │ └── unescaped: "" + │ ├── closing_loc: ∅ + │ └── block: ∅ + ├── @ CallNode (location: (17,0)-(20,0)) + │ ├── flags: ∅ + │ ├── receiver: + │ │ @ StringNode (location: (17,0)-(17,3)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (17,0)-(17,3) = "<<A" + │ │ ├── content_loc: (18,0)-(18,0) = "" + │ │ ├── closing_loc: (18,0)-(19,0) = "A\n" + │ │ └── unescaped: "" + │ ├── call_operator_loc: ∅ + │ ├── name: :+ + │ ├── message_loc: (17,3)-(17,4) = "+" + │ ├── opening_loc: ∅ + │ ├── arguments: + │ │ @ ArgumentsNode (location: (17,4)-(20,0)) + │ │ ├── flags: ∅ + │ │ └── arguments: (length: 1) + │ │ └── @ SymbolNode (location: (17,4)-(20,0)) + │ │ ├── flags: ∅ + │ │ ├── opening_loc: (17,4)-(18,0) = "%s\n" + │ │ ├── value_loc: (18,0)-(18,0) = "" + │ │ ├── closing_loc: (19,0)-(20,0) = "\n" + │ │ └── unescaped: "" + │ ├── closing_loc: ∅ + │ └── block: ∅ + └── @ CallNode (location: (21,0)-(24,0)) + ├── flags: ∅ + ├── receiver: + │ @ StringNode (location: (21,0)-(21,3)) + │ ├── flags: ∅ + │ ├── opening_loc: (21,0)-(21,3) = "<<A" + │ ├── content_loc: (22,0)-(22,0) = "" + │ ├── closing_loc: (22,0)-(23,0) = "A\n" + │ └── unescaped: "" + ├── call_operator_loc: ∅ + ├── name: :+ + ├── message_loc: (21,3)-(21,4) = "+" + ├── opening_loc: ∅ + ├── arguments: + │ @ ArgumentsNode (location: (21,4)-(24,0)) + │ ├── flags: ∅ + │ └── arguments: (length: 1) + │ └── @ XStringNode (location: (21,4)-(24,0)) + │ ├── flags: ∅ + │ ├── opening_loc: (21,4)-(22,0) = "%x\n" + │ ├── content_loc: (22,0)-(22,0) = "" + │ ├── closing_loc: (23,0)-(24,0) = "\n" + │ └── unescaped: "" + ├── closing_loc: ∅ + └── block: ∅ |