diff options
| -rw-r--r-- | prism/prism.c | 16 | ||||
| -rw-r--r-- | test/prism/errors/unterminated_heredoc_and_embexpr.txt | 11 | ||||
| -rw-r--r-- | test/prism/errors/unterminated_heredoc_and_embexpr_2.txt | 9 | ||||
| -rw-r--r-- | test/prism/errors_test.rb | 22 | ||||
| -rw-r--r-- | test/prism/lex_test.rb | 53 |
5 files changed, 107 insertions, 4 deletions
diff --git a/prism/prism.c b/prism/prism.c index 34e5d38b0a..ca0f2a55aa 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -9783,6 +9783,12 @@ parser_lex(pm_parser_t *parser) { unsigned int semantic_token_seen = parser->semantic_token_seen; parser->semantic_token_seen = true; + // We'll jump to this label when we are about to encounter an EOF. + // If we still have lex_modes on the stack, we pop them so that cleanup + // can happen. For example, we should still continue parsing after a heredoc + // identifier, even if the heredoc body was syntax invalid. + switch_lex_modes: + switch (parser->lex_modes.current->mode) { case PM_LEX_DEFAULT: case PM_LEX_EMBEXPR: @@ -9856,6 +9862,14 @@ parser_lex(pm_parser_t *parser) { // We'll check if we're at the end of the file. If we are, then we // need to return the EOF token. if (parser->current.end >= parser->end) { + // We may be missing closing tokens. We should pop modes one by one + // to do the appropriate cleanup like moving next_start for heredocs. + // Only when no mode is remaining will we actually emit the EOF token. + if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) { + lex_mode_pop(parser); + goto switch_lex_modes; + } + // If we hit EOF, but the EOF came immediately after a newline, // set the start of the token to the newline. This way any EOF // errors will be reported as happening on that line rather than @@ -15433,7 +15447,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { pm_token_t opening = parser->previous; pm_statements_node_t *statements = NULL; - if (!match1(parser, PM_TOKEN_EMBEXPR_END)) { + if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { pm_accepts_block_stack_push(parser, true); statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt new file mode 100644 index 0000000000..bed7fcd24e --- /dev/null +++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt @@ -0,0 +1,11 @@ +<<A+B + ^ unterminated heredoc; can't find string "A" anywhere before EOF + ^ unexpected '+', ignoring it + ^ unterminated heredoc; can't find string "A" anywhere before EOF +#{C + ^ unexpected heredoc ending; expected an argument + ^ unexpected heredoc ending, expecting end-of-input + ^ unexpected heredoc ending, ignoring it + ^ unexpected end-of-input, assuming it is closing the parent top level context +^ expected a `}` to close the embedded expression + diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt b/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt new file mode 100644 index 0000000000..a03ff1d212 --- /dev/null +++ b/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt @@ -0,0 +1,9 @@ +<<A+B + ^ unterminated heredoc; can't find string "A" anywhere before EOF +#{C + "#{"} + ^ unterminated string meets end of file + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected a `}` to close the embedded expression + ^ unterminated string; expected a closing delimiter for the interpolated string + ^ expected a `}` to close the embedded expression + diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index b30a0f304d..27610e89d3 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -87,6 +87,28 @@ module Prism assert_nil(statement.end_keyword) end + def test_unclosed_interpolation + statement = Prism.parse_statement("\"\#{") + assert_equal('"', statement.opening) + assert_nil(statement.closing) + + assert_equal(1, statement.parts.count) + assert_equal('#{', statement.parts[0].opening) + assert_equal("", statement.parts[0].closing) + assert_nil(statement.parts[0].statements) + end + + def test_unclosed_heredoc_and_interpolation + statement = Prism.parse_statement("<<D\n\#{") + assert_equal("<<D", statement.opening) + assert_nil(statement.closing) + + assert_equal(1, statement.parts.count) + assert_equal('#{', statement.parts[0].opening) + assert_equal("", statement.parts[0].closing) + assert_nil(statement.parts[0].statements) + end + private def assert_errors(filepath, version) diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index 9a9f203c28..8ea7ce7e9b 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -48,11 +48,58 @@ module Prism end if RUBY_VERSION >= "3.3" - def test_lex_compare - prism = Prism.lex_compat(File.read(__FILE__), version: "current").value - ripper = Ripper.lex(File.read(__FILE__)) + def test_lex_compat + source = "foo bar" + prism = Prism.lex_compat(source, version: "current").value + ripper = Ripper.lex(source) assert_equal(ripper, prism) end end + + def test_lex_interpolation_unterminated + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN EOF], + token_types('"#{') + ) + + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF], + token_types('"#{' + "\n") + ) + end + + def test_lex_interpolation_unterminated_with_content + # FIXME: Emits EOL twice. + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF], + token_types('"#{C') + ) + + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF], + token_types('"#{C' + "\n") + ) + end + + def test_lex_heredoc_unterminated + code = <<~'RUBY'.strip + <<A+B + #{C + RUBY + + assert_equal( + %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT HEREDOC_END PLUS CONSTANT NEWLINE EOF], + token_types(code) + ) + + assert_equal( + %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT NEWLINE HEREDOC_END PLUS CONSTANT NEWLINE EOF], + token_types(code + "\n") + ) + end + + def token_types(code) + Prism.lex(code).value.map { |token, _state| token.type } + end end end |
