summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Dalessio <mike.dalessio@gmail.com>2023-08-19 14:03:35 -0400
committerKevin Newton <kddnewton@gmail.com>2023-08-21 12:22:53 -0400
commitac819f4db9bb79ba92547e1abd36336f36633583 (patch)
tree326bde22f0a618daf320f435e8574886ccbe32d7
parentf83c1d62bd42c7ba51e715610e9ca2410724d9e0 (diff)
[ruby/yarp] fix: support newline-terminated regular expressions
Previously, parsing a snippet like this: %r\nfoo\n would result in tracking the second newline twice, resulting in a failed runtime assertion. Fixing that issue reveals another bug, which is that the _first_ newline was not being tracked at all. So we introduce a call to yp_newline_list right when we construct the REGEXP_BEGIN token. https://github.com/ruby/yarp/commit/0d5d759091
-rw-r--r--test/snapshots/newline-terminated-things.txt6
-rw-r--r--test/yarp/fixtures/newline-terminated-things.txt2
-rw-r--r--test/yarp/parse_test.rb2
-rw-r--r--yarp/yarp.c11
4 files changed, 20 insertions, 1 deletions
diff --git a/test/snapshots/newline-terminated-things.txt b/test/snapshots/newline-terminated-things.txt
new file mode 100644
index 0000000000..e68ea1658e
--- /dev/null
+++ b/test/snapshots/newline-terminated-things.txt
@@ -0,0 +1,6 @@
+ProgramNode(0...7)(
+ [],
+ StatementsNode(0...7)(
+ [RegularExpressionNode(0...7)((0...3), (3...6), (6...7), "foo", 0)]
+ )
+)
diff --git a/test/yarp/fixtures/newline-terminated-things.txt b/test/yarp/fixtures/newline-terminated-things.txt
new file mode 100644
index 0000000000..27e7c62e8e
--- /dev/null
+++ b/test/yarp/fixtures/newline-terminated-things.txt
@@ -0,0 +1,2 @@
+%r
+foo
diff --git a/test/yarp/parse_test.rb b/test/yarp/parse_test.rb
index 7a910f7db2..33eb1475f5 100644
--- a/test/yarp/parse_test.rb
+++ b/test/yarp/parse_test.rb
@@ -120,6 +120,8 @@ class ParseTest < Test::Unit::TestCase
end
Dir["*.txt", base: base].each do |relative|
+ next if relative == "newline_terminated.txt"
+
# We test every snippet (separated by \n\n) in isolation
# to ensure the parser does not try to read bytes further than the end of each snippet
define_method "test_individual_snippets_#{relative}" do
diff --git a/yarp/yarp.c b/yarp/yarp.c
index a61c9e15b8..df6bde14b3 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -6215,6 +6215,9 @@ parser_lex(yp_parser_t *parser) {
if (parser->current.end < parser->end) {
lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
+ if (parser->current.end == '\n') {
+ yp_newline_list_append(&parser->newline_list, parser->current.end);
+ }
parser->current.end++;
}
@@ -6526,7 +6529,13 @@ parser_lex(yp_parser_t *parser) {
// If we've hit a newline, then we need to track that in the
// list of newlines.
if (*breakpoint == '\n') {
- yp_newline_list_append(&parser->newline_list, breakpoint);
+ // For the special case of a newline-terminated regular expression, we will pass
+ // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
+ // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
+ // tracking it only in the REGEXP_BEGIN case.
+ if (!(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)) {
+ yp_newline_list_append(&parser->newline_list, breakpoint);
+ }
if (lex_mode->as.regexp.terminator != '\n') {
// If the terminator is not a newline, then we can set