summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Patterson <tenderlove@ruby-lang.org>2024-12-11 15:54:56 -0800
committergit <svn-admin@ruby-lang.org>2024-12-12 00:42:44 +0000
commit9181e8bc87dd167673bc6cfc255c7003e4c6b05e (patch)
tree3ef2de341178b5e1af3a63558414bbd4839616d8
parent0a1fa994820c3373c538376fcee82565135c7e77 (diff)
[ruby/prism] Decode %r like % strings
%r regular expressions need to be decoded like strings. This commit fixes %r decoding so it works like strings. https://github.com/ruby/prism/commit/85bfd9c0cd
-rw-r--r--prism/prism.c32
-rw-r--r--test/prism/percent_delimiter_string_test.rb48
2 files changed, 62 insertions, 18 deletions
diff --git a/prism/prism.c b/prism/prism.c
index baa81e94e0..bfc420ec00 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -12110,9 +12110,28 @@ parser_lex(pm_parser_t *parser) {
pm_regexp_token_buffer_t token_buffer = { 0 };
while (breakpoint != NULL) {
+ uint8_t term = lex_mode->as.regexp.terminator;
+ bool is_terminator = (*breakpoint == term);
+
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
+ // For example: `%\nfoo\r\n`
+ // The string should be "foo", not "foo\r"
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+ if (term == '\n') {
+ is_terminator = true;
+ }
+
+ // If the terminator is a CR, but we see a CRLF, we need to
+ // treat the CRLF as a newline, meaning this is _not_ the
+ // terminator
+ if (term == '\r') {
+ is_terminator = false;
+ }
+ }
+
// If we hit the terminator, we need to determine what kind of
// token to return.
- if (*breakpoint == lex_mode->as.regexp.terminator) {
+ if (is_terminator) {
if (lex_mode->as.regexp.nesting > 0) {
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12342,20 +12361,21 @@ parser_lex(pm_parser_t *parser) {
continue;
}
- bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);
+ uint8_t term = lex_mode->as.string.terminator;
+ bool is_terminator = (*breakpoint == term);
// If the terminator is newline, we need to consider \r\n _also_ a newline
- // For example: `%\nfoo\r\n`
- // The string should be "foo", not "foo\r"
+ // For example: `%r\nfoo\r\n`
+ // The string should be /foo/, not /foo\r/
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
- if (lex_mode->as.string.terminator == '\n') {
+ if (term == '\n') {
is_terminator = true;
}
// If the terminator is a CR, but we see a CRLF, we need to
// treat the CRLF as a newline, meaning this is _not_ the
// terminator
- if (lex_mode->as.string.terminator == '\r') {
+ if (term == '\r') {
is_terminator = false;
}
}
diff --git a/test/prism/percent_delimiter_string_test.rb b/test/prism/percent_delimiter_string_test.rb
index 4cf5990dcf..6fd825ad06 100644
--- a/test/prism/percent_delimiter_string_test.rb
+++ b/test/prism/percent_delimiter_string_test.rb
@@ -3,56 +3,80 @@
require_relative "test_helper"
module Prism
- class PercentDelimiterStringTest < TestCase
+ module PercentDelimiterTests
def test_newline_terminator_with_lf_crlf
- str = "%\n123456\r\n"
+ str = l "\n123456\r\n"
assert_parse "123456", str
end
def test_newline_terminator_with_lf_crlf_with_extra_cr
- str = "%\n123456\r\r\n"
+ str = l "\n123456\r\r\n"
assert_parse "123456\r", str
end
def test_newline_terminator_with_crlf_pair
- str = "%\r\n123456\r\n"
+ str = l "\r\n123456\r\n"
assert_parse "123456", str
end
def test_newline_terminator_with_crlf_crlf_with_extra_cr
- str = "%\r\n123456\r\r\n"
+ str = l "\r\n123456\r\r\n"
assert_parse "123456\r", str
end
def test_newline_terminator_with_cr_cr
- str = "%\r123456\r;\n"
+ str = l "\r123456\r;\n"
assert_parse "123456", str
end
def test_newline_terminator_with_crlf_lf
- str = "%\r\n123456\n;\n"
+ str = l "\r\n123456\n;\n"
assert_parse "123456", str
end
def test_cr_crlf
- str = "%\r1\r\n \r"
+ str = l "\r1\r\n \r"
assert_parse "1\n ", str
end
def test_lf_crlf
- str = "%\n1\r\n \n"
+ str = l "\n1\r\n \n"
assert_parse "1", str
end
def test_lf_lf
- str = "%\n1\n \n"
+ str = l "\n1\n \n"
assert_parse "1", str
end
def assert_parse(expected, str)
+ assert_equal expected, find_node(str).unescaped
+ end
+ end
+
+ class PercentDelimiterStringTest < TestCase
+ include PercentDelimiterTests
+
+ def find_node(str)
+ tree = Prism.parse str
+ tree.value.breadth_first_search { |x| Prism::StringNode === x }
+ end
+
+ def l(str)
+ "%" + str
+ end
+ end
+
+ class PercentDelimiterRegexpTest < TestCase
+ include PercentDelimiterTests
+
+ def l(str)
+ "%r" + str
+ end
+
+ def find_node(str)
tree = Prism.parse str
- node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
- assert_equal expected, node.unescaped
+ tree.value.breadth_first_search { |x| Prism::RegularExpressionNode === x }
end
end
end