[ruby/prism] Decode %r like % strings

%r regular expressions need to be decoded like strings. This commit fixes %r decoding so it works like strings. https://github.com/ruby/prism/commit/85bfd9c0cd
author: Aaron Patterson <tenderlove@ruby-lang.org> 2024-12-11 15:54:56 -0800
committer: git <svn-admin@ruby-lang.org> 2024-12-12 00:42:44 +0000
commit: 9181e8bc87dd167673bc6cfc255c7003e4c6b05e (patch)
tree: 3ef2de341178b5e1af3a63558414bbd4839616d8
parent: 0a1fa994820c3373c538376fcee82565135c7e77 (diff)
2 files changed, 62 insertions, 18 deletions
diff --git a/prism/prism.c b/prism/prism.c
index baa81e94e0..bfc420ec00 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -12110,9 +12110,28 @@ parser_lex(pm_parser_t *parser) {
             pm_regexp_token_buffer_t token_buffer = { 0 };
 
             while (breakpoint != NULL) {
+                uint8_t term = lex_mode->as.regexp.terminator;
+                bool is_terminator = (*breakpoint == term);
+
+                // If the terminator is newline, we need to consider \r\n _also_ a newline
+                // For example: `%\nfoo\r\n`
+                // The string should be "foo", not "foo\r"
+                if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+                    if (term == '\n') {
+                        is_terminator = true;
+                    }
+
+                    // If the terminator is a CR, but we see a CRLF, we need to
+                    // treat the CRLF as a newline, meaning this is _not_ the
+                    // terminator
+                    if (term == '\r') {
+                        is_terminator = false;
+                    }
+                }
+
                 // If we hit the terminator, we need to determine what kind of
                 // token to return.
-                if (*breakpoint == lex_mode->as.regexp.terminator) {
+                if (is_terminator) {
                     if (lex_mode->as.regexp.nesting > 0) {
                         parser->current.end = breakpoint + 1;
                         breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12342,20 +12361,21 @@ parser_lex(pm_parser_t *parser) {
                     continue;
                 }
 
-                bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);
+                uint8_t term = lex_mode->as.string.terminator;
+                bool is_terminator = (*breakpoint == term);
 
                 // If the terminator is newline, we need to consider \r\n _also_ a newline
-                // For example: `%\nfoo\r\n`
-                // The string should be "foo", not "foo\r"
+                // For example: `%r\nfoo\r\n`
+                // The string should be /foo/, not /foo\r/
                 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
-                    if (lex_mode->as.string.terminator == '\n') {
+                    if (term == '\n') {
                         is_terminator = true;
                     }
 
                     // If the terminator is a CR, but we see a CRLF, we need to
                     // treat the CRLF as a newline, meaning this is _not_ the
                     // terminator
-                    if (lex_mode->as.string.terminator == '\r') {
+                    if (term == '\r') {
                         is_terminator = false;
                     }
                 }
diff --git a/test/prism/percent_delimiter_string_test.rb b/test/prism/percent_delimiter_string_test.rb
index 4cf5990dcf..6fd825ad06 100644
--- a/test/prism/percent_delimiter_string_test.rb
+++ b/test/prism/percent_delimiter_string_test.rb
@@ -3,56 +3,80 @@
 require_relative "test_helper"
 
 module Prism
-  class PercentDelimiterStringTest < TestCase
+  module PercentDelimiterTests
     def test_newline_terminator_with_lf_crlf
-      str = "%\n123456\r\n"
+      str = l "\n123456\r\n"
       assert_parse "123456", str
     end
 
     def test_newline_terminator_with_lf_crlf_with_extra_cr
-      str = "%\n123456\r\r\n"
+      str = l "\n123456\r\r\n"
       assert_parse "123456\r", str
     end
 
     def test_newline_terminator_with_crlf_pair
-      str = "%\r\n123456\r\n"
+      str = l "\r\n123456\r\n"
       assert_parse "123456", str
     end
 
     def test_newline_terminator_with_crlf_crlf_with_extra_cr
-      str = "%\r\n123456\r\r\n"
+      str = l "\r\n123456\r\r\n"
       assert_parse "123456\r", str
     end
 
     def test_newline_terminator_with_cr_cr
-      str = "%\r123456\r;\n"
+      str = l "\r123456\r;\n"
       assert_parse "123456", str
     end
 
     def test_newline_terminator_with_crlf_lf
-      str = "%\r\n123456\n;\n"
+      str = l "\r\n123456\n;\n"
       assert_parse "123456", str
     end
 
     def test_cr_crlf
-      str = "%\r1\r\n \r"
+      str = l "\r1\r\n \r"
       assert_parse "1\n ", str
     end
 
     def test_lf_crlf
-      str = "%\n1\r\n \n"
+      str = l "\n1\r\n \n"
       assert_parse "1", str
     end
 
     def test_lf_lf
-      str = "%\n1\n \n"
+      str = l "\n1\n \n"
       assert_parse "1", str
     end
 
     def assert_parse(expected, str)
+      assert_equal expected, find_node(str).unescaped
+    end
+  end
+
+  class PercentDelimiterStringTest < TestCase
+    include PercentDelimiterTests
+
+    def find_node(str)
+      tree = Prism.parse str
+      tree.value.breadth_first_search { |x| Prism::StringNode === x }
+    end
+
+    def l(str)
+      "%" + str
+    end
+  end
+
+  class PercentDelimiterRegexpTest < TestCase
+    include PercentDelimiterTests
+
+    def l(str)
+      "%r" + str
+    end
+
+    def find_node(str)
       tree = Prism.parse str
-      node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
-      assert_equal expected, node.unescaped
+      tree.value.breadth_first_search { |x| Prism::RegularExpressionNode === x }
     end
   end
 end
author	Aaron Patterson <tenderlove@ruby-lang.org>	2024-12-11 15:54:56 -0800
committer	git <svn-admin@ruby-lang.org>	2024-12-12 00:42:44 +0000
commit	9181e8bc87dd167673bc6cfc255c7003e4c6b05e (patch)
tree	3ef2de341178b5e1af3a63558414bbd4839616d8
parent	0a1fa994820c3373c538376fcee82565135c7e77 (diff)