Fix handling of control/meta escapes in literal regexps

Ruby uses a recursive algorithm for handling control/meta escapes in strings (read_escape). However, the equivalent code for regexps (tokadd_escape) in did not use a recursive algorithm. Due to this, Handling of control/meta escapes in regexp did not have the same behavior as in strings, leading to behavior such as the following returning nil: ```ruby /\c\xFF/ =~ "\c\xFF" ``` Switch the code for handling \c, \C and \M in literal regexps to use the same code as for strings (read_escape), to keep behavior consistent between the two. Fixes [Bug #14367]
author: Jeremy Evans <code@jeremyevans.net> 2021-05-12 12:37:55 -0700
committer: Jeremy Evans <code@jeremyevans.net> 2021-05-12 18:55:43 -0700
commit: 11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 (patch)
tree: f250c8418f155fb0ee7f578085e2354bdbb7036a
parent: 9484f9ebdf675f71811a5583c1af2415b26c932f (diff)
3 files changed, 36 insertions, 34 deletions
diff --git a/parse.y b/parse.y
index fc1fbd7fd9..66813e5ecb 100644
--- a/parse.y
+++ b/parse.y
@@ -6902,10 +6902,8 @@ static int
 tokadd_escape(struct parser_params *p, rb_encoding **encp)
 {
     int c;
-    int flags = 0;
     size_t numlen;
 
-  first:
     switch (c = nextc(p)) {
       case '\n':
 	return 0;		/* just ignore */
@@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp)
 	}
 	return 0;
 
-      case 'M':
-	if (flags & ESCAPE_META) goto eof;
-	if ((c = nextc(p)) != '-') {
-	    pushback(p, c);
-	    goto eof;
-	}
-	tokcopy(p, 3);
-	flags |= ESCAPE_META;
-	goto escaped;
-
-      case 'C':
-	if (flags & ESCAPE_CONTROL) goto eof;
-	if ((c = nextc(p)) != '-') {
-	    pushback(p, c);
-	    goto eof;
-	}
-	tokcopy(p, 3);
-	goto escaped;
-
-      case 'c':
-	if (flags & ESCAPE_CONTROL) goto eof;
-	tokcopy(p, 2);
-	flags |= ESCAPE_CONTROL;
-      escaped:
-	if ((c = nextc(p)) == '\\') {
-	    goto first;
-	}
-	else if (c == -1) goto eof;
-	tokadd(p, c);
-	return 0;
-
       eof:
       case -1:
         yyerror0("Invalid escape character syntax");
@@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p,
 		    goto non_ascii;
 		}
 		if (func & STR_FUNC_REGEXP) {
+                    switch (c) {
+                      case 'c':
+                      case 'C':
+                      case 'M': {
+                        pushback(p, c);
+                        c = read_escape(p, 0, enc);
+
+                        int i;
+                        char escbuf[5];
+                        snprintf(escbuf, sizeof(escbuf), "\\x%02X", c);
+                        for(i = 0; i < 4; i++) {
+                            tokadd(p, escbuf[i]);
+                        }
+                        continue;
+                      }
+                    }
+
 		    if (c == term && !simple_re_meta(c)) {
 			tokadd(p, c);
 			continue;
diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb
index ed0b724763..6951fd38ca 100644
--- a/spec/ruby/language/regexp/interpolation_spec.rb
+++ b/spec/ruby/language/regexp/interpolation_spec.rb
@@ -36,7 +36,7 @@ describe "Regexps with interpolation" do
 
   it "gives precedence to escape sequences over substitution" do
     str = "J"
-    /\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
+    /\c#{str}/.to_s.should include('{str}')
   end
 
   it "throws RegexpError for malformed interpolation" do
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index a9e0cdff0e..5ba50b3517 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -496,6 +496,24 @@ class TestRegexp < Test::Unit::TestCase
     assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") }
   end
 
+  def test_match_control_meta_escape
+    assert_equal(0, /\c\xFF/ =~ "\c\xFF")
+    assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF")
+    assert_equal(0, /\C-\xFF/ =~ "\C-\xFF")
+    assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF")
+    assert_equal(0, /\M-\xFF/ =~ "\M-\xFF")
+    assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF")
+    assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF")
+
+    assert_nil(/\c\xFE/ =~ "\c\xFF")
+    assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF")
+    assert_nil(/\C-\xFE/ =~ "\C-\xFF")
+    assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF")
+    assert_nil(/\M-\xFE/ =~ "\M-\xFF")
+    assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF")
+    assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF")
+  end
+
   def test_unescape
     assert_raise(ArgumentError) { s = '\\'; /#{ s }/ }
     assert_equal(/\xFF/n, /#{ s="\\xFF" }/n)
author	Jeremy Evans <code@jeremyevans.net>	2021-05-12 12:37:55 -0700
committer	Jeremy Evans <code@jeremyevans.net>	2021-05-12 18:55:43 -0700
commit	11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 (patch)
tree	f250c8418f155fb0ee7f578085e2354bdbb7036a
parent	9484f9ebdf675f71811a5583c1af2415b26c932f (diff)