summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2021-05-12 12:37:55 -0700
committerJeremy Evans <code@jeremyevans.net>2021-05-12 18:55:43 -0700
commit11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 (patch)
treef250c8418f155fb0ee7f578085e2354bdbb7036a
parent9484f9ebdf675f71811a5583c1af2415b26c932f (diff)
Fix handling of control/meta escapes in literal regexps
Ruby uses a recursive algorithm for handling control/meta escapes in strings (read_escape). However, the equivalent code for regexps (tokadd_escape) in did not use a recursive algorithm. Due to this, Handling of control/meta escapes in regexp did not have the same behavior as in strings, leading to behavior such as the following returning nil: ```ruby /\c\xFF/ =~ "\c\xFF" ``` Switch the code for handling \c, \C and \M in literal regexps to use the same code as for strings (read_escape), to keep behavior consistent between the two. Fixes [Bug #14367]
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/4495
-rw-r--r--parse.y50
-rw-r--r--spec/ruby/language/regexp/interpolation_spec.rb2
-rw-r--r--test/ruby/test_regexp.rb18
3 files changed, 36 insertions, 34 deletions
diff --git a/parse.y b/parse.y
index fc1fbd7fd9..66813e5ecb 100644
--- a/parse.y
+++ b/parse.y
@@ -6902,10 +6902,8 @@ static int
tokadd_escape(struct parser_params *p, rb_encoding **encp)
{
int c;
- int flags = 0;
size_t numlen;
- first:
switch (c = nextc(p)) {
case '\n':
return 0; /* just ignore */
@@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp)
}
return 0;
- case 'M':
- if (flags & ESCAPE_META) goto eof;
- if ((c = nextc(p)) != '-') {
- pushback(p, c);
- goto eof;
- }
- tokcopy(p, 3);
- flags |= ESCAPE_META;
- goto escaped;
-
- case 'C':
- if (flags & ESCAPE_CONTROL) goto eof;
- if ((c = nextc(p)) != '-') {
- pushback(p, c);
- goto eof;
- }
- tokcopy(p, 3);
- goto escaped;
-
- case 'c':
- if (flags & ESCAPE_CONTROL) goto eof;
- tokcopy(p, 2);
- flags |= ESCAPE_CONTROL;
- escaped:
- if ((c = nextc(p)) == '\\') {
- goto first;
- }
- else if (c == -1) goto eof;
- tokadd(p, c);
- return 0;
-
eof:
case -1:
yyerror0("Invalid escape character syntax");
@@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p,
goto non_ascii;
}
if (func & STR_FUNC_REGEXP) {
+ switch (c) {
+ case 'c':
+ case 'C':
+ case 'M': {
+ pushback(p, c);
+ c = read_escape(p, 0, enc);
+
+ int i;
+ char escbuf[5];
+ snprintf(escbuf, sizeof(escbuf), "\\x%02X", c);
+ for(i = 0; i < 4; i++) {
+ tokadd(p, escbuf[i]);
+ }
+ continue;
+ }
+ }
+
if (c == term && !simple_re_meta(c)) {
tokadd(p, c);
continue;
diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb
index ed0b724763..6951fd38ca 100644
--- a/spec/ruby/language/regexp/interpolation_spec.rb
+++ b/spec/ruby/language/regexp/interpolation_spec.rb
@@ -36,7 +36,7 @@ describe "Regexps with interpolation" do
it "gives precedence to escape sequences over substitution" do
str = "J"
- /\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
+ /\c#{str}/.to_s.should include('{str}')
end
it "throws RegexpError for malformed interpolation" do
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index a9e0cdff0e..5ba50b3517 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -496,6 +496,24 @@ class TestRegexp < Test::Unit::TestCase
assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") }
end
+ def test_match_control_meta_escape
+ assert_equal(0, /\c\xFF/ =~ "\c\xFF")
+ assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF")
+ assert_equal(0, /\C-\xFF/ =~ "\C-\xFF")
+ assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF")
+ assert_equal(0, /\M-\xFF/ =~ "\M-\xFF")
+ assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF")
+ assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF")
+
+ assert_nil(/\c\xFE/ =~ "\c\xFF")
+ assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF")
+ assert_nil(/\C-\xFE/ =~ "\C-\xFF")
+ assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF")
+ assert_nil(/\M-\xFE/ =~ "\M-\xFF")
+ assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF")
+ assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF")
+ end
+
def test_unescape
assert_raise(ArgumentError) { s = '\\'; /#{ s }/ }
assert_equal(/\xFF/n, /#{ s="\\xFF" }/n)