diff options
author | Jeremy Evans <code@jeremyevans.net> | 2021-05-12 12:37:55 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2021-05-12 18:55:43 -0700 |
commit | 11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 (patch) | |
tree | f250c8418f155fb0ee7f578085e2354bdbb7036a | |
parent | 9484f9ebdf675f71811a5583c1af2415b26c932f (diff) |
Fix handling of control/meta escapes in literal regexps
Ruby uses a recursive algorithm for handling control/meta escapes
in strings (read_escape). However, the equivalent code for regexps
(tokadd_escape) in did not use a recursive algorithm. Due to this,
Handling of control/meta escapes in regexp did not have the same
behavior as in strings, leading to behavior such as the following
returning nil:
```ruby
/\c\xFF/ =~ "\c\xFF"
```
Switch the code for handling \c, \C and \M in literal regexps to
use the same code as for strings (read_escape), to keep behavior
consistent between the two.
Fixes [Bug #14367]
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/4495
-rw-r--r-- | parse.y | 50 | ||||
-rw-r--r-- | spec/ruby/language/regexp/interpolation_spec.rb | 2 | ||||
-rw-r--r-- | test/ruby/test_regexp.rb | 18 |
3 files changed, 36 insertions, 34 deletions
@@ -6902,10 +6902,8 @@ static int tokadd_escape(struct parser_params *p, rb_encoding **encp) { int c; - int flags = 0; size_t numlen; - first: switch (c = nextc(p)) { case '\n': return 0; /* just ignore */ @@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp) } return 0; - case 'M': - if (flags & ESCAPE_META) goto eof; - if ((c = nextc(p)) != '-') { - pushback(p, c); - goto eof; - } - tokcopy(p, 3); - flags |= ESCAPE_META; - goto escaped; - - case 'C': - if (flags & ESCAPE_CONTROL) goto eof; - if ((c = nextc(p)) != '-') { - pushback(p, c); - goto eof; - } - tokcopy(p, 3); - goto escaped; - - case 'c': - if (flags & ESCAPE_CONTROL) goto eof; - tokcopy(p, 2); - flags |= ESCAPE_CONTROL; - escaped: - if ((c = nextc(p)) == '\\') { - goto first; - } - else if (c == -1) goto eof; - tokadd(p, c); - return 0; - eof: case -1: yyerror0("Invalid escape character syntax"); @@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p, goto non_ascii; } if (func & STR_FUNC_REGEXP) { + switch (c) { + case 'c': + case 'C': + case 'M': { + pushback(p, c); + c = read_escape(p, 0, enc); + + int i; + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", c); + for(i = 0; i < 4; i++) { + tokadd(p, escbuf[i]); + } + continue; + } + } + if (c == term && !simple_re_meta(c)) { tokadd(p, c); continue; diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb index ed0b724763..6951fd38ca 100644 --- a/spec/ruby/language/regexp/interpolation_spec.rb +++ b/spec/ruby/language/regexp/interpolation_spec.rb @@ -36,7 +36,7 @@ describe "Regexps with interpolation" do it "gives precedence to escape sequences over substitution" do str = "J" - /\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})' + /\c#{str}/.to_s.should include('{str}') end it "throws RegexpError for malformed interpolation" do diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index a9e0cdff0e..5ba50b3517 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -496,6 +496,24 @@ class TestRegexp < Test::Unit::TestCase assert_raise(RegexpError) { Regexp.new("((?<v>))\\g<0>") } end + def test_match_control_meta_escape + assert_equal(0, /\c\xFF/ =~ "\c\xFF") + assert_equal(0, /\c\M-\xFF/ =~ "\c\M-\xFF") + assert_equal(0, /\C-\xFF/ =~ "\C-\xFF") + assert_equal(0, /\C-\M-\xFF/ =~ "\C-\M-\xFF") + assert_equal(0, /\M-\xFF/ =~ "\M-\xFF") + assert_equal(0, /\M-\C-\xFF/ =~ "\M-\C-\xFF") + assert_equal(0, /\M-\c\xFF/ =~ "\M-\c\xFF") + + assert_nil(/\c\xFE/ =~ "\c\xFF") + assert_nil(/\c\M-\xFE/ =~ "\c\M-\xFF") + assert_nil(/\C-\xFE/ =~ "\C-\xFF") + assert_nil(/\C-\M-\xFE/ =~ "\C-\M-\xFF") + assert_nil(/\M-\xFE/ =~ "\M-\xFF") + assert_nil(/\M-\C-\xFE/ =~ "\M-\C-\xFF") + assert_nil(/\M-\c\xFE/ =~ "\M-\c\xFF") + end + def test_unescape assert_raise(ArgumentError) { s = '\\'; /#{ s }/ } assert_equal(/\xFF/n, /#{ s="\\xFF" }/n) |