From 11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 12 May 2021 12:37:55 -0700 Subject: Fix handling of control/meta escapes in literal regexps Ruby uses a recursive algorithm for handling control/meta escapes in strings (read_escape). However, the equivalent code for regexps (tokadd_escape) in did not use a recursive algorithm. Due to this, Handling of control/meta escapes in regexp did not have the same behavior as in strings, leading to behavior such as the following returning nil: ```ruby /\c\xFF/ =~ "\c\xFF" ``` Switch the code for handling \c, \C and \M in literal regexps to use the same code as for strings (read_escape), to keep behavior consistent between the two. Fixes [Bug #14367] --- parse.y | 50 +++++++++++++++++--------------------------------- 1 file changed, 17 insertions(+), 33 deletions(-) (limited to 'parse.y') diff --git a/parse.y b/parse.y index fc1fbd7fd9..66813e5ecb 100644 --- a/parse.y +++ b/parse.y @@ -6902,10 +6902,8 @@ static int tokadd_escape(struct parser_params *p, rb_encoding **encp) { int c; - int flags = 0; size_t numlen; - first: switch (c = nextc(p)) { case '\n': return 0; /* just ignore */ @@ -6928,37 +6926,6 @@ tokadd_escape(struct parser_params *p, rb_encoding **encp) } return 0; - case 'M': - if (flags & ESCAPE_META) goto eof; - if ((c = nextc(p)) != '-') { - pushback(p, c); - goto eof; - } - tokcopy(p, 3); - flags |= ESCAPE_META; - goto escaped; - - case 'C': - if (flags & ESCAPE_CONTROL) goto eof; - if ((c = nextc(p)) != '-') { - pushback(p, c); - goto eof; - } - tokcopy(p, 3); - goto escaped; - - case 'c': - if (flags & ESCAPE_CONTROL) goto eof; - tokcopy(p, 2); - flags |= ESCAPE_CONTROL; - escaped: - if ((c = nextc(p)) == '\\') { - goto first; - } - else if (c == -1) goto eof; - tokadd(p, c); - return 0; - eof: case -1: yyerror0("Invalid escape character syntax"); @@ -7151,6 +7118,23 @@ tokadd_string(struct parser_params *p, goto non_ascii; } if (func & STR_FUNC_REGEXP) { + switch (c) { + case 'c': + case 'C': + case 'M': { + pushback(p, c); + c = read_escape(p, 0, enc); + + int i; + char escbuf[5]; + snprintf(escbuf, sizeof(escbuf), "\\x%02X", c); + for(i = 0; i < 4; i++) { + tokadd(p, escbuf[i]); + } + continue; + } + } + if (c == term && !simple_re_meta(c)) { tokadd(p, c); continue; -- cgit v1.2.3