diff options
author | Jeremy Evans <code@jeremyevans.net> | 2021-08-11 13:50:59 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2021-10-01 19:50:19 -0900 |
commit | abc0304cb28cb9dcc3476993bc487884c139fd11 (patch) | |
tree | a1d1c3551f806222bc07d5d53356cc53368f7fd8 /re.c | |
parent | d08721465850a6e6954b43bbfebe2ed5a7256dec (diff) |
Avoid race condition in Regexp#match
In certain conditions, Regexp#match could return a MatchData with
missing captures. This seems to require at the least, multiple
threads calling a method that calls the same block/proc/lambda
which calls Regexp#match.
The race condition happens because the MatchData is passed from
indirectly via the backref, and other threads can modify the
backref.
Fix the issue by:
1. Not reusing the existing MatchData from the backref, and always
allocating a new MatchData.
2. Passing the MatchData directly to the caller using a VALUE*,
instead of indirectly through the backref.
It's likely that variants of this issue exist for other Regexp
methods. Anywhere that MatchData is passed implicitly through
the backref is probably vulnerable to this issue.
Fixes [Bug #17507]
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/4734
Diffstat (limited to 're.c')
-rw-r--r-- | re.c | 46 |
1 files changed, 19 insertions, 27 deletions
@@ -1615,8 +1615,8 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse) } /* returns byte offset */ -long -rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) +static long +rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match) { long result; VALUE match; @@ -1638,18 +1638,7 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) tmpreg = reg != RREGEXP_PTR(re); if (!tmpreg) RREGEXP(re)->usecnt++; - match = rb_backref_get(); - if (!NIL_P(match)) { - if (FL_TEST(match, MATCH_BUSY)) { - match = Qnil; - } - else { - regs = RMATCH_REGS(match); - } - } - if (NIL_P(match)) { - MEMZERO(regs, struct re_registers, 1); - } + MEMZERO(regs, struct re_registers, 1); if (!reverse) { range += len; } @@ -1682,13 +1671,10 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) } } - if (NIL_P(match)) { - int err; - match = match_alloc(rb_cMatch); - err = rb_reg_region_copy(RMATCH_REGS(match), regs); - onig_region_free(regs, 0); - if (err) rb_memerror(); - } + match = match_alloc(rb_cMatch); + int copy_err = rb_reg_region_copy(RMATCH_REGS(match), regs); + onig_region_free(regs, 0); + if (copy_err) rb_memerror(); if (set_backref_str) { RMATCH(match)->str = rb_str_new4(str); @@ -1696,11 +1682,18 @@ rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) RMATCH(match)->regexp = re; rb_backref_set(match); + if (set_match) *set_match = match; return result; } long +rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str) +{ + return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL); +} + +long rb_reg_search(VALUE re, VALUE str, long pos, int reverse) { return rb_reg_search0(re, str, pos, reverse, 1); @@ -3193,7 +3186,7 @@ reg_operand(VALUE s, int check) } static long -reg_match_pos(VALUE re, VALUE *strp, long pos) +reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match) { VALUE str = *strp; @@ -3212,7 +3205,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos) } pos = rb_str_offset(str, pos); } - return rb_reg_search(re, str, pos, 0); + return rb_reg_search_set_match(re, str, pos, 0, 1, set_match); } /* @@ -3266,7 +3259,7 @@ reg_match_pos(VALUE re, VALUE *strp, long pos) VALUE rb_reg_match(VALUE re, VALUE str) { - long pos = reg_match_pos(re, &str, 0); + long pos = reg_match_pos(re, &str, 0, NULL); if (pos < 0) return Qnil; pos = rb_str_sublen(str, pos); return LONG2FIX(pos); @@ -3377,7 +3370,7 @@ rb_reg_match2(VALUE re) static VALUE rb_reg_match_m(int argc, VALUE *argv, VALUE re) { - VALUE result, str, initpos; + VALUE result = Qnil, str, initpos; long pos; if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) { @@ -3387,12 +3380,11 @@ rb_reg_match_m(int argc, VALUE *argv, VALUE re) pos = 0; } - pos = reg_match_pos(re, &str, pos); + pos = reg_match_pos(re, &str, pos, &result); if (pos < 0) { rb_backref_set(Qnil); return Qnil; } - result = rb_backref_get(); rb_match_busy(result); if (!NIL_P(result) && rb_block_given_p()) { return rb_yield(result); |