diff options
author | Peter Zhu <peter@peterzhu.ca> | 2023-07-26 15:57:03 -0400 |
---|---|---|
committer | Peter Zhu <peter@peterzhu.ca> | 2023-07-27 13:33:40 -0400 |
commit | 7193b404a1a56e50f8046d0382914907020c1559 (patch) | |
tree | f0d17c30d36039eab9b8c421e24d412535e05d5e /re.c | |
parent | e5effa4bd063f454f9f304e6f9fbf9dd8b353a76 (diff) |
Add function rb_reg_onig_match
rb_reg_onig_match performs preparation, error handling, and cleanup for
matching a regex against a string. This reduces repetitive code and
removes the need for StringScanner to access internal data of regex.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/8123
Diffstat (limited to 're.c')
-rw-r--r-- | re.c | 242 |
1 files changed, 99 insertions, 143 deletions
@@ -1575,8 +1575,8 @@ rb_reg_prepare_enc(VALUE re, VALUE str, int warn) return enc; } -regex_t * -rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) +static regex_t * +rb_reg_prepare_re(VALUE re, VALUE str, onig_errmsg_buffer err) { regex_t *reg = RREGEXP_PTR(re); int r; @@ -1620,11 +1620,40 @@ rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err) return reg; } -regex_t * -rb_reg_prepare_re(VALUE re, VALUE str) +OnigPosition +rb_reg_onig_match(VALUE re, VALUE str, + OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), + void *args, struct re_registers *regs) { onig_errmsg_buffer err = ""; - return rb_reg_prepare_re0(re, str, err); + regex_t *reg = rb_reg_prepare_re(re, str, err); + + bool tmpreg = reg != RREGEXP_PTR(re); + if (!tmpreg) RREGEXP(re)->usecnt++; + + OnigPosition result = match(reg, str, regs, args); + + if (!tmpreg) RREGEXP(re)->usecnt--; + if (tmpreg) { + if (RREGEXP(re)->usecnt) { + onig_free(reg); + } + else { + onig_free(RREGEXP_PTR(re)); + RREGEXP_PTR(re) = reg; + } + } + + if (result < 0) { + onig_region_free(regs, 0); + + if (result != ONIG_MISMATCH) { + onig_error_code_to_str((UChar*)err, (int)result); + rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); + } + } + + return result; } long @@ -1658,65 +1687,52 @@ rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse) return pos; } +struct reg_onig_search_args { + long pos; + long range; +}; + +static OnigPosition +reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) +{ + struct reg_onig_search_args *args = (struct reg_onig_search_args *)args_ptr; + const char *ptr; + long len; + RSTRING_GETMEM(str, ptr, len); + + return onig_search( + reg, + (UChar *)ptr, + (UChar *)(ptr + len), + (UChar *)(ptr + args->pos), + (UChar *)(ptr + args->range), + regs, + ONIG_OPTION_NONE); +} + /* returns byte offset */ static long rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match) { - long result; - VALUE match; - struct re_registers regi, *regs = ®i; - char *start, *range; - long len; - regex_t *reg; - int tmpreg; - onig_errmsg_buffer err = ""; - - RSTRING_GETMEM(str, start, len); - range = start; + long len = RSTRING_LEN(str); if (pos > len || pos < 0) { rb_backref_set(Qnil); return -1; } - reg = rb_reg_prepare_re0(re, str, err); - tmpreg = reg != RREGEXP_PTR(re); - if (!tmpreg) RREGEXP(re)->usecnt++; + struct reg_onig_search_args args = { + .pos = pos, + .range = reverse ? 0 : len, + }; - MEMZERO(regs, struct re_registers, 1); - if (!reverse) { - range += len; - } - result = onig_search(reg, - (UChar*)start, - ((UChar*)(start + len)), - ((UChar*)(start + pos)), - ((UChar*)range), - regs, ONIG_OPTION_NONE); - if (!tmpreg) RREGEXP(re)->usecnt--; - if (tmpreg) { - if (RREGEXP(re)->usecnt) { - onig_free(reg); - } - else { - onig_free(RREGEXP_PTR(re)); - RREGEXP_PTR(re) = reg; - } - } - if (result < 0) { - if (regs == ®i) - onig_region_free(regs, 0); - if (result == ONIG_MISMATCH) { - rb_backref_set(Qnil); - return result; - } - else { - onig_error_code_to_str((UChar*)err, (int)result); - rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); - } - } + VALUE match = match_alloc(rb_cMatch); + struct re_registers *regs = RMATCH_REGS(match); - match = match_alloc(rb_cMatch); - memcpy(RMATCH_REGS(match), regs, sizeof(struct re_registers)); + OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, regs); + if (result == ONIG_MISMATCH) { + rb_backref_set(Qnil); + return ONIG_MISMATCH; + } if (set_backref_str) { RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str)); @@ -1748,69 +1764,35 @@ rb_reg_search(VALUE re, VALUE str, long pos, int reverse) return rb_reg_search0(re, str, pos, reverse, 1); } -bool -rb_reg_start_with_p(VALUE re, VALUE str) +static OnigPosition +reg_onig_match(regex_t *reg, VALUE str, struct re_registers *regs, void *_) { - long result; - VALUE match; - struct re_registers regi, *regs = ®i; - regex_t *reg; - int tmpreg; - onig_errmsg_buffer err = ""; - - reg = rb_reg_prepare_re0(re, str, err); - tmpreg = reg != RREGEXP_PTR(re); - if (!tmpreg) RREGEXP(re)->usecnt++; - - match = rb_backref_get(); - if (!NIL_P(match)) { - if (FL_TEST(match, MATCH_BUSY)) { - match = Qnil; - } - else { - regs = RMATCH_REGS(match); - } - } - if (NIL_P(match)) { - MEMZERO(regs, struct re_registers, 1); - } const char *ptr; long len; RSTRING_GETMEM(str, ptr, len); - result = onig_match(reg, - (UChar*)(ptr), - ((UChar*)(ptr + len)), - (UChar*)(ptr), - regs, ONIG_OPTION_NONE); - if (!tmpreg) RREGEXP(re)->usecnt--; - if (tmpreg) { - if (RREGEXP(re)->usecnt) { - onig_free(reg); - } - else { - onig_free(RREGEXP_PTR(re)); - RREGEXP_PTR(re) = reg; - } - } - if (result < 0) { - if (regs == ®i) - onig_region_free(regs, 0); - if (result == ONIG_MISMATCH) { - rb_backref_set(Qnil); - return false; - } - else { - onig_error_code_to_str((UChar*)err, (int)result); - rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); - } - } - if (NIL_P(match)) { - int err; + return onig_match( + reg, + (UChar *)ptr, + (UChar *)(ptr + len), + (UChar *)ptr, + regs, + ONIG_OPTION_NONE); +} + +bool +rb_reg_start_with_p(VALUE re, VALUE str) +{ + VALUE match = rb_backref_get(); + if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) { match = match_alloc(rb_cMatch); - err = rb_reg_region_copy(RMATCH_REGS(match), regs); - onig_region_free(regs, 0); - if (err) rb_memerror(); + } + + struct re_registers *regs = RMATCH_REGS(match); + + if (rb_reg_onig_match(re, str, reg_onig_match, NULL, regs) == ONIG_MISMATCH) { + rb_backref_set(Qnil); + return false; } RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str)); @@ -3784,12 +3766,6 @@ rb_reg_match_m_p(int argc, VALUE *argv, VALUE re) VALUE rb_reg_match_p(VALUE re, VALUE str, long pos) { - regex_t *reg; - onig_errmsg_buffer err = ""; - OnigPosition result; - const UChar *start, *end; - int tmpreg; - if (NIL_P(str)) return Qfalse; str = SYMBOL_P(str) ? rb_sym2str(str) : StringValue(str); if (pos) { @@ -3804,33 +3780,13 @@ rb_reg_match_p(VALUE re, VALUE str, long pos) pos = beg - RSTRING_PTR(str); } } - reg = rb_reg_prepare_re0(re, str, err); - tmpreg = reg != RREGEXP_PTR(re); - if (!tmpreg) RREGEXP(re)->usecnt++; - start = ((UChar*)RSTRING_PTR(str)); - end = start + RSTRING_LEN(str); - result = onig_search(reg, start, end, start + pos, end, - NULL, ONIG_OPTION_NONE); - if (!tmpreg) RREGEXP(re)->usecnt--; - if (tmpreg) { - if (RREGEXP(re)->usecnt) { - onig_free(reg); - } - else { - onig_free(RREGEXP_PTR(re)); - RREGEXP_PTR(re) = reg; - } - } - if (result < 0) { - if (result == ONIG_MISMATCH) { - return Qfalse; - } - else { - onig_error_code_to_str((UChar*)err, (int)result); - rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); - } - } - return Qtrue; + + struct reg_onig_search_args args = { + .pos = pos, + .range = RSTRING_LEN(str), + }; + + return rb_reg_onig_match(re, str, reg_onig_search, &args, NULL) == ONIG_MISMATCH ? Qfalse : Qtrue; } /* |