diff options
author | Peter Zhu <peter@peterzhu.ca> | 2023-07-26 15:57:03 -0400 |
---|---|---|
committer | Peter Zhu <peter@peterzhu.ca> | 2023-07-27 13:33:40 -0400 |
commit | 7193b404a1a56e50f8046d0382914907020c1559 (patch) | |
tree | f0d17c30d36039eab9b8c421e24d412535e05d5e /ext | |
parent | e5effa4bd063f454f9f304e6f9fbf9dd8b353a76 (diff) |
Add function rb_reg_onig_match
rb_reg_onig_match performs preparation, error handling, and cleanup for
matching a regex against a string. This reduces repetitive code and
removes the need for StringScanner to access internal data of regex.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/8123
Diffstat (limited to 'ext')
-rw-r--r-- | ext/strscan/extconf.rb | 1 | ||||
-rw-r--r-- | ext/strscan/strscan.c | 107 |
2 files changed, 69 insertions, 39 deletions
diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index b53b63e455..bd65606a4e 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -3,6 +3,7 @@ require 'mkmf' if RUBY_ENGINE == 'ruby' $INCFLAGS << " -I$(top_srcdir)" if $extmk have_func("onig_region_memsize", "ruby.h") + have_func("rb_reg_onig_match", "ruby.h") create_makefile 'strscan' else File.write('Makefile', dummy_makefile("").join) diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 9f24287e35..c8e8ef6be9 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -539,6 +539,68 @@ adjust_register_position(struct strscanner *p, long position) } } +/* rb_reg_onig_match is available in Ruby 3.3 and later. */ +#ifndef HAVE_RB_REG_ONIG_MATCH +static OnigPosition +rb_reg_onig_match(VALUE re, VALUE str, + OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), + void *args, struct re_registers *regs) +{ + regex_t *reg = rb_reg_prepare_re(re, str); + + bool tmpreg = reg != RREGEXP_PTR(re); + if (!tmpreg) RREGEXP(re)->usecnt++; + + OnigPosition result = match(reg, str, regs, args); + + if (!tmpreg) RREGEXP(re)->usecnt--; + if (tmpreg) { + if (RREGEXP(re)->usecnt) { + onig_free(reg); + } + else { + onig_free(RREGEXP_PTR(re)); + RREGEXP_PTR(re) = reg; + } + } + + if (result < 0) { + if (result != ONIG_MISMATCH) { + rb_raise(ScanError, "regexp buffer overflow"); + } + } + + return result; +} +#endif + +static OnigPosition +strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) +{ + struct strscanner *p = (struct strscanner *)args_ptr; + + return onig_match(reg, + match_target(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), + regs, + ONIG_OPTION_NONE); +} + +static OnigPosition +strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) +{ + struct strscanner *p = (struct strscanner *)args_ptr; + + return onig_search(reg, + match_target(p), + (UChar *)(CURPTR(p) + S_RESTLEN(p)), + (UChar *)CURPTR(p), + (UChar *)(CURPTR(p) + S_RESTLEN(p)), + regs, + ONIG_OPTION_NONE); +} + static VALUE strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly) { @@ -560,47 +622,14 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly } if (RB_TYPE_P(pattern, T_REGEXP)) { - regex_t *rb_reg_prepare_re(VALUE re, VALUE str); - regex_t *re; - long ret; - int tmpreg; - p->regex = pattern; - re = rb_reg_prepare_re(pattern, p->str); - tmpreg = re != RREGEXP_PTR(pattern); - if (!tmpreg) RREGEXP(pattern)->usecnt++; - - if (headonly) { - ret = onig_match(re, - match_target(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), - &(p->regs), - ONIG_OPTION_NONE); - } - else { - ret = onig_search(re, - match_target(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - &(p->regs), - ONIG_OPTION_NONE); - } - if (!tmpreg) RREGEXP(pattern)->usecnt--; - if (tmpreg) { - if (RREGEXP(pattern)->usecnt) { - onig_free(re); - } - else { - onig_free(RREGEXP_PTR(pattern)); - RREGEXP_PTR(pattern) = re; - } - } + OnigPosition ret = rb_reg_onig_match(pattern, + p->str, + headonly ? strscan_match : strscan_search, + (void *)p, + &(p->regs)); - if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); - if (ret < 0) { - /* not matched */ + if (ret == ONIG_MISMATCH) { return Qnil; } } |