diff options
author | Peter Zhu <peter@peterzhu.ca> | 2023-07-28 11:28:44 -0400 |
---|---|---|
committer | Peter Zhu <peter@peterzhu.ca> | 2023-07-31 09:17:18 -0400 |
commit | d42b9ffb20658b6c0e3f75d7a3f3917d976abb1f (patch) | |
tree | 0269d5c909f789bc7b5e5fc910a599782e164382 /re.c | |
parent | a542512b7c394847a488e9b94d9defebe26003ce (diff) |
Reuse Regexp ptr when recompiling
When matching an incompatible encoding, the Regexp needs to recompile.
If `usecnt == 0`, then we can reuse the `ptr` because nothing else is
using it. This avoids allocating another `regex_t`.
This speeds up matches that switch to incompatible encodings by 15%.
Branch:
```
Regex#match? with different encoding
1.431M (± 1.3%) i/s - 7.264M in 5.076153s
Regex#match? with same encoding
16.858M (± 1.1%) i/s - 85.347M in 5.063279s
```
Base:
```
Regex#match? with different encoding
1.248M (± 2.0%) i/s - 6.342M in 5.083151s
Regex#match? with same encoding
16.377M (± 1.1%) i/s - 82.519M in 5.039504s
```
Script:
```
regex = /foo/
str1 = "日本語"
str2 = "English".force_encoding("ASCII-8BIT")
Benchmark.ips do |x|
x.report("Regex#match? with different encoding") do |times|
i = 0
while i < times
regex.match?(str1)
regex.match?(str2)
i += 1
end
end
x.report("Regex#match? with same encoding") do |times|
i = 0
while i < times
regex.match?(str1)
i += 1
end
end
end
```
Diffstat (limited to 're.c')
-rw-r--r-- | re.c | 35 |
1 files changed, 25 insertions, 10 deletions
@@ -1606,9 +1606,30 @@ rb_reg_prepare_re(VALUE re, VALUE str) const char *ptr; long len; RSTRING_GETMEM(unescaped, ptr, len); - r = onig_new(®, (UChar *)ptr, (UChar *)(ptr + len), - reg->options, enc, - OnigDefaultSyntax, &einfo); + + /* If there are no other users of this regex, then we can directly overwrite it. */ + if (RREGEXP(re)->usecnt == 0) { + regex_t tmp_reg; + r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr + len), + reg->options, enc, + OnigDefaultSyntax, &einfo); + + if (r) { + /* There was an error so perform cleanups. */ + onig_free_body(&tmp_reg); + } + else { + onig_free_body(reg); + /* There are no errors so set reg to tmp_reg. */ + *reg = tmp_reg; + } + } + else { + r = onig_new(®, (UChar *)ptr, (UChar *)(ptr + len), + reg->options, enc, + OnigDefaultSyntax, &einfo); + } + if (r) { onig_error_code_to_str((UChar*)err, r, &einfo); rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re); @@ -1634,13 +1655,7 @@ rb_reg_onig_match(VALUE re, VALUE str, if (!tmpreg) RREGEXP(re)->usecnt--; if (tmpreg) { - if (RREGEXP(re)->usecnt) { - onig_free(reg); - } - else { - onig_free(RREGEXP_PTR(re)); - RREGEXP_PTR(re) = reg; - } + onig_free(reg); } if (result < 0) { |