summaryrefslogtreecommitdiff
path: root/re.c
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-07-28 11:28:44 -0400
committerPeter Zhu <peter@peterzhu.ca>2023-07-31 09:17:18 -0400
commitd42b9ffb20658b6c0e3f75d7a3f3917d976abb1f (patch)
tree0269d5c909f789bc7b5e5fc910a599782e164382 /re.c
parenta542512b7c394847a488e9b94d9defebe26003ce (diff)
Reuse Regexp ptr when recompiling
When matching an incompatible encoding, the Regexp needs to recompile. If `usecnt == 0`, then we can reuse the `ptr` because nothing else is using it. This avoids allocating another `regex_t`. This speeds up matches that switch to incompatible encodings by 15%. Branch: ``` Regex#match? with different encoding 1.431M (± 1.3%) i/s - 7.264M in 5.076153s Regex#match? with same encoding 16.858M (± 1.1%) i/s - 85.347M in 5.063279s ``` Base: ``` Regex#match? with different encoding 1.248M (± 2.0%) i/s - 6.342M in 5.083151s Regex#match? with same encoding 16.377M (± 1.1%) i/s - 82.519M in 5.039504s ``` Script: ``` regex = /foo/ str1 = "日本語" str2 = "English".force_encoding("ASCII-8BIT") Benchmark.ips do |x| x.report("Regex#match? with different encoding") do |times| i = 0 while i < times regex.match?(str1) regex.match?(str2) i += 1 end end x.report("Regex#match? with same encoding") do |times| i = 0 while i < times regex.match?(str1) i += 1 end end end ```
Diffstat (limited to 're.c')
-rw-r--r--re.c35
1 files changed, 25 insertions, 10 deletions
diff --git a/re.c b/re.c
index abab264f7d..da7434f421 100644
--- a/re.c
+++ b/re.c
@@ -1606,9 +1606,30 @@ rb_reg_prepare_re(VALUE re, VALUE str)
const char *ptr;
long len;
RSTRING_GETMEM(unescaped, ptr, len);
- r = onig_new(&reg, (UChar *)ptr, (UChar *)(ptr + len),
- reg->options, enc,
- OnigDefaultSyntax, &einfo);
+
+ /* If there are no other users of this regex, then we can directly overwrite it. */
+ if (RREGEXP(re)->usecnt == 0) {
+ regex_t tmp_reg;
+ r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr + len),
+ reg->options, enc,
+ OnigDefaultSyntax, &einfo);
+
+ if (r) {
+ /* There was an error so perform cleanups. */
+ onig_free_body(&tmp_reg);
+ }
+ else {
+ onig_free_body(reg);
+ /* There are no errors so set reg to tmp_reg. */
+ *reg = tmp_reg;
+ }
+ }
+ else {
+ r = onig_new(&reg, (UChar *)ptr, (UChar *)(ptr + len),
+ reg->options, enc,
+ OnigDefaultSyntax, &einfo);
+ }
+
if (r) {
onig_error_code_to_str((UChar*)err, r, &einfo);
rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
@@ -1634,13 +1655,7 @@ rb_reg_onig_match(VALUE re, VALUE str,
if (!tmpreg) RREGEXP(re)->usecnt--;
if (tmpreg) {
- if (RREGEXP(re)->usecnt) {
- onig_free(reg);
- }
- else {
- onig_free(RREGEXP_PTR(re));
- RREGEXP_PTR(re) = reg;
- }
+ onig_free(reg);
}
if (result < 0) {