diff options
author | Daniel Colson <danieljamescolson@gmail.com> | 2022-11-22 21:16:11 -0500 |
---|---|---|
committer | John Hawthorn <john@hawthorn.email> | 2022-12-06 12:37:23 -0800 |
commit | e69b91fae4602b69c5ef45fcf82932adde8b31d8 (patch) | |
tree | 7b3466a4199eadf6bb00773145895697b1455748 /enum.c | |
parent | c43951e60eed0b01f464cd25441b81751d2d5087 (diff) |
Introduce BOP_CMP for optimized comparison
Prior to this commit the `OPTIMIZED_CMP` macro relied on a method lookup
to determine whether `<=>` was overridden. The result of the lookup was
cached, but only for the duration of the specific method that
initialized the cmp_opt_data cache structure.
With this method lookup, `[x,y].max` is slower than doing `x > y ?
x : y` even though there's an optimized instruction for "new array max".
(John noticed somebody a proposed micro-optimization based on this fact
in https://github.com/mastodon/mastodon/pull/19903.)
```rb
a, b = 1, 2
Benchmark.ips do |bm|
bm.report('conditional') { a > b ? a : b }
bm.report('method') { [a, b].max }
bm.compare!
end
```
Before:
```
Comparison:
conditional: 22603733.2 i/s
method: 19820412.7 i/s - 1.14x (± 0.00) slower
```
This commit replaces the method lookup with a new CMP basic op, which
gives the examples above equivalent performance.
After:
```
Comparison:
method: 24022466.5 i/s
conditional: 23851094.2 i/s - same-ish: difference falls within
error
```
Relevant benchmarks show an improvement to Array#max and Array#min when
not using the optimized newarray_max instruction as well. They are
noticeably faster for small arrays with the relevant types, and the same
or maybe a touch faster on larger arrays.
```
$ make benchmark COMPARE_RUBY=<master@5958c305> ITEM=array_min
$ make benchmark COMPARE_RUBY=<master@5958c305> ITEM=array_max
```
The benchmarks added in this commit also look generally improved.
Co-authored-by: John Hawthorn <jhawthorn@github.com>
Diffstat (limited to 'enum.c')
-rw-r--r-- | enum.c | 49 |
1 files changed, 16 insertions, 33 deletions
@@ -1373,7 +1373,6 @@ sort_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _data)) static int sort_by_cmp(const void *ap, const void *bp, void *data) { - struct cmp_opt_data cmp_opt = { 0, 0 }; VALUE a; VALUE b; VALUE ary = (VALUE)data; @@ -1385,7 +1384,7 @@ sort_by_cmp(const void *ap, const void *bp, void *data) a = *(VALUE *)ap; b = *(VALUE *)bp; - return OPTIMIZED_CMP(a, b, cmp_opt); + return OPTIMIZED_CMP(a, b); } /* @@ -1713,11 +1712,10 @@ cmpint_reenter_check(struct nmin_data *data, VALUE val) static int nmin_cmp(const void *ap, const void *bp, void *_data) { - struct cmp_opt_data cmp_opt = { 0, 0 }; struct nmin_data *data = (struct nmin_data *)_data; VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp; #define rb_cmpint(cmp, a, b) rb_cmpint(cmpint_reenter_check(data, (cmp)), a, b) - return OPTIMIZED_CMP(a, b, cmp_opt); + return OPTIMIZED_CMP(a, b); #undef rb_cmpint } @@ -2027,7 +2025,6 @@ enum_none(int argc, VALUE *argv, VALUE obj) struct min_t { VALUE min; - struct cmp_opt_data cmp_opt; }; static VALUE @@ -2041,7 +2038,7 @@ min_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) memo->min = i; } else { - if (OPTIMIZED_CMP(i, memo->min, memo->cmp_opt) < 0) { + if (OPTIMIZED_CMP(i, memo->min) < 0) { memo->min = i; } } @@ -2130,7 +2127,7 @@ static VALUE enum_min(int argc, VALUE *argv, VALUE obj) { VALUE memo; - struct min_t *m = NEW_CMP_OPT_MEMO(struct min_t, memo); + struct min_t *m = NEW_MEMO_FOR(struct min_t, memo); VALUE result; VALUE num; @@ -2138,8 +2135,6 @@ enum_min(int argc, VALUE *argv, VALUE obj) return rb_nmin_run(obj, num, 0, 0, 0); m->min = Qundef; - m->cmp_opt.opt_methods = 0; - m->cmp_opt.opt_inited = 0; if (rb_block_given_p()) { rb_block_call(obj, id_each, 0, 0, min_ii, memo); } @@ -2153,7 +2148,6 @@ enum_min(int argc, VALUE *argv, VALUE obj) struct max_t { VALUE max; - struct cmp_opt_data cmp_opt; }; static VALUE @@ -2167,7 +2161,7 @@ max_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) memo->max = i; } else { - if (OPTIMIZED_CMP(i, memo->max, memo->cmp_opt) > 0) { + if (OPTIMIZED_CMP(i, memo->max) > 0) { memo->max = i; } } @@ -2255,7 +2249,7 @@ static VALUE enum_max(int argc, VALUE *argv, VALUE obj) { VALUE memo; - struct max_t *m = NEW_CMP_OPT_MEMO(struct max_t, memo); + struct max_t *m = NEW_MEMO_FOR(struct max_t, memo); VALUE result; VALUE num; @@ -2263,8 +2257,6 @@ enum_max(int argc, VALUE *argv, VALUE obj) return rb_nmin_run(obj, num, 0, 1, 0); m->max = Qundef; - m->cmp_opt.opt_methods = 0; - m->cmp_opt.opt_inited = 0; if (rb_block_given_p()) { rb_block_call(obj, id_each, 0, 0, max_ii, (VALUE)memo); } @@ -2280,7 +2272,6 @@ struct minmax_t { VALUE min; VALUE max; VALUE last; - struct cmp_opt_data cmp_opt; }; static void @@ -2293,11 +2284,11 @@ minmax_i_update(VALUE i, VALUE j, struct minmax_t *memo) memo->max = j; } else { - n = OPTIMIZED_CMP(i, memo->min, memo->cmp_opt); + n = OPTIMIZED_CMP(i, memo->min); if (n < 0) { memo->min = i; } - n = OPTIMIZED_CMP(j, memo->max, memo->cmp_opt); + n = OPTIMIZED_CMP(j, memo->max); if (n > 0) { memo->max = j; } @@ -2320,7 +2311,7 @@ minmax_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo)) j = memo->last; memo->last = Qundef; - n = OPTIMIZED_CMP(j, i, memo->cmp_opt); + n = OPTIMIZED_CMP(j, i); if (n == 0) i = j; else if (n < 0) { @@ -2422,12 +2413,10 @@ static VALUE enum_minmax(VALUE obj) { VALUE memo; - struct minmax_t *m = NEW_CMP_OPT_MEMO(struct minmax_t, memo); + struct minmax_t *m = NEW_MEMO_FOR(struct minmax_t, memo); m->min = Qundef; m->last = Qundef; - m->cmp_opt.opt_methods = 0; - m->cmp_opt.opt_inited = 0; if (rb_block_given_p()) { rb_block_call(obj, id_each, 0, 0, minmax_ii, memo); if (!UNDEF_P(m->last)) @@ -2447,7 +2436,6 @@ enum_minmax(VALUE obj) static VALUE min_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) { - struct cmp_opt_data cmp_opt = { 0, 0 }; struct MEMO *memo = MEMO_CAST(args); VALUE v; @@ -2458,7 +2446,7 @@ min_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) MEMO_V1_SET(memo, v); MEMO_V2_SET(memo, i); } - else if (OPTIMIZED_CMP(v, memo->v1, cmp_opt) < 0) { + else if (OPTIMIZED_CMP(v, memo->v1) < 0) { MEMO_V1_SET(memo, v); MEMO_V2_SET(memo, i); } @@ -2522,7 +2510,6 @@ enum_min_by(int argc, VALUE *argv, VALUE obj) static VALUE max_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) { - struct cmp_opt_data cmp_opt = { 0, 0 }; struct MEMO *memo = MEMO_CAST(args); VALUE v; @@ -2533,7 +2520,7 @@ max_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) MEMO_V1_SET(memo, v); MEMO_V2_SET(memo, i); } - else if (OPTIMIZED_CMP(v, memo->v1, cmp_opt) > 0) { + else if (OPTIMIZED_CMP(v, memo->v1) > 0) { MEMO_V1_SET(memo, v); MEMO_V2_SET(memo, i); } @@ -2606,8 +2593,6 @@ struct minmax_by_t { static void minmax_by_i_update(VALUE v1, VALUE v2, VALUE i1, VALUE i2, struct minmax_by_t *memo) { - struct cmp_opt_data cmp_opt = { 0, 0 }; - if (UNDEF_P(memo->min_bv)) { memo->min_bv = v1; memo->max_bv = v2; @@ -2615,11 +2600,11 @@ minmax_by_i_update(VALUE v1, VALUE v2, VALUE i1, VALUE i2, struct minmax_by_t *m memo->max = i2; } else { - if (OPTIMIZED_CMP(v1, memo->min_bv, cmp_opt) < 0) { + if (OPTIMIZED_CMP(v1, memo->min_bv) < 0) { memo->min_bv = v1; memo->min = i1; } - if (OPTIMIZED_CMP(v2, memo->max_bv, cmp_opt) > 0) { + if (OPTIMIZED_CMP(v2, memo->max_bv) > 0) { memo->max_bv = v2; memo->max = i2; } @@ -2629,7 +2614,6 @@ minmax_by_i_update(VALUE v1, VALUE v2, VALUE i1, VALUE i2, struct minmax_by_t *m static VALUE minmax_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo)) { - struct cmp_opt_data cmp_opt = { 0, 0 }; struct minmax_by_t *memo = MEMO_FOR(struct minmax_by_t, _memo); VALUE vi, vj, j; int n; @@ -2647,7 +2631,7 @@ minmax_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo)) j = memo->last; memo->last_bv = Qundef; - n = OPTIMIZED_CMP(vj, vi, cmp_opt); + n = OPTIMIZED_CMP(vj, vi); if (n == 0) { i = j; vi = vj; @@ -3033,7 +3017,6 @@ each_cons_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, args)) static VALUE enum_each_cons_size(VALUE obj, VALUE args, VALUE eobj) { - struct cmp_opt_data cmp_opt = { 0, 0 }; const VALUE zero = LONG2FIX(0); VALUE n, size; long cons_size = NUM2LONG(RARRAY_AREF(args, 0)); @@ -3043,7 +3026,7 @@ enum_each_cons_size(VALUE obj, VALUE args, VALUE eobj) if (NIL_P(size)) return Qnil; n = add_int(size, 1 - cons_size); - return (OPTIMIZED_CMP(n, zero, cmp_opt) == -1) ? zero : n; + return (OPTIMIZED_CMP(n, zero) == -1) ? zero : n; } /* |