diff options
author | Jeremy Evans <code@jeremyevans.net> | 2023-03-23 14:39:31 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2023-04-25 08:06:16 -0700 |
commit | f6254f77f7a7c4d1f11180b3b382680868bd9ee4 (patch) | |
tree | abfd87315df6e5742fc71ee6ea9e1bd45fed5654 /vm_insnhelper.c | |
parent | 99c6d19e502b5fdadbd367ae4b6bb3fab850fddc (diff) |
Speed up calling iseq bmethods
Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c. This is inefficient.
This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.
Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.
A benchmark is added for bmethod calling. In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.
```
./miniruby-iseq-bmethod: 18159792.6 i/s
./miniruby-m: 13174419.1 i/s - 1.38x slower
bmethod_simple_1
./miniruby-iseq-bmethod: 15890745.4 i/s
./miniruby-m: 10008972.7 i/s - 1.59x slower
bmethod_simple_0_splat
./miniruby-iseq-bmethod: 13142804.3 i/s
./miniruby-m: 11168595.2 i/s - 1.18x slower
bmethod_simple_1_splat
./miniruby-iseq-bmethod: 12375791.0 i/s
./miniruby-m: 8491140.1 i/s - 1.46x slower
bmethod_no_splat
./miniruby-iseq-bmethod: 10151258.8 i/s
./miniruby-m: 8716664.1 i/s - 1.16x slower
bmethod_0_splat
./miniruby-iseq-bmethod: 8138802.5 i/s
./miniruby-m: 7515600.2 i/s - 1.08x slower
bmethod_1_splat
./miniruby-iseq-bmethod: 8028372.7 i/s
./miniruby-m: 5947658.6 i/s - 1.35x slower
bmethod_10_splat
./miniruby-iseq-bmethod: 6953514.1 i/s
./miniruby-m: 4840132.9 i/s - 1.44x slower
bmethod_100_splat
./miniruby-iseq-bmethod: 5287288.4 i/s
./miniruby-m: 2243218.4 i/s - 2.36x slower
bmethod_kw
./miniruby-iseq-bmethod: 8931358.2 i/s
./miniruby-m: 3185818.6 i/s - 2.80x slower
bmethod_no_kw
./miniruby-iseq-bmethod: 12281287.4 i/s
./miniruby-m: 10041727.9 i/s - 1.22x slower
bmethod_kw_splat
./miniruby-iseq-bmethod: 5618956.8 i/s
./miniruby-m: 3657549.5 i/s - 1.54x slower
```
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7522
Diffstat (limited to 'vm_insnhelper.c')
-rw-r--r-- | vm_insnhelper.c | 79 |
1 files changed, 74 insertions, 5 deletions
diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 0bbae89498..264dedecb5 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3555,16 +3555,61 @@ vm_call_bmethod_body(rb_execution_context_t *ec, struct rb_calling_info *calling return val; } +static int vm_callee_setup_block_arg(rb_execution_context_t *ec, struct rb_calling_info *calling, const struct rb_callinfo *ci, const rb_iseq_t *iseq, VALUE *argv, const enum arg_setup_type arg_setup_type); +static VALUE invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc); + static VALUE -vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling) +vm_call_iseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling) { - RB_DEBUG_COUNTER_INC(ccf_bmethod); + RB_DEBUG_COUNTER_INC(ccf_iseq_bmethod); + + const struct rb_callcache *cc = calling->cc; + const rb_callable_method_entry_t *cme = vm_cc_cme(cc); + VALUE procv = cme->def->body.bmethod.proc; + + if (!RB_OBJ_SHAREABLE_P(procv) && + cme->def->body.bmethod.defined_ractor != rb_ractor_self(rb_ec_ractor_ptr(ec))) { + rb_raise(rb_eRuntimeError, "defined with an un-shareable Proc in a different Ractor"); + } + + rb_proc_t *proc; + GetProcPtr(procv, proc); + const struct rb_block *block = &proc->block; + + while (vm_block_type(block) == block_type_proc) { + block = vm_proc_block(block->as.proc); + } + VM_ASSERT(vm_block_type(block) == block_type_iseq); + + const struct rb_captured_block *captured = &block->as.captured; + const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq); + int i, opt_pc; + + VALUE *sp = cfp->sp - calling->argc - 1; + for (i = 0; i < calling->argc; i++) { + sp[i] = sp[i+1]; + } + + if (vm_ci_flag(calling->ci) & VM_CALL_ARGS_SIMPLE) { + opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, sp, arg_setup_method); + } + else { + opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, sp, arg_setup_method); + } + + cfp->sp = sp; + return invoke_bmethod(ec, iseq, calling->recv, captured, cme, + VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_LAMBDA, opt_pc); +} + +static VALUE +vm_call_noniseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling) +{ + RB_DEBUG_COUNTER_INC(ccf_noniseq_bmethod); VALUE *argv; int argc; - const struct rb_callinfo *ci = calling->ci; - - CALLER_SETUP_ARG(cfp, calling, ci, ALLOW_HEAP_ARGV); + CALLER_SETUP_ARG(cfp, calling, calling->ci, ALLOW_HEAP_ARGV); if (UNLIKELY(calling->heap_argv)) { argv = RARRAY_PTR(calling->heap_argv); cfp->sp -= 2; @@ -3579,6 +3624,30 @@ vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_c return vm_call_bmethod_body(ec, calling, argv); } +static VALUE +vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling) +{ + RB_DEBUG_COUNTER_INC(ccf_bmethod); + + const struct rb_callcache *cc = calling->cc; + const rb_callable_method_entry_t *cme = vm_cc_cme(cc); + VALUE procv = cme->def->body.bmethod.proc; + rb_proc_t *proc; + GetProcPtr(procv, proc); + const struct rb_block *block = &proc->block; + + while (vm_block_type(block) == block_type_proc) { + block = vm_proc_block(block->as.proc); + } + if (vm_block_type(block) == block_type_iseq) { + CC_SET_FASTPATH(cc, vm_call_iseq_bmethod, TRUE); + return vm_call_iseq_bmethod(ec, cfp, calling); + } + + CC_SET_FASTPATH(cc, vm_call_noniseq_bmethod, TRUE); + return vm_call_noniseq_bmethod(ec, cfp, calling); +} + VALUE rb_find_defined_class_by_owner(VALUE current_class, VALUE target_owner) { |