diff options
author | Jeremy Evans <code@jeremyevans.net> | 2023-04-01 09:19:35 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2023-04-25 08:06:16 -0700 |
commit | af2da6419aba1e242e851664b4e6816aeb27f8cb (patch) | |
tree | 6c9a085aa6ad05f1134a0c52fd6e58f5c3bf39fd /vm_insnhelper.c | |
parent | f6254f77f7a7c4d1f11180b3b382680868bd9ee4 (diff) |
Optimize cfunc calls for f(*a) and f(*a, **kw) if kw is empty
This optimizes the following calls:
* ~10-15% for f(*a) when a does not end with a flagged keywords hash
* ~10-15% for f(*a) when a ends with an empty flagged keywords hash
* ~35-40% for f(*a, **kw) if kw is empty
This still copies the array contents to the VM stack, but avoids some
overhead. It would be faster to use the array pointer directly,
but that could cause problems if the array was modified during
the call to the function. You could do that optimization for frozen
arrays, but as splatting frozen arrays is uncommon, and the speedup
is minimal (<5%), it doesn't seem worth it.
The vm_send_cfunc benchmark has been updated to test additional cfunc
call types, and the numbers above were taken from the benchmark results.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7522
Diffstat (limited to 'vm_insnhelper.c')
-rw-r--r-- | vm_insnhelper.c | 85 |
1 files changed, 83 insertions, 2 deletions
diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 264dedecb5..d66f1e833a 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3462,10 +3462,10 @@ vm_call_cfunc_with_frame(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp } static VALUE -vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +vm_call_cfunc_other(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) { const struct rb_callinfo *ci = calling->ci; - RB_DEBUG_COUNTER_INC(ccf_cfunc); + RB_DEBUG_COUNTER_INC(ccf_cfunc_other); CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV_KEEP_KWSPLAT); VALUE argv_ary; @@ -3488,6 +3488,87 @@ vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb } } +static inline VALUE +vm_call_cfunc_array_argv(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, int stack_offset, int argc_offset) +{ + VALUE argv_ary = reg_cfp->sp[-1 - stack_offset]; + int argc = RARRAY_LENINT(argv_ary) - argc_offset; + + if (UNLIKELY(argc > VM_ARGC_STACK_MAX)) { + return vm_call_cfunc_other(ec, reg_cfp, calling); + } + + VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary); + calling->kw_splat = 0; + int i; + VALUE *stack_bottom = reg_cfp->sp - 2 - stack_offset; + VALUE *sp = stack_bottom; + CHECK_VM_STACK_OVERFLOW(reg_cfp, argc); + for(i = 0; i < argc; i++) { + *++sp = argv[i]; + } + reg_cfp->sp = sp+1; + + return vm_call_cfunc_with_frame_(ec, reg_cfp, calling, argc, stack_bottom+1, stack_bottom); +} + +static inline VALUE +vm_call_cfunc_only_splat(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +{ + RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat); + VALUE argv_ary = reg_cfp->sp[-1]; + int argc = RARRAY_LENINT(argv_ary); + VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary); + VALUE last_hash; + int argc_offset = 0; + + if (UNLIKELY(argc > 0 && + RB_TYPE_P((last_hash = argv[argc-1]), T_HASH) && + (((struct RHash *)last_hash)->basic.flags & RHASH_PASS_AS_KEYWORDS))) { + if (!RHASH_EMPTY_P(last_hash)) { + return vm_call_cfunc_other(ec, reg_cfp, calling); + } + argc_offset++; + } + return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 0, argc_offset); +} + +static inline VALUE +vm_call_cfunc_only_splat_kw(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +{ + RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat_kw); + VALUE keyword_hash = reg_cfp->sp[-1]; + + if (RB_TYPE_P(keyword_hash, T_HASH) && RHASH_EMPTY_P(keyword_hash)) { + return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 1, 0); + } + + return vm_call_cfunc_other(ec, reg_cfp, calling); +} + +static VALUE +vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling) +{ + const struct rb_callinfo *ci = calling->ci; + RB_DEBUG_COUNTER_INC(ccf_cfunc); + + if (IS_ARGS_SPLAT(ci)) { + if (!IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 1) { + // f(*a) + CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat, TRUE); + return vm_call_cfunc_only_splat(ec, reg_cfp, calling); + } + if (IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 2) { + // f(*a, **kw) + CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat_kw, TRUE); + return vm_call_cfunc_only_splat_kw(ec, reg_cfp, calling); + } + } + + CC_SET_FASTPATH(calling->cc, vm_call_cfunc_other, TRUE); + return vm_call_cfunc_other(ec, reg_cfp, calling); +} + static VALUE vm_call_ivar(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling) { |