diff options
author | Alan Wu <XrXr@users.noreply.github.com> | 2023-07-17 13:57:58 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-17 13:57:58 -0400 |
commit | f302e725e10ae05e613e2c24cae0741f65f2db91 (patch) | |
tree | f539d3b5a27636dde99dfe96ab07e917b4f62740 /vm_insnhelper.c | |
parent | 105bdba899fbb10aa51115c4cd074ea42eb9e3e6 (diff) |
Remove __bp__ and speed-up bmethod calls (#8060)
Remove rb_control_frame_t::__bp__ and optimize bmethod calls
This commit removes the __bp__ field from rb_control_frame_t. It was
introduced to help MJIT, but since MJIT was replaced by RJIT, we can use
vm_base_ptr() to compute it from the SP of the previous control frame
instead. Removing the field avoids needing to set it up when pushing new
frames.
Simply removing __bp__ would cause crashes since RJIT and YJIT used a
slightly different stack layout for bmethod calls than the interpreter.
At the moment of the call, the two layouts looked as follows:
┌────────────┐ ┌────────────┐
│ frame_base │ │ frame_base │
├────────────┤ ├────────────┤
│ ... │ │ ... │
├────────────┤ ├────────────┤
│ args │ │ args │
├────────────┤ └────────────┘<─prev_frame_sp
│ receiver │
prev_frame_sp─>└────────────┘
RJIT & YJIT interpreter
Essentially, vm_base_ptr() needs to compute the address to frame_base
given prev_frame_sp in the diagrams. The presence of the receiver
created an off-by-one situation.
Make the interpreter use the layout the JITs use for iseq-to-iseq
bmethod calls. Doing so removes unnecessary argument shifting and
vm_exec_core() re-entry from the interpreter, yielding a speed
improvement visible through `benchmark/vm_defined_method.yml`:
patched: 7578743.1 i/s
master: 4796596.3 i/s - 1.58x slower
C-to-iseq bmethod calls now store one more VALUE than before, but that
should have negligible impact on overall performance.
Note that re-entering vm_exec_core() used to be necessary for firing
TracePoint events, but that's no longer the case since
9121e57a5f50bc91bae48b3b91edb283bf96cb6b.
Closes ruby/ruby#6428
Diffstat (limited to 'vm_insnhelper.c')
-rw-r--r-- | vm_insnhelper.c | 37 |
1 files changed, 20 insertions, 17 deletions
diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 9a96fc3b93..d33fdb8fa7 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -381,7 +381,6 @@ vm_push_frame(rb_execution_context_t *ec, .self = self, .ep = sp - 1, .block_code = NULL, - .__bp__ = sp, /* Store initial value of ep as bp to skip calculation cost of bp on JIT cancellation. */ #if VM_DEBUG_BP_CHECK .bp_check = sp, #endif @@ -2455,15 +2454,15 @@ double_cmp_ge(double a, double b) return RBOOL(a >= b); } +// Copied by vm_dump.c static inline VALUE * vm_base_ptr(const rb_control_frame_t *cfp) { -#if 0 // we may optimize and use this once we confirm it does not spoil performance on JIT. const rb_control_frame_t *prev_cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp); if (cfp->iseq && VM_FRAME_RUBYFRAME_P(cfp)) { VALUE *bp = prev_cfp->sp + ISEQ_BODY(cfp->iseq)->local_table_size + VM_ENV_DATA_SIZE; - if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD) { + if (ISEQ_BODY(cfp->iseq)->type == ISEQ_TYPE_METHOD || VM_FRAME_BMETHOD_P(cfp)) { /* adjust `self' */ bp += 1; } @@ -2480,9 +2479,6 @@ vm_base_ptr(const rb_control_frame_t *cfp) else { return NULL; } -#else - return cfp->__bp__; -#endif } /* method call processes with call_info */ @@ -3693,23 +3689,30 @@ vm_call_iseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct const struct rb_captured_block *captured = &block->as.captured; const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq); - int i, opt_pc; - - VALUE *sp = cfp->sp - calling->argc - 1; - for (i = 0; i < calling->argc; i++) { - sp[i] = sp[i+1]; - } + VALUE * const argv = cfp->sp - calling->argc; + const int arg_size = ISEQ_BODY(iseq)->param.size; + int opt_pc; if (vm_ci_flag(calling->ci) & VM_CALL_ARGS_SIMPLE) { - opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, sp, arg_setup_method); + opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, argv, arg_setup_method); } else { - opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, sp, arg_setup_method); + opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, argv, arg_setup_method); } - cfp->sp = sp; - return invoke_bmethod(ec, iseq, calling->recv, captured, cme, - VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_LAMBDA, opt_pc); + cfp->sp = argv - 1; // -1 for the receiver + + vm_push_frame(ec, iseq, + VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA, + calling->recv, + VM_GUARDED_PREV_EP(captured->ep), + (VALUE)cme, + ISEQ_BODY(iseq)->iseq_encoded + opt_pc, + argv + arg_size, + ISEQ_BODY(iseq)->local_table_size - arg_size, + ISEQ_BODY(iseq)->stack_max); + + return Qundef; } static VALUE |