diff options
author | Alan Wu <XrXr@users.noreply.github.com> | 2023-07-17 13:57:58 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-17 13:57:58 -0400 |
commit | f302e725e10ae05e613e2c24cae0741f65f2db91 (patch) | |
tree | f539d3b5a27636dde99dfe96ab07e917b4f62740 /vm.c | |
parent | 105bdba899fbb10aa51115c4cd074ea42eb9e3e6 (diff) |
Remove __bp__ and speed-up bmethod calls (#8060)
Remove rb_control_frame_t::__bp__ and optimize bmethod calls
This commit removes the __bp__ field from rb_control_frame_t. It was
introduced to help MJIT, but since MJIT was replaced by RJIT, we can use
vm_base_ptr() to compute it from the SP of the previous control frame
instead. Removing the field avoids needing to set it up when pushing new
frames.
Simply removing __bp__ would cause crashes since RJIT and YJIT used a
slightly different stack layout for bmethod calls than the interpreter.
At the moment of the call, the two layouts looked as follows:
┌────────────┐ ┌────────────┐
│ frame_base │ │ frame_base │
├────────────┤ ├────────────┤
│ ... │ │ ... │
├────────────┤ ├────────────┤
│ args │ │ args │
├────────────┤ └────────────┘<─prev_frame_sp
│ receiver │
prev_frame_sp─>└────────────┘
RJIT & YJIT interpreter
Essentially, vm_base_ptr() needs to compute the address to frame_base
given prev_frame_sp in the diagrams. The presence of the receiver
created an off-by-one situation.
Make the interpreter use the layout the JITs use for iseq-to-iseq
bmethod calls. Doing so removes unnecessary argument shifting and
vm_exec_core() re-entry from the interpreter, yielding a speed
improvement visible through `benchmark/vm_defined_method.yml`:
patched: 7578743.1 i/s
master: 4796596.3 i/s - 1.58x slower
C-to-iseq bmethod calls now store one more VALUE than before, but that
should have negligible impact on overall performance.
Note that re-entering vm_exec_core() used to be necessary for firing
TracePoint events, but that's no longer the case since
9121e57a5f50bc91bae48b3b91edb283bf96cb6b.
Closes ruby/ruby#6428
Diffstat (limited to 'vm.c')
-rw-r--r-- | vm.c | 22 |
1 files changed, 13 insertions, 9 deletions
@@ -198,7 +198,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured) { rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3)); VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp)); - VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0); + VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0); return cfp; } @@ -1398,7 +1398,7 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons static VALUE invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc) { - /* bmethod */ + /* bmethod call from outside the VM */ int arg_size = ISEQ_BODY(iseq)->param.size; VALUE ret; @@ -1408,7 +1408,7 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co VM_GUARDED_PREV_EP(captured->ep), (VALUE)me, ISEQ_BODY(iseq)->iseq_encoded + opt_pc, - ec->cfp->sp + arg_size, + ec->cfp->sp + 1 /* self */ + arg_size, ISEQ_BODY(iseq)->local_table_size - arg_size, ISEQ_BODY(iseq)->stack_max); @@ -1429,7 +1429,7 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl const rb_cref_t *cref, int is_lambda, const rb_callable_method_entry_t *me) { const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq); - int i, opt_pc; + int opt_pc; VALUE type = VM_FRAME_MAGIC_BLOCK | (is_lambda ? VM_FRAME_FLAG_LAMBDA : 0); rb_control_frame_t *cfp = ec->cfp; VALUE *sp = cfp->sp; @@ -1448,14 +1448,18 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl use_argv = vm_argv_ruby_array(av, argv, &flags, &argc, kw_splat); } - CHECK_VM_STACK_OVERFLOW(cfp, argc); + CHECK_VM_STACK_OVERFLOW(cfp, argc + 1); vm_check_canary(ec, sp); - cfp->sp = sp + argc; - for (i=0; i<argc; i++) { - sp[i] = use_argv[i]; + + VALUE *stack_argv = sp; + if (me) { + *sp = self; // bemthods need `self` on the VM stack + stack_argv++; } + cfp->sp = stack_argv + argc; + MEMCPY(stack_argv, use_argv, VALUE, argc); // restrict: new stack space - opt_pc = vm_yield_setup_args(ec, iseq, argc, sp, flags, passed_block_handler, + opt_pc = vm_yield_setup_args(ec, iseq, argc, stack_argv, flags, passed_block_handler, (is_lambda ? arg_setup_method : arg_setup_block)); cfp->sp = sp; |