summaryrefslogtreecommitdiff
path: root/vm.c
diff options
context:
space:
mode:
authorAlan Wu <XrXr@users.noreply.github.com>2023-07-17 13:57:58 -0400
committerGitHub <noreply@github.com>2023-07-17 13:57:58 -0400
commitf302e725e10ae05e613e2c24cae0741f65f2db91 (patch)
treef539d3b5a27636dde99dfe96ab07e917b4f62740 /vm.c
parent105bdba899fbb10aa51115c4cd074ea42eb9e3e6 (diff)
Remove __bp__ and speed-up bmethod calls (#8060)
Remove rb_control_frame_t::__bp__ and optimize bmethod calls This commit removes the __bp__ field from rb_control_frame_t. It was introduced to help MJIT, but since MJIT was replaced by RJIT, we can use vm_base_ptr() to compute it from the SP of the previous control frame instead. Removing the field avoids needing to set it up when pushing new frames. Simply removing __bp__ would cause crashes since RJIT and YJIT used a slightly different stack layout for bmethod calls than the interpreter. At the moment of the call, the two layouts looked as follows: ┌────────────┐ ┌────────────┐ │ frame_base │ │ frame_base │ ├────────────┤ ├────────────┤ │ ... │ │ ... │ ├────────────┤ ├────────────┤ │ args │ │ args │ ├────────────┤ └────────────┘<─prev_frame_sp │ receiver │ prev_frame_sp─>└────────────┘ RJIT & YJIT interpreter Essentially, vm_base_ptr() needs to compute the address to frame_base given prev_frame_sp in the diagrams. The presence of the receiver created an off-by-one situation. Make the interpreter use the layout the JITs use for iseq-to-iseq bmethod calls. Doing so removes unnecessary argument shifting and vm_exec_core() re-entry from the interpreter, yielding a speed improvement visible through `benchmark/vm_defined_method.yml`: patched: 7578743.1 i/s master: 4796596.3 i/s - 1.58x slower C-to-iseq bmethod calls now store one more VALUE than before, but that should have negligible impact on overall performance. Note that re-entering vm_exec_core() used to be necessary for firing TracePoint events, but that's no longer the case since 9121e57a5f50bc91bae48b3b91edb283bf96cb6b. Closes ruby/ruby#6428
Diffstat (limited to 'vm.c')
-rw-r--r--vm.c22
1 files changed, 13 insertions, 9 deletions
diff --git a/vm.c b/vm.c
index f8cdc5a5ea..c132f9a377 100644
--- a/vm.c
+++ b/vm.c
@@ -198,7 +198,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured)
{
rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3));
VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp));
- VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0);
+ VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0);
return cfp;
}
@@ -1398,7 +1398,7 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons
static VALUE
invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc)
{
- /* bmethod */
+ /* bmethod call from outside the VM */
int arg_size = ISEQ_BODY(iseq)->param.size;
VALUE ret;
@@ -1408,7 +1408,7 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co
VM_GUARDED_PREV_EP(captured->ep),
(VALUE)me,
ISEQ_BODY(iseq)->iseq_encoded + opt_pc,
- ec->cfp->sp + arg_size,
+ ec->cfp->sp + 1 /* self */ + arg_size,
ISEQ_BODY(iseq)->local_table_size - arg_size,
ISEQ_BODY(iseq)->stack_max);
@@ -1429,7 +1429,7 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
const rb_cref_t *cref, int is_lambda, const rb_callable_method_entry_t *me)
{
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
- int i, opt_pc;
+ int opt_pc;
VALUE type = VM_FRAME_MAGIC_BLOCK | (is_lambda ? VM_FRAME_FLAG_LAMBDA : 0);
rb_control_frame_t *cfp = ec->cfp;
VALUE *sp = cfp->sp;
@@ -1448,14 +1448,18 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl
use_argv = vm_argv_ruby_array(av, argv, &flags, &argc, kw_splat);
}
- CHECK_VM_STACK_OVERFLOW(cfp, argc);
+ CHECK_VM_STACK_OVERFLOW(cfp, argc + 1);
vm_check_canary(ec, sp);
- cfp->sp = sp + argc;
- for (i=0; i<argc; i++) {
- sp[i] = use_argv[i];
+
+ VALUE *stack_argv = sp;
+ if (me) {
+ *sp = self; // bemthods need `self` on the VM stack
+ stack_argv++;
}
+ cfp->sp = stack_argv + argc;
+ MEMCPY(stack_argv, use_argv, VALUE, argc); // restrict: new stack space
- opt_pc = vm_yield_setup_args(ec, iseq, argc, sp, flags, passed_block_handler,
+ opt_pc = vm_yield_setup_args(ec, iseq, argc, stack_argv, flags, passed_block_handler,
(is_lambda ? arg_setup_method : arg_setup_block));
cfp->sp = sp;