summaryrefslogtreecommitdiff
path: root/zjit/src/codegen.rs
diff options
context:
space:
mode:
Diffstat (limited to 'zjit/src/codegen.rs')
-rw-r--r--zjit/src/codegen.rs142
1 files changed, 90 insertions, 52 deletions
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index c53e0a0f9b..bb21c3dda2 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -626,9 +626,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::ObjectAlloc { val, state } => gen_object_alloc(jit, asm, opnd!(val), &function.frame_state(*state)),
&Insn::ObjectAllocClass { class, state } => gen_object_alloc_class(asm, class, &function.frame_state(state)),
Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)),
- // concatstrings shouldn't have 0 strings
- // If it happens we abort the compilation for now
- Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state),
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)),
&Insn::StringGetbyte { string, index } => gen_string_getbyte(asm, opnd!(string), opnd!(index)),
Insn::StringSetbyteFixnum { string, index, value } => gen_string_setbyte_fixnum(asm, opnd!(string), opnd!(index), opnd!(value)),
@@ -659,9 +656,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
&Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason),
Insn::InvokeBlockIfunc { cd, block_handler, args, state, .. } => gen_invokeblock_ifunc(jit, asm, *cd, opnd!(block_handler), opnds!(args), &function.frame_state(*state)),
Insn::InvokeProc { recv, args, state, kw_splat } => gen_invokeproc(jit, asm, opnd!(recv), opnds!(args), *kw_splat, &function.frame_state(*state)),
- // Ensure we have enough room fit ec, self, and arguments
- // TODO remove this check when we have stack args (we can use Time.new to test it)
- Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state),
Insn::InvokeBuiltin { bf, leaf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, *leaf, opnds!(args)),
&Insn::EntryPoint { jit_entry_idx } => no_output!(gen_entry_point(jit, asm, jit_entry_idx)),
Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))),
@@ -722,12 +716,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
&Insn::GuardGreaterEq { left, right, state, .. } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)),
Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))),
Insn::CCall { cfunc, recv, args, name, owner: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)),
- // Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args).
- // We're currently emitting a CCallWithFrame for `super` in to a cfunction.
- // We can't lower to `gen_send_without_block` because the
- // source opcode isn't necessarily `opt_send_without_block`
- // and so the interpreter stack layout may be incompatible.
- Insn::CCallWithFrame { cd, state, args, block, .. } if args.len() + 1 > C_ARG_OPNDS.len() => return Err(*state),
Insn::CCallWithFrame { cfunc, recv, name, args, cme, state, block, .. } =>
gen_ccall_with_frame(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *block, &function.frame_state(*state)),
Insn::CCallVariadic { cfunc, recv, name, args, cme, state, block, return_type: _, elidable: _ } => {
@@ -770,7 +758,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::LoadEC => gen_load_ec(),
Insn::LoadSP => gen_load_sp(),
&Insn::GetEP { level } => gen_get_ep(asm, level),
- Insn::LoadSelf => gen_load_self(),
+ Insn::LoadSelf => gen_load_self(asm),
&Insn::LoadField { recv, id, offset, return_type } => gen_load_field(asm, opnd!(recv), id, offset, return_type),
&Insn::StoreField { recv, id, offset, val } => no_output!(gen_store_field(asm, opnd!(recv), id, offset, opnd!(val), function.type_of(val))),
&Insn::WriteBarrier { recv, val } => no_output!(gen_write_barrier(jit, asm, opnd!(recv), opnd!(val), function.type_of(val))),
@@ -967,6 +955,7 @@ fn gen_fixnum_bit_check(asm: &mut Assembler, val: Opnd, index: u8) -> Opnd {
}
fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, leaf: bool, args: Vec<Opnd>) -> lir::Opnd {
+ // +2 for ec, self
assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32,
"gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}",
unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() },
@@ -1052,11 +1041,14 @@ fn gen_ccall_with_frame(
gen_stack_overflow_check(jit, asm, state, state.stack_size());
let args_with_recv_len = args.len() + 1;
+ if args_with_recv_len > C_ARG_OPNDS.len() {
+ unimplemented!("Passing C call arguments on the stack");
+ }
let caller_stack_size = state.stack().len() - args_with_recv_len;
// Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
// to account for the receiver and arguments (and block arguments if any)
- gen_save_pc_for_gc(asm, state);
+ gen_save_pc_for_gc(asm, state, 0);
gen_save_sp(asm, caller_stack_size);
gen_spill_stack(jit, asm, state);
gen_spill_locals(jit, asm, state);
@@ -1089,7 +1081,7 @@ fn gen_ccall_with_frame(
asm_comment!(asm, "switch to new CFP");
let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, new_cfp);
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
let mut cfunc_args = vec![recv];
cfunc_args.extend(args);
@@ -1099,7 +1091,7 @@ fn gen_ccall_with_frame(
asm_comment!(asm, "pop C frame");
let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, new_cfp);
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
asm_comment!(asm, "restore SP register for the caller");
let new_sp = asm.sub(SP, sp_offset.into());
@@ -1150,7 +1142,7 @@ fn gen_ccall_variadic(
// Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
// to account for the receiver and arguments (like gen_ccall_with_frame does)
- gen_save_pc_for_gc(asm, state);
+ gen_save_pc_for_gc(asm, state, 0);
gen_save_sp(asm, caller_stack_size);
gen_spill_stack(jit, asm, state);
gen_spill_locals(jit, asm, state);
@@ -1178,7 +1170,7 @@ fn gen_ccall_variadic(
asm_comment!(asm, "switch to new CFP");
let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, new_cfp);
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
let argv_ptr = gen_push_opnds(jit, asm, &args);
asm.count_call_to(&name.contents_lossy());
@@ -1187,7 +1179,7 @@ fn gen_ccall_variadic(
asm_comment!(asm, "pop C frame");
let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, new_cfp);
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
asm_comment!(asm, "restore SP register for the caller");
let new_sp = asm.sub(SP, sp_offset.into());
@@ -1312,7 +1304,7 @@ fn gen_check_interrupts(jit: &mut JITState, asm: &mut Assembler, state: &FrameSt
asm_comment!(asm, "RUBY_VM_CHECK_INTS(ec)");
// Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages,
// signal_exec, or rb_postponed_job_flush.
- let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32));
+ let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG));
asm.test(interrupt_flag, interrupt_flag);
asm.jnz(jit, side_exit(jit, state, SideExitReason::Interrupt));
}
@@ -1382,8 +1374,8 @@ fn gen_load_sp() -> Opnd {
SP
}
-fn gen_load_self() -> Opnd {
- Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)
+fn gen_load_self(asm: &mut Assembler) -> Opnd {
+ asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF))
}
fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: FieldName, offset: i32, return_type: Type) -> Opnd {
@@ -1491,7 +1483,7 @@ fn gen_send(
) -> lir::Opnd {
gen_incr_send_fallback_counter(asm, reason);
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd));
unsafe extern "C" {
fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
@@ -1514,7 +1506,7 @@ fn gen_send_forward(
) -> lir::Opnd {
gen_incr_send_fallback_counter(asm, reason);
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd));
unsafe extern "C" {
@@ -1537,7 +1529,7 @@ fn gen_send_without_block(
) -> lir::Opnd {
gen_incr_send_fallback_counter(asm, reason);
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd));
unsafe extern "C" {
fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE;
@@ -1568,8 +1560,9 @@ fn gen_push_inline_frame(
// Save cfp->pc and cfp->sp for the caller frame.
// Cannot use gen_prepare_non_leaf_call because we need special SP math.
- gen_save_pc_for_gc(asm, state);
- gen_save_sp(asm, state.stack().len() - args.len() - 1); // -1 for receiver
+ let stack_size = state.stack().len() - args.len() - 1; // -1 for receiver
+ gen_save_pc_for_gc(asm, state, 0);
+ gen_save_sp(asm, stack_size);
gen_spill_locals(jit, asm, state);
gen_spill_stack(jit, asm, state);
@@ -1636,7 +1629,7 @@ fn gen_push_inline_frame(
}
let callee_depth = state.depth + 1;
let callee_entry_pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) };
- let callee_entry_frame = JITFrame::new_iseq(callee_entry_pc, iseq);
+ let callee_entry_frame = JITFrame::new_iseq(callee_entry_pc, iseq, 0);
asm_comment!(asm, "install entry JITFrame for inlined callee");
asm.mov(Opnd::mem(64, NATIVE_BASE_PTR, jit_frame_slot_offset(callee_depth)), Opnd::const_ptr(callee_entry_frame));
let callee_jit_return = cfp_jit_return_for_depth(asm, callee_depth);
@@ -1704,11 +1697,12 @@ fn gen_send_iseq_direct(
// Save cfp->pc and cfp->sp for the caller frame
// Can't use gen_prepare_non_leaf_call because we need special SP math.
- gen_save_pc_for_gc(asm, state);
- gen_save_sp(asm, state.stack().len() - args.len() - 1); // -1 for receiver
+ let stack_size = state.stack().len() - args.len() - 1; // -1 for receiver
+ let jit_frame = gen_save_pc_for_gc(asm, state, stack_size);
+ gen_save_sp(asm, stack_size);
gen_spill_locals(jit, asm, state);
- gen_spill_stack(jit, asm, state);
+ gen_stack_map(jit, asm, state, stack_size, jit_frame);
// This mirrors vm_caller_setup_arg_block() in for the `blockiseq != NULL` case.
// The HIR specialization guards ensure we will only reach here for literal blocks,
@@ -1767,7 +1761,7 @@ fn gen_send_iseq_direct(
asm_comment!(asm, "switch to new CFP");
let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, new_cfp);
- asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
let params = unsafe { iseq.params() };
@@ -1843,7 +1837,7 @@ fn gen_invokeblock(
) -> lir::Opnd {
gen_incr_send_fallback_counter(asm, reason);
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
asm_comment!(asm, "call invokeblock");
unsafe extern "C" {
@@ -1868,7 +1862,7 @@ fn gen_invokeblock_ifunc(
) -> lir::Opnd {
let _ = cd; // cd is not needed for the direct call
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
// Push args to memory so we can pass argv pointer
let argv_ptr = gen_push_opnds(jit, asm, &args);
@@ -1898,7 +1892,7 @@ fn gen_invokeproc(
kw_splat: bool,
state: &FrameState,
) -> lir::Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
asm_comment!(asm, "call invokeproc");
@@ -1927,7 +1921,7 @@ fn gen_invokesuper(
) -> lir::Opnd {
gen_incr_send_fallback_counter(asm, reason);
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
asm_comment!(asm, "call super with dynamic dispatch");
unsafe extern "C" {
fn rb_vm_invokesuper(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
@@ -1950,7 +1944,7 @@ fn gen_invokesuperforward(
) -> lir::Opnd {
gen_incr_send_fallback_counter(asm, reason);
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
asm_comment!(asm, "call super with dynamic dispatch (forwarding)");
unsafe extern "C" {
fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
@@ -2072,7 +2066,7 @@ fn gen_opt_newarray_hash(
state: &FrameState,
) -> lir::Opnd {
// `Array#hash` will hash the elements of the array.
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
let array_len: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long");
@@ -2098,7 +2092,7 @@ fn gen_array_max(
elements: Vec<Opnd>,
state: &FrameState,
) -> lir::Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
let array_len: u32 = elements.len().try_into().expect("Unable to fit length of elements into u32");
@@ -2124,7 +2118,7 @@ fn gen_array_min(
elements: Vec<Opnd>,
state: &FrameState,
) -> lir::Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
let array_len: u32 = elements.len().try_into().expect("Unable to fit length of elements into u32");
@@ -2150,7 +2144,7 @@ fn gen_array_include(
target: Opnd,
state: &FrameState,
) -> lir::Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
let array_len: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long");
@@ -2178,7 +2172,7 @@ fn gen_array_pack_buffer(
buffer: Option<Opnd>,
state: &FrameState,
) -> lir::Opnd {
- gen_prepare_non_leaf_call(jit, asm, state);
+ gen_prepare_fallback_call(jit, asm, state);
let array_len: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long");
@@ -2365,7 +2359,7 @@ fn gen_entry_point(jit: &mut JITState, asm: &mut Assembler, jit_entry_idx: Optio
// Publish a valid entry JITFrame before setting cfp->jit_return. The entry point is
// always the top-level frame (depth 0). Inlined frames get their own deeper
// slots in gen_push_lightweight_frame().
- let jit_frame = JITFrame::new_iseq(entry_pc(jit.iseq(), jit_entry_idx), jit.iseq());
+ let jit_frame = JITFrame::new_iseq(entry_pc(jit.iseq(), jit_entry_idx), jit.iseq(), 0);
asm.mov(Opnd::mem(64, NATIVE_BASE_PTR, -SIZEOF_VALUE_I32), Opnd::const_ptr(jit_frame));
asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), NATIVE_BASE_PTR);
}
@@ -2377,7 +2371,7 @@ fn gen_return(asm: &mut Assembler, val: lir::Opnd) {
asm_comment!(asm, "pop stack frame");
let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
asm.mov(CFP, incr_cfp);
- asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP);
+ asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
// Order here is important. Because we're about to tear down the frame,
// we need to load the return value, which might be part of the frame.
@@ -2891,13 +2885,13 @@ fn cfp_jit_return_for_depth(asm: &mut Assembler, depth: InlineDepth) -> Opnd {
/// Save only the PC to CFP. Use this when you need to call gen_save_sp()
/// immediately after with a custom stack size (e.g., gen_ccall_with_frame
/// adjusts SP to exclude receiver and arguments).
-fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState) {
+fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState, stack_map_size: usize) -> *const zjit_jit_frame {
let opcode: usize = state.get_opcode().try_into().unwrap();
let next_pc: *const VALUE = unsafe { state.pc.offset(insn_len(opcode) as isize) };
gen_incr_counter(asm, Counter::vm_write_jit_frame_count);
asm_comment!(asm, "save JITFrame to CFP");
- let jit_frame = JITFrame::new_iseq(next_pc, state.iseq);
+ let jit_frame = JITFrame::new_iseq(next_pc, state.iseq, stack_map_size);
asm.mov(Opnd::mem(64, NATIVE_BASE_PTR, jit_frame_slot_offset(state.depth)), Opnd::const_ptr(jit_frame));
// CFP_PC for a live JIT frame routes through the JITFrame on the native
@@ -2907,6 +2901,7 @@ fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState) {
// jit_frame->pc into cfp->pc and cleared cfp->jit_return: the JIT keeps
// running, lands on this routine again, and the poison would replace
// the valid materialized pc behind the GC's back.
+ jit_frame
}
/// Save the current PC on the CFP as a preparation for calling a C function
@@ -2917,12 +2912,13 @@ fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState) {
/// because the backend spills all live registers onto the C stack on CCall.
/// However, to avoid marking uninitialized stack slots, this also updates SP,
/// which may have cfp->sp for a past frame or a past non-leaf call.
-fn gen_prepare_call_with_gc(asm: &mut Assembler, state: &FrameState, leaf: bool) {
- gen_save_pc_for_gc(asm, state);
+fn gen_prepare_call_with_gc(asm: &mut Assembler, state: &FrameState, leaf: bool, stack_map_size: usize) -> *const zjit_jit_frame {
+ let jit_frame = gen_save_pc_for_gc(asm, state, stack_map_size);
gen_save_sp(asm, state.stack_size());
if leaf {
asm.expect_leaf_ccall(state.stack_size());
}
+ jit_frame
}
fn gen_prepare_leaf_call_with_gc(asm: &mut Assembler, state: &FrameState) {
@@ -2939,7 +2935,7 @@ fn gen_prepare_leaf_call_with_gc(asm: &mut Assembler, state: &FrameState) {
// We use state.without_stack() to pass stack_size=0 to gen_save_sp() because we don't write
// VM stack slots on leaf calls, which leaves those stack slots uninitialized. ZJIT keeps
// live objects on the C stack, so they are protected from GC properly.
- gen_prepare_call_with_gc(asm, &state.without_stack(), true);
+ gen_prepare_call_with_gc(asm, &state.without_stack(), true, 0);
}
/// Save the current SP on the CFP
@@ -2976,17 +2972,43 @@ fn gen_spill_stack(jit: &JITState, asm: &mut Assembler, state: &FrameState) {
}
}
+/// Prepare for VM fallback helpers that read arguments from the VM stack.
+///
+/// Direct JIT-to-JIT calls keep cfp->sp lazy, so this must publish SP before
+/// writing stack slots. Otherwise spilling the stack can overwrite frame
+/// metadata below the real VM-stack base.
+fn gen_prepare_fallback_call(jit: &JITState, asm: &mut Assembler, state: &FrameState) {
+ gen_save_pc_for_gc(asm, state, 0);
+ gen_save_sp(asm, state.stack_size());
+ gen_spill_locals(jit, asm, state);
+ gen_spill_stack(jit, asm, state);
+}
+
+/// Record the Ruby stack values needed to materialize this frame after the next
+/// non-leaf C call. The actual JITFrame entries are encoded by the register
+/// allocator, where VReg locations on the native stack are known.
+fn gen_stack_map(jit: &JITState, asm: &mut Assembler, state: &FrameState, stack_size: usize, jit_frame: *const zjit_jit_frame) {
+ let mut stack = Vec::new();
+ for &insn_id in state.stack().take(stack_size) {
+ let opnd = jit.get_opnd(insn_id);
+ // JITFrame only supports materializing Opnd::Value or Opnd::VReg out of the frame
+ assert!(matches!(opnd, Opnd::Value(_) | Opnd::VReg { .. }), "FrameState should only reference Opnd::Value or Opnd::VReg, but got: {opnd:?}");
+ stack.push(opnd);
+ }
+ asm.stack_map(stack, jit_frame);
+}
+
/// Prepare for calling a C function that may call an arbitrary method.
/// Use gen_prepare_leaf_call_with_gc() if the method is leaf but allocates objects.
fn gen_prepare_non_leaf_call(jit: &JITState, asm: &mut Assembler, state: &FrameState) {
// TODO: Lazily materialize caller frames when needed
// Save PC for backtraces and allocation tracing
// and SP to avoid marking uninitialized stack slots
- gen_prepare_call_with_gc(asm, state, false);
+ let jit_frame = gen_prepare_call_with_gc(asm, state, false, state.stack_size());
// Spill the virtual stack in case it raises an exception
// and the interpreter uses the stack for handling the exception
- gen_spill_stack(jit, asm, state);
+ gen_stack_map(jit, asm, state, state.stack_size(), jit_frame);
// Spill locals in case the method looks at caller Bindings
gen_spill_locals(jit, asm, state);
@@ -3436,6 +3458,20 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result<CodeP
asm.new_block_without_id("gen_function_stub");
asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq.get(), 0));
+ // If the stubbed ISEQ fails to compile, function_stub_hit exits to the
+ // interpreter with this callee frame. Direct JIT-to-JIT calls pass arguments
+ // in C argument registers, so spill the packed argument locals first. The
+ // fallback path will reshape these around any optional positional gaps.
+ let argc = iseq_call.argc.to_usize();
+ assert!(argc < C_ARG_OPNDS.len(), "SendDirect must fit receiver plus arguments in C argument registers");
+ let local_size = unsafe { get_iseq_body_local_table_size(iseq_call.iseq.get()) }.to_usize();
+ for arg_idx in 0..argc {
+ asm.store(
+ Opnd::mem(64, SP, -local_size_and_idx_to_bp_offset(local_size, arg_idx) * SIZEOF_VALUE_I32),
+ C_ARG_OPNDS[arg_idx + 1],
+ );
+ }
+
// Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`.
// Use load_into instead of mov, which is split on arm64, to avoid clobbering ALLOC_REGS.
asm.load_into(scratch_reg, Opnd::const_ptr(Rc::into_raw(iseq_call)));
@@ -3541,8 +3577,10 @@ pub fn gen_materialize_exit_trampoline(cb: &mut CodeBlock, exit_trampoline: Code
let mut asm = Assembler::new();
asm.new_block_without_id("materialize_exit_trampoline");
- asm_comment!(asm, "materialize ZJIT frames");
+ asm_comment!(asm, "clear JITFrame materialized by exit code");
asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), 0.into());
+
+ asm_comment!(asm, "materialize ZJIT frames");
asm_ccall!(asm, rb_zjit_materialize_frames, EC, CFP);
asm.jmp(Target::CodePtr(exit_trampoline));