diff options
Diffstat (limited to 'zjit/src')
| -rw-r--r-- | zjit/src/backend/arm64/mod.rs | 24 | ||||
| -rw-r--r-- | zjit/src/backend/lir.rs | 129 | ||||
| -rw-r--r-- | zjit/src/backend/tests.rs | 4 | ||||
| -rw-r--r-- | zjit/src/backend/x86_64/mod.rs | 2 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 142 | ||||
| -rw-r--r-- | zjit/src/codegen_tests.rs | 63 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 7 | ||||
| -rw-r--r-- | zjit/src/cruby_methods.rs | 12 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 83 | ||||
| -rw-r--r-- | zjit/src/hir/opt_tests.rs | 292 | ||||
| -rw-r--r-- | zjit/src/jit_frame.rs | 66 | ||||
| -rw-r--r-- | zjit/src/stats.rs | 2 |
12 files changed, 636 insertions, 190 deletions
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index ff1661f6fc..87b388eefa 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -263,6 +263,9 @@ impl Assembler { Opnd::Mem(mem) => { if mem_disp_fits_bits(mem.disp) { opnd + } else if asm.accept_scratch_reg { + asm.lea_into(SCRATCH1_OPND, Opnd::Mem(Mem { num_bits: 64, ..mem })); + Opnd::mem(mem.num_bits, SCRATCH1_OPND, 0) } else { let base = asm.lea(Opnd::Mem(Mem { num_bits: 64, ..mem })); Opnd::mem(mem.num_bits, base, 0) @@ -627,6 +630,12 @@ impl Assembler { *opnd = split_load_operand(asm, *opnd); asm.push_insn(insn); }, + Insn::Store { dest, .. } => { + if asm.accept_scratch_reg && matches!(dest, Opnd::Mem(_)) { + *dest = split_memory_address(asm, *dest); + } + asm.push_insn(insn); + }, Insn::Mul { left, right, .. } => { *left = split_load_operand(asm, *left); *right = split_load_operand(asm, *right); @@ -1673,7 +1682,7 @@ impl Assembler { }); trace_compile_phase("resolve_ssa", || { - asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS); + asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS, total_stack_slots); asm.resolve_ssa(&intervals, &assignments); }); @@ -2193,6 +2202,19 @@ mod tests { } #[test] + fn test_store_with_scratch_reg_and_large_displacement() { + let (mut asm, mut cb, _) = setup_asm_with_scratch_reg(); + asm.store(Opnd::mem(64, SP, -0x140), C_RET_OPND); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm_snapshot!(cb.disasm(), @" + 0x0: sub x17, x21, #0x140 + 0x4: stur x0, [x17] + "); + assert_snapshot!(cb.hexdump(), @"b10205d1200200f8"); + } + + #[test] #[should_panic] fn test_store_with_invalid_scratch_reg() { let (_, scratch_reg) = Assembler::new_with_scratch_reg(); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index f394062d51..84c8c8ba0f 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -4,11 +4,10 @@ use std::mem::take; use std::rc::Rc; use crate::bitset::BitSet; use crate::codegen::{local_size_and_idx_to_ep_offset, perf_symbol_range_start, perf_symbol_range_end}; -use crate::cruby::{IseqPtr, RUBY_OFFSET_CFP_ISEQ, RUBY_OFFSET_CFP_JIT_RETURN, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32, vm_stack_canary, YarvInsnIdx }; +use crate::cruby::{IseqPtr, RUBY_OFFSET_CFP_ISEQ, RUBY_OFFSET_CFP_JIT_RETURN, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32, VALUE, ZJIT_STACK_MAP_SHIFT, ZJIT_STACK_MAP_VREG_TAG, vm_stack_canary, YarvInsnIdx, zjit_jit_frame}; use crate::hir::{Invariant, SideExitReason}; use crate::hir; use crate::options::{TraceExits, PerfMap, get_option}; -use crate::cruby::VALUE; use crate::payload::{IseqVersionRef, get_or_create_iseq_payload}; use crate::stats::{exit_counter_ptr, exit_counter_ptr_for_opcode, side_exit_counter, CompileError}; use crate::virtualmem::CodePtr; @@ -698,6 +697,7 @@ pub enum Insn { /// that are split from this CCall during register assignment. end_marker: Option<PosMarkerFn>, out: Opnd, + stack_map: Option<StackMap>, }, // C function return @@ -940,8 +940,11 @@ macro_rules! for_each_operand_impl { visit_one!(opnd0); visit_one!(opnd1); } - Insn::CCall { opnds, .. } => { + Insn::CCall { opnds, stack_map, .. } => { visit_many!(opnds); + if let Some(StackMap { stack, .. }) = stack_map { + visit_many!(stack); + } } // only iterate over preserved in the const iterator #[allow(unused_variables)] @@ -1387,6 +1390,18 @@ impl StackState { } } +/// Stack map to materialize Ruby stack slots from JIT-kept values. +#[derive(Clone, Debug)] +pub struct StackMap { + /// Ruby stack slots to reconstruct if this frame is materialized. + /// Each operand must be either an immediate Ruby VALUE or a VReg whose + /// final register/spill location will be encoded after register allocation. + stack: Vec<Opnd>, + /// Heap-allocated JITFrame whose trailing stack map storage receives the + /// encoded entries once this CCall's register allocation is known. + jit_frame: *const zjit_jit_frame, +} + /// Initial capacity for asm.insns vector const ASSEMBLER_INSNS_CAPACITY: usize = 256; @@ -1420,6 +1435,11 @@ pub struct Assembler { /// Current instruction index, incremented for each instruction pushed idx: usize, + + /// Pending stack map to attach to the next CCall. The register allocator + /// consumes this through Insn::CCall, after it knows whether each live VReg + /// is in a saved register or an allocator spill slot. + stack_map: Option<StackMap>, } impl Assembler @@ -1435,6 +1455,7 @@ impl Assembler current_block_id: BlockId(0), num_vregs: 0, idx: 0, + stack_map: None, } } @@ -2103,6 +2124,7 @@ impl Assembler intervals: &[Interval], assignments: &[Option<Allocation>], regs: &[Reg], + total_stack_slots: usize, ) { use crate::backend::parcopy; use crate::backend::current::{C_RET_OPND, SCRATCH_REG, ALLOC_REGS}; @@ -2116,7 +2138,7 @@ impl Assembler let mut new_ids = Vec::with_capacity(old_ids.len()); for (insn, insn_id) in old_insns.into_iter().zip(old_ids.into_iter()) { - if let Insn::CCall { opnds, out, start_marker, end_marker, fptr } = insn { + if let Insn::CCall { opnds, out, start_marker, end_marker, fptr, stack_map } = insn { let insn_number = insn_id.map(|id| id.0).unwrap_or(0); // Do we have a case where a ccall is emitted, but nobody // uses the result? @@ -2126,15 +2148,29 @@ impl Assembler .end .is_some_and(|end| end > insn_number); + // Build a set of VRegIds that can be referenced by JITFrame for materializing the VM stack + let stack_vreg_ids: HashSet<usize> = if let Some(StackMap { stack, .. }) = &stack_map { + stack.iter().filter_map(|opnd| match opnd { + Opnd::VReg { idx: VRegId(vreg_id), .. } => Some(*vreg_id), + _ => None, + }).collect() + } else { + HashSet::default() + }; + // Find survivors: intervals that survive this Call instruction // We need to preserve the "surviving" registers past the ccall, // so we're going to push them all on the stack, then pop // after we make the ccall let survivors: Vec<usize> = intervals.iter() .filter(|interval| { - interval.has_bounds() - && interval.survives(insn_number) - && assignments[interval.id].and_then(|alloc| alloc.alloc_pool_index(ALLOC_REGS.len())).is_some() + // We need to spill register intervals on this CCall in two cases: + // 1) The VReg is referenced in an instruction after the CCall + let survives_call = interval.has_bounds() && interval.survives(insn_number); + // 2) The VReg is referenced by the stack map for the CCall + let stack_map_reg = stack_vreg_ids.contains(&interval.id); + let is_register = assignments[interval.id].and_then(|alloc| alloc.alloc_pool_index(ALLOC_REGS.len())).is_some(); + is_register && (survives_call || stack_map_reg) }) .map(|interval| interval.id) .collect(); @@ -2156,6 +2192,59 @@ impl Assembler } new_ids.push(None); } + + if let Some(StackMap { stack, jit_frame }) = stack_map { + assert_eq!(unsafe { (*jit_frame).stack_size } as usize, stack.len()); + for (idx, stack_opnd) in stack.iter().enumerate() { + let entry = match stack_opnd { + Opnd::UImm(value) => { + let value = VALUE(*value as usize); + // TODO: Investigate using a constant pool to track any value reference in the stack map + assert!(value.special_const_p(), "StackMap should only materialize immediate VALUEs, but got: {value:?}"); + value + } + Opnd::VReg { idx: VRegId(vreg_id), .. } => { + // Calculate the offset from NATIVE_BASE_PTR to the stack slot for this VReg. + let vreg_stack_index = match assignments[*vreg_id].expect("StackMap VReg should have an allocation") { + Allocation::Reg(_) | Allocation::Fixed(_) => { + let position = survivors.iter().position(|&survivor_id| survivor_id == *vreg_id).unwrap(); + // See Assembler::alloc_stack's native stack diagram. rb_zjit_materialize_frames() + // reads vreg_stack_index as cfp->jit_return[-vreg_stack_index]. + // For both arches, FrameSetup may add one native stack slot for + // alignment before these CPushPairs. + // TODO: Centralize stack slot offset calculation in StackState. + let frame_alignment_slots = if total_stack_slots % 2 == 1 { + 1 + } else { + 0 + }; + (total_stack_slots + frame_alignment_slots) + .checked_add(1) + .expect("StackMap requires a JITFrame slot") + + position + } + Allocation::Stack(stack_idx) => { + // StackState places allocator spills at: + // cfp->jit_return[-(self.stack_base_idx + stack_idx + 1)] + // so encode the matching materializer index. + self.stack_base_idx + .checked_add(1) + .expect("StackMap requires a JITFrame slot") + + stack_idx + } + }; + + // Encode the offset as a shifted-and-tagged integer. + let encoded = (vreg_stack_index << ZJIT_STACK_MAP_SHIFT) | ZJIT_STACK_MAP_VREG_TAG as usize; + debug_assert!(!VALUE(encoded).special_const_p(), "encoded StackMap VReg should not look like an immediate VALUE"); + VALUE(encoded) + } + _ => unreachable!("unexpected operand in StackMap: {stack_opnd:?}"), + }; + unsafe { (*jit_frame.cast_mut()).stack.as_mut_ptr().add(idx).write(entry); } + } + } + // Extract arguments from CCall, clear opnds assert!(opnds.len() <= regs.len()); @@ -2198,7 +2287,8 @@ impl Assembler // be empty now start_marker: None, end_marker: None, - fptr + fptr, + stack_map: None, }); new_ids.push(insn_id); @@ -3228,7 +3318,8 @@ impl Assembler { let canary_opnd = self.set_stack_canary(); let out = self.new_vreg(Opnd::match_num_bits(&opnds)); let fptr = Opnd::const_ptr(fptr); - self.push_insn(Insn::CCall { fptr, opnds, start_marker: None, end_marker: None, out }); + let stack_map = self.stack_map.take(); + self.push_insn(Insn::CCall { fptr, opnds, start_marker: None, end_marker: None, out, stack_map }); self.clear_stack_canary(canary_opnd); out } @@ -3237,14 +3328,16 @@ impl Assembler { /// new vreg for the result. pub fn ccall_into(&mut self, out: Opnd, fptr: *const u8, opnds: Vec<Opnd>) { let fptr = Opnd::const_ptr(fptr); - self.push_insn(Insn::CCall { fptr, opnds, start_marker: None, end_marker: None, out }); + let stack_map = self.stack_map.take(); + self.push_insn(Insn::CCall { fptr, opnds, start_marker: None, end_marker: None, out, stack_map }); } /// Call a C function stored in a register pub fn ccall_reg(&mut self, fptr: Opnd, num_bits: u8) -> Opnd { assert!(matches!(fptr, Opnd::Reg(_)), "ccall_reg must be called with Opnd::Reg: {fptr:?}"); let out = self.new_vreg(num_bits); - self.push_insn(Insn::CCall { fptr, opnds: vec![], start_marker: None, end_marker: None, out }); + let stack_map = self.stack_map.take(); + self.push_insn(Insn::CCall { fptr, opnds: vec![], start_marker: None, end_marker: None, out, stack_map }); out } @@ -3258,12 +3351,14 @@ impl Assembler { end_marker: impl Fn(CodePtr, &CodeBlock) + 'static, ) -> Opnd { let out = self.new_vreg(Opnd::match_num_bits(&opnds)); + let stack_map = self.stack_map.take(); self.push_insn(Insn::CCall { fptr: Opnd::const_ptr(fptr), opnds, start_marker: Some(Rc::new(start_marker)), end_marker: Some(Rc::new(end_marker)), out, + stack_map, }); out } @@ -3492,6 +3587,15 @@ impl Assembler { out } + /// Attach a stack map to the next CCall emitted by this Assembler. + /// The map is queued here because gen_stack_map() runs before the CCall is + /// emitted, but the map is filled only after register allocation assigns + /// locations to the VRegs it references. + pub fn stack_map(&mut self, stack: Vec<Opnd>, jit_frame: *const zjit_jit_frame) { + assert!(self.stack_map.is_none()); + self.stack_map = Some(StackMap { stack, jit_frame }); + } + pub fn store(&mut self, dest: Opnd, src: Opnd) { assert!(!matches!(dest, Opnd::VReg { .. }), "Destination of store must not be Opnd::VReg, got: {dest:?}"); self.push_insn(Insn::Store { dest, src }); @@ -4405,6 +4509,7 @@ mod tests { start_marker: None, end_marker: None, out: v3, + stack_map: None, }); // v4 = Add(v3, v1) @@ -4436,7 +4541,7 @@ mod tests { "v1 should be in a register"); // Run the pipeline: handle_caller_saved_regs then resolve_ssa - asm.handle_caller_saved_regs(&intervals, &assignments, regs); + asm.handle_caller_saved_regs(&intervals, &assignments, regs, 0); asm.resolve_ssa(&intervals, &assignments); let insns = &asm.basic_blocks[b1.0].insns; diff --git a/zjit/src/backend/tests.rs b/zjit/src/backend/tests.rs index 7174ac4c80..e38dc4707f 100644 --- a/zjit/src/backend/tests.rs +++ b/zjit/src/backend/tests.rs @@ -182,9 +182,9 @@ fn test_jcc_ptr() let (mut asm, mut cb) = setup_asm(); let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); - let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK as i32)); + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); asm.test( - Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32), + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), not_mask, ); asm.push_insn(Insn::Jnz(side_exit)); diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index d3bf847ab2..0f4acf1cef 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -1195,7 +1195,7 @@ impl Assembler { }); trace_compile_phase("resolve_ssa", || { - asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS); + asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS, total_stack_slots); asm.resolve_ssa(&intervals, &assignments); }); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index c53e0a0f9b..bb21c3dda2 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -626,9 +626,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::ObjectAlloc { val, state } => gen_object_alloc(jit, asm, opnd!(val), &function.frame_state(*state)), &Insn::ObjectAllocClass { class, state } => gen_object_alloc_class(asm, class, &function.frame_state(state)), Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)), - // concatstrings shouldn't have 0 strings - // If it happens we abort the compilation for now - Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state), Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), &Insn::StringGetbyte { string, index } => gen_string_getbyte(asm, opnd!(string), opnd!(index)), Insn::StringSetbyteFixnum { string, index, value } => gen_string_setbyte_fixnum(asm, opnd!(string), opnd!(index), opnd!(value)), @@ -659,9 +656,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), Insn::InvokeBlockIfunc { cd, block_handler, args, state, .. } => gen_invokeblock_ifunc(jit, asm, *cd, opnd!(block_handler), opnds!(args), &function.frame_state(*state)), Insn::InvokeProc { recv, args, state, kw_splat } => gen_invokeproc(jit, asm, opnd!(recv), opnds!(args), *kw_splat, &function.frame_state(*state)), - // Ensure we have enough room fit ec, self, and arguments - // TODO remove this check when we have stack args (we can use Time.new to test it) - Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state), Insn::InvokeBuiltin { bf, leaf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, *leaf, opnds!(args)), &Insn::EntryPoint { jit_entry_idx } => no_output!(gen_entry_point(jit, asm, jit_entry_idx)), Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))), @@ -722,12 +716,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::GuardGreaterEq { left, right, state, .. } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfunc, recv, args, name, owner: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)), - // Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args). - // We're currently emitting a CCallWithFrame for `super` in to a cfunction. - // We can't lower to `gen_send_without_block` because the - // source opcode isn't necessarily `opt_send_without_block` - // and so the interpreter stack layout may be incompatible. - Insn::CCallWithFrame { cd, state, args, block, .. } if args.len() + 1 > C_ARG_OPNDS.len() => return Err(*state), Insn::CCallWithFrame { cfunc, recv, name, args, cme, state, block, .. } => gen_ccall_with_frame(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *block, &function.frame_state(*state)), Insn::CCallVariadic { cfunc, recv, name, args, cme, state, block, return_type: _, elidable: _ } => { @@ -770,7 +758,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::LoadEC => gen_load_ec(), Insn::LoadSP => gen_load_sp(), &Insn::GetEP { level } => gen_get_ep(asm, level), - Insn::LoadSelf => gen_load_self(), + Insn::LoadSelf => gen_load_self(asm), &Insn::LoadField { recv, id, offset, return_type } => gen_load_field(asm, opnd!(recv), id, offset, return_type), &Insn::StoreField { recv, id, offset, val } => no_output!(gen_store_field(asm, opnd!(recv), id, offset, opnd!(val), function.type_of(val))), &Insn::WriteBarrier { recv, val } => no_output!(gen_write_barrier(jit, asm, opnd!(recv), opnd!(val), function.type_of(val))), @@ -967,6 +955,7 @@ fn gen_fixnum_bit_check(asm: &mut Assembler, val: Opnd, index: u8) -> Opnd { } fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, leaf: bool, args: Vec<Opnd>) -> lir::Opnd { + // +2 for ec, self assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32, "gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}", unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() }, @@ -1052,11 +1041,14 @@ fn gen_ccall_with_frame( gen_stack_overflow_check(jit, asm, state, state.stack_size()); let args_with_recv_len = args.len() + 1; + if args_with_recv_len > C_ARG_OPNDS.len() { + unimplemented!("Passing C call arguments on the stack"); + } let caller_stack_size = state.stack().len() - args_with_recv_len; // Can't use gen_prepare_non_leaf_call() because we need to adjust the SP // to account for the receiver and arguments (and block arguments if any) - gen_save_pc_for_gc(asm, state); + gen_save_pc_for_gc(asm, state, 0); gen_save_sp(asm, caller_stack_size); gen_spill_stack(jit, asm, state); gen_spill_locals(jit, asm, state); @@ -1089,7 +1081,7 @@ fn gen_ccall_with_frame( asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); let mut cfunc_args = vec![recv]; cfunc_args.extend(args); @@ -1099,7 +1091,7 @@ fn gen_ccall_with_frame( asm_comment!(asm, "pop C frame"); let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); asm_comment!(asm, "restore SP register for the caller"); let new_sp = asm.sub(SP, sp_offset.into()); @@ -1150,7 +1142,7 @@ fn gen_ccall_variadic( // Can't use gen_prepare_non_leaf_call() because we need to adjust the SP // to account for the receiver and arguments (like gen_ccall_with_frame does) - gen_save_pc_for_gc(asm, state); + gen_save_pc_for_gc(asm, state, 0); gen_save_sp(asm, caller_stack_size); gen_spill_stack(jit, asm, state); gen_spill_locals(jit, asm, state); @@ -1178,7 +1170,7 @@ fn gen_ccall_variadic( asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); let argv_ptr = gen_push_opnds(jit, asm, &args); asm.count_call_to(&name.contents_lossy()); @@ -1187,7 +1179,7 @@ fn gen_ccall_variadic( asm_comment!(asm, "pop C frame"); let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); asm_comment!(asm, "restore SP register for the caller"); let new_sp = asm.sub(SP, sp_offset.into()); @@ -1312,7 +1304,7 @@ fn gen_check_interrupts(jit: &mut JITState, asm: &mut Assembler, state: &FrameSt asm_comment!(asm, "RUBY_VM_CHECK_INTS(ec)"); // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages, // signal_exec, or rb_postponed_job_flush. - let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG as i32)); + let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG)); asm.test(interrupt_flag, interrupt_flag); asm.jnz(jit, side_exit(jit, state, SideExitReason::Interrupt)); } @@ -1382,8 +1374,8 @@ fn gen_load_sp() -> Opnd { SP } -fn gen_load_self() -> Opnd { - Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) +fn gen_load_self(asm: &mut Assembler) -> Opnd { + asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)) } fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: FieldName, offset: i32, return_type: Type) -> Opnd { @@ -1491,7 +1483,7 @@ fn gen_send( ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; @@ -1514,7 +1506,7 @@ fn gen_send_forward( ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { @@ -1537,7 +1529,7 @@ fn gen_send_without_block( ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; @@ -1568,8 +1560,9 @@ fn gen_push_inline_frame( // Save cfp->pc and cfp->sp for the caller frame. // Cannot use gen_prepare_non_leaf_call because we need special SP math. - gen_save_pc_for_gc(asm, state); - gen_save_sp(asm, state.stack().len() - args.len() - 1); // -1 for receiver + let stack_size = state.stack().len() - args.len() - 1; // -1 for receiver + gen_save_pc_for_gc(asm, state, 0); + gen_save_sp(asm, stack_size); gen_spill_locals(jit, asm, state); gen_spill_stack(jit, asm, state); @@ -1636,7 +1629,7 @@ fn gen_push_inline_frame( } let callee_depth = state.depth + 1; let callee_entry_pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; - let callee_entry_frame = JITFrame::new_iseq(callee_entry_pc, iseq); + let callee_entry_frame = JITFrame::new_iseq(callee_entry_pc, iseq, 0); asm_comment!(asm, "install entry JITFrame for inlined callee"); asm.mov(Opnd::mem(64, NATIVE_BASE_PTR, jit_frame_slot_offset(callee_depth)), Opnd::const_ptr(callee_entry_frame)); let callee_jit_return = cfp_jit_return_for_depth(asm, callee_depth); @@ -1704,11 +1697,12 @@ fn gen_send_iseq_direct( // Save cfp->pc and cfp->sp for the caller frame // Can't use gen_prepare_non_leaf_call because we need special SP math. - gen_save_pc_for_gc(asm, state); - gen_save_sp(asm, state.stack().len() - args.len() - 1); // -1 for receiver + let stack_size = state.stack().len() - args.len() - 1; // -1 for receiver + let jit_frame = gen_save_pc_for_gc(asm, state, stack_size); + gen_save_sp(asm, stack_size); gen_spill_locals(jit, asm, state); - gen_spill_stack(jit, asm, state); + gen_stack_map(jit, asm, state, stack_size, jit_frame); // This mirrors vm_caller_setup_arg_block() in for the `blockiseq != NULL` case. // The HIR specialization guards ensure we will only reach here for literal blocks, @@ -1767,7 +1761,7 @@ fn gen_send_iseq_direct( asm_comment!(asm, "switch to new CFP"); let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, new_cfp); - asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); let params = unsafe { iseq.params() }; @@ -1843,7 +1837,7 @@ fn gen_invokeblock( ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); asm_comment!(asm, "call invokeblock"); unsafe extern "C" { @@ -1868,7 +1862,7 @@ fn gen_invokeblock_ifunc( ) -> lir::Opnd { let _ = cd; // cd is not needed for the direct call - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); // Push args to memory so we can pass argv pointer let argv_ptr = gen_push_opnds(jit, asm, &args); @@ -1898,7 +1892,7 @@ fn gen_invokeproc( kw_splat: bool, state: &FrameState, ) -> lir::Opnd { - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); asm_comment!(asm, "call invokeproc"); @@ -1927,7 +1921,7 @@ fn gen_invokesuper( ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); asm_comment!(asm, "call super with dynamic dispatch"); unsafe extern "C" { fn rb_vm_invokesuper(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; @@ -1950,7 +1944,7 @@ fn gen_invokesuperforward( ) -> lir::Opnd { gen_incr_send_fallback_counter(asm, reason); - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); asm_comment!(asm, "call super with dynamic dispatch (forwarding)"); unsafe extern "C" { fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; @@ -2072,7 +2066,7 @@ fn gen_opt_newarray_hash( state: &FrameState, ) -> lir::Opnd { // `Array#hash` will hash the elements of the array. - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); let array_len: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); @@ -2098,7 +2092,7 @@ fn gen_array_max( elements: Vec<Opnd>, state: &FrameState, ) -> lir::Opnd { - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); let array_len: u32 = elements.len().try_into().expect("Unable to fit length of elements into u32"); @@ -2124,7 +2118,7 @@ fn gen_array_min( elements: Vec<Opnd>, state: &FrameState, ) -> lir::Opnd { - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); let array_len: u32 = elements.len().try_into().expect("Unable to fit length of elements into u32"); @@ -2150,7 +2144,7 @@ fn gen_array_include( target: Opnd, state: &FrameState, ) -> lir::Opnd { - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); let array_len: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); @@ -2178,7 +2172,7 @@ fn gen_array_pack_buffer( buffer: Option<Opnd>, state: &FrameState, ) -> lir::Opnd { - gen_prepare_non_leaf_call(jit, asm, state); + gen_prepare_fallback_call(jit, asm, state); let array_len: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); @@ -2365,7 +2359,7 @@ fn gen_entry_point(jit: &mut JITState, asm: &mut Assembler, jit_entry_idx: Optio // Publish a valid entry JITFrame before setting cfp->jit_return. The entry point is // always the top-level frame (depth 0). Inlined frames get their own deeper // slots in gen_push_lightweight_frame(). - let jit_frame = JITFrame::new_iseq(entry_pc(jit.iseq(), jit_entry_idx), jit.iseq()); + let jit_frame = JITFrame::new_iseq(entry_pc(jit.iseq(), jit_entry_idx), jit.iseq(), 0); asm.mov(Opnd::mem(64, NATIVE_BASE_PTR, -SIZEOF_VALUE_I32), Opnd::const_ptr(jit_frame)); asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), NATIVE_BASE_PTR); } @@ -2377,7 +2371,7 @@ fn gen_return(asm: &mut Assembler, val: lir::Opnd) { asm_comment!(asm, "pop stack frame"); let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); asm.mov(CFP, incr_cfp); - asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); // Order here is important. Because we're about to tear down the frame, // we need to load the return value, which might be part of the frame. @@ -2891,13 +2885,13 @@ fn cfp_jit_return_for_depth(asm: &mut Assembler, depth: InlineDepth) -> Opnd { /// Save only the PC to CFP. Use this when you need to call gen_save_sp() /// immediately after with a custom stack size (e.g., gen_ccall_with_frame /// adjusts SP to exclude receiver and arguments). -fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState) { +fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState, stack_map_size: usize) -> *const zjit_jit_frame { let opcode: usize = state.get_opcode().try_into().unwrap(); let next_pc: *const VALUE = unsafe { state.pc.offset(insn_len(opcode) as isize) }; gen_incr_counter(asm, Counter::vm_write_jit_frame_count); asm_comment!(asm, "save JITFrame to CFP"); - let jit_frame = JITFrame::new_iseq(next_pc, state.iseq); + let jit_frame = JITFrame::new_iseq(next_pc, state.iseq, stack_map_size); asm.mov(Opnd::mem(64, NATIVE_BASE_PTR, jit_frame_slot_offset(state.depth)), Opnd::const_ptr(jit_frame)); // CFP_PC for a live JIT frame routes through the JITFrame on the native @@ -2907,6 +2901,7 @@ fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState) { // jit_frame->pc into cfp->pc and cleared cfp->jit_return: the JIT keeps // running, lands on this routine again, and the poison would replace // the valid materialized pc behind the GC's back. + jit_frame } /// Save the current PC on the CFP as a preparation for calling a C function @@ -2917,12 +2912,13 @@ fn gen_save_pc_for_gc(asm: &mut Assembler, state: &FrameState) { /// because the backend spills all live registers onto the C stack on CCall. /// However, to avoid marking uninitialized stack slots, this also updates SP, /// which may have cfp->sp for a past frame or a past non-leaf call. -fn gen_prepare_call_with_gc(asm: &mut Assembler, state: &FrameState, leaf: bool) { - gen_save_pc_for_gc(asm, state); +fn gen_prepare_call_with_gc(asm: &mut Assembler, state: &FrameState, leaf: bool, stack_map_size: usize) -> *const zjit_jit_frame { + let jit_frame = gen_save_pc_for_gc(asm, state, stack_map_size); gen_save_sp(asm, state.stack_size()); if leaf { asm.expect_leaf_ccall(state.stack_size()); } + jit_frame } fn gen_prepare_leaf_call_with_gc(asm: &mut Assembler, state: &FrameState) { @@ -2939,7 +2935,7 @@ fn gen_prepare_leaf_call_with_gc(asm: &mut Assembler, state: &FrameState) { // We use state.without_stack() to pass stack_size=0 to gen_save_sp() because we don't write // VM stack slots on leaf calls, which leaves those stack slots uninitialized. ZJIT keeps // live objects on the C stack, so they are protected from GC properly. - gen_prepare_call_with_gc(asm, &state.without_stack(), true); + gen_prepare_call_with_gc(asm, &state.without_stack(), true, 0); } /// Save the current SP on the CFP @@ -2976,17 +2972,43 @@ fn gen_spill_stack(jit: &JITState, asm: &mut Assembler, state: &FrameState) { } } +/// Prepare for VM fallback helpers that read arguments from the VM stack. +/// +/// Direct JIT-to-JIT calls keep cfp->sp lazy, so this must publish SP before +/// writing stack slots. Otherwise spilling the stack can overwrite frame +/// metadata below the real VM-stack base. +fn gen_prepare_fallback_call(jit: &JITState, asm: &mut Assembler, state: &FrameState) { + gen_save_pc_for_gc(asm, state, 0); + gen_save_sp(asm, state.stack_size()); + gen_spill_locals(jit, asm, state); + gen_spill_stack(jit, asm, state); +} + +/// Record the Ruby stack values needed to materialize this frame after the next +/// non-leaf C call. The actual JITFrame entries are encoded by the register +/// allocator, where VReg locations on the native stack are known. +fn gen_stack_map(jit: &JITState, asm: &mut Assembler, state: &FrameState, stack_size: usize, jit_frame: *const zjit_jit_frame) { + let mut stack = Vec::new(); + for &insn_id in state.stack().take(stack_size) { + let opnd = jit.get_opnd(insn_id); + // JITFrame only supports materializing Opnd::Value or Opnd::VReg out of the frame + assert!(matches!(opnd, Opnd::Value(_) | Opnd::VReg { .. }), "FrameState should only reference Opnd::Value or Opnd::VReg, but got: {opnd:?}"); + stack.push(opnd); + } + asm.stack_map(stack, jit_frame); +} + /// Prepare for calling a C function that may call an arbitrary method. /// Use gen_prepare_leaf_call_with_gc() if the method is leaf but allocates objects. fn gen_prepare_non_leaf_call(jit: &JITState, asm: &mut Assembler, state: &FrameState) { // TODO: Lazily materialize caller frames when needed // Save PC for backtraces and allocation tracing // and SP to avoid marking uninitialized stack slots - gen_prepare_call_with_gc(asm, state, false); + let jit_frame = gen_prepare_call_with_gc(asm, state, false, state.stack_size()); // Spill the virtual stack in case it raises an exception // and the interpreter uses the stack for handling the exception - gen_spill_stack(jit, asm, state); + gen_stack_map(jit, asm, state, state.stack_size(), jit_frame); // Spill locals in case the method looks at caller Bindings gen_spill_locals(jit, asm, state); @@ -3436,6 +3458,20 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result<CodeP asm.new_block_without_id("gen_function_stub"); asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq.get(), 0)); + // If the stubbed ISEQ fails to compile, function_stub_hit exits to the + // interpreter with this callee frame. Direct JIT-to-JIT calls pass arguments + // in C argument registers, so spill the packed argument locals first. The + // fallback path will reshape these around any optional positional gaps. + let argc = iseq_call.argc.to_usize(); + assert!(argc < C_ARG_OPNDS.len(), "SendDirect must fit receiver plus arguments in C argument registers"); + let local_size = unsafe { get_iseq_body_local_table_size(iseq_call.iseq.get()) }.to_usize(); + for arg_idx in 0..argc { + asm.store( + Opnd::mem(64, SP, -local_size_and_idx_to_bp_offset(local_size, arg_idx) * SIZEOF_VALUE_I32), + C_ARG_OPNDS[arg_idx + 1], + ); + } + // Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`. // Use load_into instead of mov, which is split on arm64, to avoid clobbering ALLOC_REGS. asm.load_into(scratch_reg, Opnd::const_ptr(Rc::into_raw(iseq_call))); @@ -3541,8 +3577,10 @@ pub fn gen_materialize_exit_trampoline(cb: &mut CodeBlock, exit_trampoline: Code let mut asm = Assembler::new(); asm.new_block_without_id("materialize_exit_trampoline"); - asm_comment!(asm, "materialize ZJIT frames"); + asm_comment!(asm, "clear JITFrame materialized by exit code"); asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), 0.into()); + + asm_comment!(asm, "materialize ZJIT frames"); asm_ccall!(asm, rb_zjit_materialize_frames, EC, CFP); asm.jmp(Target::CodePtr(exit_trampoline)); diff --git a/zjit/src/codegen_tests.rs b/zjit/src/codegen_tests.rs index b052147add..99d5f90516 100644 --- a/zjit/src/codegen_tests.rs +++ b/zjit/src/codegen_tests.rs @@ -1294,6 +1294,69 @@ fn test_invokesuper_to_cfunc_with_too_many_args_exits() { "#), @"[1, 2, 3, 4, 5, 6]"); } +// Repro for the production "Failed to get_opnd(vN)" panic +// (PriceRs::PricingService#build_rust_adjustment_from_row, introduced by #17186). +// +// A regular send to a C method with 7 fixed args is reduced to a CCallWithFrame +// with recv + 7 = 8 operands, which exceeds C_ARG_OPNDS.len() (6). gen_insn bails +// with `return Err(*state)`; the caller emits a side exit and `break`s out of the +// block. But the call's *result* is stored in a local and used in a *later* basic +// block (the `if` arm here). Because codegen bailed before assigning a LIR operand +// to the result, compiling that later block calls get_opnd(result) on a None entry +// and panics. The existing `test_invokesuper_to_cfunc_with_too_many_args_exits` does +// not catch this because there the call result is the method's tail value and is not +// referenced past the bailed block. +// +// NOTE: This currently ABORTS with `Failed to get_opnd(vN)` (the bug). The snapshot +// below is the expected behavior once the backend exits cleanly: `flag` is true so +// `test` returns the cfunc's result, the array [1, 2, 3, 4, 5, 6, 7]. +#[test] +fn test_ccall_with_frame_too_many_args_result_used_in_later_block() { + unsafe extern "C" fn test_seven_args( + _self: VALUE, + a: VALUE, + b: VALUE, + c: VALUE, + d: VALUE, + e: VALUE, + f: VALUE, + g: VALUE, + ) -> VALUE { + unsafe { rb_ary_new_from_args(7, a, b, c, d, e, f, g) } + } + + with_rubyvm(|| { + let klass = define_class("ZJITSevenArgs", unsafe { rb_cObject }); + unsafe { + rb_define_method( + klass, + c"seven".as_ptr(), + Some(std::mem::transmute::< + unsafe extern "C" fn(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) -> VALUE, + unsafe extern "C" fn(VALUE) -> VALUE, + >(test_seven_args)), + 7, + ); + } + }); + + assert_snapshot!(assert_compiles_allowing_exits(r#" + def test(obj, flag) + priceable = obj.seven(1, 2, 3, 4, 5, 6, 7) + if flag + priceable + else + nil + end + end + + obj = ZJITSevenArgs.new + test(obj, true) # profile receiver class + test(obj, true) # compile -> currently panics: Failed to get_opnd(vN) + test(obj, true) + "#), @"[1, 2, 3, 4, 5, 6, 7]"); +} + #[test] fn test_string_new_preserves_string_arg() { assert_snapshot!(inspect(r#" diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 75535272af..261cdf2c51 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -234,6 +234,8 @@ pub const VM_ENV_DATA_INDEX_SPECVAL: i32 = -1; pub const VM_ENV_DATA_INDEX_FLAGS: u32 = 0; pub const VM_BLOCK_HANDLER_NONE: u32 = 0; pub const SHAPE_ID_NUM_BITS: u32 = 32; +pub const ZJIT_STACK_MAP_VREG_TAG: u32 = 8; +pub const ZJIT_STACK_MAP_SHIFT: u32 = 8; pub const ZJIT_JIT_RETURN_C_FRAME: u32 = 1; pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>; pub const RUBY_Qfalse: ruby_special_consts = 0; @@ -1921,11 +1923,12 @@ pub const DEFINED_FUNC: defined_type = 16; pub const DEFINED_CONST_FROM: defined_type = 17; pub type defined_type = u32; #[repr(C)] -#[derive(Debug, Copy, Clone)] pub struct zjit_jit_frame { pub pc: *const VALUE, pub iseq: *const rb_iseq_t, pub materialize_block_code: bool, + pub stack_size: u32, + pub stack: __IncompleteArrayField<VALUE>, } pub const ISEQ_BODY_OFFSET_PARAM: zjit_struct_offsets = 16; pub type zjit_struct_offsets = u32; @@ -1940,7 +1943,7 @@ pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: jit_bindgen_constants = 32; pub const RUBY_OFFSET_EC_INTERRUPT_MASK: jit_bindgen_constants = 36; pub const RUBY_OFFSET_EC_THREAD_PTR: jit_bindgen_constants = 48; pub const RUBY_OFFSET_EC_RACTOR_ID: jit_bindgen_constants = 64; -pub type jit_bindgen_constants = u32; +pub type jit_bindgen_constants = i32; pub const rb_invalid_shape_id: shape_id_t = 524287; pub type rb_iseq_param_keyword_struct = rb_iseq_constant_body_rb_iseq_parameters_rb_iseq_param_keyword; diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index b8d314b858..68c0ea4d90 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -321,13 +321,13 @@ fn inline_thread_current(fun: &mut hir::Function, block: hir::BlockId, _recv: hi let thread_ptr = fun.push_insn(block, hir::Insn::LoadField { recv: ec, id: FieldName::thread_ptr, - offset: RUBY_OFFSET_EC_THREAD_PTR as i32, + offset: RUBY_OFFSET_EC_THREAD_PTR, return_type: types::CPtr, }); let thread_self = fun.push_insn(block, hir::Insn::LoadField { recv: thread_ptr, id: FieldName::SelfParam, - offset: RUBY_OFFSET_THREAD_SELF as i32, + offset: RUBY_OFFSET_THREAD_SELF, // TODO(max): Add Thread type. But Thread.current is not guaranteed to be an exact Thread. // You can make subclasses... return_type: types::BasicObject, @@ -467,7 +467,7 @@ fn inline_string_bytesize(fun: &mut hir::Function, block: hir::BlockId, recv: hi let len = fun.push_insn(block, hir::Insn::LoadField { recv, id: FieldName::len, - offset: RUBY_OFFSET_RSTRING_LEN as i32, + offset: RUBY_OFFSET_RSTRING_LEN, return_type: types::CInt64, }); @@ -491,7 +491,7 @@ fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir let len = fun.push_insn(block, hir::Insn::LoadField { recv, id: FieldName::len, - offset: RUBY_OFFSET_RSTRING_LEN as i32, + offset: RUBY_OFFSET_RSTRING_LEN, return_type: types::CInt64, }); // TODO(max): Find a way to mark these guards as not needed for correctness... as in, once @@ -519,7 +519,7 @@ fn inline_string_setbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir let len = fun.push_insn(block, hir::Insn::LoadField { recv, id: FieldName::len, - offset: RUBY_OFFSET_RSTRING_LEN as i32, + offset: RUBY_OFFSET_RSTRING_LEN, return_type: types::CInt64, }); let unboxed_index = fun.push_insn(block, hir::Insn::GuardLess { left: unboxed_index, right: len, reason: SideExitReason::GuardLess, state }); @@ -542,7 +542,7 @@ fn inline_string_empty_p(fun: &mut hir::Function, block: hir::BlockId, recv: hir let len = fun.push_insn(block, hir::Insn::LoadField { recv, id: FieldName::len, - offset: RUBY_OFFSET_RSTRING_LEN as i32, + offset: RUBY_OFFSET_RSTRING_LEN, return_type: types::CInt64, }); let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) }); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index bd2b4e29d6..013996ba9f 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -531,6 +531,7 @@ pub enum SideExitReason { UnhandledCallType(CallType), UnhandledBlockArg, TooManyKeywordParameters, + TooManyArgsForLir, FixnumAddOverflow, FixnumSubOverflow, FixnumMultOverflow, @@ -2578,6 +2579,7 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq // See: https://github.com/ruby/ruby/pull/15911#discussion_r2710544982 let block_arg = if 0 != params.flags.has_block() { 1 } else { 0 }; let final_argc = caller_positional + kw_total_num as usize + block_arg; + // TODO: Support passing arguments on the stack in C calls if final_argc + 1 > C_ARG_OPNDS.len() { // +1 for self function.set_dynamic_send_reason(send_insn, TooManyArgsForLir); return false; @@ -4257,6 +4259,13 @@ impl Function { self.set_dynamic_send_reason(insn_id, ArgcParamMismatch); continue; } + // TODO: Support passing arguments on the stack in C calls + // +1 for self + if args.len()+1 > C_ARG_OPNDS.len() { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, TooManyArgsForLir); + continue; + } emit_super_call_guards(self, block, super_cme, current_cme, mid, state, frame_state.iseq); @@ -4841,7 +4850,7 @@ impl Function { // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h let ptr = self.push_insn(block, Insn::LoadField { recv, id: FieldName::as_heap, - offset: ROBJECT_OFFSET_AS_HEAP_FIELDS as i32, + offset: ROBJECT_OFFSET_AS_HEAP_FIELDS, return_type: types::CPtr, }); let offset = SIZEOF_VALUE_I32 * ivar_index as i32; @@ -4853,7 +4862,7 @@ impl Function { fn load_ivar_embedded(&mut self, block: BlockId, recv: InsnId, id: ID, ivar_index: attr_index_t) -> InsnId { // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h - let offset = ROBJECT_OFFSET_AS_ARY as i32 + let offset = ROBJECT_OFFSET_AS_ARY + (SIZEOF_VALUE * ivar_index.to_usize()) as i32; self.push_insn(block, Insn::LoadField { recv, id: id.into(), offset, @@ -4883,9 +4892,9 @@ impl Function { match layout { ShapeLayout::RClass | ShapeLayout::RData => { let offset = if layout == ShapeLayout::RClass { - RCLASS_OFFSET_PRIME_FIELDS_OBJ as i32 + RCLASS_OFFSET_PRIME_FIELDS_OBJ } else { - TDATA_OFFSET_FIELDS_OBJ as i32 + TDATA_OFFSET_FIELDS_OBJ }; let fields_obj = self.push_insn(block, Insn::LoadField { @@ -5061,10 +5070,10 @@ impl Function { // Current shape contains this ivar let (ivar_storage, offset) = if recv_type.flags().is_embedded() { // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h - let offset = ROBJECT_OFFSET_AS_ARY as i32 + (SIZEOF_VALUE * ivar_index.to_usize()) as i32; + let offset = ROBJECT_OFFSET_AS_ARY + (SIZEOF_VALUE * ivar_index.to_usize()) as i32; (self_val, offset) } else { - let as_heap = self.push_insn(block, Insn::LoadField { recv: self_val, id: FieldName::as_heap, offset: ROBJECT_OFFSET_AS_HEAP_FIELDS as i32, return_type: types::CPtr }); + let as_heap = self.push_insn(block, Insn::LoadField { recv: self_val, id: FieldName::as_heap, offset: ROBJECT_OFFSET_AS_HEAP_FIELDS, return_type: types::CPtr }); let offset = SIZEOF_VALUE_I32 * ivar_index as i32; (as_heap, offset) }; @@ -5236,6 +5245,13 @@ impl Function { return Err(()); } + // TODO: Support passing arguments on the stack in C calls + // +1 for self + if (argc as usize)+1 > C_ARG_OPNDS.len() { + fun.set_dynamic_send_reason(send_insn_id, TooManyArgsForLir); + return Err(()); + } + // Check singleton class assumption first, before emitting other patchpoints if !fun.assume_no_singleton_classes(block, recv_class, state) { fun.set_dynamic_send_reason(send_insn_id, SingletonClassSeen); @@ -7626,6 +7642,7 @@ fn add_iseq_to_hir( } YARVINSN_concatstrings => { let count = get_arg(pc, 0).as_u32(); + debug_assert!(count > 0, "concatstrings should have arguments"); let strings = state.stack_pop_n(count as usize)?; let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id }); state.stack_push(insn_id); @@ -9136,6 +9153,12 @@ fn add_iseq_to_hir( } YARVINSN_invokebuiltin => { let bf: rb_builtin_function = unsafe { *get_arg(pc, 0).as_ptr() }; + // TODO: Support passing arguments on the stack in C calls + // +2 for ec, self + if (bf.argc + 2) as usize > C_ARG_OPNDS.len() { + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::TooManyArgsForLir, recompile: None }); + break; // End the block + } let mut args = vec![]; for _ in 0..bf.argc { @@ -9165,6 +9188,13 @@ fn add_iseq_to_hir( YARVINSN_opt_invokebuiltin_delegate | YARVINSN_opt_invokebuiltin_delegate_leave => { let bf: rb_builtin_function = unsafe { *get_arg(pc, 0).as_ptr() }; + // TODO: Support passing arguments on the stack in C calls + // +2 for ec, self + if (bf.argc + 2) as usize > C_ARG_OPNDS.len() { + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::TooManyArgsForLir, recompile: None }); + break; // End the block + } + let index = get_arg(pc, 1).as_usize(); let argc = bf.argc as usize; @@ -9396,6 +9426,10 @@ fn compile_jit_entry_state(fun: &mut Function, jit_entry_block: BlockId, jit_ent let opt_num: usize = params.opt_num.try_into().expect("iseq param opt_num >= 0"); let lead_num: usize = params.lead_num.try_into().expect("iseq param lead_num >= 0"); let passed_opt_num = jit_entry_idx; + // We don't need to check crate::cruby::iseq_escapes_ep because we + // don't enter ISEQ_TYPE_MAIN/ISEQ_TYPE_EVAL using JIT-to-JIT calls. + // TODO: Stop compiling such JIT entries (Shopify/ruby#992) + let iseq_escapes_ep = crate::invariants::iseq_escapes_ep(iseq); // If the iseq has keyword parameters, the keyword bits local will be appended to the local table. let kw_bits_idx: Option<usize> = if unsafe { rb_get_iseq_flags_has_kw(iseq) } { @@ -9418,9 +9452,9 @@ fn compile_jit_entry_state(fun: &mut Function, jit_entry_block: BlockId, jit_ent let mut entry_state = FrameState::new(iseq); let mut ep: Option<InsnId> = None; for local_idx in 0..num_locals(iseq) { - if (lead_num + passed_opt_num..lead_num + opt_num).contains(&local_idx) { + let local = if (lead_num + passed_opt_num..lead_num + opt_num).contains(&local_idx) { // Omitted optionals are locals, so they start as nils before their code run - entry_state.locals.push(fun.push_insn(jit_entry_block, Insn::Const { val: Const::Value(Qnil) })); + fun.push_insn(jit_entry_block, Insn::Const { val: Const::Value(Qnil) }) } else if Some(local_idx) == kw_bits_idx { // Read the kw_bits value written by the caller to the callee frame. // This tells us which optional keywords were NOT provided and need their defaults evaluated. @@ -9430,20 +9464,37 @@ fn compile_jit_entry_state(fun: &mut Function, jit_entry_block: BlockId, jit_ent let ep_offset_u32 = u32::try_from(ep_offset) .unwrap_or_else(|_| panic!("Could not convert ep_offset {ep_offset} to u32")); let ep = *ep.get_or_insert_with(|| fun.push_insn(jit_entry_block, Insn::GetEP { level: 0 })); - entry_state.locals.push(fun.get_local_from_ep( + fun.get_local_from_ep( jit_entry_block, iseq, ep, ep_offset_u32, 0, types::BasicObject, - )); + ) } else if local_idx < param_size { let id = unsafe { rb_zjit_local_id(iseq, local_idx.try_into().unwrap()) }; - entry_state.locals.push(fun.push_insn(jit_entry_block, Insn::LoadArg { idx: arg_idx, id: id.into(), val_type: types::BasicObject })); + let local = fun.push_insn(jit_entry_block, Insn::LoadArg { idx: arg_idx, id: id.into(), val_type: types::BasicObject }); arg_idx += 1; + local } else { - entry_state.locals.push(fun.push_insn(jit_entry_block, Insn::Const { val: Const::Value(Qnil) })); + fun.push_insn(jit_entry_block, Insn::Const { val: Const::Value(Qnil) }) + }; + entry_state.locals.push(local); + + // Once an ISEQ has escaped EP, HIR getlocal may need to read from the + // VM frame instead of FrameState. Direct JIT-to-JIT entry passes locals + // as C arguments, so initialize the frame slots here before such reads. + if iseq_escapes_ep { + let ep_offset = local_idx_to_ep_offset(iseq, local_idx); + let local_id = unsafe { rb_zjit_local_id(iseq, local_idx.try_into().unwrap()) }; + let ep = *ep.get_or_insert_with(|| fun.push_insn(jit_entry_block, Insn::GetEP { level: 0 })); + fun.push_insn(jit_entry_block, Insn::StoreField { + recv: ep, + id: local_id.into(), + offset: -(SIZEOF_VALUE_I32 * ep_offset), + val: local, + }); } } (self_param, entry_state) @@ -10040,13 +10091,13 @@ mod validation_tests { function.push_insn(entry, Insn::LoadField { recv, id: FieldName::as_heap, - offset: ROBJECT_OFFSET_AS_HEAP_FIELDS as i32, + offset: ROBJECT_OFFSET_AS_HEAP_FIELDS, return_type: types::CPtr, }); let ivar = function.push_insn(entry, Insn::LoadField { recv, id: FieldName::Id(ID(1)), - offset: ROBJECT_OFFSET_AS_ARY as i32, + offset: ROBJECT_OFFSET_AS_ARY, return_type: types::BasicObject, }); function.push_insn(entry, Insn::Return { val: ivar }); @@ -10073,13 +10124,13 @@ mod validation_tests { function.push_insn(entry, Insn::LoadField { recv, id: FieldName::as_heap, - offset: ROBJECT_OFFSET_AS_HEAP_FIELDS as i32, + offset: ROBJECT_OFFSET_AS_HEAP_FIELDS, return_type: types::BasicObject, }); let ivar = function.push_insn(entry, Insn::LoadField { recv, id: FieldName::Id(ID(1)), - offset: ROBJECT_OFFSET_AS_ARY as i32, + offset: ROBJECT_OFFSET_AS_ARY, return_type: types::Array, }); function.push_insn(entry, Insn::Return { val: ivar }); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 5b94aed90e..0953905c19 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -3849,20 +3849,22 @@ mod hir_opt_tests { EntryPoint JIT(0) v5:BasicObject = LoadArg :self@0 v6:NilClass = Const Value(nil) + v7:CPtr = GetEP 0 + StoreField v7, :a@0x1000, v6 Jump bb3(v5, v6) - bb3(v8:BasicObject, v9:NilClass): - v13:Fixnum[1] = Const Value(1) - SetLocal :a, l0, EP@3, v13 - PatchPoint MethodRedefined(Object@0x1000, lambda@0x1008, cme:0x1010) - v45:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v8, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v46:BasicObject = CCallWithFrame v45, :Kernel#lambda@0x1038, block=0x1040 - v20:CPtr = GetEP 0 - v21:BasicObject = LoadField v20, :a@0x1048 - PatchPoint MethodRedefined(Object@0x1000, foo@0x1049, cme:0x1050) - v32:CPtr = GetEP 0 - v33:BasicObject = LoadField v32, :a@0x1048 + bb3(v10:BasicObject, v11:NilClass): + v15:Fixnum[1] = Const Value(1) + SetLocal :a, l0, EP@3, v15 + PatchPoint MethodRedefined(Object@0x1008, lambda@0x1010, cme:0x1018) + v47:ObjectSubclass[class_exact*:Object@VALUE(0x1008)] = GuardType v10, ObjectSubclass[class_exact*:Object@VALUE(0x1008)] recompile + v48:BasicObject = CCallWithFrame v47, :Kernel#lambda@0x1040, block=0x1048 + v22:CPtr = GetEP 0 + v23:BasicObject = LoadField v22, :a@0x1000 + PatchPoint MethodRedefined(Object@0x1008, foo@0x1050, cme:0x1058) + v34:CPtr = GetEP 0 + v35:BasicObject = LoadField v34, :a@0x1000 CheckInterrupts - Return v33 + Return v35 "); } @@ -14486,25 +14488,30 @@ mod hir_opt_tests { EntryPoint JIT(0) v9:BasicObject = LoadArg :self@0 v10:BasicObject = LoadArg :a@1 - v11:BasicObject = LoadArg :_b@2 - v12:BasicObject = LoadArg :_c@3 - v13:NilClass = Const Value(nil) - Jump bb3(v9, v10, v11, v12, v13) - bb3(v15:BasicObject, v16:BasicObject, v17:BasicObject, v18:BasicObject, v19:NilClass): + v11:CPtr = GetEP 0 + StoreField v11, :a@0x1001, v10 + v13:BasicObject = LoadArg :_b@2 + StoreField v11, :_b@0x1002, v13 + v15:BasicObject = LoadArg :_c@3 + StoreField v11, :_c@0x1003, v15 + v17:NilClass = Const Value(nil) + StoreField v11, :formatted@0x1004, v17 + Jump bb3(v9, v10, v13, v15, v17) + bb3(v20:BasicObject, v21:BasicObject, v22:BasicObject, v23:BasicObject, v24:NilClass): CheckInterrupts - SetLocal :formatted, l0, EP@3, v16 + SetLocal :formatted, l0, EP@3, v21 PatchPoint SingleRactorMode - SetIvar v15, :@formatted, v16 - v47:ClassSubclass[VMFrozenCore] = Const Value(VALUE(0x1008)) + SetIvar v20, :@formatted, v21 + v52:ClassSubclass[VMFrozenCore] = Const Value(VALUE(0x1008)) PatchPoint MethodRedefined(Class@0x1010, lambda@0x1018, cme:0x1020) - v63:BasicObject = CCallWithFrame v47, :RubyVM::FrozenCore.lambda@0x1048, block=0x1050 - v50:CPtr = GetEP 0 - v51:BasicObject = LoadField v50, :a@0x1001 - v52:BasicObject = LoadField v50, :_b@0x1002 - v53:BasicObject = LoadField v50, :_c@0x1058 - v54:BasicObject = LoadField v50, :formatted@0x1059 + v68:BasicObject = CCallWithFrame v52, :RubyVM::FrozenCore.lambda@0x1048, block=0x1050 + v55:CPtr = GetEP 0 + v56:BasicObject = LoadField v55, :a@0x1001 + v57:BasicObject = LoadField v55, :_b@0x1002 + v58:BasicObject = LoadField v55, :_c@0x1003 + v59:BasicObject = LoadField v55, :formatted@0x1004 CheckInterrupts - Return v63 + Return v68 "); } @@ -15729,21 +15736,24 @@ mod hir_opt_tests { EntryPoint JIT(0) v7:HeapBasicObject = LoadArg :self@0 v8:BasicObject = LoadArg :blk@1 - v9:NilClass = Const Value(nil) - Jump bb3(v7, v8, v9) - bb3(v11:HeapBasicObject, v12:BasicObject, v13:NilClass): + v9:CPtr = GetEP 0 + StoreField v9, :blk@0x1001, v8 + v11:NilClass = Const Value(nil) + StoreField v9, :other_block@0x1002, v11 + Jump bb3(v7, v8, v11) + bb3(v14:HeapBasicObject, v15:BasicObject, v16:NilClass): PatchPoint NoSingletonClass(B@0x1008) PatchPoint MethodRedefined(B@0x1008, proc@0x1010, cme:0x1018) - v39:ObjectSubclass[class_exact:B] = GuardType v11, ObjectSubclass[class_exact:B] recompile - v40:BasicObject = CCallWithFrame v39, :Kernel#proc@0x1040, block=0x1048 - v19:CPtr = GetEP 0 - v20:BasicObject = LoadField v19, :blk@0x1050 - SetLocal :other_block, l0, EP@3, v40 - v27:CPtr = GetEP 0 - v28:BasicObject = LoadField v27, :other_block@0x1051 - v30:BasicObject = InvokeSuper v39, 0x1058, v28 # SendFallbackReason: super: complex argument passing to `super` call + v42:ObjectSubclass[class_exact:B] = GuardType v14, ObjectSubclass[class_exact:B] recompile + v43:BasicObject = CCallWithFrame v42, :Kernel#proc@0x1040, block=0x1048 + v22:CPtr = GetEP 0 + v23:BasicObject = LoadField v22, :blk@0x1001 + SetLocal :other_block, l0, EP@3, v43 + v30:CPtr = GetEP 0 + v31:BasicObject = LoadField v30, :other_block@0x1002 + v33:BasicObject = InvokeSuper v42, 0x1050, v31 # SendFallbackReason: super: complex argument passing to `super` call CheckInterrupts - Return v30 + Return v33 "); } @@ -16328,67 +16338,82 @@ mod hir_opt_tests { EntryPoint JIT(0) v16:BasicObject = LoadArg :self@0 v17:BasicObject = LoadArg :list@1 - v18:NilClass = Const Value(nil) - v19:NilClass = Const Value(nil) + v18:CPtr = GetEP 0 + StoreField v18, :list@0x1001, v17 v20:NilClass = Const Value(nil) - Jump bb3(v16, v17, v18, v19, v20) - bb3(v36:BasicObject, v37:BasicObject, v38:BasicObject, v39:BasicObject, v40:NilClass): - v43:NilClass = Const Value(nil) - SetLocal :sep, l0, EP@5, v43 - Jump bb5(v36, v37, v43, v39, v40) + StoreField v18, :sep@0x1002, v20 + v22:NilClass = Const Value(nil) + StoreField v18, :iter_method@0x1005, v22 + v24:NilClass = Const Value(nil) + StoreField v18, :kwsplat@0x1006, v24 + Jump bb3(v16, v17, v20, v22, v24) + bb3(v51:BasicObject, v52:BasicObject, v53:BasicObject, v54:BasicObject, v55:NilClass): + v58:NilClass = Const Value(nil) + SetLocal :sep, l0, EP@5, v58 + Jump bb5(v51, v52, v58, v54, v55) bb4(): EntryPoint JIT(1) - v23:BasicObject = LoadArg :self@0 - v24:BasicObject = LoadArg :list@1 - v25:BasicObject = LoadArg :sep@2 - v26:NilClass = Const Value(nil) - v27:NilClass = Const Value(nil) - Jump bb5(v23, v24, v25, v26, v27) - bb5(v47:BasicObject, v48:BasicObject, v49:BasicObject, v50:BasicObject, v51:NilClass): - v54:StaticSymbol[:each] = Const Value(VALUE(0x1008)) - SetLocal :iter_method, l0, EP@4, v54 - Jump bb7(v47, v48, v49, v54, v51) - bb6(): - EntryPoint JIT(2) - v30:BasicObject = LoadArg :self@0 - v31:BasicObject = LoadArg :list@1 + v28:BasicObject = LoadArg :self@0 + v29:BasicObject = LoadArg :list@1 + v30:CPtr = GetEP 0 + StoreField v30, :list@0x1001, v29 v32:BasicObject = LoadArg :sep@2 - v33:BasicObject = LoadArg :iter_method@3 + StoreField v30, :sep@0x1002, v32 v34:NilClass = Const Value(nil) - Jump bb7(v30, v31, v32, v33, v34) - bb7(v58:BasicObject, v59:BasicObject, v60:BasicObject, v61:BasicObject, v62:NilClass): - CheckInterrupts - v68:CBool = Test v60 - v69:Truthy = RefineType v60, Truthy - CondBranch v68, bb8(v58, v59, v69, v61, v62), bb11() + StoreField v30, :iter_method@0x1005, v34 + v36:NilClass = Const Value(nil) + StoreField v30, :kwsplat@0x1006, v36 + Jump bb5(v28, v29, v32, v34, v36) + bb5(v62:BasicObject, v63:BasicObject, v64:BasicObject, v65:BasicObject, v66:NilClass): + v69:StaticSymbol[:each] = Const Value(VALUE(0x1008)) + SetLocal :iter_method, l0, EP@4, v69 + Jump bb7(v62, v63, v64, v69, v66) + bb6(): + EntryPoint JIT(2) + v40:BasicObject = LoadArg :self@0 + v41:BasicObject = LoadArg :list@1 + v42:CPtr = GetEP 0 + StoreField v42, :list@0x1001, v41 + v44:BasicObject = LoadArg :sep@2 + StoreField v42, :sep@0x1002, v44 + v46:BasicObject = LoadArg :iter_method@3 + StoreField v42, :iter_method@0x1005, v46 + v48:NilClass = Const Value(nil) + StoreField v42, :kwsplat@0x1006, v48 + Jump bb7(v40, v41, v44, v46, v48) + bb7(v73:BasicObject, v74:BasicObject, v75:BasicObject, v76:BasicObject, v77:NilClass): + CheckInterrupts + v83:CBool = Test v75 + v84:Truthy = RefineType v75, Truthy + CondBranch v83, bb8(v73, v74, v84, v76, v77), bb11() bb11(): - v71:Falsy = RefineType v60, Falsy + v86:Falsy = RefineType v75, Falsy PatchPoint MethodRedefined(Object@0x1010, lambda@0x1018, cme:0x1020) - v118:ObjectSubclass[class_exact*:Object@VALUE(0x1010)] = GuardType v58, ObjectSubclass[class_exact*:Object@VALUE(0x1010)] recompile - v119:BasicObject = CCallWithFrame v118, :Kernel#lambda@0x1048, block=0x1050 - v75:CPtr = GetEP 0 - v76:BasicObject = LoadField v75, :list@0x1001 - v78:BasicObject = LoadField v75, :iter_method@0x1058 - v79:BasicObject = LoadField v75, :kwsplat@0x1059 - SetLocal :sep, l0, EP@5, v119 - Jump bb8(v118, v76, v119, v78, v79) - bb8(v83:BasicObject, v84:BasicObject, v85:BasicObject, v86:BasicObject, v87:BasicObject): + v133:ObjectSubclass[class_exact*:Object@VALUE(0x1010)] = GuardType v73, ObjectSubclass[class_exact*:Object@VALUE(0x1010)] recompile + v134:BasicObject = CCallWithFrame v133, :Kernel#lambda@0x1048, block=0x1050 + v90:CPtr = GetEP 0 + v91:BasicObject = LoadField v90, :list@0x1001 + v93:BasicObject = LoadField v90, :iter_method@0x1005 + v94:BasicObject = LoadField v90, :kwsplat@0x1006 + SetLocal :sep, l0, EP@5, v134 + Jump bb8(v133, v91, v134, v93, v94) + bb8(v98:BasicObject, v99:BasicObject, v100:BasicObject, v101:BasicObject, v102:BasicObject): PatchPoint SingleRactorMode - PatchPoint StableConstantNames(0x1060, CONST) - v115:HashExact[VALUE(0x1068)] = Const Value(VALUE(0x1068)) - SetLocal :kwsplat, l0, EP@3, v115 - v96:CPtr = GetEP 0 - v97:BasicObject = LoadField v96, :list@0x1001 - v99:CPtr = GetEP 0 - v100:BasicObject = LoadField v99, :iter_method@0x1058 - v102:BasicObject = Send v97, 0x1070, :__send__, v100 # SendFallbackReason: Send: unsupported method type Optimized - v103:CPtr = GetEP 0 - v104:BasicObject = LoadField v103, :list@0x1001 - v105:BasicObject = LoadField v103, :sep@0x1002 - v106:BasicObject = LoadField v103, :iter_method@0x1058 - v107:BasicObject = LoadField v103, :kwsplat@0x1059 + PatchPoint StableConstantNames(0x1058, CONST) + v130:HashExact[VALUE(0x1060)] = Const Value(VALUE(0x1060)) + SetLocal :kwsplat, l0, EP@3, v130 + v111:CPtr = GetEP 0 + v112:BasicObject = LoadField v111, :list@0x1001 + v114:CPtr = GetEP 0 + v115:BasicObject = LoadField v114, :iter_method@0x1005 + v117:BasicObject = Send v112, 0x1068, :__send__, v115 # SendFallbackReason: Send: unsupported method type Optimized + v118:CPtr = GetEP 0 + v119:BasicObject = LoadField v118, :list@0x1001 + v120:BasicObject = LoadField v118, :sep@0x1002 + v121:BasicObject = LoadField v118, :iter_method@0x1005 + v122:BasicObject = LoadField v118, :kwsplat@0x1006 CheckInterrupts - Return v102 + Return v117 "); } @@ -18779,4 +18804,89 @@ mod hir_opt_tests { Return v200 "); } + + #[test] + fn test_ccall_with_frame_too_many_args_result_used_in_later_block() { + unsafe extern "C" fn test_seven_args( + _self: VALUE, + a: VALUE, + b: VALUE, + c: VALUE, + d: VALUE, + e: VALUE, + f: VALUE, + g: VALUE, + ) -> VALUE { + unsafe { rb_ary_new_from_args(7, a, b, c, d, e, f, g) } + } + + with_rubyvm(|| { + let klass = define_class("ZJITSevenArgs", unsafe { rb_cObject }); + unsafe { + rb_define_method( + klass, + c"seven".as_ptr(), + Some(std::mem::transmute::< + unsafe extern "C" fn(VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE, VALUE) -> VALUE, + unsafe extern "C" fn(VALUE) -> VALUE, + >(test_seven_args)), + 7, + ); + } + }); + + eval(r#" + def test(obj, flag) + priceable = obj.seven(1, 2, 3, 4, 5, 6, 7) + if flag + priceable + else + nil + end + end + + obj = ZJITSevenArgs.new + test(obj, true) # profile receiver class + "#); + assert_snapshot!(hir_string("test"), @" + fn test@<compiled>:3: + bb1(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:CPtr = LoadSP + v3:BasicObject = LoadField v2, :obj@0x1000 + v4:BasicObject = LoadField v2, :flag@0x1001 + v5:NilClass = Const Value(nil) + Jump bb3(v1, v3, v4, v5) + bb2(): + EntryPoint JIT(0) + v8:BasicObject = LoadArg :self@0 + v9:BasicObject = LoadArg :obj@1 + v10:BasicObject = LoadArg :flag@2 + v11:NilClass = Const Value(nil) + Jump bb3(v8, v9, v10, v11) + bb3(v13:BasicObject, v14:BasicObject, v15:BasicObject, v16:NilClass): + v21:Fixnum[1] = Const Value(1) + v23:Fixnum[2] = Const Value(2) + v25:Fixnum[3] = Const Value(3) + v27:Fixnum[4] = Const Value(4) + v29:Fixnum[5] = Const Value(5) + v31:Fixnum[6] = Const Value(6) + v33:Fixnum[7] = Const Value(7) + v35:BasicObject = Send v14, :seven, v21, v23, v25, v27, v29, v31, v33 # SendFallbackReason: Too many arguments for LIR + PatchPoint NoEPEscape(test) + CheckInterrupts + v43:CBool = Test v15 + v44:Falsy = RefineType v15, Falsy + CondBranch v43, bb5(), bb4(v13, v14, v44, v35) + bb5(): + v46:Truthy = RefineType v15, Truthy + CheckInterrupts + Return v35 + bb4(v53:BasicObject, v54:BasicObject, v55:Falsy, v56:BasicObject): + v60:NilClass = Const Value(nil) + CheckInterrupts + Return v60 + "); + } } diff --git a/zjit/src/jit_frame.rs b/zjit/src/jit_frame.rs index 3dafd385be..eaedcd0a60 100644 --- a/zjit/src/jit_frame.rs +++ b/zjit/src/jit_frame.rs @@ -1,4 +1,8 @@ -use crate::cruby::{IseqPtr, VALUE, rb_gc_mark_movable, rb_gc_location}; +use std::alloc::{alloc, handle_alloc_error, Layout}; +use std::mem::{align_of, size_of}; +use std::ptr; + +use crate::cruby::{__IncompleteArrayField, IseqPtr, VALUE, rb_gc_mark_movable, rb_gc_location}; use crate::cruby::zjit_jit_frame; use crate::codegen::iseq_may_write_block_code; use crate::state::ZJITState; @@ -8,18 +12,43 @@ use crate::state::ZJITState; pub type JITFrame = zjit_jit_frame; impl JITFrame { - /// Allocate a JITFrame on the heap, register it with ZJITState, and return - /// a raw pointer that remains valid for the lifetime of the process. - fn alloc(jit_frame: JITFrame) -> *const Self { - let raw_ptr = Box::into_raw(Box::new(jit_frame)); + /// Allocate a JITFrame and its trailing stack map on the heap, register it + /// with ZJITState, and return a raw pointer that remains valid for the + /// lifetime of the process. + fn alloc( + pc: *const VALUE, + iseq: IseqPtr, + materialize_block_code: bool, + stack_size: usize, + ) -> *const Self { + // JITFrame ends with a flexible stack[] array, so allocate enough + // space for the fixed fields plus the requested stack map entries. + let frame_size = size_of::<JITFrame>() + .checked_add(stack_size.checked_mul(size_of::<VALUE>()).unwrap()) + .unwrap(); + let layout = Layout::from_size_align(frame_size, align_of::<JITFrame>()).unwrap(); + let raw_ptr = unsafe { alloc(layout) as *mut JITFrame }; + if raw_ptr.is_null() { + handle_alloc_error(layout); + } + + unsafe { + ptr::write(raw_ptr, JITFrame { + pc, + iseq, + materialize_block_code, + stack_size: stack_size.try_into().unwrap(), + stack: __IncompleteArrayField::new(), + }); + } ZJITState::get_jit_frames().push(raw_ptr); raw_ptr as *const _ } /// Create a JITFrame for an ISEQ frame. - pub fn new_iseq(pc: *const VALUE, iseq: IseqPtr) -> *const Self { + pub fn new_iseq(pc: *const VALUE, iseq: IseqPtr, stack_size: usize) -> *const Self { let materialize_block_code = !iseq_may_write_block_code(iseq); - Self::alloc(JITFrame { pc, iseq, materialize_block_code }) + Self::alloc(pc, iseq, materialize_block_code, stack_size) } /// Mark the iseq pointer for GC. Called from rb_zjit_root_mark. @@ -94,6 +123,29 @@ mod tests { "), @"1"); } + // Direct JIT-to-JIT entry passes callee locals as native arguments. If the + // callee ISEQ has already escaped EP, later getlocal reads use EP memory, + // so JIT entry must materialize those locals into the callee frame. + #[test] + fn test_jit_entry_materializes_ep_escaped_locals() { + assert_snapshot!(inspect(" + def poison(*) = nil + + def victim(a, b, c) + lambda { a } + a + end + + def jit_entry + poison([], [], [], []) + victim(:expected, 1, 2) + end + + jit_entry + Array.new(100) { jit_entry }.uniq + "), @"[:expected]"); + } + // Materialize frames on side exit: a type guard triggers a side exit with // multiple JIT frames on the stack. All frames must be materialized before // the interpreter resumes. diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index b019ba0b87..a769005fb9 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -234,6 +234,7 @@ make_counters! { exit_block_param_proxy_profile_not_covered, exit_block_param_wb_required, exit_too_many_keyword_parameters, + exit_too_many_args_for_lir, exit_no_profile_send, exit_splatkw_not_nil_or_hash, exit_splatkw_polymorphic, @@ -630,6 +631,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { BlockParamProxyProfileNotCovered => exit_block_param_proxy_profile_not_covered, BlockParamWbRequired => exit_block_param_wb_required, TooManyKeywordParameters => exit_too_many_keyword_parameters, + TooManyArgsForLir => exit_too_many_args_for_lir, SplatKwNotNilOrHash => exit_splatkw_not_nil_or_hash, SplatKwPolymorphic => exit_splatkw_polymorphic, SplatKwNotProfiled => exit_splatkw_not_profiled, |
