summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Wu <XrXr@users.noreply.github.com>2025-07-25 22:09:51 -0400
committerAlan Wu <XrXr@users.noreply.github.com>2025-07-28 15:30:50 -0400
commitff428b4dd0c5f0a07abbd8f8520d8d1e4bff8d66 (patch)
tree3a374489edb8903846e08651f5ad81b5d82a3e69
parent5ca71364ff6cf4a9c445cb2701b1eb4770e79579 (diff)
ZJIT: Keep a frame pointer and use it for memory params
Previously, ZJIT miscompiled the following because of native SP interference. def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] a(0,0,0,0,0,0,0, :ok) Commented problematic disassembly: ; call rb_ary_new_capa mov x0, #1 mov x16, #0x1278 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 ; call rb_ary_push mov x1, x0 str x1, [sp, #-0x10]! ; c_push() from alloc_regs() mov x0, x1 ; arg0, the array ldur x1, [sp] ; meant to be arg1=n8, but sp just moved! mov x16, #0x3968 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 Since the frame pointer stays constant in the body of the function, static offsets based on it don't run the risk of being invalidated by SP movements. Pass the registers to preserve through Insn::FrameSetup. This allows ARM to use STP and waste no gaps between EC, SP, and CFP. x86 now preserves and restores RBP since we use it as the frame pointer. Since all arches now have a frame pointer, remove offset based SP movement in the epilogue and restore registers using the frame pointer.
-rw-r--r--test/ruby/test_zjit.rb7
-rw-r--r--zjit/src/asm/arm64/opnd.rs3
-rw-r--r--zjit/src/backend/arm64/mod.rs117
-rw-r--r--zjit/src/backend/lir.rs41
-rw-r--r--zjit/src/backend/x86_64/mod.rs76
-rw-r--r--zjit/src/codegen.rs87
6 files changed, 220 insertions, 111 deletions
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index 0dcdb8e4cb..fc085d2e93 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -819,6 +819,13 @@ class TestZJIT < Test::Unit::TestCase
}
end
+ def test_spilled_param_new_arary
+ assert_compiles '[:ok]', %q{
+ def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8]
+ a(0,0,0,0,0,0,0, :ok)
+ }
+ end
+
def test_opt_aref_with
assert_compiles ':ok', %q{
def aref_with(hash) = hash["key"]
diff --git a/zjit/src/asm/arm64/opnd.rs b/zjit/src/asm/arm64/opnd.rs
index 28422b7476..a77958f7e6 100644
--- a/zjit/src/asm/arm64/opnd.rs
+++ b/zjit/src/asm/arm64/opnd.rs
@@ -119,6 +119,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 };
pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 };
pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 };
+// frame pointer (base pointer)
+pub const X29_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 29 };
+
// link register
pub const X30_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 30 };
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index 88ccad8e09..42dc31c90f 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -29,6 +29,7 @@ pub const C_ARG_OPNDS: [Opnd; 6] = [
pub const C_RET_REG: Reg = X0_REG;
pub const C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(XZR_REG);
+pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(X29_REG);
// These constants define the way we work with Arm64's stack pointer. The stack
// pointer always needs to be aligned to a 16-byte boundary.
@@ -911,18 +912,54 @@ impl Assembler
cb.write_byte(0);
}
},
- Insn::FrameSetup => {
+ &Insn::FrameSetup { preserved, mut slot_count } => {
+ const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); }
+ // Preserve X29 and set up frame record
stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16));
-
- // X29 (frame_pointer) = SP
mov(cb, X29, C_SP_REG);
- },
- Insn::FrameTeardown => {
+
+ for regs in preserved.chunks(2) {
+ // For the body, store pairs and move SP
+ if let [reg0, reg1] = regs {
+ stp_pre(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, C_SP_REG, -16));
+ } else if let [reg] = regs {
+ // For overhang, store but don't move SP. Combine movement with
+ // movement for slots below.
+ stur(cb, reg.into(), A64Opnd::new_mem(64, C_SP_REG, -8));
+ slot_count += 1;
+ } else {
+ unreachable!("chunks(2)");
+ }
+ }
+ // Align slot_count
+ if slot_count % 2 == 1 {
+ slot_count += 1
+ }
+ if slot_count > 0 {
+ let slot_offset = (slot_count * SIZEOF_VALUE) as u64;
+ // Bail when asked to reserve too many slots in one instruction.
+ ShiftedImmediate::try_from(slot_offset).ok()?;
+ sub(cb, C_SP_REG, C_SP_REG, A64Opnd::new_uimm(slot_offset));
+ }
+ }
+ Insn::FrameTeardown { preserved } => {
+ // Restore preserved registers below frame pointer.
+ let mut base_offset = 0;
+ for regs in preserved.chunks(2) {
+ if let [reg0, reg1] = regs {
+ base_offset -= 16;
+ ldp(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, X29, base_offset));
+ } else if let [reg] = regs {
+ ldur(cb, reg.into(), A64Opnd::new_mem(64, X29, base_offset - 8));
+ } else {
+ unreachable!("chunks(2)");
+ }
+ }
+
// SP = X29 (frame pointer)
mov(cb, C_SP_REG, X29);
-
ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16));
- },
+ }
Insn::Add { left, right, out } => {
// Usually, we issue ADDS, so you could branch on overflow, but ADDS with
// out=31 refers to out=XZR, which discards the sum. So, instead of ADDS
@@ -1482,12 +1519,74 @@ mod tests {
fn test_emit_frame() {
let (mut asm, mut cb) = setup_asm();
- asm.frame_setup();
- asm.frame_teardown();
+ asm.frame_setup(&[], 0);
+ asm.frame_teardown(&[]);
asm.compile_with_num_regs(&mut cb, 0);
}
#[test]
+ fn frame_setup_and_teardown() {
+ const THREE_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG)];
+ // Test 3 preserved regs (odd), odd slot_count
+ {
+ let (mut asm, mut cb) = setup_asm();
+ asm.frame_setup(THREE_REGS, 3);
+ asm.frame_teardown(THREE_REGS);
+ asm.compile_with_num_regs(&mut cb, 0);
+ assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ff8300d1b44f7fa9b5835ef8bf030091fd7bc1a8", "
+ 0x0: stp x29, x30, [sp, #-0x10]!
+ 0x4: mov x29, sp
+ 0x8: stp x20, x19, [sp, #-0x10]!
+ 0xc: stur x21, [sp, #-8]
+ 0x10: sub sp, sp, #0x20
+ 0x14: ldp x20, x19, [x29, #-0x10]
+ 0x18: ldur x21, [x29, #-0x18]
+ 0x1c: mov sp, x29
+ 0x20: ldp x29, x30, [sp], #0x10
+ ");
+ }
+
+ // Test 3 preserved regs (odd), even slot_count
+ {
+ let (mut asm, mut cb) = setup_asm();
+ asm.frame_setup(THREE_REGS, 4);
+ asm.frame_teardown(THREE_REGS);
+ asm.compile_with_num_regs(&mut cb, 0);
+ assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ffc300d1b44f7fa9b5835ef8bf030091fd7bc1a8", "
+ 0x0: stp x29, x30, [sp, #-0x10]!
+ 0x4: mov x29, sp
+ 0x8: stp x20, x19, [sp, #-0x10]!
+ 0xc: stur x21, [sp, #-8]
+ 0x10: sub sp, sp, #0x30
+ 0x14: ldp x20, x19, [x29, #-0x10]
+ 0x18: ldur x21, [x29, #-0x18]
+ 0x1c: mov sp, x29
+ 0x20: ldp x29, x30, [sp], #0x10
+ ");
+ }
+
+ // Test 4 preserved regs (even), odd slot_count
+ {
+ static FOUR_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG), Opnd::Reg(X22_REG)];
+ let (mut asm, mut cb) = setup_asm();
+ asm.frame_setup(FOUR_REGS, 3);
+ asm.frame_teardown(FOUR_REGS);
+ asm.compile_with_num_regs(&mut cb, 0);
+ assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f657bfa9ff8300d1b44f7fa9b6577ea9bf030091fd7bc1a8", "
+ 0x0: stp x29, x30, [sp, #-0x10]!
+ 0x4: mov x29, sp
+ 0x8: stp x20, x19, [sp, #-0x10]!
+ 0xc: stp x22, x21, [sp, #-0x10]!
+ 0x10: sub sp, sp, #0x20
+ 0x14: ldp x20, x19, [x29, #-0x10]
+ 0x18: ldp x22, x21, [x29, #-0x20]
+ 0x1c: mov sp, x29
+ 0x20: ldp x29, x30, [sp], #0x10
+ ");
+ }
+ }
+
+ #[test]
fn test_emit_je_fits_into_bcond() {
let (mut asm, mut cb) = setup_asm();
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 7bac210bee..36e783bd4e 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -12,10 +12,12 @@ use crate::asm::{CodeBlock, Label};
pub use crate::backend::current::{
Reg,
EC, CFP, SP,
- NATIVE_STACK_PTR,
+ NATIVE_STACK_PTR, NATIVE_BASE_PTR,
C_ARG_OPNDS, C_RET_REG, C_RET_OPND,
};
+pub static JIT_PRESERVED_REGS: &'static [Opnd] = &[CFP, SP, EC];
+
// Memory operand base
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum MemBase
@@ -291,8 +293,6 @@ pub enum Target
context: Option<SideExitContext>,
/// We use this to enrich asm comments.
reason: SideExitReason,
- /// The number of bytes we need to adjust the C stack pointer by.
- c_stack_bytes: usize,
/// Some if the side exit should write this label. We use it for patch points.
label: Option<Label>,
},
@@ -404,10 +404,10 @@ pub enum Insn {
CSelZ { truthy: Opnd, falsy: Opnd, out: Opnd },
/// Set up the frame stack as necessary per the architecture.
- FrameSetup,
+ FrameSetup { preserved: &'static [Opnd], slot_count: usize },
/// Tear down the frame stack as necessary per the architecture.
- FrameTeardown,
+ FrameTeardown { preserved: &'static [Opnd], },
// Atomically increment a counter
// Input: memory operand, increment value
@@ -598,8 +598,8 @@ impl Insn {
Insn::CSelNE { .. } => "CSelNE",
Insn::CSelNZ { .. } => "CSelNZ",
Insn::CSelZ { .. } => "CSelZ",
- Insn::FrameSetup => "FrameSetup",
- Insn::FrameTeardown => "FrameTeardown",
+ Insn::FrameSetup { .. } => "FrameSetup",
+ Insn::FrameTeardown { .. } => "FrameTeardown",
Insn::IncrCounter { .. } => "IncrCounter",
Insn::Jbe(_) => "Jbe",
Insn::Jb(_) => "Jb",
@@ -823,8 +823,8 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::CPop { .. } |
Insn::CPopAll |
Insn::CPushAll |
- Insn::FrameSetup |
- Insn::FrameTeardown |
+ Insn::FrameSetup { .. } |
+ Insn::FrameTeardown { .. } |
Insn::PadPatchPoint |
Insn::PosMarker(_) => None,
@@ -979,8 +979,8 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::CPop { .. } |
Insn::CPopAll |
Insn::CPushAll |
- Insn::FrameSetup |
- Insn::FrameTeardown |
+ Insn::FrameSetup { .. } |
+ Insn::FrameTeardown { .. } |
Insn::PadPatchPoint |
Insn::PosMarker(_) => None,
@@ -1813,7 +1813,7 @@ impl Assembler
for (idx, target) in targets {
// Compile a side exit. Note that this is past the split pass and alloc_regs(),
// so you can't use a VReg or an instruction that needs to be split.
- if let Target::SideExit { context, reason, c_stack_bytes, label } = target {
+ if let Target::SideExit { context, reason, label } = target {
asm_comment!(self, "Exit: {reason}");
let side_exit_label = if let Some(label) = label {
Target::Label(label)
@@ -1858,13 +1858,8 @@ impl Assembler
self.store(cfp_sp, Opnd::Reg(Assembler::SCRATCH_REG));
}
- if c_stack_bytes > 0 {
- asm_comment!(self, "restore C stack pointer");
- self.add_into(NATIVE_STACK_PTR, c_stack_bytes.into());
- }
-
asm_comment!(self, "exit to the interpreter");
- self.frame_teardown();
+ self.frame_teardown(&[]); // matching the setup in :bb0-prologue:
self.mov(C_RET_OPND, Opnd::UImm(Qundef.as_u64()));
self.cret(C_RET_OPND);
@@ -2065,12 +2060,14 @@ impl Assembler {
out
}
- pub fn frame_setup(&mut self) {
- self.push_insn(Insn::FrameSetup);
+ pub fn frame_setup(&mut self, preserved_regs: &'static [Opnd], slot_count: usize) {
+ self.push_insn(Insn::FrameSetup { preserved: preserved_regs, slot_count });
}
- pub fn frame_teardown(&mut self) {
- self.push_insn(Insn::FrameTeardown);
+ /// The inverse of [Self::frame_setup] used before return. `reserve_bytes`
+ /// not necessary since we use a base pointer register.
+ pub fn frame_teardown(&mut self, preserved_regs: &'static [Opnd]) {
+ self.push_insn(Insn::FrameTeardown { preserved: preserved_regs });
}
pub fn incr_counter(&mut self, mem: Opnd, value: Opnd) {
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 942705fb95..4543252573 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -29,6 +29,7 @@ pub const C_ARG_OPNDS: [Opnd; 6] = [
pub const C_RET_REG: Reg = RAX_REG;
pub const C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(RSP_REG);
+pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(RBP_REG);
impl CodeBlock {
// The number of bytes that are generated by jmp_ptr
@@ -110,9 +111,9 @@ impl Assembler
vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG]
}
- /// How many bytes a call and a [Self::frame_setup] would change native SP
+ /// How many bytes a call and a bare bones [Self::frame_setup] would change native SP
pub fn frame_size() -> i32 {
- 0x8
+ 0x10
}
// These are the callee-saved registers in the x86-64 SysV ABI
@@ -476,22 +477,34 @@ impl Assembler
cb.write_byte(0);
},
- // Set up RBP to work with frame pointer unwinding
+ // Set up RBP as frame pointer work with unwinding
// (e.g. with Linux `perf record --call-graph fp`)
- Insn::FrameSetup => {
- if false { // We don't support --zjit-perf yet
- // TODO(alan): Change Assembler::frame_size() when adding --zjit-perf support
- push(cb, RBP);
- mov(cb, RBP, RSP);
- push(cb, RBP);
+ // and to allow push and pops in the function.
+ &Insn::FrameSetup { preserved, mut slot_count } => {
+ // Bump slot_count for alignment if necessary
+ const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); }
+ let total_slots = 2 /* rbp and return address*/ + slot_count + preserved.len();
+ if total_slots % 2 == 1 {
+ slot_count += 1;
}
- },
- Insn::FrameTeardown => {
- if false { // We don't support --zjit-perf yet
- pop(cb, RBP);
- pop(cb, RBP);
+ push(cb, RBP);
+ mov(cb, RBP, RSP);
+ for reg in preserved {
+ push(cb, reg.into());
}
- },
+ if slot_count > 0 {
+ sub(cb, RSP, uimm_opnd((slot_count * SIZEOF_VALUE) as u64));
+ }
+ }
+ &Insn::FrameTeardown { preserved } => {
+ let mut preserved_offset = -8;
+ for reg in preserved {
+ mov(cb, reg.into(), mem_opnd(64, RBP, preserved_offset));
+ preserved_offset -= 8;
+ }
+ mov(cb, RSP, RBP);
+ pop(cb, RBP);
+ }
Insn::Add { left, right, .. } => {
let opnd1 = emit_64bit_immediate(cb, right);
@@ -1306,4 +1319,37 @@ mod tests {
0x6: mov dword ptr [rax], 0x80000001
"});
}
+
+ #[test]
+ fn frame_setup_teardown() {
+ let (mut asm, mut cb) = setup_asm();
+ asm.frame_setup(JIT_PRESERVED_REGS, 0);
+ asm.frame_teardown(JIT_PRESERVED_REGS);
+
+ asm.cret(C_RET_OPND);
+
+ asm.frame_setup(&[], 5);
+ asm.frame_teardown(&[]);
+
+ asm.compile_with_num_regs(&mut cb, 0);
+ assert_disasm!(cb, "554889e541555341544883ec084c8b6df8488b5df04c8b65e84889ec5dc3554889e54883ec304889ec5d", {"
+ 0x0: push rbp
+ 0x1: mov rbp, rsp
+ 0x4: push r13
+ 0x6: push rbx
+ 0x7: push r12
+ 0x9: sub rsp, 8
+ 0xd: mov r13, qword ptr [rbp - 8]
+ 0x11: mov rbx, qword ptr [rbp - 0x10]
+ 0x15: mov r12, qword ptr [rbp - 0x18]
+ 0x19: mov rsp, rbp
+ 0x1c: pop rbp
+ 0x1d: ret
+ 0x1e: push rbp
+ 0x1f: mov rbp, rsp
+ 0x22: sub rsp, 0x30
+ 0x26: mov rsp, rbp
+ 0x29: pop rbp
+ "});
+ }
}
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index c460dddfb8..1d694bffd9 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -8,7 +8,7 @@ use crate::invariants::{track_bop_assumption, track_cme_assumption};
use crate::gc::{get_or_create_iseq_payload, append_gc_offsets};
use crate::state::ZJITState;
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
-use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, SP};
+use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP};
use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, CallInfo, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX};
use crate::hir::{Const, FrameState, Function, Insn, InsnId};
use crate::hir_type::{types::Fixnum, Type};
@@ -29,18 +29,18 @@ struct JITState {
branch_iseqs: Vec<(Rc<Branch>, IseqPtr)>,
/// The number of bytes allocated for basic block arguments spilled onto the C stack
- c_stack_bytes: usize,
+ c_stack_slots: usize,
}
impl JITState {
/// Create a new JITState instance
- fn new(iseq: IseqPtr, num_insns: usize, num_blocks: usize, c_stack_bytes: usize) -> Self {
+ fn new(iseq: IseqPtr, num_insns: usize, num_blocks: usize, c_stack_slots: usize) -> Self {
JITState {
iseq,
opnds: vec![None; num_insns],
labels: vec![None; num_blocks],
branch_iseqs: Vec::default(),
- c_stack_bytes,
+ c_stack_slots,
}
}
@@ -128,7 +128,7 @@ fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> *const u8 {
append_gc_offsets(iseq, &gc_offsets);
// Compile an entry point to the JIT code
- (gen_entry(cb, iseq, &function, start_ptr, jit.c_stack_bytes), jit.branch_iseqs)
+ (gen_entry(cb, iseq, &function, start_ptr), jit.branch_iseqs)
},
None => (None, vec![]),
};
@@ -170,21 +170,18 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) {
}
/// Compile a JIT entry
-fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr, c_stack_bytes: usize) -> Option<CodePtr> {
+fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr) -> Option<CodePtr> {
// Set up registers for CFP, EC, SP, and basic block arguments
let mut asm = Assembler::new();
gen_entry_prologue(&mut asm, iseq);
- gen_entry_params(&mut asm, iseq, function.block(BlockId(0)), c_stack_bytes);
+ gen_entry_params(&mut asm, iseq, function.block(BlockId(0)));
// Jump to the first block using a call instruction
asm.ccall(function_ptr.raw_ptr(cb) as *const u8, vec![]);
// Restore registers for CFP, EC, and SP after use
- asm_comment!(asm, "exit to the interpreter");
- asm.cpop_into(SP);
- asm.cpop_into(EC);
- asm.cpop_into(CFP);
- asm.frame_teardown();
+ asm_comment!(asm, "return to the interpreter");
+ asm.frame_teardown(lir::JIT_PRESERVED_REGS);
asm.cret(C_RET_OPND);
if get_option!(dump_lir) {
@@ -231,8 +228,8 @@ fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc<Branc
/// Compile a function
fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Option<(CodePtr, Vec<CodePtr>, JITState)> {
- let c_stack_bytes = aligned_stack_bytes(max_num_params(function).saturating_sub(ALLOC_REGS.len()));
- let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks(), c_stack_bytes);
+ let c_stack_slots = max_num_params(function).saturating_sub(ALLOC_REGS.len());
+ let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks(), c_stack_slots);
let mut asm = Assembler::new();
// Compile each basic block
@@ -245,16 +242,9 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio
let label = jit.get_label(&mut asm, block_id);
asm.write_label(label);
- // Set up the frame at the first block
+ // Set up the frame at the first block. :bb0-prologue:
if block_id == BlockId(0) {
- asm.frame_setup();
-
- // Bump the C stack pointer for basic block arguments
- if jit.c_stack_bytes > 0 {
- asm_comment!(asm, "bump C stack pointer");
- let new_sp = asm.sub(NATIVE_STACK_PTR, jit.c_stack_bytes.into());
- asm.mov(NATIVE_STACK_PTR, new_sp);
- }
+ asm.frame_setup(&[], jit.c_stack_slots);
}
// Compile all parameters
@@ -335,7 +325,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?,
Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state))?,
Insn::InvokeBuiltin { bf, args, state } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?,
- Insn::Return { val } => return Some(gen_return(jit, asm, opnd!(val))?),
+ Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?),
Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
Insn::FixnumMult { left, right, state } => gen_fixnum_mult(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
@@ -569,12 +559,8 @@ fn gen_putspecialobject(asm: &mut Assembler, value_type: SpecialObjectType) -> O
/// Compile an interpreter entry block to be inserted into an ISEQ
fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0));
- asm.frame_setup();
-
// Save the registers we'll use for CFP, EP, SP
- asm.cpush(CFP);
- asm.cpush(EC);
- asm.cpush(SP);
+ asm.frame_setup(lir::JIT_PRESERVED_REGS, 0);
// EC and CFP are passed as arguments
asm.mov(EC, C_ARG_OPNDS[0]);
@@ -587,7 +573,7 @@ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
}
/// Assign method arguments to basic block arguments at JIT entry
-fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block, c_stack_bytes: usize) {
+fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block) {
let num_params = entry_block.params().len() - 1; // -1 to exclude self
if num_params > 0 {
asm_comment!(asm, "set method params: {num_params}");
@@ -616,8 +602,8 @@ fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block, c_s
// the HIR function ────────────► └────────────┘
// is running
match param {
- Opnd::Mem(lir::Mem { base, disp, num_bits }) => {
- let param_slot = Opnd::Mem(lir::Mem { num_bits, base, disp: disp - c_stack_bytes as i32 - Assembler::frame_size() });
+ Opnd::Mem(lir::Mem { base: _, disp, num_bits }) => {
+ let param_slot = Opnd::mem(num_bits, NATIVE_STACK_PTR, disp - Assembler::frame_size());
asm.mov(param_slot, local);
}
// Prepare for parallel move for locals in registers
@@ -848,7 +834,7 @@ fn gen_send_without_block_direct(
asm_comment!(asm, "side-exit if callee side-exits");
asm.cmp(ret, Qundef.into());
// Restore the C stack pointer on exit
- asm.je(Target::SideExit { context: None, reason: CalleeSideExit, c_stack_bytes: jit.c_stack_bytes, label: None });
+ asm.je(Target::SideExit { context: None, reason: CalleeSideExit, label: None });
asm_comment!(asm, "restore SP register for the caller");
let new_sp = asm.sub(SP, sp_offset.into());
@@ -912,7 +898,7 @@ fn gen_new_range(
}
/// Compile code that exits from JIT code with a return value
-fn gen_return(jit: &JITState, asm: &mut Assembler, val: lir::Opnd) -> Option<()> {
+fn gen_return(asm: &mut Assembler, val: lir::Opnd) -> Option<()> {
// Pop the current frame (ec->cfp++)
// Note: the return PC is already in the previous CFP
asm_comment!(asm, "pop stack frame");
@@ -924,16 +910,8 @@ fn gen_return(jit: &JITState, asm: &mut Assembler, val: lir::Opnd) -> Option<()>
// we need to load the return value, which might be part of the frame.
asm.load_into(C_RET_OPND, val);
- // Restore the C stack pointer bumped for basic block arguments
- if jit.c_stack_bytes > 0 {
- asm_comment!(asm, "restore C stack pointer");
- let new_sp = asm.add(NATIVE_STACK_PTR, jit.c_stack_bytes.into());
- asm.mov(NATIVE_STACK_PTR, new_sp);
- }
-
- asm.frame_teardown();
-
// Return from the function
+ asm.frame_teardown(&[]); // matching the setup in :bb0-prologue:
asm.cret(C_RET_OPND);
Some(())
}
@@ -1140,7 +1118,7 @@ fn param_opnd(idx: usize) -> Opnd {
if idx < ALLOC_REGS.len() {
Opnd::Reg(ALLOC_REGS[idx])
} else {
- Opnd::mem(64, NATIVE_STACK_PTR, (idx - ALLOC_REGS.len()) as i32 * SIZEOF_VALUE_I32)
+ Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32)
}
}
@@ -1196,7 +1174,6 @@ fn build_side_exit(jit: &mut JITState, state: &FrameState, reason: SideExitReaso
locals,
}),
reason,
- c_stack_bytes: jit.c_stack_bytes,
label,
};
Some(target)
@@ -1225,26 +1202,6 @@ fn max_num_params(function: &Function) -> usize {
}).max().unwrap_or(0)
}
-/// Given the number of spill slots needed for a function, return the number of bytes
-/// the function needs to allocate on the stack for the stack frame.
-fn aligned_stack_bytes(num_slots: usize) -> usize {
- // Both x86_64 and arm64 require the stack to be aligned to 16 bytes.
- let num_slots = if cfg!(target_arch = "x86_64") && num_slots % 2 == 0 {
- // On x86_64, since the call instruction bumps the stack pointer by 8 bytes on entry,
- // we need to round up `num_slots` to an odd number.
- num_slots + 1
- } else if cfg!(target_arch = "aarch64") && num_slots % 2 == 1 {
- // On arm64, the stack pointer is always aligned to 16 bytes, so we need to round up
- // `num_slots`` to an even number.
- num_slots + 1
- } else {
- num_slots
- };
-
- const { assert!(SIZEOF_VALUE == 8, "aligned_stack_bytes() assumes SIZEOF_VALUE == 8"); }
- num_slots * SIZEOF_VALUE
-}
-
impl Assembler {
/// Make a C call while marking the start and end positions of it
fn ccall_with_branch(&mut self, fptr: *const u8, opnds: Vec<Opnd>, branch: &Rc<Branch>) -> Opnd {