summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Kokubun <takashi.kokubun@shopify.com>2025-11-03 16:49:25 -0800
committerGitHub <noreply@github.com>2025-11-03 16:49:25 -0800
commit9ca940757315e53d1eaddc83071b1b4581f8f578 (patch)
tree82429b9d0516f431f07207a1c820e9045226f3b3
parent0d210f4d39c1cff71f34dbe9938345897f3e5eb1 (diff)
ZJIT: Implement register spill (#14936)
-rw-r--r--zjit/src/asm/x86_64/mod.rs7
-rw-r--r--zjit/src/asm/x86_64/tests.rs18
-rw-r--r--zjit/src/backend/arm64/mod.rs187
-rw-r--r--zjit/src/backend/lir.rs213
-rw-r--r--zjit/src/backend/x86_64/mod.rs157
5 files changed, 431 insertions, 151 deletions
diff --git a/zjit/src/asm/x86_64/mod.rs b/zjit/src/asm/x86_64/mod.rs
index 9d3bf18dcd..cfedca4540 100644
--- a/zjit/src/asm/x86_64/mod.rs
+++ b/zjit/src/asm/x86_64/mod.rs
@@ -779,13 +779,6 @@ pub fn imul(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
write_rm(cb, false, true, opnd0, opnd1, None, &[0x0F, 0xAF]);
}
- // Flip the operands to handle this case. This instruction has weird encoding restrictions.
- (X86Opnd::Mem(_), X86Opnd::Reg(_)) => {
- //REX.W + 0F AF /rIMUL r64, r/m64
- // Quadword register := Quadword register * r/m64.
- write_rm(cb, false, true, opnd1, opnd0, None, &[0x0F, 0xAF]);
- }
-
_ => unreachable!()
}
}
diff --git a/zjit/src/asm/x86_64/tests.rs b/zjit/src/asm/x86_64/tests.rs
index 0f86725946..d574bdb034 100644
--- a/zjit/src/asm/x86_64/tests.rs
+++ b/zjit/src/asm/x86_64/tests.rs
@@ -228,23 +228,29 @@ fn test_cqo() {
fn test_imul() {
let cb1 = compile(|cb| imul(cb, RAX, RBX));
let cb2 = compile(|cb| imul(cb, RDX, mem_opnd(64, RAX, 0)));
- // Operands flipped for encoding since multiplication is commutative
- let cb3 = compile(|cb| imul(cb, mem_opnd(64, RAX, 0), RDX));
- assert_disasm_snapshot!(disasms!(cb1, cb2, cb3), @r"
+ assert_disasm_snapshot!(disasms!(cb1, cb2), @r"
0x0: imul rax, rbx
0x0: imul rdx, qword ptr [rax]
- 0x0: imul rdx, qword ptr [rax]
");
- assert_snapshot!(hexdumps!(cb1, cb2, cb3), @r"
+ assert_snapshot!(hexdumps!(cb1, cb2), @r"
480fafc3
480faf10
- 480faf10
");
}
#[test]
+#[should_panic]
+fn test_imul_mem_reg() {
+ // imul doesn't have (Mem, Reg) encoding. Since multiplication is communicative, imul() could
+ // swap operands. However, x86_scratch_split may need to move the result to the output operand,
+ // which can be complicated if the assembler may sometimes change the result operand.
+ // So x86_scratch_split should be responsible for that swap, not the assembler.
+ compile(|cb| imul(cb, mem_opnd(64, RAX, 0), RDX));
+}
+
+#[test]
fn test_jge_label() {
let cb = compile(|cb| {
let label_idx = cb.new_label("loop".to_owned());
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index d762b14c91..acf0576f9c 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -79,6 +79,9 @@ impl From<Opnd> for A64Opnd {
Opnd::Mem(Mem { base: MemBase::VReg(_), .. }) => {
panic!("attempted to lower an Opnd::Mem with a MemBase::VReg base")
},
+ Opnd::Mem(Mem { base: MemBase::Stack { .. }, .. }) => {
+ panic!("attempted to lower an Opnd::Mem with a MemBase::Stack base")
+ },
Opnd::VReg { .. } => panic!("attempted to lower an Opnd::VReg"),
Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"),
Opnd::None => panic!(
@@ -203,6 +206,7 @@ pub const ALLOC_REGS: &[Reg] = &[
/// [`Assembler::arm64_scratch_split`] or [`Assembler::new_with_scratch_reg`].
const SCRATCH0_OPND: Opnd = Opnd::Reg(X15_REG);
const SCRATCH1_OPND: Opnd = Opnd::Reg(X17_REG);
+const SCRATCH2_OPND: Opnd = Opnd::Reg(X14_REG);
impl Assembler {
/// Special register for intermediate processing in arm64_emit. It should be used only by arm64_emit.
@@ -690,22 +694,129 @@ impl Assembler {
/// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so this
/// splits them and uses scratch registers for it.
fn arm64_scratch_split(self) -> Assembler {
- let mut asm = Assembler::new_with_asm(&self);
+ /// If opnd is Opnd::Mem with a too large disp, make the disp smaller using lea.
+ fn split_large_disp(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Mem(Mem { num_bits, disp, .. }) if !mem_disp_fits_bits(disp) => {
+ asm.lea_into(scratch_opnd, opnd);
+ Opnd::mem(num_bits, scratch_opnd, 0)
+ }
+ _ => opnd,
+ }
+ }
+
+ /// If opnd is Opnd::Mem with MemBase::Stack, lower it to Opnd::Mem with MemBase::Reg, and split a large disp.
+ fn split_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd {
+ let opnd = split_only_stack_membase(asm, opnd, scratch_opnd, stack_state);
+ split_large_disp(asm, opnd, scratch_opnd)
+ }
+
+ /// split_stack_membase but without split_large_disp. This should be used only by lea.
+ fn split_only_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd {
+ if let Opnd::Mem(Mem { base: stack_membase @ MemBase::Stack { .. }, disp: opnd_disp, num_bits: opnd_num_bits }) = opnd {
+ let base = Opnd::Mem(stack_state.stack_membase_to_mem(stack_membase));
+ let base = split_large_disp(asm, base, scratch_opnd);
+ asm.load_into(scratch_opnd, base);
+ Opnd::Mem(Mem { base: MemBase::Reg(scratch_opnd.unwrap_reg().reg_no), disp: opnd_disp, num_bits: opnd_num_bits })
+ } else {
+ opnd
+ }
+ }
+
+ /// If opnd is Opnd::Mem, lower it to scratch_opnd. You should use this when `opnd` is read by the instruction, not written.
+ fn split_memory_read(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd {
+ if let Opnd::Mem(_) = opnd {
+ let opnd = split_large_disp(asm, opnd, scratch_opnd);
+ let scratch_opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd);
+ asm.load_into(scratch_opnd, opnd);
+ scratch_opnd
+ } else {
+ opnd
+ }
+ }
+
+ /// If opnd is Opnd::Mem, set scratch_reg to *opnd. Return Some(Opnd::Mem) if it needs to be written back from scratch_reg.
+ fn split_memory_write(opnd: &mut Opnd, scratch_opnd: Opnd) -> Option<Opnd> {
+ if let Opnd::Mem(_) = opnd {
+ let mem_opnd = opnd.clone();
+ *opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd);
+ Some(mem_opnd)
+ } else {
+ None
+ }
+ }
+
+ // Prepare StackState to lower MemBase::Stack
+ let stack_state = StackState::new(self.stack_base_idx);
+
+ let mut asm_local = Assembler::new_with_asm(&self);
+ let asm = &mut asm_local;
asm.accept_scratch_reg = true;
let mut iterator = self.insns.into_iter().enumerate().peekable();
while let Some((_, mut insn)) = iterator.next() {
match &mut insn {
- &mut Insn::Mul { out, .. } => {
+ Insn::Add { left, right, out } |
+ Insn::Sub { left, right, out } |
+ Insn::And { left, right, out } |
+ Insn::Or { left, right, out } |
+ Insn::Xor { left, right, out } |
+ Insn::CSelZ { truthy: left, falsy: right, out } |
+ Insn::CSelNZ { truthy: left, falsy: right, out } |
+ Insn::CSelE { truthy: left, falsy: right, out } |
+ Insn::CSelNE { truthy: left, falsy: right, out } |
+ Insn::CSelL { truthy: left, falsy: right, out } |
+ Insn::CSelLE { truthy: left, falsy: right, out } |
+ Insn::CSelG { truthy: left, falsy: right, out } |
+ Insn::CSelGE { truthy: left, falsy: right, out } => {
+ *left = split_memory_read(asm, *left, SCRATCH0_OPND);
+ *right = split_memory_read(asm, *right, SCRATCH1_OPND);
+ let mem_out = split_memory_write(out, SCRATCH0_OPND);
+
+ asm.push_insn(insn);
+
+ if let Some(mem_out) = mem_out {
+ let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
+ asm.store(mem_out, SCRATCH0_OPND);
+ }
+ }
+ Insn::Mul { left, right, out } => {
+ *left = split_memory_read(asm, *left, SCRATCH0_OPND);
+ *right = split_memory_read(asm, *right, SCRATCH1_OPND);
+ let mem_out = split_memory_write(out, SCRATCH0_OPND);
+ let reg_out = out.clone();
+
asm.push_insn(insn);
+ if let Some(mem_out) = mem_out {
+ let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
+ asm.store(mem_out, SCRATCH0_OPND);
+ };
+
// If the next instruction is JoMul
if matches!(iterator.peek(), Some((_, Insn::JoMul(_)))) {
// Produce a register that is all zeros or all ones
// Based on the sign bit of the 64-bit mul result
- asm.push_insn(Insn::RShift { out: SCRATCH0_OPND, opnd: out, shift: Opnd::UImm(63) });
+ asm.push_insn(Insn::RShift { out: SCRATCH0_OPND, opnd: reg_out, shift: Opnd::UImm(63) });
+ }
+ }
+ Insn::RShift { opnd, out, .. } => {
+ *opnd = split_memory_read(asm, *opnd, SCRATCH0_OPND);
+ let mem_out = split_memory_write(out, SCRATCH0_OPND);
+
+ asm.push_insn(insn);
+
+ if let Some(mem_out) = mem_out {
+ let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
+ asm.store(mem_out, SCRATCH0_OPND);
}
}
+ Insn::Cmp { left, right } |
+ Insn::Test { left, right } => {
+ *left = split_memory_read(asm, *left, SCRATCH0_OPND);
+ *right = split_memory_read(asm, *right, SCRATCH1_OPND);
+ asm.push_insn(insn);
+ }
// For compile_exits, support splitting simple C arguments here
Insn::CCall { opnds, .. } if !opnds.is_empty() => {
for (i, opnd) in opnds.iter().enumerate() {
@@ -714,16 +825,32 @@ impl Assembler {
*opnds = vec![];
asm.push_insn(insn);
}
- &mut Insn::Lea { opnd, out } => {
- match (opnd, out) {
- // Split here for compile_exits
- (Opnd::Mem(_), Opnd::Mem(_)) => {
- asm.lea_into(SCRATCH0_OPND, opnd);
- asm.store(out, SCRATCH0_OPND);
- }
- _ => {
- asm.push_insn(insn);
+ Insn::Lea { opnd, out } => {
+ *opnd = split_only_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state);
+ let mem_out = split_memory_write(out, SCRATCH0_OPND);
+
+ asm.push_insn(insn);
+
+ if let Some(mem_out) = mem_out {
+ let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
+ asm.store(mem_out, SCRATCH0_OPND);
+ }
+ }
+ Insn::Load { opnd, out } |
+ Insn::LoadInto { opnd, dest: out } => {
+ *opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state);
+ *out = split_stack_membase(asm, *out, SCRATCH1_OPND, &stack_state);
+
+ if let Opnd::Mem(_) = out {
+ // If NATIVE_STACK_PTR is used as a source for Store, it's handled as xzr, storeing zero.
+ // To save the content of NATIVE_STACK_PTR, we need to load it into another register first.
+ if *opnd == NATIVE_STACK_PTR {
+ asm.load_into(SCRATCH0_OPND, NATIVE_STACK_PTR);
+ *opnd = SCRATCH0_OPND;
}
+ asm.store(*out, *opnd);
+ } else {
+ asm.push_insn(insn);
}
}
&mut Insn::IncrCounter { mem, value } => {
@@ -741,31 +868,24 @@ impl Assembler {
asm.cmp(SCRATCH1_OPND, 0.into());
asm.jne(label);
}
- &mut Insn::Store { dest, src } => {
- let Opnd::Mem(Mem { num_bits: dest_num_bits, disp: dest_disp, .. }) = dest else {
- panic!("Insn::Store destination must be Opnd::Mem: {dest:?}, {src:?}");
- };
-
- // Split dest using a scratch register if necessary.
- let dest = if mem_disp_fits_bits(dest_disp) {
- dest
- } else {
- asm.lea_into(SCRATCH0_OPND, dest);
- Opnd::mem(dest_num_bits, SCRATCH0_OPND, 0)
- };
-
- asm.store(dest, src);
+ Insn::Store { dest, .. } => {
+ *dest = split_stack_membase(asm, *dest, SCRATCH0_OPND, &stack_state);
+ asm.push_insn(insn);
}
- &mut Insn::Mov { dest, src } => {
+ Insn::Mov { dest, src } => {
+ *src = split_stack_membase(asm, *src, SCRATCH0_OPND, &stack_state);
+ *dest = split_large_disp(asm, *dest, SCRATCH1_OPND);
match dest {
- Opnd::Reg(_) => asm.load_into(dest, src),
- Opnd::Mem(_) => asm.store(dest, src),
+ Opnd::Reg(_) => asm.load_into(*dest, *src),
+ Opnd::Mem(_) => asm.store(*dest, *src),
_ => asm.push_insn(insn),
}
}
// Resolve ParallelMov that couldn't be handled without a scratch register.
Insn::ParallelMov { moves } => {
for (dst, src) in Self::resolve_parallel_moves(moves, Some(SCRATCH0_OPND)).unwrap() {
+ let src = split_stack_membase(asm, src, SCRATCH1_OPND, &stack_state);
+ let dst = split_large_disp(asm, dst, SCRATCH2_OPND);
match dst {
Opnd::Reg(_) => asm.load_into(dst, src),
Opnd::Mem(_) => asm.store(dst, src),
@@ -779,7 +899,7 @@ impl Assembler {
}
}
- asm
+ asm_local
}
/// Emit platform-specific machine code
@@ -1157,10 +1277,11 @@ impl Assembler {
load_effective_address(cb, Self::EMIT_OPND, src_base_reg_no, src_disp);
A64Opnd::new_mem(dest.rm_num_bits(), Self::EMIT_OPND, 0)
};
+ let dst = A64Opnd::Reg(Self::EMIT_REG.with_num_bits(src_num_bits));
match src_num_bits {
- 64 | 32 => ldur(cb, Self::EMIT_OPND, src_mem),
- 16 => ldurh(cb, Self::EMIT_OPND, src_mem),
- 8 => ldurb(cb, Self::EMIT_OPND, src_mem),
+ 64 | 32 => ldur(cb, dst, src_mem),
+ 16 => ldurh(cb, dst, src_mem),
+ 8 => ldurb(cb, dst, src_mem),
num_bits => panic!("unexpected num_bits: {num_bits}")
};
Self::EMIT_REG
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 584251de80..66e89a1304 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -28,8 +28,12 @@ pub static JIT_PRESERVED_REGS: &[Opnd] = &[CFP, SP, EC];
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum MemBase
{
+ /// Register: Every Opnd::Mem should have MemBase::Reg as of emit.
Reg(u8),
+ /// Virtual register: Lowered to MemBase::Reg or MemBase::Stack in alloc_regs.
VReg(usize),
+ /// Stack slot: Lowered to MemBase::Reg in scratch_split.
+ Stack { stack_idx: usize, num_bits: u8 },
}
// Memory location
@@ -55,6 +59,8 @@ impl fmt::Display for Mem {
match self.base {
MemBase::Reg(reg_no) => write!(f, "{}", mem_base_reg(reg_no))?,
MemBase::VReg(idx) => write!(f, "v{idx}")?,
+ MemBase::Stack { stack_idx, num_bits } if num_bits == 64 => write!(f, "Stack[{stack_idx}]")?,
+ MemBase::Stack { stack_idx, num_bits } => write!(f, "Stack{num_bits}[{stack_idx}]")?,
}
if self.disp != 0 {
let sign = if self.disp > 0 { '+' } else { '-' };
@@ -1143,6 +1149,81 @@ impl LiveRange {
}
}
+/// StackState manages which stack slots are used by which VReg
+pub struct StackState {
+ /// The maximum number of spilled VRegs at a time
+ stack_size: usize,
+ /// Map from index at the C stack for spilled VRegs to Some(vreg_idx) if allocated
+ stack_slots: Vec<Option<usize>>,
+ /// Copy of Assembler::stack_base_idx. Used for calculating stack slot offsets.
+ stack_base_idx: usize,
+}
+
+impl StackState {
+ /// Initialize a stack allocator
+ pub(super) fn new(stack_base_idx: usize) -> Self {
+ StackState {
+ stack_size: 0,
+ stack_slots: vec![],
+ stack_base_idx,
+ }
+ }
+
+ /// Allocate a stack slot for a given vreg_idx
+ fn alloc_stack(&mut self, vreg_idx: usize) -> Opnd {
+ for stack_idx in 0..self.stack_size {
+ if self.stack_slots[stack_idx].is_none() {
+ self.stack_slots[stack_idx] = Some(vreg_idx);
+ return Opnd::mem(64, NATIVE_BASE_PTR, self.stack_idx_to_disp(stack_idx));
+ }
+ }
+ // Every stack slot is in use. Allocate a new stack slot.
+ self.stack_size += 1;
+ self.stack_slots.push(Some(vreg_idx));
+ Opnd::mem(64, NATIVE_BASE_PTR, self.stack_idx_to_disp(self.stack_slots.len() - 1))
+ }
+
+ /// Deallocate a stack slot for a given disp
+ fn dealloc_stack(&mut self, disp: i32) {
+ let stack_idx = self.disp_to_stack_idx(disp);
+ if self.stack_slots[stack_idx].is_some() {
+ self.stack_slots[stack_idx] = None;
+ }
+ }
+
+ /// Convert the `disp` of a stack slot operand to the stack index
+ fn disp_to_stack_idx(&self, disp: i32) -> usize {
+ (-disp / SIZEOF_VALUE_I32) as usize - self.stack_base_idx - 1
+ }
+
+ /// Convert a stack index to the `disp` of the stack slot
+ fn stack_idx_to_disp(&self, stack_idx: usize) -> i32 {
+ (self.stack_base_idx + stack_idx + 1) as i32 * -SIZEOF_VALUE_I32
+ }
+
+ /// Convert Mem to MemBase::Stack
+ fn mem_to_stack_membase(&self, mem: Mem) -> MemBase {
+ match mem {
+ Mem { base: MemBase::Reg(reg_no), disp, num_bits } if NATIVE_BASE_PTR.unwrap_reg().reg_no == reg_no => {
+ let stack_idx = self.disp_to_stack_idx(disp);
+ MemBase::Stack { stack_idx, num_bits }
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ /// Convert MemBase::Stack to Mem
+ pub(super) fn stack_membase_to_mem(&self, membase: MemBase) -> Mem {
+ match membase {
+ MemBase::Stack { stack_idx, num_bits } => {
+ let disp = self.stack_idx_to_disp(stack_idx);
+ Mem { base: MemBase::Reg(NATIVE_BASE_PTR.unwrap_reg().reg_no), disp, num_bits }
+ }
+ _ => unreachable!(),
+ }
+ }
+}
+
/// RegisterPool manages which registers are used by which VReg
struct RegisterPool {
/// List of registers that can be allocated
@@ -1155,45 +1236,54 @@ struct RegisterPool {
/// The number of live registers.
/// Provides a quick way to query `pool.filter(|r| r.is_some()).count()`
live_regs: usize,
+
+ /// Fallback to let StackState allocate stack slots when RegisterPool runs out of registers.
+ stack_state: StackState,
}
impl RegisterPool {
/// Initialize a register pool
- fn new(regs: Vec<Reg>) -> Self {
+ fn new(regs: Vec<Reg>, stack_base_idx: usize) -> Self {
let pool = vec![None; regs.len()];
RegisterPool {
regs,
pool,
live_regs: 0,
+ stack_state: StackState::new(stack_base_idx),
}
}
/// Mutate the pool to indicate that the register at the index
/// has been allocated and is live.
- fn alloc_reg(&mut self, vreg_idx: usize) -> Option<Reg> {
+ fn alloc_opnd(&mut self, vreg_idx: usize) -> Opnd {
for (reg_idx, reg) in self.regs.iter().enumerate() {
if self.pool[reg_idx].is_none() {
self.pool[reg_idx] = Some(vreg_idx);
self.live_regs += 1;
- return Some(*reg);
+ return Opnd::Reg(*reg);
}
}
- None
+ self.stack_state.alloc_stack(vreg_idx)
}
/// Allocate a specific register
- fn take_reg(&mut self, reg: &Reg, vreg_idx: usize) -> Reg {
+ fn take_reg(&mut self, reg: &Reg, vreg_idx: usize) -> Opnd {
let reg_idx = self.regs.iter().position(|elem| elem.reg_no == reg.reg_no)
.unwrap_or_else(|| panic!("Unable to find register: {}", reg.reg_no));
assert_eq!(self.pool[reg_idx], None, "register already allocated for VReg({:?})", self.pool[reg_idx]);
self.pool[reg_idx] = Some(vreg_idx);
self.live_regs += 1;
- *reg
+ Opnd::Reg(*reg)
}
// Mutate the pool to indicate that the given register is being returned
// as it is no longer used by the instruction that previously held it.
- fn dealloc_reg(&mut self, reg: &Reg) {
+ fn dealloc_opnd(&mut self, opnd: &Opnd) {
+ if let Opnd::Mem(Mem { disp, .. }) = *opnd {
+ return self.stack_state.dealloc_stack(disp);
+ }
+
+ let reg = opnd.unwrap_reg();
let reg_idx = self.regs.iter().position(|elem| elem.reg_no == reg.reg_no)
.unwrap_or_else(|| panic!("Unable to find register: {}", reg.reg_no));
if self.pool[reg_idx].is_some() {
@@ -1428,61 +1518,40 @@ impl Assembler
/// registers because their output is used as the operand on a subsequent
/// instruction. This is our implementation of the linear scan algorithm.
pub(super) fn alloc_regs(mut self, regs: Vec<Reg>) -> Result<Assembler, CompileError> {
- // Dump live registers for register spill debugging.
- fn dump_live_regs(insns: Vec<Insn>, live_ranges: Vec<LiveRange>, num_regs: usize, spill_index: usize) {
- // Convert live_ranges to live_regs: the number of live registers at each index
- let mut live_regs: Vec<usize> = vec![];
- for insn_idx in 0..insns.len() {
- let live_count = live_ranges.iter().filter(|range|
- match (range.start, range.end) {
- (Some(start), Some(end)) => start <= insn_idx && insn_idx <= end,
- _ => false,
- }
- ).count();
- live_regs.push(live_count);
- }
-
- // Dump insns along with live registers
- for (insn_idx, insn) in insns.iter().enumerate() {
- eprint!("{:3} ", if spill_index == insn_idx { "==>" } else { "" });
- for reg in 0..=num_regs {
- eprint!("{:1}", if reg < live_regs[insn_idx] { "|" } else { "" });
- }
- eprintln!(" [{:3}] {:?}", insn_idx, insn);
- }
- }
-
// First, create the pool of registers.
- let mut pool = RegisterPool::new(regs.clone());
+ let mut pool = RegisterPool::new(regs.clone(), self.stack_base_idx);
- // Mapping between VReg and allocated VReg for each VReg index.
- // None if no register has been allocated for the VReg.
- let mut reg_mapping: Vec<Option<Reg>> = vec![None; self.live_ranges.len()];
+ // Mapping between VReg and register or stack slot for each VReg index.
+ // None if no register or stack slot has been allocated for the VReg.
+ let mut vreg_opnd: Vec<Option<Opnd>> = vec![None; self.live_ranges.len()];
// List of registers saved before a C call, paired with the VReg index.
let mut saved_regs: Vec<(Reg, usize)> = vec![];
+ // Remember the indexes of Insn::FrameSetup to update the stack size later
+ let mut frame_setup_idxs: Vec<usize> = vec![];
+
// live_ranges is indexed by original `index` given by the iterator.
let mut asm = Assembler::new_with_asm(&self);
let live_ranges: Vec<LiveRange> = take(&mut self.live_ranges);
let mut iterator = self.insns.into_iter().enumerate().peekable();
while let Some((index, mut insn)) = iterator.next() {
+ // Remember the index of FrameSetup to bump slot_count when we know the max number of spilled VRegs.
+ if let Insn::FrameSetup { .. } = insn {
+ frame_setup_idxs.push(asm.insns.len());
+ }
+
let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) {
(Insn::ParallelMov { .. }, Some(Insn::CCall { .. })) |
(Insn::CCall { .. }, _) if !pool.is_empty() => {
// If C_RET_REG is in use, move it to another register.
// This must happen before last-use registers are deallocated.
if let Some(vreg_idx) = pool.vreg_for(&C_RET_REG) {
- let new_reg = if let Some(new_reg) = pool.alloc_reg(vreg_idx) {
- new_reg
- } else {
- debug!("spilling VReg is not implemented yet, can't evacuate C_RET_REG on CCall");
- return Err(CompileError::RegisterSpillOnCCall);
- };
- asm.mov(Opnd::Reg(new_reg), C_RET_OPND);
- pool.dealloc_reg(&C_RET_REG);
- reg_mapping[vreg_idx] = Some(new_reg);
+ let new_opnd = pool.alloc_opnd(vreg_idx);
+ asm.mov(new_opnd, C_RET_OPND);
+ pool.dealloc_opnd(&Opnd::Reg(C_RET_REG));
+ vreg_opnd[vreg_idx] = Some(new_opnd);
}
true
@@ -1501,8 +1570,8 @@ impl Assembler
// uses this operand. If it is, we can return the allocated
// register to the pool.
if live_ranges[idx].end() == index {
- if let Some(reg) = reg_mapping[idx] {
- pool.dealloc_reg(&reg);
+ if let Some(opnd) = vreg_opnd[idx] {
+ pool.dealloc_opnd(&opnd);
} else {
unreachable!("no register allocated for insn {:?}", insn);
}
@@ -1520,7 +1589,7 @@ impl Assembler
// Save live registers
for &(reg, _) in saved_regs.iter() {
asm.cpush(Opnd::Reg(reg));
- pool.dealloc_reg(&reg);
+ pool.dealloc_opnd(&Opnd::Reg(reg));
}
// On x86_64, maintain 16-byte stack alignment
if cfg!(target_arch = "x86_64") && saved_regs.len() % 2 == 1 {
@@ -1560,7 +1629,7 @@ impl Assembler
if let Some(Opnd::VReg{ idx, .. }) = opnd_iter.next() {
if live_ranges[*idx].end() == index {
- if let Some(reg) = reg_mapping[*idx] {
+ if let Some(Opnd::Reg(reg)) = vreg_opnd[*idx] {
out_reg = Some(pool.take_reg(&reg, vreg_idx));
}
}
@@ -1569,23 +1638,7 @@ impl Assembler
// Allocate a new register for this instruction if one is not
// already allocated.
- if out_reg.is_none() {
- out_reg = match pool.alloc_reg(vreg_idx) {
- Some(reg) => Some(reg),
- None => {
- if get_option!(debug) {
- let mut insns = asm.insns;
- insns.push(insn);
- for (_, insn) in iterator.by_ref() {
- insns.push(insn);
- }
- dump_live_regs(insns, live_ranges, regs.len(), index);
- }
- debug!("Register spill not supported");
- return Err(CompileError::RegisterSpillOnAlloc);
- }
- };
- }
+ let out_opnd = out_reg.unwrap_or_else(|| pool.alloc_opnd(vreg_idx));
// Set the output operand on the instruction
let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
@@ -1594,9 +1647,9 @@ impl Assembler
// output operand on this instruction because the live range
// extends beyond the index of the instruction.
let out = insn.out_opnd_mut().unwrap();
- let reg = out_reg.unwrap().with_num_bits(out_num_bits);
- reg_mapping[out.vreg_idx()] = Some(reg);
- *out = Opnd::Reg(reg);
+ let out_opnd = out_opnd.with_num_bits(out_num_bits);
+ vreg_opnd[out.vreg_idx()] = Some(out_opnd);
+ *out = out_opnd;
}
// Replace VReg and Param operands by their corresponding register
@@ -1604,11 +1657,15 @@ impl Assembler
while let Some(opnd) = opnd_iter.next() {
match *opnd {
Opnd::VReg { idx, num_bits } => {
- *opnd = Opnd::Reg(reg_mapping[idx].unwrap()).with_num_bits(num_bits);
+ *opnd = vreg_opnd[idx].unwrap().with_num_bits(num_bits);
},
Opnd::Mem(Mem { base: MemBase::VReg(idx), disp, num_bits }) => {
- let base = MemBase::Reg(reg_mapping[idx].unwrap().reg_no);
- *opnd = Opnd::Mem(Mem { base, disp, num_bits });
+ *opnd = match vreg_opnd[idx].unwrap() {
+ Opnd::Reg(reg) => Opnd::Mem(Mem { base: MemBase::Reg(reg.reg_no), disp, num_bits }),
+ // If the base is spilled, lower it to MemBase::Stack, which scratch_split will lower to MemBase::Reg.
+ Opnd::Mem(mem) => Opnd::Mem(Mem { base: pool.stack_state.mem_to_stack_membase(mem), disp, num_bits }),
+ _ => unreachable!(),
+ }
}
_ => {},
}
@@ -1618,8 +1675,8 @@ impl Assembler
// register
if let Some(idx) = vreg_idx {
if live_ranges[idx].end() == index {
- if let Some(reg) = reg_mapping[idx] {
- pool.dealloc_reg(&reg);
+ if let Some(opnd) = vreg_opnd[idx] {
+ pool.dealloc_opnd(&opnd);
} else {
unreachable!("no register allocated for insn {:?}", insn);
}
@@ -1671,6 +1728,16 @@ impl Assembler
}
}
+ // Extend the stack space for spilled operands
+ for frame_setup_idx in frame_setup_idxs {
+ match &mut asm.insns[frame_setup_idx] {
+ Insn::FrameSetup { slot_count, .. } => {
+ *slot_count += pool.stack_state.stack_size;
+ }
+ _ => unreachable!(),
+ }
+ }
+
assert!(pool.is_empty(), "Expected all registers to be returned to the pool");
Ok(asm)
}
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 14c0df8dd0..1d5d90a856 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -1,4 +1,4 @@
-use std::mem::take;
+use std::mem::{self, take};
use crate::asm::*;
use crate::asm::x86_64::*;
@@ -97,13 +97,13 @@ pub const ALLOC_REGS: &[Reg] = &[
RCX_REG,
R8_REG,
R9_REG,
- R10_REG,
RAX_REG,
];
/// Special scratch register for intermediate processing. It should be used only by
/// [`Assembler::x86_scratch_split`] or [`Assembler::new_with_scratch_reg`].
const SCRATCH0_OPND: Opnd = Opnd::Reg(R11_REG);
+const SCRATCH1_OPND: Opnd = Opnd::Reg(R10_REG);
impl Assembler {
/// Return an Assembler with scratch registers disabled in the backend, and a scratch register.
@@ -395,13 +395,13 @@ impl Assembler {
/// without requiring more registers to be available in the register
/// allocator. So we just use the SCRATCH0_OPND register temporarily to hold
/// the value before we immediately use it.
- fn split_64bit_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ fn split_64bit_immediate(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd {
match opnd {
Opnd::Imm(value) => {
// 32-bit values will be sign-extended
if imm_num_bits(value) > 32 {
- asm.mov(SCRATCH0_OPND, opnd);
- SCRATCH0_OPND
+ asm.mov(scratch_opnd, opnd);
+ scratch_opnd
} else {
opnd
}
@@ -409,8 +409,8 @@ impl Assembler {
Opnd::UImm(value) => {
// 32-bit values will be sign-extended
if imm_num_bits(value as i64) > 32 {
- asm.mov(SCRATCH0_OPND, opnd);
- SCRATCH0_OPND
+ asm.mov(scratch_opnd, opnd);
+ scratch_opnd
} else {
Opnd::Imm(value as i64)
}
@@ -419,23 +419,102 @@ impl Assembler {
}
}
- let mut asm = Assembler::new_with_asm(&self);
+ /// If a given operand is Opnd::Mem and it uses MemBase::Stack, lower it to MemBase::Reg using a scratch regsiter.
+ fn split_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd {
+ if let Opnd::Mem(Mem { base: stack_membase @ MemBase::Stack { .. }, disp, num_bits }) = opnd {
+ let base = Opnd::Mem(stack_state.stack_membase_to_mem(stack_membase));
+ asm.load_into(scratch_opnd, base);
+ Opnd::Mem(Mem { base: MemBase::Reg(scratch_opnd.unwrap_reg().reg_no), disp, num_bits })
+ } else {
+ opnd
+ }
+ }
+
+ /// If opnd is Opnd::Mem, set scratch_reg to *opnd. Return Some(Opnd::Mem) if it needs to be written back from scratch_reg.
+ fn split_memory_write(opnd: &mut Opnd, scratch_opnd: Opnd) -> Option<Opnd> {
+ if let Opnd::Mem(_) = opnd {
+ let mem_opnd = opnd.clone();
+ *opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd);
+ Some(mem_opnd)
+ } else {
+ None
+ }
+ }
+
+ /// If both opnd and other are Opnd::Mem, split opnd with scratch_opnd.
+ fn split_if_both_memory(asm: &mut Assembler, opnd: Opnd, other: Opnd, scratch_opnd: Opnd) -> Opnd {
+ if let (Opnd::Mem(_), Opnd::Mem(_)) = (opnd, other) {
+ asm.load_into(scratch_opnd.with_num_bits(opnd.rm_num_bits()), opnd);
+ scratch_opnd.with_num_bits(opnd.rm_num_bits())
+ } else {
+ opnd
+ }
+ }
+
+ /// Move src to dst, splitting it with scratch_opnd if it's a Mem-to-Mem move. Skip it if dst == src.
+ fn asm_mov(asm: &mut Assembler, dst: Opnd, src: Opnd, scratch_opnd: Opnd) {
+ if dst != src {
+ if let (Opnd::Mem(_), Opnd::Mem(_)) = (dst, src) {
+ asm.mov(scratch_opnd, src);
+ asm.mov(dst, scratch_opnd);
+ } else {
+ asm.mov(dst, src);
+ }
+ }
+ }
+
+ // Prepare StackState to lower MemBase::Stack
+ let stack_state = StackState::new(self.stack_base_idx);
+
+ let mut asm_local = Assembler::new_with_asm(&self);
+ let asm = &mut asm_local;
asm.accept_scratch_reg = true;
let mut iterator = self.insns.into_iter().enumerate().peekable();
while let Some((_, mut insn)) = iterator.next() {
match &mut insn {
- Insn::Add { right, .. } |
- Insn::Sub { right, .. } |
- Insn::Mul { right, .. } |
- Insn::And { right, .. } |
- Insn::Or { right, .. } |
- Insn::Xor { right, .. } |
- Insn::Test { right, .. } => {
- *right = split_64bit_immediate(&mut asm, *right);
+ Insn::Add { left, right, out } |
+ Insn::Sub { left, right, out } |
+ Insn::And { left, right, out } |
+ Insn::Or { left, right, out } |
+ Insn::Xor { left, right, out } => {
+ *left = split_stack_membase(asm, *left, SCRATCH0_OPND, &stack_state);
+ *left = split_if_both_memory(asm, *left, *right, SCRATCH0_OPND);
+ *right = split_stack_membase(asm, *right, SCRATCH1_OPND, &stack_state);
+ *right = split_64bit_immediate(asm, *right, SCRATCH1_OPND);
+
+ let (out, left) = (*out, *left);
+ asm.push_insn(insn);
+ asm_mov(asm, out, left, SCRATCH0_OPND);
+ }
+ Insn::Mul { left, right, out } => {
+ *left = split_stack_membase(asm, *left, SCRATCH0_OPND, &stack_state);
+ *left = split_if_both_memory(asm, *left, *right, SCRATCH0_OPND);
+ *right = split_stack_membase(asm, *right, SCRATCH1_OPND, &stack_state);
+ *right = split_64bit_immediate(asm, *right, SCRATCH1_OPND);
+
+ // imul doesn't have (Mem, Reg) encoding. Swap left and right in that case.
+ if let (Opnd::Mem(_), Opnd::Reg(_)) = (&left, &right) {
+ mem::swap(left, right);
+ }
+
+ let (out, left) = (*out, *left);
asm.push_insn(insn);
+ asm_mov(asm, out, left, SCRATCH0_OPND);
}
+ &mut Insn::Not { opnd, out } |
+ &mut Insn::LShift { opnd, out, .. } |
+ &mut Insn::RShift { opnd, out, .. } |
+ &mut Insn::URShift { opnd, out, .. } => {
+ asm.push_insn(insn);
+ asm_mov(asm, out, opnd, SCRATCH0_OPND);
+ }
+ Insn::Test { left, right } |
Insn::Cmp { left, right } => {
+ *left = split_stack_membase(asm, *left, SCRATCH1_OPND, &stack_state);
+ *right = split_stack_membase(asm, *right, SCRATCH0_OPND, &stack_state);
+ *right = split_if_both_memory(asm, *right, *left, SCRATCH0_OPND);
+
let num_bits = match right {
Opnd::Imm(value) => Some(imm_num_bits(*value)),
Opnd::UImm(value) => Some(uimm_num_bits(*value)),
@@ -450,7 +529,7 @@ impl Assembler {
// directly in the instruction.
let use_imm = num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64;
if !use_imm {
- *right = split_64bit_immediate(&mut asm, *right);
+ *right = split_64bit_immediate(asm, *right, SCRATCH0_OPND);
}
asm.push_insn(insn);
}
@@ -462,16 +541,19 @@ impl Assembler {
*opnds = vec![];
asm.push_insn(insn);
}
- &mut Insn::Lea { opnd, out } => {
- match (opnd, out) {
- // Split here for compile_exits
- (Opnd::Mem(_), Opnd::Mem(_)) => {
- asm.lea_into(SCRATCH0_OPND, opnd);
- asm.store(out, SCRATCH0_OPND);
- }
- _ => {
- asm.push_insn(insn);
- }
+ Insn::CSelZ { out, .. } |
+ Insn::CSelNZ { out, .. } |
+ Insn::CSelE { out, .. } |
+ Insn::CSelNE { out, .. } |
+ Insn::CSelL { out, .. } |
+ Insn::CSelLE { out, .. } |
+ Insn::CSelG { out, .. } |
+ Insn::CSelGE { out, .. } |
+ Insn::Lea { out, .. } => {
+ let mem_out = split_memory_write(out, SCRATCH0_OPND);
+ asm.push_insn(insn);
+ if let Some(mem_out) = mem_out {
+ asm.store(mem_out, SCRATCH0_OPND);
}
}
Insn::LeaJumpTarget { target, out } => {
@@ -480,6 +562,15 @@ impl Assembler {
asm.mov(*out, SCRATCH0_OPND);
}
}
+ Insn::Load { out, opnd } |
+ Insn::LoadInto { dest: out, opnd } => {
+ *opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state);
+ let mem_out = split_memory_write(out, SCRATCH0_OPND);
+ asm.push_insn(insn);
+ if let Some(mem_out) = mem_out {
+ asm.store(mem_out, SCRATCH0_OPND.with_num_bits(mem_out.rm_num_bits()));
+ }
+ }
// Convert Opnd::const_ptr into Opnd::Mem. This split is done here to give
// a register for compile_exits.
&mut Insn::IncrCounter { mem, value } => {
@@ -487,17 +578,19 @@ impl Assembler {
asm.load_into(SCRATCH0_OPND, mem);
asm.incr_counter(Opnd::mem(64, SCRATCH0_OPND, 0), value);
}
+ &mut Insn::Mov { dest, src } => {
+ asm_mov(asm, dest, src, SCRATCH0_OPND);
+ }
// Resolve ParallelMov that couldn't be handled without a scratch register.
Insn::ParallelMov { moves } => {
for (dst, src) in Self::resolve_parallel_moves(&moves, Some(SCRATCH0_OPND)).unwrap() {
- asm.mov(dst, src)
+ asm_mov(asm, dst, src, SCRATCH0_OPND);
}
}
// Handle various operand combinations for spills on compile_exits.
&mut Insn::Store { dest, src } => {
- let Opnd::Mem(Mem { num_bits, .. }) = dest else {
- panic!("Unexpected Insn::Store destination in x86_scratch_split: {dest:?}");
- };
+ let num_bits = dest.rm_num_bits();
+ let dest = split_stack_membase(asm, dest, SCRATCH1_OPND, &stack_state);
let src = match src {
Opnd::Reg(_) => src,
@@ -541,7 +634,7 @@ impl Assembler {
}
}
- asm
+ asm_local
}
/// Emit platform-specific machine code