diff options
| author | Takashi Kokubun <takashi.kokubun@shopify.com> | 2025-10-28 13:18:05 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-28 13:18:05 -0700 |
| commit | d0c7234bc79e9b0e415c29ae3250bde12d791b4c (patch) | |
| tree | cd27ae20fc2219d7a0cd45d826f9788ca9c50133 | |
| parent | 599de290a030927734eac93db66de18c653b6ed2 (diff) | |
ZJIT: Support ParallelMov into memory (#14975)
| -rw-r--r-- | zjit/src/backend/arm64/mod.rs | 35 | ||||
| -rw-r--r-- | zjit/src/backend/lir.rs | 155 | ||||
| -rw-r--r-- | zjit/src/backend/x86_64/mod.rs | 26 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 19 |
4 files changed, 169 insertions, 66 deletions
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 867ad493ec..e090d8ce44 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -212,13 +212,7 @@ impl Assembler { /// Return true if opnd contains a scratch reg pub fn has_scratch_reg(opnd: Opnd) -> bool { - match opnd { - Opnd::Reg(_) => opnd == SCRATCH_OPND, - Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => { - reg_no == SCRATCH_OPND.unwrap_reg().reg_no - } - _ => false, - } + Self::has_reg(opnd, SCRATCH_OPND.unwrap_reg()) } /// Get the list of registers from which we will allocate on this platform @@ -492,7 +486,7 @@ impl Assembler { // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register if !opnds.is_empty() { - let mut args: Vec<(Reg, Opnd)> = vec![]; + let mut args: Vec<(Opnd, Opnd)> = vec![]; for (idx, opnd) in opnds.iter_mut().enumerate().rev() { // If the value that we're sending is 0, then we can use // the zero register, so in this case we'll just send @@ -502,7 +496,7 @@ impl Assembler { Opnd::Mem(_) => split_memory_address(asm, *opnd), _ => *opnd }; - args.push((C_ARG_OPNDS[idx].unwrap_reg(), value)); + args.push((C_ARG_OPNDS[idx], value)); } asm.parallel_mov(args); } @@ -725,10 +719,21 @@ impl Assembler { asm.lea_into(SCRATCH_OPND, Opnd::mem(64, SCRATCH_OPND, 0)); asm.incr_counter(SCRATCH_OPND, value); } + &mut Insn::Mov { dest, src } => { + match dest { + Opnd::Reg(_) => asm.load_into(dest, src), + Opnd::Mem(_) => asm.store(dest, src), + _ => asm.push_insn(insn), + } + } // Resolve ParallelMov that couldn't be handled without a scratch register. Insn::ParallelMov { moves } => { - for (reg, opnd) in Self::resolve_parallel_moves(moves, Some(SCRATCH_OPND)).unwrap() { - asm.load_into(Opnd::Reg(reg), opnd); + for (dst, src) in Self::resolve_parallel_moves(moves, Some(SCRATCH_OPND)).unwrap() { + match dst { + Opnd::Reg(_) => asm.load_into(dst, src), + Opnd::Mem(_) => asm.store(dst, src), + _ => asm.mov(dst, src), + } } } _ => { @@ -2385,7 +2390,7 @@ mod tests { } #[test] - fn test_reorder_c_args_no_cycle() { + fn test_ccall_resolve_parallel_moves_no_cycle() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); @@ -2403,7 +2408,7 @@ mod tests { } #[test] - fn test_reorder_c_args_single_cycle() { + fn test_ccall_resolve_parallel_moves_single_cycle() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); @@ -2426,7 +2431,7 @@ mod tests { } #[test] - fn test_reorder_c_args_two_cycles() { + fn test_ccall_resolve_parallel_moves_two_cycles() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); @@ -2453,7 +2458,7 @@ mod tests { } #[test] - fn test_reorder_c_args_large_cycle() { + fn test_ccall_resolve_parallel_moves_large_cycle() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index eb49b419d6..72ebeaf11b 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -454,9 +454,9 @@ pub enum Insn { /// Shift a value left by a certain amount. LShift { opnd: Opnd, shift: Opnd, out: Opnd }, - /// A set of parallel moves into registers. + /// A set of parallel moves into registers or memory. /// The backend breaks cycles if there are any cycles between moves. - ParallelMov { moves: Vec<(Reg, Opnd)> }, + ParallelMov { moves: Vec<(Opnd, Opnd)> }, // A low-level mov instruction. It accepts two operands. Mov { dest: Opnd, src: Opnd }, @@ -1227,6 +1227,15 @@ impl Assembler asm } + /// Return true if `opnd` is or depends on `reg` + pub fn has_reg(opnd: Opnd, reg: Reg) -> bool { + match opnd { + Opnd::Reg(opnd_reg) => opnd_reg == reg, + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => reg_no == reg.reg_no, + _ => false, + } + } + pub fn expect_leaf_ccall(&mut self, stack_size: usize) { self.leaf_ccall_stack_size = Some(stack_size); } @@ -1307,17 +1316,23 @@ impl Assembler // Shuffle register moves, sometimes adding extra moves using scratch_reg, // so that they will not rewrite each other before they are used. - pub fn resolve_parallel_moves(old_moves: &[(Reg, Opnd)], scratch_reg: Option<Opnd>) -> Option<Vec<(Reg, Opnd)>> { + pub fn resolve_parallel_moves(old_moves: &[(Opnd, Opnd)], scratch_opnd: Option<Opnd>) -> Option<Vec<(Opnd, Opnd)>> { // Return the index of a move whose destination is not used as a source if any. - fn find_safe_move(moves: &[(Reg, Opnd)]) -> Option<usize> { - moves.iter().enumerate().find(|&(_, &(dest_reg, _))| { - moves.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg)) + fn find_safe_move(moves: &[(Opnd, Opnd)]) -> Option<usize> { + moves.iter().enumerate().find(|&(_, &(dst, src))| { + // Check if `dst` is used in other moves. If `dst` is not used elsewhere, it's safe to write into `dst` now. + moves.iter().filter(|&&other_move| other_move != (dst, src)).all(|&(other_dst, other_src)| + match dst { + Opnd::Reg(reg) => !Assembler::has_reg(other_dst, reg) && !Assembler::has_reg(other_src, reg), + _ => other_dst != dst && other_src != dst, + } + ) }).map(|(index, _)| index) } // Remove moves whose source and destination are the same - let mut old_moves: Vec<(Reg, Opnd)> = old_moves.iter().copied() - .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect(); + let mut old_moves: Vec<(Opnd, Opnd)> = old_moves.iter().copied() + .filter(|&(dst, src)| dst != src).collect(); let mut new_moves = vec![]; while !old_moves.is_empty() { @@ -1326,18 +1341,19 @@ impl Assembler new_moves.push(old_moves.remove(index)); } - // No safe move. Load the source of one move into scratch_reg, and - // then load scratch_reg into the destination when it's safe. + // No safe move. Load the source of one move into scratch_opnd, and + // then load scratch_opnd into the destination when it's safe. if !old_moves.is_empty() { - // If scratch_reg is None, return None and leave it to *_split_with_scratch_regs to resolve it. - let scratch_reg = scratch_reg?.unwrap_reg(); + // If scratch_opnd is None, return None and leave it to *_split_with_scratch_regs to resolve it. + let scratch_opnd = scratch_opnd?; + let scratch_reg = scratch_opnd.unwrap_reg(); // Make sure it's safe to use scratch_reg - assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(scratch_reg))); + assert!(old_moves.iter().all(|&(dst, src)| !Self::has_reg(dst, scratch_reg) && !Self::has_reg(src, scratch_reg))); - // Move scratch_reg <- opnd, and delay reg <- scratch_reg - let (reg, opnd) = old_moves.remove(0); - new_moves.push((scratch_reg, opnd)); - old_moves.push((reg, Opnd::Reg(scratch_reg))); + // Move scratch_opnd <- src, and delay dst <- scratch_opnd + let (dst, src) = old_moves.remove(0); + new_moves.push((scratch_opnd, src)); + old_moves.push((dst, scratch_opnd)); } } Some(new_moves) @@ -1551,8 +1567,8 @@ impl Assembler Insn::ParallelMov { moves } => { // For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg. if let Some(moves) = Self::resolve_parallel_moves(&moves, None) { - for (reg, opnd) in moves { - asm.load_into(Opnd::Reg(reg), opnd); + for (dst, src) in moves { + asm.mov(dst, src); } } else { // If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it. @@ -1980,7 +1996,7 @@ impl Assembler { out } - pub fn parallel_mov(&mut self, moves: Vec<(Reg, Opnd)>) { + pub fn parallel_mov(&mut self, moves: Vec<(Opnd, Opnd)>) { self.push_insn(Insn::ParallelMov { moves }); } @@ -2096,6 +2112,10 @@ pub(crate) use asm_ccall; mod tests { use super::*; + fn scratch_reg() -> Opnd { + Assembler::new_with_scratch_reg().1 + } + #[test] fn test_opnd_iter() { let insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None }; @@ -2125,4 +2145,99 @@ mod tests { let mem = Opnd::mem(64, SP, 0); asm.load_into(mem, mem); } + + #[test] + fn test_resolve_parallel_moves_reorder_registers() { + let result = Assembler::resolve_parallel_moves(&[ + (C_ARG_OPNDS[0], SP), + (C_ARG_OPNDS[1], C_ARG_OPNDS[0]), + ], None); + assert_eq!(result, Some(vec![ + (C_ARG_OPNDS[1], C_ARG_OPNDS[0]), + (C_ARG_OPNDS[0], SP), + ])); + } + + #[test] + fn test_resolve_parallel_moves_give_up_register_cycle() { + // If scratch_opnd is not given, it cannot break cycles. + let result = Assembler::resolve_parallel_moves(&[ + (C_ARG_OPNDS[0], C_ARG_OPNDS[1]), + (C_ARG_OPNDS[1], C_ARG_OPNDS[0]), + ], None); + assert_eq!(result, None); + } + + #[test] + fn test_resolve_parallel_moves_break_register_cycle() { + let scratch_reg = scratch_reg(); + let result = Assembler::resolve_parallel_moves(&[ + (C_ARG_OPNDS[0], C_ARG_OPNDS[1]), + (C_ARG_OPNDS[1], C_ARG_OPNDS[0]), + ], Some(scratch_reg)); + assert_eq!(result, Some(vec![ + (scratch_reg, C_ARG_OPNDS[1]), + (C_ARG_OPNDS[1], C_ARG_OPNDS[0]), + (C_ARG_OPNDS[0], scratch_reg), + ])); + } + + #[test] + fn test_resolve_parallel_moves_break_memory_memory_cycle() { + let scratch_reg = scratch_reg(); + let result = Assembler::resolve_parallel_moves(&[ + (Opnd::mem(64, C_ARG_OPNDS[0], 0), C_ARG_OPNDS[1]), + (C_ARG_OPNDS[1], Opnd::mem(64, C_ARG_OPNDS[0], 0)), + ], Some(scratch_reg)); + assert_eq!(result, Some(vec![ + (scratch_reg, C_ARG_OPNDS[1]), + (C_ARG_OPNDS[1], Opnd::mem(64, C_ARG_OPNDS[0], 0)), + (Opnd::mem(64, C_ARG_OPNDS[0], 0), scratch_reg), + ])); + } + + #[test] + fn test_resolve_parallel_moves_break_register_memory_cycle() { + let scratch_reg = scratch_reg(); + let result = Assembler::resolve_parallel_moves(&[ + (C_ARG_OPNDS[0], C_ARG_OPNDS[1]), + (C_ARG_OPNDS[1], Opnd::mem(64, C_ARG_OPNDS[0], 0)), + ], Some(scratch_reg)); + assert_eq!(result, Some(vec![ + (scratch_reg, C_ARG_OPNDS[1]), + (C_ARG_OPNDS[1], Opnd::mem(64, C_ARG_OPNDS[0], 0)), + (C_ARG_OPNDS[0], scratch_reg), + ])); + } + + #[test] + fn test_resolve_parallel_moves_reorder_memory_destination() { + let scratch_reg = scratch_reg(); + let result = Assembler::resolve_parallel_moves(&[ + (C_ARG_OPNDS[0], SP), + (Opnd::mem(64, C_ARG_OPNDS[0], 0), CFP), + ], Some(scratch_reg)); + assert_eq!(result, Some(vec![ + (Opnd::mem(64, C_ARG_OPNDS[0], 0), CFP), + (C_ARG_OPNDS[0], SP), + ])); + } + + #[test] + #[should_panic] + fn test_resolve_parallel_moves_into_same_register() { + Assembler::resolve_parallel_moves(&[ + (C_ARG_OPNDS[0], SP), + (C_ARG_OPNDS[0], CFP), + ], Some(scratch_reg())); + } + + #[test] + #[should_panic] + fn test_resolve_parallel_moves_into_same_memory() { + Assembler::resolve_parallel_moves(&[ + (Opnd::mem(64, C_ARG_OPNDS[0], 0), SP), + (Opnd::mem(64, C_ARG_OPNDS[0], 0), CFP), + ], Some(scratch_reg())); + } } diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index bd2421823c..f76be64ec0 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -107,13 +107,7 @@ impl Assembler { /// Return true if opnd contains a scratch reg pub fn has_scratch_reg(opnd: Opnd) -> bool { - match opnd { - Opnd::Reg(_) => opnd == SCRATCH_OPND, - Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => { - reg_no == SCRATCH_OPND.unwrap_reg().reg_no - } - _ => false, - } + Self::has_reg(opnd, SCRATCH_OPND.unwrap_reg()) } /// Get the list of registers from which we can allocate on this platform @@ -354,9 +348,9 @@ impl Assembler { // Load each operand into the corresponding argument register. if !opnds.is_empty() { - let mut args: Vec<(Reg, Opnd)> = vec![]; + let mut args: Vec<(Opnd, Opnd)> = vec![]; for (idx, opnd) in opnds.iter_mut().enumerate() { - args.push((C_ARG_OPNDS[idx].unwrap_reg(), *opnd)); + args.push((C_ARG_OPNDS[idx], *opnd)); } asm.parallel_mov(args); } @@ -489,8 +483,8 @@ impl Assembler { } // Resolve ParallelMov that couldn't be handled without a scratch register. Insn::ParallelMov { moves } => { - for (reg, opnd) in Self::resolve_parallel_moves(&moves, Some(SCRATCH_OPND)).unwrap() { - asm.load_into(Opnd::Reg(reg), opnd); + for (dst, src) in Self::resolve_parallel_moves(&moves, Some(SCRATCH_OPND)).unwrap() { + asm.mov(dst, src) } } // Handle various operand combinations for spills on compile_side_exits. @@ -1368,7 +1362,7 @@ mod tests { } #[test] - fn test_reorder_c_args_no_cycle() { + fn test_ccall_resolve_parallel_moves_no_cycle() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); @@ -1386,7 +1380,7 @@ mod tests { } #[test] - fn test_reorder_c_args_single_cycle() { + fn test_ccall_resolve_parallel_moves_single_cycle() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); @@ -1409,7 +1403,7 @@ mod tests { } #[test] - fn test_reorder_c_args_two_cycles() { + fn test_ccall_resolve_parallel_moves_two_cycles() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); @@ -1436,7 +1430,7 @@ mod tests { } #[test] - fn test_reorder_c_args_large_cycle() { + fn test_ccall_resolve_parallel_moves_large_cycle() { crate::options::rb_zjit_prepare_options(); let (mut asm, mut cb) = setup_asm(); @@ -1461,7 +1455,7 @@ mod tests { #[test] #[ignore] - fn test_reorder_c_args_with_insn_out() { + fn test_ccall_resolve_parallel_moves_with_insn_out() { let (mut asm, mut cb) = setup_asm(); let rax = asm.load(Opnd::UImm(1)); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 63b5b6cb52..5ead4870df 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -8,7 +8,7 @@ use std::ffi::{c_int, c_long, c_void}; use std::slice; use crate::asm::Label; -use crate::backend::current::{Reg, ALLOC_REGS}; +use crate::backend::current::ALLOC_REGS; use crate::invariants::{ track_bop_assumption, track_cme_assumption, track_no_ep_escape_assumption, track_no_trace_point_assumption, track_single_ractor_assumption, track_stable_constant_names_assumption, track_no_singleton_class_assumption @@ -1024,20 +1024,9 @@ fn gen_branch_params(jit: &mut JITState, asm: &mut Assembler, branch: &BranchEdg } asm_comment!(asm, "set branch params: {}", branch.args.len()); - let mut moves: Vec<(Reg, Opnd)> = vec![]; - for (idx, &arg) in branch.args.iter().enumerate() { - match param_opnd(idx) { - Opnd::Reg(reg) => { - // If a parameter is a register, we need to parallel-move it - moves.push((reg, jit.get_opnd(arg))); - }, - param => { - // If a parameter is memory, we set it beforehand - asm.mov(param, jit.get_opnd(arg)); - } - } - } - asm.parallel_mov(moves); + asm.parallel_mov(branch.args.iter().enumerate().map(|(idx, &arg)| + (param_opnd(idx), jit.get_opnd(arg)) + ).collect()); } /// Compile a constant |
