diff options
author | Kevin Newton <kddnewton@gmail.com> | 2022-09-08 17:09:50 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-08 17:09:50 -0400 |
commit | 35cfc9a3bb0078c297eb70520216ad228f2083e1 (patch) | |
tree | e0d98f27ed1f47bb5523e34e2d7f7c2707858fd0 | |
parent | e4f5296f065110fa83eb450d3a861253e76e534f (diff) |
Remove as many unnecessary moves as possible (#6342)v3_2_0_preview2
This commit does a bunch of stuff to try to eliminate as many
unnecessary mov instructions as possible.
First, it introduces the Insn::LoadInto instruction. Previously
when we needed a value to go into a specific register (like in
Insn::CCall when we're putting values into the argument registers
or in Insn::CRet when we're putting a value into the return
register) we would first load the value and then mov it into the
correct register. This resulted in a lot of duplicated work with
short live ranges since they basically immediately we unnecessary.
The new instruction accepts a destination and does not interact
with the register allocator at all, making it much more efficient.
We then use the new instruction when we're loading values into
argument registers for AArch64 or X86_64, and when we're returning
a value from AArch64. Notably we don't do it when we're returning
a value from X86_64 because everything can be accomplished with a
single mov anyway.
A couple of unnecessary movs were also present because when we
called the split_load_opnd function in a lot of split passes we
were loading all registers and instruction outputs. We no longer do
that.
This commit also makes it so that UImm(0) passes through the
Insn::Store split without attempting to be loaded, which allows it
can take advantage of the zero register. So now instead of mov-ing
0 into a register and then calling store, it just stores XZR.
Notes
Notes:
Merged-By: maximecb <maximecb@ruby-lang.org>
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 74 | ||||
-rw-r--r-- | yjit/src/backend/ir.rs | 10 | ||||
-rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 31 |
3 files changed, 76 insertions, 39 deletions
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 3e926c9387..32db0ab3dc 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -137,6 +137,7 @@ impl Assembler /// to be split in case their displacement doesn't fit into 9 bits. fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, Opnd::Mem(_) => { let split_opnd = split_memory_address(asm, opnd); asm.load(split_opnd) @@ -235,7 +236,7 @@ impl Assembler // such that only the Op::Load instruction needs to handle that // case. If the values aren't heap objects then we'll treat them as // if they were just unsigned integer. - let is_load = matches!(insn, Insn::Load { .. }); + let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { @@ -284,9 +285,8 @@ impl Assembler Insn::CCall { opnds, target, .. } => { assert!(opnds.len() <= C_ARG_OPNDS.len()); - // For each of the operands we're going to first load them - // into a register and then move them into the correct - // argument register. + // Load each operand into the corresponding argument + // register. // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { @@ -295,10 +295,11 @@ impl Assembler // a UImm of 0 along as the argument to the move. let value = match opnd { Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), - _ => split_load_operand(asm, opnd) + Opnd::Mem(_) => split_memory_address(asm, opnd), + _ => opnd }; - asm.mov(C_ARG_OPNDS[idx], value); + asm.load_into(C_ARG_OPNDS[idx], value); } // Now we push the CCall without any arguments so that it @@ -306,18 +307,29 @@ impl Assembler asm.ccall(target.unwrap_fun_ptr(), vec![]); }, Insn::Cmp { left, right } => { - let opnd0 = match left { - Opnd::Reg(_) | Opnd::InsnOut { .. } => left, - _ => split_load_operand(asm, left) - }; - + let opnd0 = split_load_operand(asm, left); let opnd1 = split_shifted_immediate(asm, right); asm.cmp(opnd0, opnd1); }, Insn::CRet(opnd) => { - if opnd != Opnd::Reg(C_RET_REG) { - let value = split_load_operand(asm, opnd); - asm.mov(C_RET_OPND, value); + match opnd { + // If the value is already in the return register, then + // we don't need to do anything. + Opnd::Reg(C_RET_REG) => {}, + + // If the value is a memory address, we need to first + // make sure the displacement isn't too large and then + // load it into the return register. + Opnd::Mem(_) => { + let split = split_memory_address(asm, opnd); + asm.load_into(C_RET_OPND, split); + }, + + // Otherwise we just need to load the value into the + // return register. + _ => { + asm.load_into(C_RET_OPND, opnd); + } } asm.cret(C_RET_OPND); }, @@ -375,7 +387,20 @@ impl Assembler } }, Insn::Load { opnd, .. } => { - split_load_operand(asm, opnd); + let value = match opnd { + Opnd::Mem(_) => split_memory_address(asm, opnd), + _ => opnd + }; + + asm.load(value); + }, + Insn::LoadInto { dest, opnd } => { + let value = match opnd { + Opnd::Mem(_) => split_memory_address(asm, opnd), + _ => opnd + }; + + asm.load_into(dest, value); }, Insn::LoadSExt { opnd, .. } => { match opnd { @@ -442,28 +467,24 @@ impl Assembler // The value being stored must be in a register, so if it's // not already one we'll load it first. let opnd1 = match src { - Opnd::Reg(_) | Opnd::InsnOut { .. } => src, + // If the first operand is zero, then we can just use + // the zero register. + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), + // Otherwise we'll check if we need to load it first. _ => split_load_operand(asm, src) }; asm.store(opnd0, opnd1); }, Insn::Sub { left, right, .. } => { - let opnd0 = match left { - Opnd::Reg(_) | Opnd::InsnOut { .. } => left, - _ => split_load_operand(asm, left) - }; - + let opnd0 = split_load_operand(asm, left); let opnd1 = split_shifted_immediate(asm, right); asm.sub(opnd0, opnd1); }, Insn::Test { left, right } => { // The value being tested must be in a register, so if it's // not already one we'll load it first. - let opnd0 = match left { - Opnd::Reg(_) | Opnd::InsnOut { .. } => left, - _ => split_load_operand(asm, left) - }; + let opnd0 = split_load_operand(asm, left); // The second value must be either a register or an // unsigned immediate that can be encoded as a bitmask @@ -710,7 +731,8 @@ impl Assembler // our IR we have the address first and the register second. stur(cb, src.into(), dest.into()); }, - Insn::Load { opnd, out } => { + Insn::Load { opnd, out } | + Insn::LoadInto { opnd, dest: out } => { match *opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => { mov(cb, out.into(), opnd.into()); diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 0b96af7f62..ee6499ff64 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -401,6 +401,9 @@ pub enum Insn { // A low-level instruction that loads a value into a register. Load { opnd: Opnd, out: Opnd }, + // A low-level instruction that loads a value into a specified register. + LoadInto { dest: Opnd, opnd: Opnd }, + // A low-level instruction that loads a value into a register and // sign-extends it to a 64-bit value. LoadSExt { opnd: Opnd, out: Opnd }, @@ -502,6 +505,7 @@ impl Insn { Insn::Lea { .. } => "Lea", Insn::LiveReg { .. } => "LiveReg", Insn::Load { .. } => "Load", + Insn::LoadInto { .. } => "LoadInto", Insn::LoadSExt { .. } => "LoadSExt", Insn::LShift { .. } => "LShift", Insn::Mov { .. } => "Mov", @@ -675,6 +679,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LoadInto { dest: opnd0, opnd: opnd1 } | Insn::LShift { opnd: opnd0, shift: opnd1, .. } | Insn::Mov { dest: opnd0, src: opnd1 } | Insn::Or { left: opnd0, right: opnd1, .. } | @@ -771,6 +776,7 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LoadInto { dest: opnd0, opnd: opnd1 } | Insn::LShift { opnd: opnd0, shift: opnd1, .. } | Insn::Mov { dest: opnd0, src: opnd1 } | Insn::Or { left: opnd0, right: opnd1, .. } | @@ -1422,6 +1428,10 @@ impl Assembler { out } + pub fn load_into(&mut self, dest: Opnd, opnd: Opnd) { + self.push_insn(Insn::LoadInto { dest, opnd }); + } + #[must_use] pub fn load_sext(&mut self, opnd: Opnd) -> Opnd { let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 4e230822f1..ca07d50ffc 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -131,7 +131,7 @@ impl Assembler // VALUEs alive. This is a sort of canonicalization. let mut unmapped_opnds: Vec<Opnd> = vec![]; - let is_load = matches!(insn, Insn::Load { .. }); + let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { @@ -289,6 +289,19 @@ impl Assembler asm.not(opnd0); }, + Insn::CCall { opnds, target, .. } => { + assert!(opnds.len() <= C_ARG_OPNDS.len()); + + // Load each operand into the corresponding argument + // register. + for (idx, opnd) in opnds.into_iter().enumerate() { + asm.load_into(C_ARG_OPNDS[idx], *opnd); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(target.unwrap_fun_ptr(), vec![]); + }, _ => { if insn.out_opnd().is_some() { let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); @@ -421,7 +434,8 @@ impl Assembler }, // This assumes only load instructions can contain references to GC'd Value operands - Insn::Load { opnd, out } => { + Insn::Load { opnd, out } | + Insn::LoadInto { dest: out, opnd } => { mov(cb, out.into(), opnd.into()); // If the value being loaded is a heap object @@ -490,17 +504,8 @@ impl Assembler }, // C function call - Insn::CCall { opnds, target, .. } => { - // Temporary - assert!(opnds.len() <= _C_ARG_OPNDS.len()); - - // For each operand - for (idx, opnd) in opnds.iter().enumerate() { - mov(cb, X86Opnd::Reg(_C_ARG_OPNDS[idx].unwrap_reg()), opnds[idx].into()); - } - - let ptr = target.unwrap_fun_ptr(); - call_ptr(cb, RAX, ptr); + Insn::CCall { target, .. } => { + call_ptr(cb, RAX, target.unwrap_fun_ptr()); }, Insn::CRet(opnd) => { |