summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2022-09-08 17:09:50 -0400
committerGitHub <noreply@github.com>2022-09-08 17:09:50 -0400
commit35cfc9a3bb0078c297eb70520216ad228f2083e1 (patch)
treee0d98f27ed1f47bb5523e34e2d7f7c2707858fd0
parente4f5296f065110fa83eb450d3a861253e76e534f (diff)
Remove as many unnecessary moves as possible (#6342)v3_2_0_preview2
This commit does a bunch of stuff to try to eliminate as many unnecessary mov instructions as possible. First, it introduces the Insn::LoadInto instruction. Previously when we needed a value to go into a specific register (like in Insn::CCall when we're putting values into the argument registers or in Insn::CRet when we're putting a value into the return register) we would first load the value and then mov it into the correct register. This resulted in a lot of duplicated work with short live ranges since they basically immediately we unnecessary. The new instruction accepts a destination and does not interact with the register allocator at all, making it much more efficient. We then use the new instruction when we're loading values into argument registers for AArch64 or X86_64, and when we're returning a value from AArch64. Notably we don't do it when we're returning a value from X86_64 because everything can be accomplished with a single mov anyway. A couple of unnecessary movs were also present because when we called the split_load_opnd function in a lot of split passes we were loading all registers and instruction outputs. We no longer do that. This commit also makes it so that UImm(0) passes through the Insn::Store split without attempting to be loaded, which allows it can take advantage of the zero register. So now instead of mov-ing 0 into a register and then calling store, it just stores XZR.
Notes
Notes: Merged-By: maximecb <maximecb@ruby-lang.org>
-rw-r--r--yjit/src/backend/arm64/mod.rs74
-rw-r--r--yjit/src/backend/ir.rs10
-rw-r--r--yjit/src/backend/x86_64/mod.rs31
3 files changed, 76 insertions, 39 deletions
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 3e926c9387..32db0ab3dc 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -137,6 +137,7 @@ impl Assembler
/// to be split in case their displacement doesn't fit into 9 bits.
fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd {
match opnd {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
Opnd::Mem(_) => {
let split_opnd = split_memory_address(asm, opnd);
asm.load(split_opnd)
@@ -235,7 +236,7 @@ impl Assembler
// such that only the Op::Load instruction needs to handle that
// case. If the values aren't heap objects then we'll treat them as
// if they were just unsigned integer.
- let is_load = matches!(insn, Insn::Load { .. });
+ let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
let mut opnd_iter = insn.opnd_iter_mut();
while let Some(opnd) = opnd_iter.next() {
@@ -284,9 +285,8 @@ impl Assembler
Insn::CCall { opnds, target, .. } => {
assert!(opnds.len() <= C_ARG_OPNDS.len());
- // For each of the operands we're going to first load them
- // into a register and then move them into the correct
- // argument register.
+ // Load each operand into the corresponding argument
+ // register.
// Note: the iteration order is reversed to avoid corrupting x0,
// which is both the return value and first argument register
for (idx, opnd) in opnds.into_iter().enumerate().rev() {
@@ -295,10 +295,11 @@ impl Assembler
// a UImm of 0 along as the argument to the move.
let value = match opnd {
Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
- _ => split_load_operand(asm, opnd)
+ Opnd::Mem(_) => split_memory_address(asm, opnd),
+ _ => opnd
};
- asm.mov(C_ARG_OPNDS[idx], value);
+ asm.load_into(C_ARG_OPNDS[idx], value);
}
// Now we push the CCall without any arguments so that it
@@ -306,18 +307,29 @@ impl Assembler
asm.ccall(target.unwrap_fun_ptr(), vec![]);
},
Insn::Cmp { left, right } => {
- let opnd0 = match left {
- Opnd::Reg(_) | Opnd::InsnOut { .. } => left,
- _ => split_load_operand(asm, left)
- };
-
+ let opnd0 = split_load_operand(asm, left);
let opnd1 = split_shifted_immediate(asm, right);
asm.cmp(opnd0, opnd1);
},
Insn::CRet(opnd) => {
- if opnd != Opnd::Reg(C_RET_REG) {
- let value = split_load_operand(asm, opnd);
- asm.mov(C_RET_OPND, value);
+ match opnd {
+ // If the value is already in the return register, then
+ // we don't need to do anything.
+ Opnd::Reg(C_RET_REG) => {},
+
+ // If the value is a memory address, we need to first
+ // make sure the displacement isn't too large and then
+ // load it into the return register.
+ Opnd::Mem(_) => {
+ let split = split_memory_address(asm, opnd);
+ asm.load_into(C_RET_OPND, split);
+ },
+
+ // Otherwise we just need to load the value into the
+ // return register.
+ _ => {
+ asm.load_into(C_RET_OPND, opnd);
+ }
}
asm.cret(C_RET_OPND);
},
@@ -375,7 +387,20 @@ impl Assembler
}
},
Insn::Load { opnd, .. } => {
- split_load_operand(asm, opnd);
+ let value = match opnd {
+ Opnd::Mem(_) => split_memory_address(asm, opnd),
+ _ => opnd
+ };
+
+ asm.load(value);
+ },
+ Insn::LoadInto { dest, opnd } => {
+ let value = match opnd {
+ Opnd::Mem(_) => split_memory_address(asm, opnd),
+ _ => opnd
+ };
+
+ asm.load_into(dest, value);
},
Insn::LoadSExt { opnd, .. } => {
match opnd {
@@ -442,28 +467,24 @@ impl Assembler
// The value being stored must be in a register, so if it's
// not already one we'll load it first.
let opnd1 = match src {
- Opnd::Reg(_) | Opnd::InsnOut { .. } => src,
+ // If the first operand is zero, then we can just use
+ // the zero register.
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
+ // Otherwise we'll check if we need to load it first.
_ => split_load_operand(asm, src)
};
asm.store(opnd0, opnd1);
},
Insn::Sub { left, right, .. } => {
- let opnd0 = match left {
- Opnd::Reg(_) | Opnd::InsnOut { .. } => left,
- _ => split_load_operand(asm, left)
- };
-
+ let opnd0 = split_load_operand(asm, left);
let opnd1 = split_shifted_immediate(asm, right);
asm.sub(opnd0, opnd1);
},
Insn::Test { left, right } => {
// The value being tested must be in a register, so if it's
// not already one we'll load it first.
- let opnd0 = match left {
- Opnd::Reg(_) | Opnd::InsnOut { .. } => left,
- _ => split_load_operand(asm, left)
- };
+ let opnd0 = split_load_operand(asm, left);
// The second value must be either a register or an
// unsigned immediate that can be encoded as a bitmask
@@ -710,7 +731,8 @@ impl Assembler
// our IR we have the address first and the register second.
stur(cb, src.into(), dest.into());
},
- Insn::Load { opnd, out } => {
+ Insn::Load { opnd, out } |
+ Insn::LoadInto { opnd, dest: out } => {
match *opnd {
Opnd::Reg(_) | Opnd::InsnOut { .. } => {
mov(cb, out.into(), opnd.into());
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 0b96af7f62..ee6499ff64 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -401,6 +401,9 @@ pub enum Insn {
// A low-level instruction that loads a value into a register.
Load { opnd: Opnd, out: Opnd },
+ // A low-level instruction that loads a value into a specified register.
+ LoadInto { dest: Opnd, opnd: Opnd },
+
// A low-level instruction that loads a value into a register and
// sign-extends it to a 64-bit value.
LoadSExt { opnd: Opnd, out: Opnd },
@@ -502,6 +505,7 @@ impl Insn {
Insn::Lea { .. } => "Lea",
Insn::LiveReg { .. } => "LiveReg",
Insn::Load { .. } => "Load",
+ Insn::LoadInto { .. } => "LoadInto",
Insn::LoadSExt { .. } => "LoadSExt",
Insn::LShift { .. } => "LShift",
Insn::Mov { .. } => "Mov",
@@ -675,6 +679,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } |
Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } |
Insn::IncrCounter { mem: opnd0, value: opnd1, .. } |
+ Insn::LoadInto { dest: opnd0, opnd: opnd1 } |
Insn::LShift { opnd: opnd0, shift: opnd1, .. } |
Insn::Mov { dest: opnd0, src: opnd1 } |
Insn::Or { left: opnd0, right: opnd1, .. } |
@@ -771,6 +776,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } |
Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } |
Insn::IncrCounter { mem: opnd0, value: opnd1, .. } |
+ Insn::LoadInto { dest: opnd0, opnd: opnd1 } |
Insn::LShift { opnd: opnd0, shift: opnd1, .. } |
Insn::Mov { dest: opnd0, src: opnd1 } |
Insn::Or { left: opnd0, right: opnd1, .. } |
@@ -1422,6 +1428,10 @@ impl Assembler {
out
}
+ pub fn load_into(&mut self, dest: Opnd, opnd: Opnd) {
+ self.push_insn(Insn::LoadInto { dest, opnd });
+ }
+
#[must_use]
pub fn load_sext(&mut self, opnd: Opnd) -> Opnd {
let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd]));
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index 4e230822f1..ca07d50ffc 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -131,7 +131,7 @@ impl Assembler
// VALUEs alive. This is a sort of canonicalization.
let mut unmapped_opnds: Vec<Opnd> = vec![];
- let is_load = matches!(insn, Insn::Load { .. });
+ let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
let mut opnd_iter = insn.opnd_iter_mut();
while let Some(opnd) = opnd_iter.next() {
@@ -289,6 +289,19 @@ impl Assembler
asm.not(opnd0);
},
+ Insn::CCall { opnds, target, .. } => {
+ assert!(opnds.len() <= C_ARG_OPNDS.len());
+
+ // Load each operand into the corresponding argument
+ // register.
+ for (idx, opnd) in opnds.into_iter().enumerate() {
+ asm.load_into(C_ARG_OPNDS[idx], *opnd);
+ }
+
+ // Now we push the CCall without any arguments so that it
+ // just performs the call.
+ asm.ccall(target.unwrap_fun_ptr(), vec![]);
+ },
_ => {
if insn.out_opnd().is_some() {
let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
@@ -421,7 +434,8 @@ impl Assembler
},
// This assumes only load instructions can contain references to GC'd Value operands
- Insn::Load { opnd, out } => {
+ Insn::Load { opnd, out } |
+ Insn::LoadInto { dest: out, opnd } => {
mov(cb, out.into(), opnd.into());
// If the value being loaded is a heap object
@@ -490,17 +504,8 @@ impl Assembler
},
// C function call
- Insn::CCall { opnds, target, .. } => {
- // Temporary
- assert!(opnds.len() <= _C_ARG_OPNDS.len());
-
- // For each operand
- for (idx, opnd) in opnds.iter().enumerate() {
- mov(cb, X86Opnd::Reg(_C_ARG_OPNDS[idx].unwrap_reg()), opnds[idx].into());
- }
-
- let ptr = target.unwrap_fun_ptr();
- call_ptr(cb, RAX, ptr);
+ Insn::CCall { target, .. } => {
+ call_ptr(cb, RAX, target.unwrap_fun_ptr());
},
Insn::CRet(opnd) => {