summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2022-08-23 13:41:22 -0400
committerTakashi Kokubun <takashikkbn@gmail.com>2022-08-29 09:09:41 -0700
commit54c7bc67a2d54311b77aca9233b23a9e7a1ca581 (patch)
tree26e2da017378285c18edecd671fbeea896178768
parentd433eb957bf3826e7aea97c12f0cdc9fcb9a1b43 (diff)
Various AArch64 optimizations (https://github.com/Shopify/ruby/pull/433)
* When we're storing an immediate 0 value at a memory address, we can use STUR XZR, Xd instead of loading 0 into a register and then storing that register. * When we're moving 0 into an argument register, we can use MOV Xd, XZR instead of loading the value into a register first. * In the newarray instruction, we can skip looking at the stack at all if the number of values we're using is 0.
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/6289
-rw-r--r--yjit/src/asm/arm64/mod.rs3
-rw-r--r--yjit/src/asm/arm64/opnd.rs5
-rw-r--r--yjit/src/backend/arm64/mod.rs14
-rw-r--r--yjit/src/codegen.rs11
4 files changed, 27 insertions, 6 deletions
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index e5ba2f81ea..fb07498ce2 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -542,6 +542,9 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into()
},
+ (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => {
+ LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into()
+ },
(A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => {
LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into()
},
diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs
index a10e289455..c89481fb03 100644
--- a/yjit/src/asm/arm64/opnd.rs
+++ b/yjit/src/asm/arm64/opnd.rs
@@ -111,6 +111,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 };
pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 };
pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 };
+// zero register
+pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 };
+
// 64-bit registers
pub const X0: A64Opnd = A64Opnd::Reg(X0_REG);
pub const X1: A64Opnd = A64Opnd::Reg(X1_REG);
@@ -143,7 +146,7 @@ pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 });
pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 });
pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 });
pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 });
-pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 });
+pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG);
// 32-bit registers
pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 });
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 3f1bbf99b0..08eb5efa3f 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -289,7 +289,14 @@ impl Assembler
// Note: the iteration order is reversed to avoid corrupting x0,
// which is both the return value and first argument register
for (idx, opnd) in opnds.into_iter().enumerate().rev() {
- let value = split_load_operand(asm, opnd);
+ // If the value that we're sending is 0, then we can use
+ // the zero register, so in this case we'll just send
+ // a UImm of 0 along as the argument to the move.
+ let value = match opnd {
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
+ _ => split_load_operand(asm, opnd)
+ };
+
asm.mov(C_ARG_OPNDS[idx], value);
}
@@ -386,7 +393,10 @@ impl Assembler
};
},
Insn::Mov { dest, src } => {
- let value = match (dest, src) {
+ let value: Opnd = match (dest, src) {
+ // If the first operand is zero, then we can just use
+ // the zero register.
+ (Opnd::Mem(_), Opnd::UImm(0) | Opnd::Imm(0)) => Opnd::Reg(XZR_REG),
// If the first operand is a memory operand, we're going
// to transform this into a store instruction, so we'll
// need to load this anyway.
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index e23171d2a0..1336fe3c57 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -1168,9 +1168,14 @@ fn gen_newarray(
// Save the PC and SP because we are allocating
jit_prepare_routine_call(jit, ctx, asm);
- let offset_magnitude = SIZEOF_VALUE as u32 * n;
- let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize));
- let values_ptr = asm.lea(values_opnd);
+ // If n is 0, then elts is never going to be read, so we can just pass null
+ let values_ptr = if n == 0 {
+ Opnd::UImm(0)
+ } else {
+ let offset_magnitude = SIZEOF_VALUE as u32 * n;
+ let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize));
+ asm.lea(values_opnd)
+ };
// call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
let new_ary = asm.ccall(