diff options
author | Kevin Newton <kddnewton@gmail.com> | 2022-08-23 13:41:22 -0400 |
---|---|---|
committer | Takashi Kokubun <takashikkbn@gmail.com> | 2022-08-29 09:09:41 -0700 |
commit | 54c7bc67a2d54311b77aca9233b23a9e7a1ca581 (patch) | |
tree | 26e2da017378285c18edecd671fbeea896178768 | |
parent | d433eb957bf3826e7aea97c12f0cdc9fcb9a1b43 (diff) |
Various AArch64 optimizations (https://github.com/Shopify/ruby/pull/433)
* When we're storing an immediate 0 value at a memory address, we
can use STUR XZR, Xd instead of loading 0 into a register and
then storing that register.
* When we're moving 0 into an argument register, we can use
MOV Xd, XZR instead of loading the value into a register first.
* In the newarray instruction, we can skip looking at the stack at
all if the number of values we're using is 0.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6289
-rw-r--r-- | yjit/src/asm/arm64/mod.rs | 3 | ||||
-rw-r--r-- | yjit/src/asm/arm64/opnd.rs | 5 | ||||
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 14 | ||||
-rw-r--r-- | yjit/src/codegen.rs | 11 |
4 files changed, 27 insertions, 6 deletions
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index e5ba2f81ea..fb07498ce2 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -542,6 +542,9 @@ pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into() }, + (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => { + LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into() + }, (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => { LogicalImm::mov(rd.reg_no, imm.try_into().unwrap(), rd.num_bits).into() }, diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs index a10e289455..c89481fb03 100644 --- a/yjit/src/asm/arm64/opnd.rs +++ b/yjit/src/asm/arm64/opnd.rs @@ -111,6 +111,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; +// zero register +pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 }; + // 64-bit registers pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); @@ -143,7 +146,7 @@ pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); -pub const X31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 31 }); +pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG); // 32-bit registers pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 }); diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 3f1bbf99b0..08eb5efa3f 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -289,7 +289,14 @@ impl Assembler // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { - let value = split_load_operand(asm, opnd); + // If the value that we're sending is 0, then we can use + // the zero register, so in this case we'll just send + // a UImm of 0 along as the argument to the move. + let value = match opnd { + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), + _ => split_load_operand(asm, opnd) + }; + asm.mov(C_ARG_OPNDS[idx], value); } @@ -386,7 +393,10 @@ impl Assembler }; }, Insn::Mov { dest, src } => { - let value = match (dest, src) { + let value: Opnd = match (dest, src) { + // If the first operand is zero, then we can just use + // the zero register. + (Opnd::Mem(_), Opnd::UImm(0) | Opnd::Imm(0)) => Opnd::Reg(XZR_REG), // If the first operand is a memory operand, we're going // to transform this into a store instruction, so we'll // need to load this anyway. diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e23171d2a0..1336fe3c57 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1168,9 +1168,14 @@ fn gen_newarray( // Save the PC and SP because we are allocating jit_prepare_routine_call(jit, ctx, asm); - let offset_magnitude = SIZEOF_VALUE as u32 * n; - let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); - let values_ptr = asm.lea(values_opnd); + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + let offset_magnitude = SIZEOF_VALUE as u32 * n; + let values_opnd = ctx.sp_opnd(-(offset_magnitude as isize)); + asm.lea(values_opnd) + }; // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts); let new_ary = asm.ccall( |