summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2022-09-27 16:58:01 -0400
committerGitHub <noreply@github.com>2022-09-27 16:58:01 -0400
commit28433e9aa0c765c9d20bc6397439a1b12e66bcbd (patch)
tree4b328addafdc92a8100c557cb8d21df1e7f6c36e
parent8f7f12ad64c2f01e1fc8a75402337ceeb6607657 (diff)
Change IncrCounter lowering on AArch64 (#6455)
* Change IncrCounter lowering on AArch64 Previously we were using LDADDAL which is not available on Graviton 1 chips. Instead, we're going to use an exclusive load/store group through the LDAXR/STLXR instructions. * Update yjit/src/backend/arm64/mod.rs Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
Notes
Notes: Merged-By: maximecb <maximecb@ruby-lang.org>
-rw-r--r--yjit/src/asm/arm64/inst/load_store_exclusive.rs109
-rw-r--r--yjit/src/asm/arm64/inst/mod.rs2
-rw-r--r--yjit/src/asm/arm64/mod.rs39
-rw-r--r--yjit/src/asm/arm64/opnd.rs10
-rw-r--r--yjit/src/backend/arm64/mod.rs32
5 files changed, 181 insertions, 11 deletions
diff --git a/yjit/src/asm/arm64/inst/load_store_exclusive.rs b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
new file mode 100644
index 0000000000..8216c2200a
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
@@ -0,0 +1,109 @@
+/// The operation being performed for this instruction.
+enum Op {
+ Store = 0,
+ Load = 1
+}
+
+/// The size of the registers being operated on.
+enum Size {
+ Size32 = 0b10,
+ Size64 = 0b11
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into a Size enum variant.
+impl From<u8> for Size {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Size::Size64,
+ 32 => Size::Size32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The struct that represents an A64 load or store exclusive instruction that
+/// can be encoded.
+///
+/// LDAXR/STLXR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 1 0 0 0 0 0 1 1 1 1 1 1 |
+/// | size. op rs.............. rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadStoreExclusive {
+ /// The number of the register to be loaded.
+ rt: u8,
+
+ /// The base register with which to form the address.
+ rn: u8,
+
+ /// The register to be used for the status result if it applies to this
+ /// operation. Otherwise it's the zero register.
+ rs: u8,
+
+ /// The operation being performed for this instruction.
+ op: Op,
+
+ /// The size of the registers being operated on.
+ size: Size
+}
+
+impl LoadStoreExclusive {
+ /// LDAXR
+ /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register-
+ pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self {
+ Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() }
+ }
+
+ /// STLXR
+ /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register-
+ pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
+ Self { rt, rn, rs, op: Op::Store, size: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadStoreExclusive> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LoadStoreExclusive) -> Self {
+ 0
+ | ((inst.size as u32) << 30)
+ | (FAMILY << 25)
+ | ((inst.op as u32) << 22)
+ | ((inst.rs as u32) << 16)
+ | (0b111111 << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<LoadStoreExclusive> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LoadStoreExclusive) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldaxr() {
+ let inst = LoadStoreExclusive::ldaxr(16, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xc85ffc10, result);
+ }
+
+ #[test]
+ fn test_stlxr() {
+ let inst = LoadStoreExclusive::stlxr(17, 16, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xc811fc10, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs
index b3a77e73c9..9821e6a334 100644
--- a/yjit/src/asm/arm64/inst/mod.rs
+++ b/yjit/src/asm/arm64/inst/mod.rs
@@ -13,6 +13,7 @@ mod halfword_imm;
mod load_literal;
mod load_register;
mod load_store;
+mod load_store_exclusive;
mod logical_imm;
mod logical_reg;
mod mov;
@@ -36,6 +37,7 @@ pub use halfword_imm::HalfwordImm;
pub use load_literal::LoadLiteral;
pub use load_register::LoadRegister;
pub use load_store::LoadStore;
+pub use load_store_exclusive::LoadStoreExclusive;
pub use logical_imm::LogicalImm;
pub use logical_reg::LogicalReg;
pub use mov::Mov;
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index d97452a045..88431ce30a 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -331,6 +331,20 @@ pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
cb.write_bytes(&bytes);
}
+/// LDAXR - atomic load with acquire semantics
+pub fn ldaxr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+ assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
+
+ LoadStoreExclusive::ldaxr(rt.reg_no, rn.reg_no, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldaxr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
/// LDP (signed offset) - load a pair of registers from memory
pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
let bytes: [u8; 4] = match (rt1, rt2, rn) {
@@ -707,6 +721,21 @@ pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
cb.write_bytes(&bytes);
}
+/// STLXR - store a value to memory, release exclusive access
+pub fn stlxr(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rs, rt, rn) {
+ (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+ assert_eq!(rs.num_bits, 32, "rs must be a 32-bit register.");
+ assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
+
+ LoadStoreExclusive::stlxr(rs.reg_no, rt.reg_no, rn.reg_no, rn.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to stlxr instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
/// STP (signed offset) - store a pair of registers to memory
pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
let bytes: [u8; 4] = match (rt1, rt2, rn) {
@@ -1184,6 +1213,11 @@ mod tests {
}
#[test]
+ fn test_ldaxr() {
+ check_bytes("6afd5fc8", |cb| ldaxr(cb, X10, X11));
+ }
+
+ #[test]
fn test_ldp() {
check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
}
@@ -1334,6 +1368,11 @@ mod tests {
}
#[test]
+ fn test_stlxr() {
+ check_bytes("8bfd0ac8", |cb| stlxr(cb, W10, X11, X12));
+ }
+
+ #[test]
fn test_stp() {
check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
}
diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs
index 52b2a84637..0dc614ab4e 100644
--- a/yjit/src/asm/arm64/opnd.rs
+++ b/yjit/src/asm/arm64/opnd.rs
@@ -84,6 +84,14 @@ impl A64Opnd {
_ => false
}
}
+
+ /// Unwrap a register from an operand.
+ pub fn unwrap_reg(&self) -> A64Reg {
+ match self {
+ A64Opnd::Reg(reg) => *reg,
+ _ => panic!("Expected register operand")
+ }
+ }
}
// argument registers
@@ -102,6 +110,8 @@ pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 };
pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 };
pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 };
pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 };
+pub const X16_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 16 };
+pub const X17_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 17 };
// callee-save registers
pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 };
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 0a5068be58..79dff530d1 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -70,7 +70,8 @@ impl Assembler
{
// A special scratch register for intermediate processing.
// This register is caller-saved (so we don't have to save it before using it)
- const SCRATCH0: A64Opnd = A64Opnd::Reg(X15_REG);
+ const SCRATCH0: A64Opnd = A64Opnd::Reg(X16_REG);
+ const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
/// Get the list of registers from which we will allocate on this platform
/// These are caller-saved registers
@@ -373,17 +374,12 @@ impl Assembler
asm.csel_ge(opnd0, opnd1);
},
Insn::IncrCounter { mem, value } => {
- // We'll use LDADD later which only works with registers
- // ... Load pointer into register
- let counter_addr = split_lea_operand(asm, mem);
-
- // Load immediates into a register
- let addend = match value {
- opnd @ Opnd::Imm(_) | opnd @ Opnd::UImm(_) => asm.load(opnd),
- opnd => opnd,
+ let counter_addr = match mem {
+ Opnd::Mem(_) => split_lea_operand(asm, mem),
+ _ => mem
};
- asm.incr_counter(counter_addr, addend);
+ asm.incr_counter(counter_addr, value);
},
Insn::JmpOpnd(opnd) => {
if let Opnd::Mem(_) = opnd {
@@ -936,7 +932,21 @@ impl Assembler
emit_conditional_jump::<{Condition::VS}>(cb, *target);
},
Insn::IncrCounter { mem, value } => {
- ldaddal(cb, value.into(), value.into(), mem.into());
+ let label = cb.new_label("incr_counter_loop".to_string());
+ cb.write_label(label);
+
+ ldaxr(cb, Self::SCRATCH0, mem.into());
+ add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into());
+
+ // The status register that gets used to track whether or
+ // not the store was successful must be 32 bytes. Since we
+ // store the SCRATCH registers as their 64-bit versions, we
+ // need to rewrap it here.
+ let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32));
+ stlxr(cb, status, Self::SCRATCH0, mem.into());
+
+ cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
+ emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
},
Insn::Breakpoint => {
brk(cb, A64Opnd::None);