summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2022-06-16 14:28:51 -0400
committerTakashi Kokubun <takashikkbn@gmail.com>2022-08-29 08:46:57 -0700
commiteb4c7b4ea55c5a4c593bea4bba9aa1e9093b3447 (patch)
tree0686b856c68a722e320d45c28cc3d7dd22692b3d
parent67de662c4419d4d115f40f8d3ed0c295b581dcf0 (diff)
AND/ANDS for A64 (https://github.com/Shopify/ruby/pull/300)
-rw-r--r--yjit/src/asm/arm64/inst/bitmask_imm.rs250
-rw-r--r--yjit/src/asm/arm64/inst/logical_imm.rs97
-rw-r--r--yjit/src/asm/arm64/inst/mod.rs2
3 files changed, 349 insertions, 0 deletions
diff --git a/yjit/src/asm/arm64/inst/bitmask_imm.rs b/yjit/src/asm/arm64/inst/bitmask_imm.rs
new file mode 100644
index 0000000000..7e5a21c7b4
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/bitmask_imm.rs
@@ -0,0 +1,250 @@
+/// Immediates used by the logical immediate instructions are not actually the
+/// immediate value, but instead are encoded into a 13-bit wide mask of 3
+/// elements. This allows many more values to be represented than 13 bits would
+/// normally allow, at the expense of not being able to represent every possible
+/// value.
+///
+/// In order for a number to be encodeable in this form, the binary
+/// representation must consist of a single set of contiguous 1s. That pattern
+/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or
+/// 32 times (rotated or not).
+///
+/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid.
+/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot
+/// be replicated across 64 bits.
+///
+/// Some more examples to illustrate the idea of replication:
+/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a
+/// single set of 1s which can be replicated across all of the bits 32 times.
+/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it
+/// consists of a single set of 1s which can be replicated across all of the
+/// bits 8 times (rotated by 4 bits).
+/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it
+/// consists of a single set of 1s which can be replicated across all of the
+/// bits 4 times (rotated by 12 bits).
+///
+/// To encode the values, there are 3 elements:
+/// * n = 1 if the pattern is 64-bits wide, 0 otherwise
+/// * imms = the size of the pattern, a 0, and then one less than the number of
+/// sequential 1s
+/// * immr = the number of right rotations to apply to the pattern to get the
+/// target value
+///
+pub struct BitmaskImmediate {
+ n: u8,
+ imms: u8,
+ immr: u8
+}
+
+impl TryFrom<u64> for BitmaskImmediate {
+ type Error = ();
+
+ /// Attempt to convert a u64 into a BitmaskImm.
+ fn try_from(value: u64) -> Result<Self, Self::Error> {
+ /// Is this number's binary representation all 1s?
+ fn is_mask(imm: u64) -> bool {
+ if imm == u64::MAX { true } else { ((imm + 1) & imm) == 0 }
+ }
+
+ /// Is this number's binary representation one or more 1s followed by one or
+ /// more 0s?
+ fn is_shifted_mask(imm: u64) -> bool {
+ is_mask((imm - 1) | imm)
+ }
+
+ let mut imm = value;
+ let mut size = 64;
+
+ // First, determine the element size.
+ loop {
+ size >>= 1;
+ let mask = (1 << size) - 1;
+
+ if (imm & mask) != ((imm >> size) & mask) {
+ size <<= 1;
+ break;
+ }
+
+ if size <= 2 {
+ break;
+ }
+ }
+
+ // Second, determine the rotation to make the pattern be aligned such
+ // that all of the least significant bits are 1.
+ let trailing_ones: u32;
+ let left_rotations: u32;
+
+ let mask = u64::MAX >> (64 - size);
+ imm &= mask;
+
+ if is_shifted_mask(imm) {
+ left_rotations = imm.trailing_zeros();
+ assert!(left_rotations < 64);
+ trailing_ones = (imm >> left_rotations).trailing_ones();
+ } else {
+ imm |= !mask;
+ if !is_shifted_mask(!imm) {
+ return Err(());
+ }
+
+ let leading_ones = imm.leading_ones();
+ left_rotations = 64 - leading_ones;
+ trailing_ones = leading_ones + imm.trailing_ones() - (64 - size);
+ }
+
+ // immr is the number of right rotations it takes to get from the
+ // matching unrotated pattern to the target value.
+ let immr = (size - left_rotations) & (size - 1);
+ assert!(size > left_rotations);
+
+ // imms is encoded as the size of the pattern, a 0, and then one less
+ // than the number of sequential 1s. The table below shows how this is
+ // encoded. (Note that the way it works out, it's impossible for every x
+ // in a row to be 1 at the same time).
+ // +-------------+--------------+--------------+
+ // | imms | element size | number of 1s |
+ // +-------------+--------------+--------------+
+ // | 1 1 1 1 0 x | 2 bits | 1 |
+ // | 1 1 1 0 x x | 4 bits | 1-3 |
+ // | 1 1 0 x x x | 8 bits | 1-7 |
+ // | 1 0 x x x x | 16 bits | 1-15 |
+ // | 0 x x x x x | 32 bits | 1-31 |
+ // | x x x x x x | 64 bits | 1-63 |
+ // +-------------+--------------+--------------+
+ let imms = (!(size - 1) << 1) | (trailing_ones - 1);
+
+ // n is 1 if the element size is 64-bits, and 0 otherwise.
+ let n = ((imms >> 6) & 1) ^ 1;
+
+ Ok(BitmaskImmediate {
+ n: n as u8,
+ imms: (imms & 0x3f) as u8,
+ immr: (immr & 0x3f) as u8
+ })
+ }
+}
+
+impl From<BitmaskImmediate> for u32 {
+ /// Encode a bitmask immediate into a 32-bit value.
+ fn from(bitmask: BitmaskImmediate) -> Self {
+ 0
+ | (((bitmask.n as u32) & 1) << 12)
+ | (bitmask.immr << 6) as u32
+ | bitmask.imms as u32
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_failures() {
+ vec![5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| {
+ assert!(BitmaskImmediate::try_from(imm).is_err());
+ });
+ }
+
+ #[test]
+ fn test_size_2_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x5555555555555555);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 })));
+ }
+
+ #[test]
+ fn test_size_2_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 })));
+ }
+
+ #[test]
+ fn test_size_4_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x1111111111111111);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 })));
+ }
+
+ #[test]
+ fn test_size_4_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x6666666666666666);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 })));
+ }
+
+ #[test]
+ fn test_size_4_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 })));
+ }
+
+ #[test]
+ fn test_size_8_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0101010101010101);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 })));
+ }
+
+ #[test]
+ fn test_size_8_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x1818181818181818);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 })));
+ }
+
+ #[test]
+ fn test_size_8_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 })));
+ }
+
+ #[test]
+ fn test_size_16_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0001000100010001);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 })));
+ }
+
+ #[test]
+ fn test_size_16_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 })));
+ }
+
+ #[test]
+ fn test_size_16_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 })));
+ }
+
+ #[test]
+ fn test_size_32_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0000000100000001);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 })));
+ }
+
+ #[test]
+ fn test_size_32_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 })));
+ }
+
+ #[test]
+ fn test_size_32_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 })));
+ }
+
+ #[test]
+ fn test_size_64_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0000000000000001);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 })));
+ }
+
+ #[test]
+ fn test_size_64_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 })));
+ }
+
+ #[test]
+ fn test_size_64_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 })));
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs
new file mode 100644
index 0000000000..63a4556d85
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@@ -0,0 +1,97 @@
+use super::bitmask_imm::BitmaskImmediate;
+use super::sf::Sf;
+
+// Which operation to perform.
+enum Opc {
+ /// The AND operation.
+ And = 0b00,
+
+ /// The ANDS operation.
+ Ands = 0b11
+}
+
+/// The struct that represents an A64 bitwise immediate instruction that can be
+/// encoded.
+///
+/// AND/ANDS (immediate)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 1 0 0 |
+/// | sf opc.. N immr............... imms............... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LogicalImm {
+ /// The register number of the destination register.
+ rd: u8,
+
+ /// The register number of the first operand register.
+ rn: u8,
+
+ /// The immediate value to test.
+ imm: BitmaskImmediate,
+
+ /// The opcode for this instruction.
+ opc: Opc,
+
+ /// Whether or not this instruction is operating on 64-bit operands.
+ sf: Sf
+}
+
+impl LogicalImm {
+ /// AND (immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en
+ pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() }
+ }
+
+ /// ANDS (immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en
+ pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm
+const FAMILY: u32 = 0b1001;
+
+impl From<LogicalImm> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LogicalImm) -> Self {
+ let imm: u32 = inst.imm.into();
+
+ 0
+ | ((inst.sf as u32) << 31)
+ | ((inst.opc as u32) << 29)
+ | (FAMILY << 25)
+ | (imm << 10)
+ | ((inst.rn as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<LogicalImm> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LogicalImm) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_and() {
+ let inst = LogicalImm::and(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x92400820, result);
+ }
+
+ #[test]
+ fn test_ands() {
+ let inst = LogicalImm::ands(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf2400820, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs
index c96e9328ff..83cdd26d1d 100644
--- a/yjit/src/asm/arm64/inst/mod.rs
+++ b/yjit/src/asm/arm64/inst/mod.rs
@@ -1,9 +1,11 @@
mod atomic;
+mod bitmask_imm;
mod branch;
mod call;
mod data_imm;
mod data_reg;
mod load;
+mod logical_imm;
mod mov;
mod sf;
mod store;