From eb4c7b4ea55c5a4c593bea4bba9aa1e9093b3447 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Thu, 16 Jun 2022 14:28:51 -0400
Subject: AND/ANDS for A64 (https://github.com/Shopify/ruby/pull/300)

---
 yjit/src/asm/arm64/inst/bitmask_imm.rs | 250 +++++++++++++++++++++++++++++++++
 yjit/src/asm/arm64/inst/logical_imm.rs |  97 +++++++++++++
 yjit/src/asm/arm64/inst/mod.rs         |   2 +
 3 files changed, 349 insertions(+)
 create mode 100644 yjit/src/asm/arm64/inst/bitmask_imm.rs
 create mode 100644 yjit/src/asm/arm64/inst/logical_imm.rs
diff --git a/yjit/src/asm/arm64/inst/bitmask_imm.rs b/yjit/src/asm/arm64/inst/bitmask_imm.rs
new file mode 100644
index 0000000000..7e5a21c7b4
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/bitmask_imm.rs
@@ -0,0 +1,250 @@
+/// Immediates used by the logical immediate instructions are not actually the
+/// immediate value, but instead are encoded into a 13-bit wide mask of 3
+/// elements. This allows many more values to be represented than 13 bits would
+/// normally allow, at the expense of not being able to represent every possible
+/// value.
+///
+/// In order for a number to be encodeable in this form, the binary
+/// representation must consist of a single set of contiguous 1s. That pattern
+/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or
+/// 32 times (rotated or not).
+///
+/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid.
+/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot
+/// be replicated across 64 bits.
+///
+/// Some more examples to illustrate the idea of replication:
+/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a
+///   single set of 1s which can be replicated across all of the bits 32 times.
+/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it
+///   consists of a single set of 1s which can be replicated across all of the
+///   bits 8 times (rotated by 4 bits).
+/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it
+///   consists of a single set of 1s which can be replicated across all of the
+///   bits 4 times (rotated by 12 bits).
+///
+/// To encode the values, there are 3 elements:
+/// * n = 1 if the pattern is 64-bits wide, 0 otherwise
+/// * imms = the size of the pattern, a 0, and then one less than the number of
+///   sequential 1s
+/// * immr = the number of right rotations to apply to the pattern to get the
+///   target value
+///
+pub struct BitmaskImmediate {
+    n: u8,
+    imms: u8,
+    immr: u8
+}
+
+impl TryFrom<u64> for BitmaskImmediate {
+    type Error = ();
+
+    /// Attempt to convert a u64 into a BitmaskImm.
+    fn try_from(value: u64) -> Result<Self, Self::Error> {
+        /// Is this number's binary representation all 1s?
+        fn is_mask(imm: u64) -> bool {
+            if imm == u64::MAX { true } else { ((imm + 1) & imm) == 0 }
+        }
+
+        /// Is this number's binary representation one or more 1s followed by one or
+        /// more 0s?
+        fn is_shifted_mask(imm: u64) -> bool {
+            is_mask((imm - 1) | imm)
+        }
+
+        let mut imm = value;
+        let mut size = 64;
+
+        // First, determine the element size.
+        loop {
+            size >>= 1;
+            let mask = (1 << size) - 1;
+
+            if (imm & mask) != ((imm >> size) & mask) {
+              size <<= 1;
+              break;
+            }
+
+            if size <= 2 {
+                break;
+            }
+        }
+
+        // Second, determine the rotation to make the pattern be aligned such
+        // that all of the least significant bits are 1.
+        let trailing_ones: u32;
+        let left_rotations: u32;
+
+        let mask = u64::MAX >> (64 - size);
+        imm &= mask;
+
+        if is_shifted_mask(imm) {
+            left_rotations = imm.trailing_zeros();
+            assert!(left_rotations < 64);
+            trailing_ones = (imm >> left_rotations).trailing_ones();
+        } else {
+            imm |= !mask;
+            if !is_shifted_mask(!imm) {
+                return Err(());
+            }
+
+            let leading_ones = imm.leading_ones();
+            left_rotations = 64 - leading_ones;
+            trailing_ones = leading_ones + imm.trailing_ones() - (64 - size);
+        }
+
+        // immr is the number of right rotations it takes to get from the
+        // matching unrotated pattern to the target value.
+        let immr = (size - left_rotations) & (size - 1);
+        assert!(size > left_rotations);
+
+        // imms is encoded as the size of the pattern, a 0, and then one less
+        // than the number of sequential 1s. The table below shows how this is
+        // encoded. (Note that the way it works out, it's impossible for every x
+        // in a row to be 1 at the same time).
+        // +-------------+--------------+--------------+
+        // | imms        | element size | number of 1s |
+        // +-------------+--------------+--------------+
+        // | 1 1 1 1 0 x | 2 bits       | 1            |
+        // | 1 1 1 0 x x | 4 bits       | 1-3          |
+        // | 1 1 0 x x x | 8 bits       | 1-7          |
+        // | 1 0 x x x x | 16 bits      | 1-15         |
+        // | 0 x x x x x | 32 bits      | 1-31         |
+        // | x x x x x x | 64 bits      | 1-63         |
+        // +-------------+--------------+--------------+
+        let imms = (!(size - 1) << 1) | (trailing_ones - 1);
+
+        // n is 1 if the element size is 64-bits, and 0 otherwise.
+        let n = ((imms >> 6) & 1) ^ 1;
+
+        Ok(BitmaskImmediate {
+            n: n as u8,
+            imms: (imms & 0x3f) as u8,
+            immr: (immr & 0x3f) as u8
+        })
+    }
+}
+
+impl From<BitmaskImmediate> for u32 {
+    /// Encode a bitmask immediate into a 32-bit value.
+    fn from(bitmask: BitmaskImmediate) -> Self {
+        0
+        | (((bitmask.n as u32) & 1) << 12)
+        | (bitmask.immr << 6) as u32
+        | bitmask.imms as u32
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_failures() {
+        vec![5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| {
+            assert!(BitmaskImmediate::try_from(imm).is_err());
+        });
+    }
+
+    #[test]
+    fn test_size_2_minimum() {
+        let bitmask = BitmaskImmediate::try_from(0x5555555555555555);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 })));
+    }
+
+    #[test]
+    fn test_size_2_maximum() {
+        let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 })));
+    }
+
+    #[test]
+    fn test_size_4_minimum() {
+        let bitmask = BitmaskImmediate::try_from(0x1111111111111111);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 })));
+    }
+
+    #[test]
+    fn test_size_4_rotated() {
+        let bitmask = BitmaskImmediate::try_from(0x6666666666666666);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 })));
+    }
+
+    #[test]
+    fn test_size_4_maximum() {
+        let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 })));
+    }
+
+    #[test]
+    fn test_size_8_minimum() {
+        let bitmask = BitmaskImmediate::try_from(0x0101010101010101);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 })));
+    }
+
+    #[test]
+    fn test_size_8_rotated() {
+        let bitmask = BitmaskImmediate::try_from(0x1818181818181818);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 })));
+    }
+
+    #[test]
+    fn test_size_8_maximum() {
+        let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 })));
+    }
+
+    #[test]
+    fn test_size_16_minimum() {
+        let bitmask = BitmaskImmediate::try_from(0x0001000100010001);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 })));
+    }
+
+    #[test]
+    fn test_size_16_rotated() {
+        let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 })));
+    }
+
+    #[test]
+    fn test_size_16_maximum() {
+        let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 })));
+    }
+
+    #[test]
+    fn test_size_32_minimum() {
+        let bitmask = BitmaskImmediate::try_from(0x0000000100000001);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 })));
+    }
+
+    #[test]
+    fn test_size_32_rotated() {
+        let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 })));
+    }
+
+    #[test]
+    fn test_size_32_maximum() {
+        let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 })));
+    }
+
+    #[test]
+    fn test_size_64_minimum() {
+        let bitmask = BitmaskImmediate::try_from(0x0000000000000001);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 })));
+    }
+
+    #[test]
+    fn test_size_64_rotated() {
+        let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 })));
+    }
+
+    #[test]
+    fn test_size_64_maximum() {
+        let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe);
+        assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 })));
+    }
+}
diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs
new file mode 100644
index 0000000000..63a4556d85
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@@ -0,0 +1,97 @@
+use super::bitmask_imm::BitmaskImmediate;
+use super::sf::Sf;
+
+// Which operation to perform.
+enum Opc {
+    /// The AND operation.
+    And = 0b00,
+
+    /// The ANDS operation.
+    Ands = 0b11
+}
+
+/// The struct that represents an A64 bitwise immediate instruction that can be
+/// encoded.
+///
+/// AND/ANDS (immediate)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// |           1    0  0  1  0    0                                                                                |
+/// | sf opc..                       N  immr...............   imms............... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LogicalImm {
+    /// The register number of the destination register.
+    rd: u8,
+
+    /// The register number of the first operand register.
+    rn: u8,
+
+    /// The immediate value to test.
+    imm: BitmaskImmediate,
+
+    /// The opcode for this instruction.
+    opc: Opc,
+
+    /// Whether or not this instruction is operating on 64-bit operands.
+    sf: Sf
+}
+
+impl LogicalImm {
+    /// AND (immediate)
+    /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en
+    pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+        Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() }
+    }
+
+    /// ANDS (immediate)
+    /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en
+    pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+        Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() }
+    }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm
+const FAMILY: u32 = 0b1001;
+
+impl From<LogicalImm> for u32 {
+    /// Convert an instruction into a 32-bit value.
+    fn from(inst: LogicalImm) -> Self {
+        let imm: u32 = inst.imm.into();
+
+        0
+        | ((inst.sf as u32) << 31)
+        | ((inst.opc as u32) << 29)
+        | (FAMILY << 25)
+        | (imm << 10)
+        | ((inst.rn as u32) << 5)
+        | inst.rd as u32
+    }
+}
+
+impl From<LogicalImm> for [u8; 4] {
+    /// Convert an instruction into a 4 byte array.
+    fn from(inst: LogicalImm) -> [u8; 4] {
+        let result: u32 = inst.into();
+        result.to_le_bytes()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_and() {
+        let inst = LogicalImm::and(0, 1, 7.try_into().unwrap(), 64);
+        let result: u32 = inst.into();
+        assert_eq!(0x92400820, result);
+    }
+
+    #[test]
+    fn test_ands() {
+        let inst = LogicalImm::ands(0, 1, 7.try_into().unwrap(), 64);
+        let result: u32 = inst.into();
+        assert_eq!(0xf2400820, result);
+    }
+}
diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs
index c96e9328ff..83cdd26d1d 100644
--- a/yjit/src/asm/arm64/inst/mod.rs
+++ b/yjit/src/asm/arm64/inst/mod.rs
@@ -1,9 +1,11 @@
 mod atomic;
+mod bitmask_imm;
 mod branch;
 mod call;
 mod data_imm;
 mod data_reg;
 mod load;
+mod logical_imm;
 mod mov;
 mod sf;
 mod store;
-- 
cgit v1.2.3