From eb4c7b4ea55c5a4c593bea4bba9aa1e9093b3447 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 16 Jun 2022 14:28:51 -0400 Subject: AND/ANDS for A64 (https://github.com/Shopify/ruby/pull/300) --- yjit/src/asm/arm64/inst/bitmask_imm.rs | 250 +++++++++++++++++++++++++++++++++ yjit/src/asm/arm64/inst/logical_imm.rs | 97 +++++++++++++ yjit/src/asm/arm64/inst/mod.rs | 2 + 3 files changed, 349 insertions(+) create mode 100644 yjit/src/asm/arm64/inst/bitmask_imm.rs create mode 100644 yjit/src/asm/arm64/inst/logical_imm.rs diff --git a/yjit/src/asm/arm64/inst/bitmask_imm.rs b/yjit/src/asm/arm64/inst/bitmask_imm.rs new file mode 100644 index 0000000000..7e5a21c7b4 --- /dev/null +++ b/yjit/src/asm/arm64/inst/bitmask_imm.rs @@ -0,0 +1,250 @@ +/// Immediates used by the logical immediate instructions are not actually the +/// immediate value, but instead are encoded into a 13-bit wide mask of 3 +/// elements. This allows many more values to be represented than 13 bits would +/// normally allow, at the expense of not being able to represent every possible +/// value. +/// +/// In order for a number to be encodeable in this form, the binary +/// representation must consist of a single set of contiguous 1s. That pattern +/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or +/// 32 times (rotated or not). +/// +/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid. +/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot +/// be replicated across 64 bits. +/// +/// Some more examples to illustrate the idea of replication: +/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a +/// single set of 1s which can be replicated across all of the bits 32 times. +/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 8 times (rotated by 4 bits). +/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 4 times (rotated by 12 bits). +/// +/// To encode the values, there are 3 elements: +/// * n = 1 if the pattern is 64-bits wide, 0 otherwise +/// * imms = the size of the pattern, a 0, and then one less than the number of +/// sequential 1s +/// * immr = the number of right rotations to apply to the pattern to get the +/// target value +/// +pub struct BitmaskImmediate { + n: u8, + imms: u8, + immr: u8 +} + +impl TryFrom for BitmaskImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImm. + fn try_from(value: u64) -> Result { + /// Is this number's binary representation all 1s? + fn is_mask(imm: u64) -> bool { + if imm == u64::MAX { true } else { ((imm + 1) & imm) == 0 } + } + + /// Is this number's binary representation one or more 1s followed by one or + /// more 0s? + fn is_shifted_mask(imm: u64) -> bool { + is_mask((imm - 1) | imm) + } + + let mut imm = value; + let mut size = 64; + + // First, determine the element size. + loop { + size >>= 1; + let mask = (1 << size) - 1; + + if (imm & mask) != ((imm >> size) & mask) { + size <<= 1; + break; + } + + if size <= 2 { + break; + } + } + + // Second, determine the rotation to make the pattern be aligned such + // that all of the least significant bits are 1. + let trailing_ones: u32; + let left_rotations: u32; + + let mask = u64::MAX >> (64 - size); + imm &= mask; + + if is_shifted_mask(imm) { + left_rotations = imm.trailing_zeros(); + assert!(left_rotations < 64); + trailing_ones = (imm >> left_rotations).trailing_ones(); + } else { + imm |= !mask; + if !is_shifted_mask(!imm) { + return Err(()); + } + + let leading_ones = imm.leading_ones(); + left_rotations = 64 - leading_ones; + trailing_ones = leading_ones + imm.trailing_ones() - (64 - size); + } + + // immr is the number of right rotations it takes to get from the + // matching unrotated pattern to the target value. + let immr = (size - left_rotations) & (size - 1); + assert!(size > left_rotations); + + // imms is encoded as the size of the pattern, a 0, and then one less + // than the number of sequential 1s. The table below shows how this is + // encoded. (Note that the way it works out, it's impossible for every x + // in a row to be 1 at the same time). + // +-------------+--------------+--------------+ + // | imms | element size | number of 1s | + // +-------------+--------------+--------------+ + // | 1 1 1 1 0 x | 2 bits | 1 | + // | 1 1 1 0 x x | 4 bits | 1-3 | + // | 1 1 0 x x x | 8 bits | 1-7 | + // | 1 0 x x x x | 16 bits | 1-15 | + // | 0 x x x x x | 32 bits | 1-31 | + // | x x x x x x | 64 bits | 1-63 | + // +-------------+--------------+--------------+ + let imms = (!(size - 1) << 1) | (trailing_ones - 1); + + // n is 1 if the element size is 64-bits, and 0 otherwise. + let n = ((imms >> 6) & 1) ^ 1; + + Ok(BitmaskImmediate { + n: n as u8, + imms: (imms & 0x3f) as u8, + immr: (immr & 0x3f) as u8 + }) + } +} + +impl From for u32 { + /// Encode a bitmask immediate into a 32-bit value. + fn from(bitmask: BitmaskImmediate) -> Self { + 0 + | (((bitmask.n as u32) & 1) << 12) + | (bitmask.immr << 6) as u32 + | bitmask.imms as u32 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_failures() { + vec![5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| { + assert!(BitmaskImmediate::try_from(imm).is_err()); + }); + } + + #[test] + fn test_size_2_minimum() { + let bitmask = BitmaskImmediate::try_from(0x5555555555555555); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 }))); + } + + #[test] + fn test_size_2_maximum() { + let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 }))); + } + + #[test] + fn test_size_4_minimum() { + let bitmask = BitmaskImmediate::try_from(0x1111111111111111); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 }))); + } + + #[test] + fn test_size_4_rotated() { + let bitmask = BitmaskImmediate::try_from(0x6666666666666666); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 }))); + } + + #[test] + fn test_size_4_maximum() { + let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 }))); + } + + #[test] + fn test_size_8_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0101010101010101); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 }))); + } + + #[test] + fn test_size_8_rotated() { + let bitmask = BitmaskImmediate::try_from(0x1818181818181818); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 }))); + } + + #[test] + fn test_size_8_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 }))); + } + + #[test] + fn test_size_16_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0001000100010001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 }))); + } + + #[test] + fn test_size_16_rotated() { + let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 }))); + } + + #[test] + fn test_size_16_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 }))); + } + + #[test] + fn test_size_32_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000100000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_32_rotated() { + let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 }))); + } + + #[test] + fn test_size_32_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 }))); + } + + #[test] + fn test_size_64_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000000000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_64_rotated() { + let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 }))); + } + + #[test] + fn test_size_64_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 }))); + } +} diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs new file mode 100644 index 0000000000..63a4556d85 --- /dev/null +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -0,0 +1,97 @@ +use super::bitmask_imm::BitmaskImmediate; +use super::sf::Sf; + +// Which operation to perform. +enum Opc { + /// The AND operation. + And = 0b00, + + /// The ANDS operation. + Ands = 0b11 +} + +/// The struct that represents an A64 bitwise immediate instruction that can be +/// encoded. +/// +/// AND/ANDS (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 0 | +/// | sf opc.. N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LogicalImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The immediate value to test. + imm: BitmaskImmediate, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl LogicalImm { + /// AND (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en + pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() } + } + + /// ANDS (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en + pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm +const FAMILY: u32 = 0b1001; + +impl From for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LogicalImm) -> Self { + let imm: u32 = inst.imm.into(); + + 0 + | ((inst.sf as u32) << 31) + | ((inst.opc as u32) << 29) + | (FAMILY << 25) + | (imm << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LogicalImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_and() { + let inst = LogicalImm::and(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0x92400820, result); + } + + #[test] + fn test_ands() { + let inst = LogicalImm::ands(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf2400820, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs index c96e9328ff..83cdd26d1d 100644 --- a/yjit/src/asm/arm64/inst/mod.rs +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -1,9 +1,11 @@ mod atomic; +mod bitmask_imm; mod branch; mod call; mod data_imm; mod data_reg; mod load; +mod logical_imm; mod mov; mod sf; mod store; -- cgit v1.2.3