diff options
Diffstat (limited to 'yjit/src/asm/arm64/arg')
-rw-r--r-- | yjit/src/asm/arm64/arg/bitmask_imm.rs | 255 | ||||
-rw-r--r-- | yjit/src/asm/arm64/arg/condition.rs | 52 | ||||
-rw-r--r-- | yjit/src/asm/arm64/arg/inst_offset.rs | 47 | ||||
-rw-r--r-- | yjit/src/asm/arm64/arg/mod.rs | 18 | ||||
-rw-r--r-- | yjit/src/asm/arm64/arg/sf.rs | 19 | ||||
-rw-r--r-- | yjit/src/asm/arm64/arg/shifted_imm.rs | 81 | ||||
-rw-r--r-- | yjit/src/asm/arm64/arg/sys_reg.rs | 6 | ||||
-rw-r--r-- | yjit/src/asm/arm64/arg/truncate.rs | 66 |
8 files changed, 544 insertions, 0 deletions
diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs new file mode 100644 index 0000000000..6b71a73d2c --- /dev/null +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -0,0 +1,255 @@ +/// Immediates used by the logical immediate instructions are not actually the +/// immediate value, but instead are encoded into a 13-bit wide mask of 3 +/// elements. This allows many more values to be represented than 13 bits would +/// normally allow, at the expense of not being able to represent every possible +/// value. +/// +/// In order for a number to be encodeable in this form, the binary +/// representation must consist of a single set of contiguous 1s. That pattern +/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or +/// 32 times (rotated or not). +/// +/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid. +/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot +/// be replicated across 64 bits. +/// +/// Some more examples to illustrate the idea of replication: +/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a +/// single set of 1s which can be replicated across all of the bits 32 times. +/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 8 times (rotated by 4 bits). +/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 4 times (rotated by 12 bits). +/// +/// To encode the values, there are 3 elements: +/// * n = 1 if the pattern is 64-bits wide, 0 otherwise +/// * imms = the size of the pattern, a 0, and then one less than the number of +/// sequential 1s +/// * immr = the number of right rotations to apply to the pattern to get the +/// target value +/// +pub struct BitmaskImmediate { + n: u8, + imms: u8, + immr: u8 +} + +impl TryFrom<u64> for BitmaskImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImmediate. + /// + /// The implementation here is largely based on this blog post: + /// https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ + fn try_from(value: u64) -> Result<Self, Self::Error> { + if value == 0 || value == u64::MAX { + return Err(()); + } + + fn rotate_right(value: u64, rotations: u32) -> u64 { + (value >> (rotations & 0x3F)) | + (value << (rotations.wrapping_neg() & 0x3F)) + } + + let rotations = (value & (value + 1)).trailing_zeros(); + let normalized = rotate_right(value, rotations & 0x3F); + + let zeroes = normalized.leading_zeros(); + let ones = (!normalized).trailing_zeros(); + let size = zeroes + ones; + + if rotate_right(value, size & 0x3F) != value { + return Err(()); + } + + Ok(BitmaskImmediate { + n: ((size >> 6) & 1) as u8, + imms: (((size << 1).wrapping_neg() | (ones - 1)) & 0x3F) as u8, + immr: ((rotations.wrapping_neg() & (size - 1)) & 0x3F) as u8 + }) + } +} + +impl BitmaskImmediate { + /// Attempt to make a BitmaskImmediate for a 32 bit register. + /// The result has N==0, which is required for some 32-bit instructions. + /// Note that the exact same BitmaskImmediate produces different values + /// depending on the size of the target register. + pub fn new_32b_reg(value: u32) -> Result<Self, ()> { + // The same bit pattern replicated to u64 + let value = value as u64; + let replicated: u64 = (value << 32) | value; + let converted = Self::try_from(replicated); + if let Ok(ref imm) = converted { + assert_eq!(0, imm.n); + } + + converted + } +} + +impl BitmaskImmediate { + /// Encode a bitmask immediate into a 32-bit value. + pub fn encode(self) -> u32 { + 0 + | ((self.n as u32) << 12) + | ((self.immr as u32) << 6) + | (self.imms as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_failures() { + [5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| { + assert!(BitmaskImmediate::try_from(imm).is_err()); + }); + } + + #[test] + fn test_negative() { + let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap(); + let encoded: u32 = bitmask.encode(); + assert_eq!(7998, encoded); + } + + #[test] + fn test_size_2_minimum() { + let bitmask = BitmaskImmediate::try_from(0x5555555555555555); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 }))); + } + + #[test] + fn test_size_2_maximum() { + let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 }))); + } + + #[test] + fn test_size_4_minimum() { + let bitmask = BitmaskImmediate::try_from(0x1111111111111111); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 }))); + } + + #[test] + fn test_size_4_rotated() { + let bitmask = BitmaskImmediate::try_from(0x6666666666666666); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 }))); + } + + #[test] + fn test_size_4_maximum() { + let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 }))); + } + + #[test] + fn test_size_8_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0101010101010101); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 }))); + } + + #[test] + fn test_size_8_rotated() { + let bitmask = BitmaskImmediate::try_from(0x1818181818181818); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 }))); + } + + #[test] + fn test_size_8_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 }))); + } + + #[test] + fn test_size_16_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0001000100010001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 }))); + } + + #[test] + fn test_size_16_rotated() { + let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 }))); + } + + #[test] + fn test_size_16_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 }))); + } + + #[test] + fn test_size_32_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000100000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_32_rotated() { + let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 }))); + } + + #[test] + fn test_size_32_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 }))); + } + + #[test] + fn test_size_64_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000000000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_64_rotated() { + let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 }))); + } + + #[test] + fn test_size_64_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 }))); + } + + #[test] + fn test_size_64_invalid() { + let bitmask = BitmaskImmediate::try_from(u64::MAX); + assert!(matches!(bitmask, Err(()))); + } + + #[test] + fn test_all_valid_32b_pattern() { + let mut patterns = vec![]; + for pattern_size in [2, 4, 8, 16, 32_u64] { + for ones_count in 1..pattern_size { + for rotation in 0..pattern_size { + let ones = (1_u64 << ones_count) - 1; + let rotated = (ones >> rotation) | + ((ones & ((1 << rotation) - 1)) << (pattern_size - rotation)); + let mut replicated = rotated; + let mut shift = pattern_size; + while shift < 32 { + replicated |= replicated << shift; + shift *= 2; + } + let replicated: u32 = replicated.try_into().unwrap(); + assert!(BitmaskImmediate::new_32b_reg(replicated).is_ok()); + patterns.push(replicated); + } + } + } + patterns.sort(); + patterns.dedup(); + // Up to {size}-1 ones, and a total of {size} possible rotations. + assert_eq!(1*2 + 3*4 + 7*8 + 15*16 + 31*32, patterns.len()); + } +} diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs new file mode 100644 index 0000000000..f711b8b0d8 --- /dev/null +++ b/yjit/src/asm/arm64/arg/condition.rs @@ -0,0 +1,52 @@ +/// Various instructions in A64 can have condition codes attached. This enum +/// includes all of the various kinds of conditions along with their respective +/// encodings. +pub struct Condition; + +impl Condition { + pub const EQ: u8 = 0b0000; // equal to + pub const NE: u8 = 0b0001; // not equal to + pub const CS: u8 = 0b0010; // carry set (alias for HS) + pub const CC: u8 = 0b0011; // carry clear (alias for LO) + pub const MI: u8 = 0b0100; // minus, negative + pub const PL: u8 = 0b0101; // positive or zero + pub const VS: u8 = 0b0110; // signed overflow + pub const VC: u8 = 0b0111; // no signed overflow + pub const HI: u8 = 0b1000; // greater than (unsigned) + pub const LS: u8 = 0b1001; // less than or equal to (unsigned) + pub const GE: u8 = 0b1010; // greater than or equal to (signed) + pub const LT: u8 = 0b1011; // less than (signed) + pub const GT: u8 = 0b1100; // greater than (signed) + pub const LE: u8 = 0b1101; // less than or equal to (signed) + pub const AL: u8 = 0b1110; // always + + pub const fn inverse(condition: u8) -> u8 { + match condition { + Condition::EQ => Condition::NE, + Condition::NE => Condition::EQ, + + Condition::CS => Condition::CC, + Condition::CC => Condition::CS, + + Condition::MI => Condition::PL, + Condition::PL => Condition::MI, + + Condition::VS => Condition::VC, + Condition::VC => Condition::VS, + + Condition::HI => Condition::LS, + Condition::LS => Condition::HI, + + Condition::LT => Condition::GE, + Condition::GE => Condition::LT, + + Condition::GT => Condition::LE, + Condition::LE => Condition::GT, + + Condition::AL => Condition::AL, + + _ => panic!("Unknown condition") + + } + } +} diff --git a/yjit/src/asm/arm64/arg/inst_offset.rs b/yjit/src/asm/arm64/arg/inst_offset.rs new file mode 100644 index 0000000000..f4a6bc73a0 --- /dev/null +++ b/yjit/src/asm/arm64/arg/inst_offset.rs @@ -0,0 +1,47 @@ +/// There are a lot of instructions in the AArch64 architectrue that take an +/// offset in terms of number of instructions. Usually they are jump +/// instructions or instructions that load a value relative to the current PC. +/// +/// This struct is used to mark those locations instead of a generic operand in +/// order to give better clarity to the developer when reading the AArch64 +/// backend code. It also helps to clarify that everything is in terms of a +/// number of instructions and not a number of bytes (i.e., the offset is the +/// number of bytes divided by 4). +#[derive(Copy, Clone)] +pub struct InstructionOffset(i32); + +impl InstructionOffset { + /// Create a new instruction offset. + pub fn from_insns(insns: i32) -> Self { + InstructionOffset(insns) + } + + /// Create a new instruction offset from a number of bytes. + pub fn from_bytes(bytes: i32) -> Self { + assert_eq!(bytes % 4, 0, "Byte offset must be a multiple of 4"); + InstructionOffset(bytes / 4) + } +} + +impl From<i32> for InstructionOffset { + /// Convert an i64 into an instruction offset. + fn from(value: i32) -> Self { + InstructionOffset(value) + } +} + +impl From<InstructionOffset> for i32 { + /// Convert an instruction offset into a number of instructions as an i32. + fn from(offset: InstructionOffset) -> Self { + offset.0 + } +} + +impl From<InstructionOffset> for i64 { + /// Convert an instruction offset into a number of instructions as an i64. + /// This is useful for when we're checking how many bits this offset fits + /// into. + fn from(offset: InstructionOffset) -> Self { + offset.0.into() + } +} diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs new file mode 100644 index 0000000000..7eb37834f9 --- /dev/null +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -0,0 +1,18 @@ +// This module contains various A64 instruction arguments and the logic +// necessary to encode them. + +mod bitmask_imm; +mod condition; +mod inst_offset; +mod sf; +mod shifted_imm; +mod sys_reg; +mod truncate; + +pub use bitmask_imm::BitmaskImmediate; +pub use condition::Condition; +pub use inst_offset::InstructionOffset; +pub use sf::Sf; +pub use shifted_imm::ShiftedImmediate; +pub use sys_reg::SystemRegister; +pub use truncate::{truncate_imm, truncate_uimm}; diff --git a/yjit/src/asm/arm64/arg/sf.rs b/yjit/src/asm/arm64/arg/sf.rs new file mode 100644 index 0000000000..c2fd33302c --- /dev/null +++ b/yjit/src/asm/arm64/arg/sf.rs @@ -0,0 +1,19 @@ +/// This is commonly the top-most bit in the encoding of the instruction, and +/// represents whether register operands should be treated as 64-bit registers +/// or 32-bit registers. +pub enum Sf { + Sf32 = 0b0, + Sf64 = 0b1 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From<u8> for Sf { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Sf::Sf64, + 32 => Sf::Sf32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} diff --git a/yjit/src/asm/arm64/arg/shifted_imm.rs b/yjit/src/asm/arm64/arg/shifted_imm.rs new file mode 100644 index 0000000000..4602ac64ab --- /dev/null +++ b/yjit/src/asm/arm64/arg/shifted_imm.rs @@ -0,0 +1,81 @@ +/// How much to shift the immediate by. +pub enum Shift { + LSL0 = 0b0, // no shift + LSL12 = 0b1 // logical shift left by 12 bits +} + +/// Some instructions accept a 12-bit immediate that has an optional shift +/// attached to it. This allows encoding larger values than just fit into 12 +/// bits. We attempt to encode those here. If the values are too large we have +/// to bail out. +pub struct ShiftedImmediate { + shift: Shift, + value: u16 +} + +impl TryFrom<u64> for ShiftedImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImm. + fn try_from(value: u64) -> Result<Self, Self::Error> { + let current = value; + if current < 2_u64.pow(12) { + return Ok(ShiftedImmediate { shift: Shift::LSL0, value: current as u16 }); + } + + if (current & (2_u64.pow(12) - 1) == 0) && ((current >> 12) < 2_u64.pow(12)) { + return Ok(ShiftedImmediate { shift: Shift::LSL12, value: (current >> 12) as u16 }); + } + + Err(()) + } +} + +impl From<ShiftedImmediate> for u32 { + /// Encode a bitmask immediate into a 32-bit value. + fn from(imm: ShiftedImmediate) -> Self { + 0 + | (((imm.shift as u32) & 1) << 12) + | (imm.value as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_shift() { + let expected_value = 256; + let result = ShiftedImmediate::try_from(expected_value); + + match result { + Ok(ShiftedImmediate { shift: Shift::LSL0, value }) => assert_eq!(value as u64, expected_value), + _ => panic!("Unexpected shift value") + } + } + + #[test] + fn test_maximum_no_shift() { + let expected_value = (1 << 12) - 1; + let result = ShiftedImmediate::try_from(expected_value); + + match result { + Ok(ShiftedImmediate { shift: Shift::LSL0, value }) => assert_eq!(value as u64, expected_value), + _ => panic!("Unexpected shift value") + } + } + + #[test] + fn test_with_shift() { + let result = ShiftedImmediate::try_from(256 << 12); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL12, value: 256 }))); + } + + #[test] + fn test_unencodable() { + let result = ShiftedImmediate::try_from((256 << 12) + 1); + assert!(matches!(result, Err(()))); + } +} diff --git a/yjit/src/asm/arm64/arg/sys_reg.rs b/yjit/src/asm/arm64/arg/sys_reg.rs new file mode 100644 index 0000000000..41d71920cb --- /dev/null +++ b/yjit/src/asm/arm64/arg/sys_reg.rs @@ -0,0 +1,6 @@ +/// The encoded representation of an A64 system register. +/// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/ +pub enum SystemRegister { + /// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en + NZCV = 0b1_011_0100_0010_000 +} diff --git a/yjit/src/asm/arm64/arg/truncate.rs b/yjit/src/asm/arm64/arg/truncate.rs new file mode 100644 index 0000000000..85d56ff202 --- /dev/null +++ b/yjit/src/asm/arm64/arg/truncate.rs @@ -0,0 +1,66 @@ +// There are many instances in AArch64 instruction encoding where you represent +// an integer value with a particular bit width that isn't a power of 2. These +// functions represent truncating those integer values down to the appropriate +// number of bits. + +/// Truncate a signed immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// When the value is positive, this should effectively be a no-op since we're +/// just dropping leading zeroes. When the value is negative we should only be +/// dropping leading ones. +pub fn truncate_imm<T: Into<i32>, const WIDTH: usize>(imm: T) -> u32 { + let value: i32 = imm.into(); + let masked = (value as u32) & ((1 << WIDTH) - 1); + + // Assert that we didn't drop any bits by truncating. + if value >= 0 { + assert_eq!(value as u32, masked); + } else { + assert_eq!(value as u32, masked | (u32::MAX << WIDTH)); + } + + masked +} + +/// Truncate an unsigned immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// This should effectively be a no-op since we're just dropping leading zeroes. +pub fn truncate_uimm<T: Into<u32>, const WIDTH: usize>(uimm: T) -> u32 { + let value: u32 = uimm.into(); + let masked = value & ((1 << WIDTH) - 1); + + // Assert that we didn't drop any bits by truncating. + assert_eq!(value, masked); + + masked +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_truncate_imm_positive() { + let inst = truncate_imm::<i32, 4>(5); + let result: u32 = inst; + assert_eq!(0b0101, result); + } + + #[test] + fn test_truncate_imm_negative() { + let inst = truncate_imm::<i32, 4>(-5); + let result: u32 = inst; + assert_eq!(0b1011, result); + } + + #[test] + fn test_truncate_uimm() { + let inst = truncate_uimm::<u32, 4>(5); + let result: u32 = inst; + assert_eq!(0b0101, result); + } +} |