diff options
Diffstat (limited to 'yjit/src')
57 files changed, 33906 insertions, 0 deletions
diff --git a/yjit/src/asm/arm64/README.md b/yjit/src/asm/arm64/README.md new file mode 100644 index 0000000000..edae5773e8 --- /dev/null +++ b/yjit/src/asm/arm64/README.md @@ -0,0 +1,16 @@ +# Arm64 + +This module is responsible for encoding YJIT operands into an appropriate Arm64 encoding. + +## Architecture + +Every instruction in the Arm64 instruction set is 32 bits wide and is represented in little-endian order. Because they're all going to the same size, we represent each instruction by a struct that implements `From<T> for u32`, which contains the mechanism for encoding each instruction. The encoding for each instruction is shown in the documentation for the struct that ends up being created. + +In general each set of bytes inside of the struct has either a direct value (usually a `u8`/`u16`) or some kind of `enum` that can be converted directly into a `u32`. For more complicated pieces of encoding (e.g., bitmask immediates) a corresponding module under the `arg` namespace is available. + +## Helpful links + +* [Arm A64 Instruction Set Architecture](https://developer.arm.com/documentation/ddi0596/2021-12?lang=en) Official documentation +* [armconverter.com](https://armconverter.com/) A website that encodes Arm assembly syntax +* [hatstone](https://github.com/tenderlove/hatstone) A wrapper around the Capstone disassembler written in Ruby +* [onlinedisassembler.com](https://onlinedisassembler.com/odaweb/) A web-based disassembler diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs new file mode 100644 index 0000000000..6b71a73d2c --- /dev/null +++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs @@ -0,0 +1,255 @@ +/// Immediates used by the logical immediate instructions are not actually the +/// immediate value, but instead are encoded into a 13-bit wide mask of 3 +/// elements. This allows many more values to be represented than 13 bits would +/// normally allow, at the expense of not being able to represent every possible +/// value. +/// +/// In order for a number to be encodeable in this form, the binary +/// representation must consist of a single set of contiguous 1s. That pattern +/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or +/// 32 times (rotated or not). +/// +/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid. +/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot +/// be replicated across 64 bits. +/// +/// Some more examples to illustrate the idea of replication: +/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a +/// single set of 1s which can be replicated across all of the bits 32 times. +/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 8 times (rotated by 4 bits). +/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it +/// consists of a single set of 1s which can be replicated across all of the +/// bits 4 times (rotated by 12 bits). +/// +/// To encode the values, there are 3 elements: +/// * n = 1 if the pattern is 64-bits wide, 0 otherwise +/// * imms = the size of the pattern, a 0, and then one less than the number of +/// sequential 1s +/// * immr = the number of right rotations to apply to the pattern to get the +/// target value +/// +pub struct BitmaskImmediate { + n: u8, + imms: u8, + immr: u8 +} + +impl TryFrom<u64> for BitmaskImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImmediate. + /// + /// The implementation here is largely based on this blog post: + /// https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ + fn try_from(value: u64) -> Result<Self, Self::Error> { + if value == 0 || value == u64::MAX { + return Err(()); + } + + fn rotate_right(value: u64, rotations: u32) -> u64 { + (value >> (rotations & 0x3F)) | + (value << (rotations.wrapping_neg() & 0x3F)) + } + + let rotations = (value & (value + 1)).trailing_zeros(); + let normalized = rotate_right(value, rotations & 0x3F); + + let zeroes = normalized.leading_zeros(); + let ones = (!normalized).trailing_zeros(); + let size = zeroes + ones; + + if rotate_right(value, size & 0x3F) != value { + return Err(()); + } + + Ok(BitmaskImmediate { + n: ((size >> 6) & 1) as u8, + imms: (((size << 1).wrapping_neg() | (ones - 1)) & 0x3F) as u8, + immr: ((rotations.wrapping_neg() & (size - 1)) & 0x3F) as u8 + }) + } +} + +impl BitmaskImmediate { + /// Attempt to make a BitmaskImmediate for a 32 bit register. + /// The result has N==0, which is required for some 32-bit instructions. + /// Note that the exact same BitmaskImmediate produces different values + /// depending on the size of the target register. + pub fn new_32b_reg(value: u32) -> Result<Self, ()> { + // The same bit pattern replicated to u64 + let value = value as u64; + let replicated: u64 = (value << 32) | value; + let converted = Self::try_from(replicated); + if let Ok(ref imm) = converted { + assert_eq!(0, imm.n); + } + + converted + } +} + +impl BitmaskImmediate { + /// Encode a bitmask immediate into a 32-bit value. + pub fn encode(self) -> u32 { + 0 + | ((self.n as u32) << 12) + | ((self.immr as u32) << 6) + | (self.imms as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_failures() { + [5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| { + assert!(BitmaskImmediate::try_from(imm).is_err()); + }); + } + + #[test] + fn test_negative() { + let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap(); + let encoded: u32 = bitmask.encode(); + assert_eq!(7998, encoded); + } + + #[test] + fn test_size_2_minimum() { + let bitmask = BitmaskImmediate::try_from(0x5555555555555555); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 }))); + } + + #[test] + fn test_size_2_maximum() { + let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 }))); + } + + #[test] + fn test_size_4_minimum() { + let bitmask = BitmaskImmediate::try_from(0x1111111111111111); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 }))); + } + + #[test] + fn test_size_4_rotated() { + let bitmask = BitmaskImmediate::try_from(0x6666666666666666); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 }))); + } + + #[test] + fn test_size_4_maximum() { + let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 }))); + } + + #[test] + fn test_size_8_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0101010101010101); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 }))); + } + + #[test] + fn test_size_8_rotated() { + let bitmask = BitmaskImmediate::try_from(0x1818181818181818); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 }))); + } + + #[test] + fn test_size_8_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 }))); + } + + #[test] + fn test_size_16_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0001000100010001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 }))); + } + + #[test] + fn test_size_16_rotated() { + let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 }))); + } + + #[test] + fn test_size_16_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 }))); + } + + #[test] + fn test_size_32_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000100000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_32_rotated() { + let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 }))); + } + + #[test] + fn test_size_32_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 }))); + } + + #[test] + fn test_size_64_minimum() { + let bitmask = BitmaskImmediate::try_from(0x0000000000000001); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 }))); + } + + #[test] + fn test_size_64_rotated() { + let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 }))); + } + + #[test] + fn test_size_64_maximum() { + let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe); + assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 }))); + } + + #[test] + fn test_size_64_invalid() { + let bitmask = BitmaskImmediate::try_from(u64::MAX); + assert!(matches!(bitmask, Err(()))); + } + + #[test] + fn test_all_valid_32b_pattern() { + let mut patterns = vec![]; + for pattern_size in [2, 4, 8, 16, 32_u64] { + for ones_count in 1..pattern_size { + for rotation in 0..pattern_size { + let ones = (1_u64 << ones_count) - 1; + let rotated = (ones >> rotation) | + ((ones & ((1 << rotation) - 1)) << (pattern_size - rotation)); + let mut replicated = rotated; + let mut shift = pattern_size; + while shift < 32 { + replicated |= replicated << shift; + shift *= 2; + } + let replicated: u32 = replicated.try_into().unwrap(); + assert!(BitmaskImmediate::new_32b_reg(replicated).is_ok()); + patterns.push(replicated); + } + } + } + patterns.sort(); + patterns.dedup(); + // Up to {size}-1 ones, and a total of {size} possible rotations. + assert_eq!(1*2 + 3*4 + 7*8 + 15*16 + 31*32, patterns.len()); + } +} diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs new file mode 100644 index 0000000000..f711b8b0d8 --- /dev/null +++ b/yjit/src/asm/arm64/arg/condition.rs @@ -0,0 +1,52 @@ +/// Various instructions in A64 can have condition codes attached. This enum +/// includes all of the various kinds of conditions along with their respective +/// encodings. +pub struct Condition; + +impl Condition { + pub const EQ: u8 = 0b0000; // equal to + pub const NE: u8 = 0b0001; // not equal to + pub const CS: u8 = 0b0010; // carry set (alias for HS) + pub const CC: u8 = 0b0011; // carry clear (alias for LO) + pub const MI: u8 = 0b0100; // minus, negative + pub const PL: u8 = 0b0101; // positive or zero + pub const VS: u8 = 0b0110; // signed overflow + pub const VC: u8 = 0b0111; // no signed overflow + pub const HI: u8 = 0b1000; // greater than (unsigned) + pub const LS: u8 = 0b1001; // less than or equal to (unsigned) + pub const GE: u8 = 0b1010; // greater than or equal to (signed) + pub const LT: u8 = 0b1011; // less than (signed) + pub const GT: u8 = 0b1100; // greater than (signed) + pub const LE: u8 = 0b1101; // less than or equal to (signed) + pub const AL: u8 = 0b1110; // always + + pub const fn inverse(condition: u8) -> u8 { + match condition { + Condition::EQ => Condition::NE, + Condition::NE => Condition::EQ, + + Condition::CS => Condition::CC, + Condition::CC => Condition::CS, + + Condition::MI => Condition::PL, + Condition::PL => Condition::MI, + + Condition::VS => Condition::VC, + Condition::VC => Condition::VS, + + Condition::HI => Condition::LS, + Condition::LS => Condition::HI, + + Condition::LT => Condition::GE, + Condition::GE => Condition::LT, + + Condition::GT => Condition::LE, + Condition::LE => Condition::GT, + + Condition::AL => Condition::AL, + + _ => panic!("Unknown condition") + + } + } +} diff --git a/yjit/src/asm/arm64/arg/inst_offset.rs b/yjit/src/asm/arm64/arg/inst_offset.rs new file mode 100644 index 0000000000..f4a6bc73a0 --- /dev/null +++ b/yjit/src/asm/arm64/arg/inst_offset.rs @@ -0,0 +1,47 @@ +/// There are a lot of instructions in the AArch64 architectrue that take an +/// offset in terms of number of instructions. Usually they are jump +/// instructions or instructions that load a value relative to the current PC. +/// +/// This struct is used to mark those locations instead of a generic operand in +/// order to give better clarity to the developer when reading the AArch64 +/// backend code. It also helps to clarify that everything is in terms of a +/// number of instructions and not a number of bytes (i.e., the offset is the +/// number of bytes divided by 4). +#[derive(Copy, Clone)] +pub struct InstructionOffset(i32); + +impl InstructionOffset { + /// Create a new instruction offset. + pub fn from_insns(insns: i32) -> Self { + InstructionOffset(insns) + } + + /// Create a new instruction offset from a number of bytes. + pub fn from_bytes(bytes: i32) -> Self { + assert_eq!(bytes % 4, 0, "Byte offset must be a multiple of 4"); + InstructionOffset(bytes / 4) + } +} + +impl From<i32> for InstructionOffset { + /// Convert an i64 into an instruction offset. + fn from(value: i32) -> Self { + InstructionOffset(value) + } +} + +impl From<InstructionOffset> for i32 { + /// Convert an instruction offset into a number of instructions as an i32. + fn from(offset: InstructionOffset) -> Self { + offset.0 + } +} + +impl From<InstructionOffset> for i64 { + /// Convert an instruction offset into a number of instructions as an i64. + /// This is useful for when we're checking how many bits this offset fits + /// into. + fn from(offset: InstructionOffset) -> Self { + offset.0.into() + } +} diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs new file mode 100644 index 0000000000..7eb37834f9 --- /dev/null +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -0,0 +1,18 @@ +// This module contains various A64 instruction arguments and the logic +// necessary to encode them. + +mod bitmask_imm; +mod condition; +mod inst_offset; +mod sf; +mod shifted_imm; +mod sys_reg; +mod truncate; + +pub use bitmask_imm::BitmaskImmediate; +pub use condition::Condition; +pub use inst_offset::InstructionOffset; +pub use sf::Sf; +pub use shifted_imm::ShiftedImmediate; +pub use sys_reg::SystemRegister; +pub use truncate::{truncate_imm, truncate_uimm}; diff --git a/yjit/src/asm/arm64/arg/sf.rs b/yjit/src/asm/arm64/arg/sf.rs new file mode 100644 index 0000000000..c2fd33302c --- /dev/null +++ b/yjit/src/asm/arm64/arg/sf.rs @@ -0,0 +1,19 @@ +/// This is commonly the top-most bit in the encoding of the instruction, and +/// represents whether register operands should be treated as 64-bit registers +/// or 32-bit registers. +pub enum Sf { + Sf32 = 0b0, + Sf64 = 0b1 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From<u8> for Sf { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Sf::Sf64, + 32 => Sf::Sf32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} diff --git a/yjit/src/asm/arm64/arg/shifted_imm.rs b/yjit/src/asm/arm64/arg/shifted_imm.rs new file mode 100644 index 0000000000..4602ac64ab --- /dev/null +++ b/yjit/src/asm/arm64/arg/shifted_imm.rs @@ -0,0 +1,81 @@ +/// How much to shift the immediate by. +pub enum Shift { + LSL0 = 0b0, // no shift + LSL12 = 0b1 // logical shift left by 12 bits +} + +/// Some instructions accept a 12-bit immediate that has an optional shift +/// attached to it. This allows encoding larger values than just fit into 12 +/// bits. We attempt to encode those here. If the values are too large we have +/// to bail out. +pub struct ShiftedImmediate { + shift: Shift, + value: u16 +} + +impl TryFrom<u64> for ShiftedImmediate { + type Error = (); + + /// Attempt to convert a u64 into a BitmaskImm. + fn try_from(value: u64) -> Result<Self, Self::Error> { + let current = value; + if current < 2_u64.pow(12) { + return Ok(ShiftedImmediate { shift: Shift::LSL0, value: current as u16 }); + } + + if (current & (2_u64.pow(12) - 1) == 0) && ((current >> 12) < 2_u64.pow(12)) { + return Ok(ShiftedImmediate { shift: Shift::LSL12, value: (current >> 12) as u16 }); + } + + Err(()) + } +} + +impl From<ShiftedImmediate> for u32 { + /// Encode a bitmask immediate into a 32-bit value. + fn from(imm: ShiftedImmediate) -> Self { + 0 + | (((imm.shift as u32) & 1) << 12) + | (imm.value as u32) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_shift() { + let expected_value = 256; + let result = ShiftedImmediate::try_from(expected_value); + + match result { + Ok(ShiftedImmediate { shift: Shift::LSL0, value }) => assert_eq!(value as u64, expected_value), + _ => panic!("Unexpected shift value") + } + } + + #[test] + fn test_maximum_no_shift() { + let expected_value = (1 << 12) - 1; + let result = ShiftedImmediate::try_from(expected_value); + + match result { + Ok(ShiftedImmediate { shift: Shift::LSL0, value }) => assert_eq!(value as u64, expected_value), + _ => panic!("Unexpected shift value") + } + } + + #[test] + fn test_with_shift() { + let result = ShiftedImmediate::try_from(256 << 12); + + assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL12, value: 256 }))); + } + + #[test] + fn test_unencodable() { + let result = ShiftedImmediate::try_from((256 << 12) + 1); + assert!(matches!(result, Err(()))); + } +} diff --git a/yjit/src/asm/arm64/arg/sys_reg.rs b/yjit/src/asm/arm64/arg/sys_reg.rs new file mode 100644 index 0000000000..41d71920cb --- /dev/null +++ b/yjit/src/asm/arm64/arg/sys_reg.rs @@ -0,0 +1,6 @@ +/// The encoded representation of an A64 system register. +/// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/ +pub enum SystemRegister { + /// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en + NZCV = 0b1_011_0100_0010_000 +} diff --git a/yjit/src/asm/arm64/arg/truncate.rs b/yjit/src/asm/arm64/arg/truncate.rs new file mode 100644 index 0000000000..85d56ff202 --- /dev/null +++ b/yjit/src/asm/arm64/arg/truncate.rs @@ -0,0 +1,66 @@ +// There are many instances in AArch64 instruction encoding where you represent +// an integer value with a particular bit width that isn't a power of 2. These +// functions represent truncating those integer values down to the appropriate +// number of bits. + +/// Truncate a signed immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// When the value is positive, this should effectively be a no-op since we're +/// just dropping leading zeroes. When the value is negative we should only be +/// dropping leading ones. +pub fn truncate_imm<T: Into<i32>, const WIDTH: usize>(imm: T) -> u32 { + let value: i32 = imm.into(); + let masked = (value as u32) & ((1 << WIDTH) - 1); + + // Assert that we didn't drop any bits by truncating. + if value >= 0 { + assert_eq!(value as u32, masked); + } else { + assert_eq!(value as u32, masked | (u32::MAX << WIDTH)); + } + + masked +} + +/// Truncate an unsigned immediate to fit into a compile-time known width. It is +/// assumed before calling this function that the value fits into the correct +/// size. If it doesn't, then this function will panic. +/// +/// This should effectively be a no-op since we're just dropping leading zeroes. +pub fn truncate_uimm<T: Into<u32>, const WIDTH: usize>(uimm: T) -> u32 { + let value: u32 = uimm.into(); + let masked = value & ((1 << WIDTH) - 1); + + // Assert that we didn't drop any bits by truncating. + assert_eq!(value, masked); + + masked +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_truncate_imm_positive() { + let inst = truncate_imm::<i32, 4>(5); + let result: u32 = inst; + assert_eq!(0b0101, result); + } + + #[test] + fn test_truncate_imm_negative() { + let inst = truncate_imm::<i32, 4>(-5); + let result: u32 = inst; + assert_eq!(0b1011, result); + } + + #[test] + fn test_truncate_uimm() { + let inst = truncate_uimm::<u32, 4>(5); + let result: u32 = inst; + assert_eq!(0b0101, result); + } +} diff --git a/yjit/src/asm/arm64/inst/atomic.rs b/yjit/src/asm/arm64/inst/atomic.rs new file mode 100644 index 0000000000..5ce497209c --- /dev/null +++ b/yjit/src/asm/arm64/inst/atomic.rs @@ -0,0 +1,86 @@ +/// The size of the register operands to this instruction. +enum Size { + /// Using 32-bit registers. + Size32 = 0b10, + + /// Using 64-bit registers. + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From<u8> for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 atomic instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 1 1 1 0 0 0 0 0 0 | +/// | size rs.............. rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Atomic { + /// The register holding the value to be loaded. + rt: u8, + + /// The base register. + rn: u8, + + /// The register holding the data value to be operated on. + rs: u8, + + /// The size of the registers used in this instruction. + size: Size +} + +impl Atomic { + /// LDADDAL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en + pub fn ldaddal(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self { + Self { rt, rn, rs, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From<Atomic> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Atomic) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (0b111 << 21) + | ((inst.rs as u32) << 16) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From<Atomic> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Atomic) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldaddal() { + let result: u32 = Atomic::ldaddal(20, 21, 22, 64).into(); + assert_eq!(0xf8f402d5, result); + } +} diff --git a/yjit/src/asm/arm64/inst/branch.rs b/yjit/src/asm/arm64/inst/branch.rs new file mode 100644 index 0000000000..f15ef2a9b0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/branch.rs @@ -0,0 +1,100 @@ +/// Which operation to perform. +enum Op { + /// Perform a BR instruction. + BR = 0b00, + + /// Perform a BLR instruction. + BLR = 0b01, + + /// Perform a RET instruction. + RET = 0b10 +} + +/// The struct that represents an A64 branch instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 | +/// | op... rn.............. rm.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Branch { + /// The register holding the address to be branched to. + rn: u8, + + /// The operation to perform. + op: Op +} + +impl Branch { + /// BR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en + pub fn br(rn: u8) -> Self { + Self { rn, op: Op::BR } + } + + /// BLR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en + pub fn blr(rn: u8) -> Self { + Self { rn, op: Op::BLR } + } + + /// RET + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en + pub fn ret(rn: u8) -> Self { + Self { rn, op: Op::RET } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From<Branch> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Branch) -> Self { + 0 + | (0b11 << 30) + | (FAMILY << 26) + | (1 << 25) + | ((inst.op as u32) << 21) + | (0b11111 << 16) + | ((inst.rn as u32) << 5) + } +} + +impl From<Branch> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Branch) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_br() { + let result: u32 = Branch::br(0).into(); + assert_eq!(0xd61f0000, result); + } + + #[test] + fn test_blr() { + let result: u32 = Branch::blr(0).into(); + assert_eq!(0xd63f0000, result); + } + + #[test] + fn test_ret() { + let result: u32 = Branch::ret(30).into(); + assert_eq!(0xd65f03C0, result); + } + + #[test] + fn test_ret_rn() { + let result: u32 = Branch::ret(20).into(); + assert_eq!(0xd65f0280, result); + } +} diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs new file mode 100644 index 0000000000..fcc07f69aa --- /dev/null +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -0,0 +1,78 @@ +use super::super::arg::{InstructionOffset, truncate_imm}; + +/// The struct that represents an A64 conditional branch instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 1 0 0 0 | +/// | imm19........................................................... cond....... | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct BranchCond { + /// The kind of condition to check before branching. + cond: u8, + + /// The instruction offset from this instruction to branch to. + offset: InstructionOffset +} + +impl BranchCond { + /// B.cond + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- + pub fn bcond(cond: u8, offset: InstructionOffset) -> Self { + Self { cond, offset } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From<BranchCond> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: BranchCond) -> Self { + 0 + | (1 << 30) + | (FAMILY << 26) + | (truncate_imm::<_, 19>(inst.offset) << 5) + | (inst.cond as u32) + } +} + +impl From<BranchCond> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: BranchCond) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::super::arg::Condition; + + #[test] + fn test_b_eq() { + let result: u32 = BranchCond::bcond(Condition::EQ, 32.into()).into(); + assert_eq!(0x54000400, result); + } + + #[test] + fn test_b_vs() { + let result: u32 = BranchCond::bcond(Condition::VS, 32.into()).into(); + assert_eq!(0x54000406, result); + } + + #[test] + fn test_b_eq_max() { + let result: u32 = BranchCond::bcond(Condition::EQ, ((1 << 18) - 1).into()).into(); + assert_eq!(0x547fffe0, result); + } + + #[test] + fn test_b_eq_min() { + let result: u32 = BranchCond::bcond(Condition::EQ, (-(1 << 18)).into()).into(); + assert_eq!(0x54800000, result); + } +} diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs new file mode 100644 index 0000000000..be4920ac76 --- /dev/null +++ b/yjit/src/asm/arm64/inst/breakpoint.rs @@ -0,0 +1,55 @@ +/// The struct that represents an A64 breakpoint instruction that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 | +/// | imm16.................................................. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Breakpoint { + /// The value to be captured by ESR_ELx.ISS + imm16: u16 +} + +impl Breakpoint { + /// BRK + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction- + pub fn brk(imm16: u16) -> Self { + Self { imm16 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control +const FAMILY: u32 = 0b101; + +impl From<Breakpoint> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Breakpoint) -> Self { + let imm16 = inst.imm16 as u32; + + 0 + | (0b11 << 30) + | (FAMILY << 26) + | (1 << 21) + | (imm16 << 5) + } +} + +impl From<Breakpoint> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Breakpoint) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_brk() { + let result: u32 = Breakpoint::brk(7).into(); + assert_eq!(0xd42000e0, result); + } +} diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs new file mode 100644 index 0000000000..74debac7f7 --- /dev/null +++ b/yjit/src/asm/arm64/inst/call.rs @@ -0,0 +1,104 @@ +use super::super::arg::{InstructionOffset, truncate_imm}; + +/// The operation to perform for this instruction. +enum Op { + /// Branch directly, with a hint that this is not a subroutine call or + /// return. + Branch = 0, + + /// Branch directly, with a hint that this is a subroutine call or return. + BranchWithLink = 1 +} + +/// The struct that represents an A64 branch with our without link instruction +/// that can be encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 0 1 | +/// | op imm26.................................................................................... | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Call { + /// The PC-relative offset to jump to in terms of number of instructions. + offset: InstructionOffset, + + /// The operation to perform for this instruction. + op: Op +} + +impl Call { + /// B + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- + pub fn b(offset: InstructionOffset) -> Self { + Self { offset, op: Op::Branch } + } + + /// BL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en + pub fn bl(offset: InstructionOffset) -> Self { + Self { offset, op: Op::BranchWithLink } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b101; + +impl From<Call> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Call) -> Self { + 0 + | ((inst.op as u32) << 31) + | (FAMILY << 26) + | truncate_imm::<_, 26>(inst.offset) + } +} + +impl From<Call> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Call) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bl() { + let result: u32 = Call::bl(0.into()).into(); + assert_eq!(0x94000000, result); + } + + #[test] + fn test_bl_positive() { + let result: u32 = Call::bl(256.into()).into(); + assert_eq!(0x94000100, result); + } + + #[test] + fn test_bl_negative() { + let result: u32 = Call::bl((-256).into()).into(); + assert_eq!(0x97ffff00, result); + } + + #[test] + fn test_b() { + let result: u32 = Call::b(0.into()).into(); + assert_eq!(0x14000000, result); + } + + #[test] + fn test_b_positive() { + let result: u32 = Call::b(((1 << 25) - 1).into()).into(); + assert_eq!(0x15ffffff, result); + } + + #[test] + fn test_b_negative() { + let result: u32 = Call::b((-(1 << 25)).into()).into(); + assert_eq!(0x16000000, result); + } +} diff --git a/yjit/src/asm/arm64/inst/conditional.rs b/yjit/src/asm/arm64/inst/conditional.rs new file mode 100644 index 0000000000..e1950e95b4 --- /dev/null +++ b/yjit/src/asm/arm64/inst/conditional.rs @@ -0,0 +1,73 @@ +use super::super::arg::Sf; + +/// The struct that represents an A64 conditional instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 1 0 1 0 1 0 0 0 0 | +/// | sf rm.............. cond....... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Conditional { + /// The number of the general-purpose destination register. + rd: u8, + + /// The number of the first general-purpose source register. + rn: u8, + + /// The condition to use for the conditional instruction. + cond: u8, + + /// The number of the second general-purpose source register. + rm: u8, + + /// The size of the registers of this instruction. + sf: Sf +} + +impl Conditional { + /// CSEL + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en + pub fn csel(rd: u8, rn: u8, rm: u8, cond: u8, num_bits: u8) -> Self { + Self { rd, rn, cond, rm, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel +const FAMILY: u32 = 0b101; + +impl From<Conditional> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Conditional) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (1 << 28) + | (FAMILY << 25) + | (1 << 23) + | ((inst.rm as u32) << 16) + | ((inst.cond as u32) << 12) + | ((inst.rn as u32) << 5) + | (inst.rd as u32) + } +} + +impl From<Conditional> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Conditional) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::super::arg::Condition; + + #[test] + fn test_csel() { + let result: u32 = Conditional::csel(0, 1, 2, Condition::NE, 64).into(); + assert_eq!(0x9a821020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs new file mode 100644 index 0000000000..b474b00a52 --- /dev/null +++ b/yjit/src/asm/arm64/inst/data_imm.rs @@ -0,0 +1,143 @@ +use super::super::arg::{Sf, ShiftedImmediate}; + +/// The operation being performed by this instruction. +enum Op { + Add = 0b0, + Sub = 0b1 +} + +// Whether or not to update the flags when this instruction is performed. +enum S { + LeaveFlags = 0b0, + UpdateFlags = 0b1 +} + +/// The struct that represents an A64 data processing -- immediate instruction +/// that can be encoded. +/// +/// Add/subtract (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 0 1 0 | +/// | sf op S sh imm12.................................... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct DataImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// How much to shift the immediate by. + imm: ShiftedImmediate, + + /// Whether or not to update the flags when this instruction is performed. + s: S, + + /// The opcode for this instruction. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl DataImm { + /// ADD (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en + pub fn add(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Add, sf: num_bits.into() } + } + + /// ADDS (immediate, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en + pub fn adds(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Add, sf: num_bits.into() } + } + + /// CMP (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en + pub fn cmp(rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self::subs(31, rn, imm, num_bits) + } + + /// SUB (immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en + pub fn sub(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Sub, sf: num_bits.into() } + } + + /// SUBS (immediate, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en + pub fn subs(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Sub, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From<DataImm> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: DataImm) -> Self { + let imm: u32 = inst.imm.into(); + + 0 + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 30) + | ((inst.s as u32) << 29) + | (FAMILY << 25) + | (1 << 24) + | (imm << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From<DataImm> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: DataImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add() { + let inst = DataImm::add(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0x91001c20, result); + } + + #[test] + fn test_adds() { + let inst = DataImm::adds(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb1001c20, result); + } + + #[test] + fn test_cmp() { + let inst = DataImm::cmp(0, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf1001c1f, result); + } + + #[test] + fn test_sub() { + let inst = DataImm::sub(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xd1001c20, result); + } + + #[test] + fn test_subs() { + let inst = DataImm::subs(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf1001c20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs new file mode 100644 index 0000000000..a742121f1f --- /dev/null +++ b/yjit/src/asm/arm64/inst/data_reg.rs @@ -0,0 +1,192 @@ +use super::super::arg::{Sf, truncate_uimm}; + +/// The operation being performed by this instruction. +enum Op { + Add = 0b0, + Sub = 0b1 +} + +// Whether or not to update the flags when this instruction is performed. +enum S { + LeaveFlags = 0b0, + UpdateFlags = 0b1 +} + +/// The type of shift to perform on the second operand register. +enum Shift { + LSL = 0b00, // logical shift left (unsigned) + LSR = 0b01, // logical shift right (unsigned) + ASR = 0b10 // arithmetic shift right (signed) +} + +/// The struct that represents an A64 data processing -- register instruction +/// that can be encoded. +/// +/// Add/subtract (shifted register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 1 0 | +/// | sf op S shift rm.............. imm6............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct DataReg { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The amount to shift the second operand register by. + imm6: u8, + + /// The register number of the second operand register. + rm: u8, + + /// The type of shift to perform on the second operand register. + shift: Shift, + + /// Whether or not to update the flags when this instruction is performed. + s: S, + + /// The opcode for this instruction. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl DataReg { + /// ADD (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en + pub fn add(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::LeaveFlags, + op: Op::Add, + sf: num_bits.into() + } + } + + /// ADDS (shifted register, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en + pub fn adds(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::UpdateFlags, + op: Op::Add, + sf: num_bits.into() + } + } + + /// CMP (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en + pub fn cmp(rn: u8, rm: u8, num_bits: u8) -> Self { + Self::subs(31, rn, rm, num_bits) + } + + /// SUB (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en + pub fn sub(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::LeaveFlags, + op: Op::Sub, + sf: num_bits.into() + } + } + + /// SUBS (shifted register, set flags) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en + pub fn subs(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { + rd, + rn, + imm6: 0, + rm, + shift: Shift::LSL, + s: S::UpdateFlags, + op: Op::Sub, + sf: num_bits.into() + } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +const FAMILY: u32 = 0b0101; + +impl From<DataReg> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: DataReg) -> Self { + 0 + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 30) + | ((inst.s as u32) << 29) + | (FAMILY << 25) + | (1 << 24) + | ((inst.shift as u32) << 22) + | ((inst.rm as u32) << 16) + | (truncate_uimm::<_, 6>(inst.imm6) << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From<DataReg> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: DataReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add() { + let inst = DataReg::add(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0x8b020020, result); + } + + #[test] + fn test_adds() { + let inst = DataReg::adds(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xab020020, result); + } + + #[test] + fn test_cmp() { + let inst = DataReg::cmp(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xeb01001f, result); + } + + #[test] + fn test_sub() { + let inst = DataReg::sub(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xcb020020, result); + } + + #[test] + fn test_subs() { + let inst = DataReg::subs(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xeb020020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/halfword_imm.rs b/yjit/src/asm/arm64/inst/halfword_imm.rs new file mode 100644 index 0000000000..0ddae8e8de --- /dev/null +++ b/yjit/src/asm/arm64/inst/halfword_imm.rs @@ -0,0 +1,179 @@ +use super::super::arg::truncate_imm; + +/// Whether this is a load or a store. +enum Op { + Load = 1, + Store = 0 +} + +/// The type of indexing to perform for this instruction. +enum Index { + /// No indexing. + None = 0b00, + + /// Mutate the register after the read. + PostIndex = 0b01, + + /// Mutate the register before the read. + PreIndex = 0b11 +} + +/// The struct that represents an A64 halfword instruction that can be encoded. +/// +/// LDRH/STRH +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 1 1 0 0 1 0 | +/// | op imm12.................................... rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +/// LDRH (pre-index/post-index) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 1 1 0 0 0 0 0 | +/// | op imm9.......................... index rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct HalfwordImm { + /// The number of the 32-bit register to be loaded. + rt: u8, + + /// The number of the 64-bit base register to calculate the memory address. + rn: u8, + + /// The type of indexing to perform for this instruction. + index: Index, + + /// The immediate offset from the base register. + imm: i16, + + /// The operation to perform. + op: Op +} + +impl HalfwordImm { + /// LDRH + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh(rt: u8, rn: u8, imm12: i16) -> Self { + Self { rt, rn, index: Index::None, imm: imm12, op: Op::Load } + } + + /// LDRH (pre-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh_pre(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Load } + } + + /// LDRH (post-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate-- + pub fn ldrh_post(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Load } + } + + /// STRH + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh(rt: u8, rn: u8, imm12: i16) -> Self { + Self { rt, rn, index: Index::None, imm: imm12, op: Op::Store } + } + + /// STRH (pre-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh_pre(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Store } + } + + /// STRH (post-index) + /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate-- + pub fn strh_post(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Store } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b111100; + +impl From<HalfwordImm> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: HalfwordImm) -> Self { + let (opc, imm) = match inst.index { + Index::None => { + assert_eq!(inst.imm & 1, 0, "immediate offset must be even"); + let imm12 = truncate_imm::<_, 12>(inst.imm / 2); + (0b100, imm12) + }, + Index::PreIndex | Index::PostIndex => { + let imm9 = truncate_imm::<_, 9>(inst.imm); + (0b000, (imm9 << 2) | (inst.index as u32)) + } + }; + + 0 + | (FAMILY << 25) + | ((opc | (inst.op as u32)) << 22) + | (imm << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From<HalfwordImm> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: HalfwordImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldrh() { + let inst = HalfwordImm::ldrh(0, 1, 8); + let result: u32 = inst.into(); + assert_eq!(0x79401020, result); + } + + #[test] + fn test_ldrh_pre() { + let inst = HalfwordImm::ldrh_pre(0, 1, 16); + let result: u32 = inst.into(); + assert_eq!(0x78410c20, result); + } + + #[test] + fn test_ldrh_post() { + let inst = HalfwordImm::ldrh_post(0, 1, 24); + let result: u32 = inst.into(); + assert_eq!(0x78418420, result); + } + + #[test] + fn test_ldrh_post_negative() { + let inst = HalfwordImm::ldrh_post(0, 1, -24); + let result: u32 = inst.into(); + assert_eq!(0x785e8420, result); + } + + #[test] + fn test_strh() { + let inst = HalfwordImm::strh(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x79000020, result); + } + + #[test] + fn test_strh_pre() { + let inst = HalfwordImm::strh_pre(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x78000c20, result); + } + + #[test] + fn test_strh_post() { + let inst = HalfwordImm::strh_post(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x78000420, result); + } +} diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs new file mode 100644 index 0000000000..3eade205c8 --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -0,0 +1,89 @@ +use super::super::arg::{InstructionOffset, truncate_imm}; + +/// The size of the operands being operated on. +enum Opc { + Size32 = 0b00, + Size64 = 0b01, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From<u8> for Opc { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Opc::Size64, + 32 => Opc::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load literal instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 0 0 0 | +/// | opc.. imm19........................................................... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadLiteral { + /// The number of the register to load the value into. + rt: u8, + + /// The PC-relative number of instructions to load the value from. + offset: InstructionOffset, + + /// The size of the operands being operated on. + opc: Opc +} + +impl LoadLiteral { + /// LDR (load literal) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en + pub fn ldr_literal(rt: u8, offset: InstructionOffset, num_bits: u8) -> Self { + Self { rt, offset, opc: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From<LoadLiteral> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadLiteral) -> Self { + 0 + | ((inst.opc as u32) << 30) + | (1 << 28) + | (FAMILY << 25) + | (truncate_imm::<_, 19>(inst.offset) << 5) + | (inst.rt as u32) + } +} + +impl From<LoadLiteral> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadLiteral) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr_positive() { + let inst = LoadLiteral::ldr_literal(0, 5.into(), 64); + let result: u32 = inst.into(); + assert_eq!(0x580000a0, result); + } + + #[test] + fn test_ldr_negative() { + let inst = LoadLiteral::ldr_literal(0, (-5).into(), 64); + let result: u32 = inst.into(); + assert_eq!(0x58ffff60, result); + } +} diff --git a/yjit/src/asm/arm64/inst/load_register.rs b/yjit/src/asm/arm64/inst/load_register.rs new file mode 100644 index 0000000000..3426b9ba5f --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_register.rs @@ -0,0 +1,108 @@ +/// Whether or not to shift the register. +enum S { + Shift = 1, + NoShift = 0 +} + +/// The option for this instruction. +enum Option { + UXTW = 0b010, + LSL = 0b011, + SXTW = 0b110, + SXTX = 0b111 +} + +/// The size of the operands of this instruction. +enum Size { + Size32 = 0b10, + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From<u8> for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load instruction that can be encoded. +/// +/// LDR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 1 1 1 0 | +/// | size. rm.............. option.. S rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadRegister { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// Whether or not to shift the value of the register. + s: S, + + /// The option associated with this instruction that controls the shift. + option: Option, + + /// The number of the offset register. + rm: u8, + + /// The size of the operands. + size: Size +} + +impl LoadRegister { + /// LDR + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en + pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From<LoadRegister> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadRegister) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | (0b11 << 21) + | ((inst.rm as u32) << 16) + | ((inst.option as u32) << 13) + | ((inst.s as u32) << 12) + | (0b10 << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From<LoadRegister> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadRegister) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr() { + let inst = LoadRegister::ldr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8626820, result); + } +} diff --git a/yjit/src/asm/arm64/inst/load_store.rs b/yjit/src/asm/arm64/inst/load_store.rs new file mode 100644 index 0000000000..b5c8a3c294 --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_store.rs @@ -0,0 +1,249 @@ +use super::super::arg::truncate_imm; + +/// The size of the operands being operated on. +enum Size { + Size8 = 0b00, + Size16 = 0b01, + Size32 = 0b10, + Size64 = 0b11, +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into an Sf enum variant. +impl From<u8> for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The operation to perform for this instruction. +enum Opc { + STR = 0b00, + LDR = 0b01, + LDURSW = 0b10 +} + +/// What kind of indexing to perform for this instruction. +enum Index { + None = 0b00, + PostIndex = 0b01, + PreIndex = 0b11 +} + +/// The struct that represents an A64 load or store instruction that can be +/// encoded. +/// +/// LDR/LDUR/LDURSW/STR/STUR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 1 0 0 0 0 | +/// | size. opc.. imm9.......................... idx.. rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadStore { + /// The number of the register to load the value into. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// What kind of indexing to perform for this instruction. + idx: Index, + + /// The optional signed immediate byte offset from the base register. + imm9: i16, + + /// The operation to perform for this instruction. + opc: Opc, + + /// The size of the operands being operated on. + size: Size +} + +impl LoadStore { + /// LDR (immediate, post-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + pub fn ldr_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDR (immediate, pre-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + pub fn ldr_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDUR (load register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en + pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: num_bits.into() } + } + + /// LDURH Load Register Halfword (unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURH--Load-Register-Halfword--unscaled--?lang=en + pub fn ldurh(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size16 } + } + + /// LDURB (load register, byte, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURB--Load-Register-Byte--unscaled--?lang=en + pub fn ldurb(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size8 } + } + + /// LDURSW (load register, unscaled, signed) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en + pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDURSW, size: Size::Size32 } + } + + /// STR (immediate, post-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + pub fn str_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::STR, size: num_bits.into() } + } + + /// STR (immediate, pre-index) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate-- + pub fn str_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::STR, size: num_bits.into() } + } + + /// STUR (store register, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en + pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: num_bits.into() } + } + + /// STURH (store register, halfword, unscaled) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STURH--Store-Register-Halfword--unscaled--?lang=en + pub fn sturh(rt: u8, rn: u8, imm9: i16) -> Self { + Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: Size::Size16 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From<LoadStore> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadStore) -> Self { + 0 + | ((inst.size as u32) << 30) + | (0b11 << 28) + | (FAMILY << 25) + | ((inst.opc as u32) << 22) + | (truncate_imm::<_, 9>(inst.imm9) << 12) + | ((inst.idx as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From<LoadStore> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadStore) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldr_post() { + let inst = LoadStore::ldr_post(0, 1, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8410420, result); + } + + #[test] + fn test_ldr_pre() { + let inst = LoadStore::ldr_pre(0, 1, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8410c20, result); + } + + #[test] + fn test_ldur() { + let inst = LoadStore::ldur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8400020, result); + } + + #[test] + fn test_ldurb() { + let inst = LoadStore::ldurb(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x38400020, result); + } + + #[test] + fn test_ldurh() { + let inst = LoadStore::ldurh(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0x78400020, result); + } + + #[test] + fn test_ldur_with_imm() { + let inst = LoadStore::ldur(0, 1, 123, 64); + let result: u32 = inst.into(); + assert_eq!(0xf847b020, result); + } + + #[test] + fn test_ldursw() { + let inst = LoadStore::ldursw(0, 1, 0); + let result: u32 = inst.into(); + assert_eq!(0xb8800020, result); + } + + #[test] + fn test_ldursw_with_imm() { + let inst = LoadStore::ldursw(0, 1, 123); + let result: u32 = inst.into(); + assert_eq!(0xb887b020, result); + } + + #[test] + fn test_str_post() { + let inst = LoadStore::str_post(0, 1, -16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81f0420, result); + } + + #[test] + fn test_str_pre() { + let inst = LoadStore::str_pre(0, 1, -16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81f0c20, result); + } + + #[test] + fn test_stur() { + let inst = LoadStore::stur(0, 1, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf8000020, result); + } + + #[test] + fn test_stur_negative_offset() { + let inst = LoadStore::stur(0, 1, -1, 64); + let result: u32 = inst.into(); + assert_eq!(0xf81ff020, result); + } + + #[test] + fn test_stur_positive_offset() { + let inst = LoadStore::stur(0, 1, 255, 64); + let result: u32 = inst.into(); + assert_eq!(0xf80ff020, result); + } +} diff --git a/yjit/src/asm/arm64/inst/load_store_exclusive.rs b/yjit/src/asm/arm64/inst/load_store_exclusive.rs new file mode 100644 index 0000000000..8216c2200a --- /dev/null +++ b/yjit/src/asm/arm64/inst/load_store_exclusive.rs @@ -0,0 +1,109 @@ +/// The operation being performed for this instruction. +enum Op { + Store = 0, + Load = 1 +} + +/// The size of the registers being operated on. +enum Size { + Size32 = 0b10, + Size64 = 0b11 +} + +/// A convenience function so that we can convert the number of bits of an +/// register operand directly into a Size enum variant. +impl From<u8> for Size { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Size::Size64, + 32 => Size::Size32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 load or store exclusive instruction that +/// can be encoded. +/// +/// LDAXR/STLXR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 0 0 0 0 1 1 1 1 1 1 | +/// | size. op rs.............. rn.............. rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LoadStoreExclusive { + /// The number of the register to be loaded. + rt: u8, + + /// The base register with which to form the address. + rn: u8, + + /// The register to be used for the status result if it applies to this + /// operation. Otherwise it's the zero register. + rs: u8, + + /// The operation being performed for this instruction. + op: Op, + + /// The size of the registers being operated on. + size: Size +} + +impl LoadStoreExclusive { + /// LDAXR + /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register- + pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self { + Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() } + } + + /// STLXR + /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register- + pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self { + Self { rt, rn, rs, op: Op::Store, size: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From<LoadStoreExclusive> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LoadStoreExclusive) -> Self { + 0 + | ((inst.size as u32) << 30) + | (FAMILY << 25) + | ((inst.op as u32) << 22) + | ((inst.rs as u32) << 16) + | (0b111111 << 10) + | ((inst.rn as u32) << 5) + | (inst.rt as u32) + } +} + +impl From<LoadStoreExclusive> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LoadStoreExclusive) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldaxr() { + let inst = LoadStoreExclusive::ldaxr(16, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xc85ffc10, result); + } + + #[test] + fn test_stlxr() { + let inst = LoadStoreExclusive::stlxr(17, 16, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xc811fc10, result); + } +} diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs new file mode 100644 index 0000000000..b24916f8a5 --- /dev/null +++ b/yjit/src/asm/arm64/inst/logical_imm.rs @@ -0,0 +1,154 @@ +use super::super::arg::{BitmaskImmediate, Sf}; + +// Which operation to perform. +enum Opc { + /// The AND operation. + And = 0b00, + + /// The ORR operation. + Orr = 0b01, + + /// The EOR operation. + Eor = 0b10, + + /// The ANDS operation. + Ands = 0b11 +} + +/// The struct that represents an A64 bitwise immediate instruction that can be +/// encoded. +/// +/// AND/ORR/ANDS (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 0 | +/// | sf opc.. N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LogicalImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The immediate value to test. + imm: BitmaskImmediate, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl LogicalImm { + /// AND (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en + pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() } + } + + /// ANDS (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en + pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() } + } + + /// EOR (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate-- + pub fn eor(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Eor, sf: num_bits.into() } + } + + /// MOV (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en + pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate-- + pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() } + } + + /// TST (bitmask immediate) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en + pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self { + Self::ands(31, rn, imm, num_bits) + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm +const FAMILY: u32 = 0b1001; + +impl From<LogicalImm> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LogicalImm) -> Self { + let imm: u32 = inst.imm.encode(); + + 0 + | ((inst.sf as u32) << 31) + | ((inst.opc as u32) << 29) + | (FAMILY << 25) + | (imm << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From<LogicalImm> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LogicalImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_and() { + let inst = LogicalImm::and(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0x92400820, result); + } + + #[test] + fn test_ands() { + let inst = LogicalImm::ands(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf2400820, result); + } + + #[test] + fn test_eor() { + let inst = LogicalImm::eor(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xd2400820, result); + } + + #[test] + fn test_mov() { + let inst = LogicalImm::mov(0, 0x5555555555555555.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb200f3e0, result); + } + + #[test] + fn test_orr() { + let inst = LogicalImm::orr(0, 1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xb2400820, result); + } + + #[test] + fn test_tst() { + let inst = LogicalImm::tst(1, 7.try_into().unwrap(), 64); + let result: u32 = inst.into(); + assert_eq!(0xf240083f, result); + } +} diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs new file mode 100644 index 0000000000..a96805c9f9 --- /dev/null +++ b/yjit/src/asm/arm64/inst/logical_reg.rs @@ -0,0 +1,207 @@ +use super::super::arg::{Sf, truncate_uimm}; + +/// Whether or not this is a NOT instruction. +enum N { + /// This is not a NOT instruction. + No = 0, + + /// This is a NOT instruction. + Yes = 1 +} + +/// The type of shift to perform on the second operand register. +enum Shift { + LSL = 0b00, // logical shift left (unsigned) + LSR = 0b01, // logical shift right (unsigned) + ASR = 0b10, // arithmetic shift right (signed) + ROR = 0b11 // rotate right (unsigned) +} + +// Which operation to perform. +enum Opc { + /// The AND operation. + And = 0b00, + + /// The ORR operation. + Orr = 0b01, + + /// The EOR operation. + Eor = 0b10, + + /// The ANDS operation. + Ands = 0b11 +} + +/// The struct that represents an A64 logical register instruction that can be +/// encoded. +/// +/// AND/ORR/ANDS (shifted register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 | +/// | sf opc.. shift N rm.............. imm6............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct LogicalReg { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The amount to shift the second operand register. + imm6: u8, + + /// The register number of the second operand register. + rm: u8, + + /// Whether or not this is a NOT instruction. + n: N, + + /// The type of shift to perform on the second operand register. + shift: Shift, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl LogicalReg { + /// AND (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en + pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() } + } + + /// ANDS (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en + pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } + + /// EOR (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register-- + pub fn eor(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Eor, sf: num_bits.into() } + } + + /// MOV (register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en + pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// MVN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en + pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORN (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register-- + pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// ORR (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register-- + pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() } + } + + /// TST (shifted register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en + pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en +const FAMILY: u32 = 0b0101; + +impl From<LogicalReg> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: LogicalReg) -> Self { + 0 + | ((inst.sf as u32) << 31) + | ((inst.opc as u32) << 29) + | (FAMILY << 25) + | ((inst.shift as u32) << 22) + | ((inst.n as u32) << 21) + | ((inst.rm as u32) << 16) + | (truncate_uimm::<_, 6>(inst.imm6) << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From<LogicalReg> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: LogicalReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_and() { + let inst = LogicalReg::and(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0x8a020020, result); + } + + #[test] + fn test_ands() { + let inst = LogicalReg::ands(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xea020020, result); + } + + #[test] + fn test_eor() { + let inst = LogicalReg::eor(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xca020020, result); + } + + #[test] + fn test_mov() { + let inst = LogicalReg::mov(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa0103e0, result); + } + + #[test] + fn test_mvn() { + let inst = LogicalReg::mvn(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa2103e0, result); + } + + #[test] + fn test_orn() { + let inst = LogicalReg::orn(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa220020, result); + } + + #[test] + fn test_orr() { + let inst = LogicalReg::orr(0, 1, 2, 64); + let result: u32 = inst.into(); + assert_eq!(0xaa020020, result); + } + + #[test] + fn test_tst() { + let inst = LogicalReg::tst(0, 1, 64); + let result: u32 = inst.into(); + assert_eq!(0xea01001f, result); + } +} diff --git a/yjit/src/asm/arm64/inst/madd.rs b/yjit/src/asm/arm64/inst/madd.rs new file mode 100644 index 0000000000..683e643189 --- /dev/null +++ b/yjit/src/asm/arm64/inst/madd.rs @@ -0,0 +1,73 @@ +use super::super::arg::Sf; + +/// The struct that represents an A64 multiply-add instruction that can be +/// encoded. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 1 0 1 1 0 0 0 0 | +/// | sf rm.............. ra.............. rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct MAdd { + /// The number of the general-purpose destination register. + rd: u8, + + /// The number of the first general-purpose source register. + rn: u8, + + /// The number of the third general-purpose source register. + ra: u8, + + /// The number of the second general-purpose source register. + rm: u8, + + /// The size of the registers of this instruction. + sf: Sf +} + +impl MAdd { + /// MUL + /// https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/MUL--Multiply--an-alias-of-MADD- + pub fn mul(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self { + Self { rd, rn, ra: 0b11111, rm, sf: num_bits.into() } + } +} + +impl From<MAdd> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: MAdd) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (0b11011 << 24) + | ((inst.rm as u32) << 16) + | ((inst.ra as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rd as u32) + } +} + +impl From<MAdd> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: MAdd) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mul_32() { + let result: u32 = MAdd::mul(0, 1, 2, 32).into(); + assert_eq!(0x1B027C20, result); + } + + #[test] + fn test_mul_64() { + let result: u32 = MAdd::mul(0, 1, 2, 64).into(); + assert_eq!(0x9B027C20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs new file mode 100644 index 0000000000..bfffd914ef --- /dev/null +++ b/yjit/src/asm/arm64/inst/mod.rs @@ -0,0 +1,54 @@ +// This module contains various A64 instructions and the logic necessary to +// encode them into u32s. + +mod atomic; +mod branch; +mod branch_cond; +mod breakpoint; +mod call; +mod conditional; +mod data_imm; +mod data_reg; +mod halfword_imm; +mod load_literal; +mod load_register; +mod load_store; +mod load_store_exclusive; +mod logical_imm; +mod logical_reg; +mod madd; +mod smulh; +mod mov; +mod nop; +mod pc_rel; +mod reg_pair; +mod sbfm; +mod shift_imm; +mod sys_reg; +mod test_bit; + +pub use atomic::Atomic; +pub use branch::Branch; +pub use branch_cond::BranchCond; +pub use breakpoint::Breakpoint; +pub use call::Call; +pub use conditional::Conditional; +pub use data_imm::DataImm; +pub use data_reg::DataReg; +pub use halfword_imm::HalfwordImm; +pub use load_literal::LoadLiteral; +pub use load_register::LoadRegister; +pub use load_store::LoadStore; +pub use load_store_exclusive::LoadStoreExclusive; +pub use logical_imm::LogicalImm; +pub use logical_reg::LogicalReg; +pub use madd::MAdd; +pub use smulh::SMulH; +pub use mov::Mov; +pub use nop::Nop; +pub use pc_rel::PCRelative; +pub use reg_pair::RegisterPair; +pub use sbfm::SBFM; +pub use shift_imm::ShiftImm; +pub use sys_reg::SysReg; +pub use test_bit::TestBit; diff --git a/yjit/src/asm/arm64/inst/mov.rs b/yjit/src/asm/arm64/inst/mov.rs new file mode 100644 index 0000000000..e7cb9215b0 --- /dev/null +++ b/yjit/src/asm/arm64/inst/mov.rs @@ -0,0 +1,155 @@ +use super::super::arg::Sf; + +/// Which operation is being performed. +enum Op { + /// A movz operation which zeroes out the other bits. + MOVZ = 0b10, + + /// A movk operation which keeps the other bits in place. + MOVK = 0b11 +} + +/// How much to shift the immediate by. +enum Hw { + LSL0 = 0b00, + LSL16 = 0b01, + LSL32 = 0b10, + LSL48 = 0b11 +} + +impl From<u8> for Hw { + fn from(shift: u8) -> Self { + match shift { + 0 => Hw::LSL0, + 16 => Hw::LSL16, + 32 => Hw::LSL32, + 48 => Hw::LSL48, + _ => panic!("Invalid value for shift: {}", shift) + } + } +} + +/// The struct that represents a MOVK or MOVZ instruction. +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 0 1 | +/// | sf op... hw... imm16.................................................. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Mov { + /// The register number of the destination register. + rd: u8, + + /// The value to move into the register. + imm16: u16, + + /// The shift of the value to move. + hw: Hw, + + /// Which operation is being performed. + op: Op, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl Mov { + /// MOVK + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en + pub fn movk(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() } + } + + /// MOVZ + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en + pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self { + Self { rd, imm16, hw: hw.into(), op: Op::MOVZ, sf: num_bits.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From<Mov> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: Mov) -> Self { + 0 + | ((inst.sf as u32) << 31) + | ((inst.op as u32) << 29) + | (FAMILY << 25) + | (0b101 << 23) + | ((inst.hw as u32) << 21) + | ((inst.imm16 as u32) << 5) + | inst.rd as u32 + } +} + +impl From<Mov> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Mov) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_movk_unshifted() { + let inst = Mov::movk(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2800f60, result); + } + + #[test] + fn test_movk_shifted_16() { + let inst = Mov::movk(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2A00f60, result); + } + + #[test] + fn test_movk_shifted_32() { + let inst = Mov::movk(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2C00f60, result); + } + + #[test] + fn test_movk_shifted_48() { + let inst = Mov::movk(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0xf2e00f60, result); + } + + #[test] + fn test_movz_unshifted() { + let inst = Mov::movz(0, 123, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2800f60, result); + } + + #[test] + fn test_movz_shifted_16() { + let inst = Mov::movz(0, 123, 16, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2a00f60, result); + } + + #[test] + fn test_movz_shifted_32() { + let inst = Mov::movz(0, 123, 32, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2c00f60, result); + } + + #[test] + fn test_movz_shifted_48() { + let inst = Mov::movz(0, 123, 48, 64); + let result: u32 = inst.into(); + assert_eq!(0xd2e00f60, result); + } +} diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs new file mode 100644 index 0000000000..d58b3574a9 --- /dev/null +++ b/yjit/src/asm/arm64/inst/nop.rs @@ -0,0 +1,44 @@ +/// The struct that represents an A64 nop instruction that can be encoded. +/// +/// NOP +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct Nop; + +impl Nop { + /// NOP + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation- + pub fn nop() -> Self { + Self {} + } +} + +impl From<Nop> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(_inst: Nop) -> Self { + 0b11010101000000110010000000011111 + } +} + +impl From<Nop> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: Nop) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nop() { + let inst = Nop::nop(); + let result: u32 = inst.into(); + assert_eq!(0xd503201f, result); + } +} diff --git a/yjit/src/asm/arm64/inst/pc_rel.rs b/yjit/src/asm/arm64/inst/pc_rel.rs new file mode 100644 index 0000000000..bd1a2b9367 --- /dev/null +++ b/yjit/src/asm/arm64/inst/pc_rel.rs @@ -0,0 +1,107 @@ +/// Which operation to perform for the PC-relative instruction. +enum Op { + /// Form a PC-relative address. + ADR = 0, + + /// Form a PC-relative address to a 4KB page. + ADRP = 1 +} + +/// The struct that represents an A64 PC-relative address instruction that can +/// be encoded. +/// +/// ADR +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 0 0 | +/// | op immlo immhi........................................................... rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct PCRelative { + /// The number for the general-purpose register to load the address into. + rd: u8, + + /// The number of bytes to add to the PC to form the address. + imm: i32, + + /// Which operation to perform for this instruction. + op: Op +} + +impl PCRelative { + /// ADR + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address- + pub fn adr(rd: u8, imm: i32) -> Self { + Self { rd, imm, op: Op::ADR } + } + + /// ADRP + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page- + pub fn adrp(rd: u8, imm: i32) -> Self { + Self { rd, imm: imm >> 12, op: Op::ADRP } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en +const FAMILY: u32 = 0b1000; + +impl From<PCRelative> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: PCRelative) -> Self { + let immlo = (inst.imm & 0b11) as u32; + let mut immhi = ((inst.imm >> 2) & ((1 << 18) - 1)) as u32; + + // Toggle the sign bit if necessary. + if inst.imm < 0 { + immhi |= 1 << 18; + } + + 0 + | ((inst.op as u32) << 31) + | (immlo << 29) + | (FAMILY << 25) + | (immhi << 5) + | inst.rd as u32 + } +} + +impl From<PCRelative> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: PCRelative) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_adr_positive() { + let inst = PCRelative::adr(0, 5); + let result: u32 = inst.into(); + assert_eq!(0x30000020, result); + } + + #[test] + fn test_adr_negative() { + let inst = PCRelative::adr(0, -5); + let result: u32 = inst.into(); + assert_eq!(0x70ffffc0, result); + } + + #[test] + fn test_adrp_positive() { + let inst = PCRelative::adrp(0, 0x4000); + let result: u32 = inst.into(); + assert_eq!(0x90000020, result); + } + + #[test] + fn test_adrp_negative() { + let inst = PCRelative::adrp(0, -0x4000); + let result: u32 = inst.into(); + assert_eq!(0x90ffffe0, result); + } +} diff --git a/yjit/src/asm/arm64/inst/reg_pair.rs b/yjit/src/asm/arm64/inst/reg_pair.rs new file mode 100644 index 0000000000..87690e3b4a --- /dev/null +++ b/yjit/src/asm/arm64/inst/reg_pair.rs @@ -0,0 +1,212 @@ +use super::super::arg::truncate_imm; + +/// The operation to perform for this instruction. +enum Opc { + /// When the registers are 32-bits wide. + Opc32 = 0b00, + + /// When the registers are 64-bits wide. + Opc64 = 0b10 +} + +/// The kind of indexing to perform for this instruction. +enum Index { + StorePostIndex = 0b010, + LoadPostIndex = 0b011, + StoreSignedOffset = 0b100, + LoadSignedOffset = 0b101, + StorePreIndex = 0b110, + LoadPreIndex = 0b111 +} + +/// A convenience function so that we can convert the number of bits of a +/// register operand directly into an Opc variant. +impl From<u8> for Opc { + fn from(num_bits: u8) -> Self { + match num_bits { + 64 => Opc::Opc64, + 32 => Opc::Opc32, + _ => panic!("Invalid number of bits: {}", num_bits) + } + } +} + +/// The struct that represents an A64 register pair instruction that can be +/// encoded. +/// +/// STP/LDP +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 0 1 0 0 | +/// | opc index..... imm7.................... rt2............. rn.............. rt1............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct RegisterPair { + /// The number of the first register to be transferred. + rt1: u8, + + /// The number of the base register. + rn: u8, + + /// The number of the second register to be transferred. + rt2: u8, + + /// The signed immediate byte offset, a multiple of 8. + imm7: i16, + + /// The kind of indexing to use for this instruction. + index: Index, + + /// The operation to be performed (in terms of size). + opc: Opc +} + +impl RegisterPair { + /// Create a register pair instruction with a given indexing mode. + fn new(rt1: u8, rt2: u8, rn: u8, disp: i16, index: Index, num_bits: u8) -> Self { + Self { rt1, rn, rt2, imm7: disp / 8, index, opc: num_bits.into() } + } + + /// LDP (signed offset) + /// LDP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}] + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadSignedOffset, num_bits) + } + + /// LDP (pre-index) + /// LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadPreIndex, num_bits) + } + + /// LDP (post-index) + /// LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm> + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en + pub fn ldp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::LoadPostIndex, num_bits) + } + + /// STP (signed offset) + /// STP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}] + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StoreSignedOffset, num_bits) + } + + /// STP (pre-index) + /// STP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]! + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StorePreIndex, num_bits) + } + + /// STP (post-index) + /// STP <Xt1>, <Xt2>, [<Xn|SP>], #<imm> + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en + pub fn stp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self { + Self::new(rt1, rt2, rn, disp, Index::StorePostIndex, num_bits) + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en +const FAMILY: u32 = 0b0100; + +impl From<RegisterPair> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: RegisterPair) -> Self { + 0 + | ((inst.opc as u32) << 30) + | (1 << 29) + | (FAMILY << 25) + | ((inst.index as u32) << 22) + | (truncate_imm::<_, 7>(inst.imm7) << 15) + | ((inst.rt2 as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rt1 as u32) + } +} + +impl From<RegisterPair> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: RegisterPair) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ldp() { + let inst = RegisterPair::ldp(0, 1, 2, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9400440, result); + } + + #[test] + fn test_ldp_maximum_displacement() { + let inst = RegisterPair::ldp(0, 1, 2, 504, 64); + let result: u32 = inst.into(); + assert_eq!(0xa95f8440, result); + } + + #[test] + fn test_ldp_minimum_displacement() { + let inst = RegisterPair::ldp(0, 1, 2, -512, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9600440, result); + } + + #[test] + fn test_ldp_pre() { + let inst = RegisterPair::ldp_pre(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9d00440, result); + } + + #[test] + fn test_ldp_post() { + let inst = RegisterPair::ldp_post(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa8d00440, result); + } + + #[test] + fn test_stp() { + let inst = RegisterPair::stp(0, 1, 2, 0, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9000440, result); + } + + #[test] + fn test_stp_maximum_displacement() { + let inst = RegisterPair::stp(0, 1, 2, 504, 64); + let result: u32 = inst.into(); + assert_eq!(0xa91f8440, result); + } + + #[test] + fn test_stp_minimum_displacement() { + let inst = RegisterPair::stp(0, 1, 2, -512, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9200440, result); + } + + #[test] + fn test_stp_pre() { + let inst = RegisterPair::stp_pre(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa9900440, result); + } + + #[test] + fn test_stp_post() { + let inst = RegisterPair::stp_post(0, 1, 2, 256, 64); + let result: u32 = inst.into(); + assert_eq!(0xa8900440, result); + } +} diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs new file mode 100644 index 0000000000..8602998980 --- /dev/null +++ b/yjit/src/asm/arm64/inst/sbfm.rs @@ -0,0 +1,103 @@ +use super::super::arg::{Sf, truncate_uimm}; + +/// The struct that represents an A64 signed bitfield move instruction that can +/// be encoded. +/// +/// SBFM +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 0 1 0 0 1 1 0 | +/// | sf N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SBFM { + /// The number for the general-purpose register to load the value into. + rd: u8, + + /// The number for the general-purpose register to copy from. + rn: u8, + + /// The leftmost bit number to be moved from the source. + imms: u8, + + // The right rotate amount. + immr: u8, + + /// Whether or not this is a 64-bit operation. + n: bool, + + /// The size of this operation. + sf: Sf +} + +impl SBFM { + /// ASR + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en + pub fn asr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + let (imms, n) = if num_bits == 64 { + (0b111111, true) + } else { + (0b011111, false) + }; + + Self { rd, rn, immr: shift, imms, n, sf: num_bits.into() } + } + + /// SXTW + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en + pub fn sxtw(rd: u8, rn: u8) -> Self { + Self { rd, rn, immr: 0, imms: 31, n: true, sf: Sf::Sf64 } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +const FAMILY: u32 = 0b1001; + +impl From<SBFM> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SBFM) -> Self { + 0 + | ((inst.sf as u32) << 31) + | (FAMILY << 25) + | (1 << 24) + | ((inst.n as u32) << 22) + | (truncate_uimm::<_, 6>(inst.immr) << 16) + | (truncate_uimm::<_, 6>(inst.imms) << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From<SBFM> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SBFM) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_asr_32_bits() { + let inst = SBFM::asr(0, 1, 2, 32); + let result: u32 = inst.into(); + assert_eq!(0x13027c20, result); + } + + #[test] + fn test_asr_64_bits() { + let inst = SBFM::asr(10, 11, 5, 64); + let result: u32 = inst.into(); + assert_eq!(0x9345fd6a, result); + } + + #[test] + fn test_sxtw() { + let inst = SBFM::sxtw(0, 1); + let result: u32 = inst.into(); + assert_eq!(0x93407c20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/shift_imm.rs b/yjit/src/asm/arm64/inst/shift_imm.rs new file mode 100644 index 0000000000..3d2685a997 --- /dev/null +++ b/yjit/src/asm/arm64/inst/shift_imm.rs @@ -0,0 +1,147 @@ +use super::super::arg::Sf; + +/// The operation to perform for this instruction. +enum Opc { + /// Logical left shift + LSL, + + /// Logical shift right + LSR +} + +/// The struct that represents an A64 unsigned bitfield move instruction that +/// can be encoded. +/// +/// LSL (immediate) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 1 0 0 1 1 0 | +/// | sf N immr............... imms............... rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct ShiftImm { + /// The register number of the destination register. + rd: u8, + + /// The register number of the first operand register. + rn: u8, + + /// The immediate value to shift by. + shift: u8, + + /// The opcode for this instruction. + opc: Opc, + + /// Whether or not this instruction is operating on 64-bit operands. + sf: Sf +} + +impl ShiftImm { + /// LSL (immediate) + /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en + pub fn lsl(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + ShiftImm { rd, rn, shift, opc: Opc::LSL, sf: num_bits.into() } + } + + /// LSR (immediate) + /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en + pub fn lsr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self { + ShiftImm { rd, rn, shift, opc: Opc::LSR, sf: num_bits.into() } + } + + /// Returns a triplet of (n, immr, imms) encoded in u32s for this + /// instruction. This mirrors how they will be encoded in the actual bits. + fn bitmask(&self) -> (u32, u32, u32) { + match self.opc { + // The key insight is a little buried in the docs, but effectively: + // LSL <Wd>, <Wn>, #<shift> == UBFM <Wd>, <Wn>, #(-<shift> MOD 32), #(31-<shift>) + // LSL <Xd>, <Xn>, #<shift> == UBFM <Xd>, <Xn>, #(-<shift> MOD 64), #(63-<shift>) + Opc::LSL => { + let shift = -(self.shift as i16); + + match self.sf { + Sf::Sf32 => ( + 0, + (shift.rem_euclid(32) & 0x3f) as u32, + ((31 - self.shift) & 0x3f) as u32 + ), + Sf::Sf64 => ( + 1, + (shift.rem_euclid(64) & 0x3f) as u32, + ((63 - self.shift) & 0x3f) as u32 + ) + } + }, + // Similar to LSL: + // LSR <Wd>, <Wn>, #<shift> == UBFM <Wd>, <Wn>, #<shift>, #31 + // LSR <Xd>, <Xn>, #<shift> == UBFM <Xd>, <Xn>, #<shift>, #63 + Opc::LSR => { + match self.sf { + Sf::Sf32 => (0, (self.shift & 0x3f) as u32, 31), + Sf::Sf64 => (1, (self.shift & 0x3f) as u32, 63) + } + } + } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield +const FAMILY: u32 = 0b10011; + +impl From<ShiftImm> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: ShiftImm) -> Self { + let (n, immr, imms) = inst.bitmask(); + + 0 + | ((inst.sf as u32) << 31) + | (1 << 30) + | (FAMILY << 24) + | (n << 22) + | (immr << 16) + | (imms << 10) + | ((inst.rn as u32) << 5) + | inst.rd as u32 + } +} + +impl From<ShiftImm> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: ShiftImm) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_lsl_32() { + let inst = ShiftImm::lsl(0, 1, 7, 32); + let result: u32 = inst.into(); + assert_eq!(0x53196020, result); + } + + #[test] + fn test_lsl_64() { + let inst = ShiftImm::lsl(0, 1, 7, 64); + let result: u32 = inst.into(); + assert_eq!(0xd379e020, result); + } + + #[test] + fn test_lsr_32() { + let inst = ShiftImm::lsr(0, 1, 7, 32); + let result: u32 = inst.into(); + assert_eq!(0x53077c20, result); + } + + #[test] + fn test_lsr_64() { + let inst = ShiftImm::lsr(0, 1, 7, 64); + let result: u32 = inst.into(); + assert_eq!(0xd347fc20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/smulh.rs b/yjit/src/asm/arm64/inst/smulh.rs new file mode 100644 index 0000000000..5e9b231fde --- /dev/null +++ b/yjit/src/asm/arm64/inst/smulh.rs @@ -0,0 +1,60 @@ +/// The struct that represents an A64 signed multiply high instruction +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 0 0 1 1 0 1 1 0 1 0 0 | +/// | rm.............. ra.............. rn.............. rd.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SMulH { + /// The number of the general-purpose destination register. + rd: u8, + + /// The number of the first general-purpose source register. + rn: u8, + + /// The number of the third general-purpose source register. + ra: u8, + + /// The number of the second general-purpose source register. + rm: u8, +} + +impl SMulH { + /// SMULH + /// https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/SMULH--Signed-Multiply-High- + pub fn smulh(rd: u8, rn: u8, rm: u8) -> Self { + Self { rd, rn, ra: 0b11111, rm } + } +} + +impl From<SMulH> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SMulH) -> Self { + 0 + | (0b10011011010 << 21) + | ((inst.rm as u32) << 16) + | ((inst.ra as u32) << 10) + | ((inst.rn as u32) << 5) + | (inst.rd as u32) + } +} + +impl From<SMulH> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SMulH) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_smulh() { + let result: u32 = SMulH::smulh(0, 1, 2).into(); + assert_eq!(0x9b427c20, result); + } +} diff --git a/yjit/src/asm/arm64/inst/sys_reg.rs b/yjit/src/asm/arm64/inst/sys_reg.rs new file mode 100644 index 0000000000..108737a870 --- /dev/null +++ b/yjit/src/asm/arm64/inst/sys_reg.rs @@ -0,0 +1,86 @@ +use super::super::arg::SystemRegister; + +/// Which operation to perform (loading or storing the system register value). +enum L { + /// Store the value of a general-purpose register in a system register. + MSR = 0, + + /// Store the value of a system register in a general-purpose register. + MRS = 1 +} + +/// The struct that represents an A64 system register instruction that can be +/// encoded. +/// +/// MSR/MRS (register) +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 1 1 0 1 0 1 0 1 0 0 1 | +/// | L o0 op1..... CRn........ CRm........ op2..... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct SysReg { + /// The register to load the system register value into. + rt: u8, + + /// Which system register to load or store. + systemreg: SystemRegister, + + /// Which operation to perform (loading or storing the system register value). + l: L +} + +impl SysReg { + /// MRS (register) + /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en + pub fn mrs(rt: u8, systemreg: SystemRegister) -> Self { + SysReg { rt, systemreg, l: L::MRS } + } + + /// MSR (register) + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en + pub fn msr(systemreg: SystemRegister, rt: u8) -> Self { + SysReg { rt, systemreg, l: L::MSR } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove +const FAMILY: u32 = 0b110101010001; + +impl From<SysReg> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: SysReg) -> Self { + 0 + | (FAMILY << 20) + | ((inst.l as u32) << 21) + | ((inst.systemreg as u32) << 5) + | inst.rt as u32 + } +} + +impl From<SysReg> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: SysReg) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mrs() { + let inst = SysReg::mrs(0, SystemRegister::NZCV); + let result: u32 = inst.into(); + assert_eq!(0xd53b4200, result); + } + + #[test] + fn test_msr() { + let inst = SysReg::msr(SystemRegister::NZCV, 0); + let result: u32 = inst.into(); + assert_eq!(0xd51b4200, result); + } +} diff --git a/yjit/src/asm/arm64/inst/test_bit.rs b/yjit/src/asm/arm64/inst/test_bit.rs new file mode 100644 index 0000000000..c57a05ad2b --- /dev/null +++ b/yjit/src/asm/arm64/inst/test_bit.rs @@ -0,0 +1,133 @@ +use super::super::arg::truncate_imm; + +/// The upper bit of the bit number to test. +#[derive(Debug)] +enum B5 { + /// When the bit number is below 32. + B532 = 0, + + /// When the bit number is equal to or above 32. + B564 = 1 +} + +/// A convenience function so that we can convert the bit number directly into a +/// B5 variant. +impl From<u8> for B5 { + fn from(bit_num: u8) -> Self { + match bit_num { + 0..=31 => B5::B532, + 32..=63 => B5::B564, + _ => panic!("Invalid bit number: {}", bit_num) + } + } +} + +/// The operation to perform for this instruction. +enum Op { + /// The test bit zero operation. + TBZ = 0, + + /// The test bit not zero operation. + TBNZ = 1 +} + +/// The struct that represents an A64 test bit instruction that can be encoded. +/// +/// TBNZ/TBZ +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | 0 1 1 0 1 1 | +/// | b5 op b40............. imm14.......................................... rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// +pub struct TestBit { + /// The number of the register to test. + rt: u8, + + /// The PC-relative offset to the target instruction in term of number of + /// instructions. + imm14: i16, + + /// The lower 5 bits of the bit number to be tested. + b40: u8, + + /// The operation to perform for this instruction. + op: Op, + + /// The upper bit of the bit number to test. + b5: B5 +} + +impl TestBit { + /// TBNZ + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en + pub fn tbnz(rt: u8, bit_num: u8, offset: i16) -> Self { + Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBNZ, b5: bit_num.into() } + } + + /// TBZ + /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en + pub fn tbz(rt: u8, bit_num: u8, offset: i16) -> Self { + Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBZ, b5: bit_num.into() } + } +} + +/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en +const FAMILY: u32 = 0b11011; + +impl From<TestBit> for u32 { + /// Convert an instruction into a 32-bit value. + fn from(inst: TestBit) -> Self { + let b40 = (inst.b40 & 0b11111) as u32; + let imm14 = truncate_imm::<_, 14>(inst.imm14); + + 0 + | ((inst.b5 as u32) << 31) + | (FAMILY << 25) + | ((inst.op as u32) << 24) + | (b40 << 19) + | (imm14 << 5) + | inst.rt as u32 + } +} + +impl From<TestBit> for [u8; 4] { + /// Convert an instruction into a 4 byte array. + fn from(inst: TestBit) -> [u8; 4] { + let result: u32 = inst.into(); + result.to_le_bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tbnz() { + let inst = TestBit::tbnz(0, 0, 0); + let result: u32 = inst.into(); + assert_eq!(0x37000000, result); + } + + #[test] + fn test_tbnz_negative() { + let inst = TestBit::tbnz(0, 0, -1); + let result: u32 = inst.into(); + assert_eq!(0x3707ffe0, result); + } + + #[test] + fn test_tbz() { + let inst = TestBit::tbz(0, 0, 0); + let result: u32 = inst.into(); + assert_eq!(0x36000000, result); + } + + #[test] + fn test_tbz_negative() { + let inst = TestBit::tbz(0, 0, -1); + let result: u32 = inst.into(); + assert_eq!(0x3607ffe0, result); + } +} diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs new file mode 100644 index 0000000000..a94d435b7c --- /dev/null +++ b/yjit/src/asm/arm64/mod.rs @@ -0,0 +1,1680 @@ +#![allow(dead_code)] // For instructions and operands we're not currently using. + +use crate::asm::CodeBlock; + +mod arg; +mod inst; +mod opnd; + +use inst::*; + +// We're going to make these public to make using these things easier in the +// backend (so they don't have to have knowledge about the submodule). +pub use arg::*; +pub use opnd::*; + +/// Checks that a signed value fits within the specified number of bits. +pub const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool { + let minimum = if num_bits == 64 { i64::MIN } else { -(2_i64.pow((num_bits as u32) - 1)) }; + let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 }; + + imm >= minimum && imm <= maximum +} + +/// Checks that an unsigned value fits within the specified number of bits. +pub const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool { + let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 }; + + uimm <= maximum +} + +/// ADD - add rn and rm, put the result in rd, don't update flags +pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::add(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::sub(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::add(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to add instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADDS - add rn and rm, put the result in rd, update flags +pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::adds(rd.reg_no, rn.reg_no, imm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::subs(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::adds(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to adds instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADR - form a PC-relative address and load it into a register +pub fn adr(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) { + let bytes: [u8; 4] = match (rd, imm) { + (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => { + assert!(rd.num_bits == 64, "The destination register must be 64 bits."); + assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less."); + + PCRelative::adr(rd.reg_no, imm as i32).into() + }, + _ => panic!("Invalid operand combination to adr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ADRP - form a PC-relative address to a 4KB page and load it into a register. +/// This is effectively the same as ADR except that the immediate must be a +/// multiple of 4KB. +pub fn adrp(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) { + let bytes: [u8; 4] = match (rd, imm) { + (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => { + assert!(rd.num_bits == 64, "The destination register must be 64 bits."); + assert!(imm_fits_bits(imm, 32), "The immediate operand must be 32 bits or less."); + + PCRelative::adrp(rd.reg_no, imm as i32).into() + }, + _ => panic!("Invalid operand combination to adr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// AND - and rn and rm, put the result in rd, don't update flags +pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::and(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + let bitmask_imm = if rd.num_bits == 32 { + BitmaskImmediate::new_32b_reg(imm.try_into().unwrap()) + } else { + imm.try_into() + }.unwrap(); + + LogicalImm::and(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to and instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ANDS - and rn and rm, put the result in rd, update flags +pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::ands(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + let bitmask_imm = if rd.num_bits == 32 { + BitmaskImmediate::new_32b_reg(imm.try_into().unwrap()) + } else { + imm.try_into() + }.unwrap(); + + LogicalImm::ands(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to ands instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// ASR - arithmetic shift right rn by shift, put the result in rd, don't update +/// flags +pub fn asr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(shift)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + assert!(uimm_fits_bits(shift, 6), "The shift operand must be 6 bits or less."); + + SBFM::asr(rd.reg_no, rn.reg_no, shift.try_into().unwrap(), rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to asr instruction: asr {:?}, {:?}, {:?}", rd, rn, shift), + }; + + cb.write_bytes(&bytes); +} + +/// Whether or not the offset between two instructions fits into the branch with +/// or without link instruction. If it doesn't, then we have to load the value +/// into a register first. +pub const fn b_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 26) +} + +/// B - branch without link (offset is number of instructions to jump) +pub fn b(cb: &mut CodeBlock, offset: InstructionOffset) { + assert!(b_offset_fits_bits(offset.into()), "The immediate operand must be 26 bits or less."); + let bytes: [u8; 4] = Call::b(offset).into(); + + cb.write_bytes(&bytes); +} + +/// Whether or not the offset in number of instructions between two instructions +/// fits into the b.cond instruction. If it doesn't, then we have to load the +/// value into a register first, then use the b.cond instruction to skip past a +/// direct jump. +pub const fn bcond_offset_fits_bits(offset: i64) -> bool { + imm_fits_bits(offset, 19) +} + +/// CBZ and CBNZ also have a limit of 19 bits for the branch offset. +pub use bcond_offset_fits_bits as cmp_branch_offset_fits_bits; + +/// B.cond - branch to target if condition is true +pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) { + assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less."); + let bytes: [u8; 4] = BranchCond::bcond(cond, offset).into(); + + cb.write_bytes(&bytes); +} + +/// BL - branch with link (offset is number of instructions to jump) +pub fn bl(cb: &mut CodeBlock, offset: InstructionOffset) { + assert!(b_offset_fits_bits(offset.into()), "The offset must be 26 bits or less."); + let bytes: [u8; 4] = Call::bl(offset).into(); + + cb.write_bytes(&bytes); +} + +/// BLR - branch with link to a register +pub fn blr(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::Reg(rn) => Branch::blr(rn.reg_no).into(), + _ => panic!("Invalid operand to blr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// BR - branch to a register +pub fn br(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::Reg(rn) => Branch::br(rn.reg_no).into(), + _ => panic!("Invalid operand to br instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// BRK - create a breakpoint +pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) { + let bytes: [u8; 4] = match imm16 { + A64Opnd::None => Breakpoint::brk(0).into(), + A64Opnd::UImm(imm16) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + Breakpoint::brk(imm16 as u16).into() + }, + _ => panic!("Invalid operand combination to brk instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// CMP - compare rn and rm, update flags +pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + DataImm::cmp(rn.reg_no, (imm12 as u64).try_into().unwrap(), rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => { + DataImm::cmp(rn.reg_no, imm12.try_into().unwrap(), rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to cmp instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// CSEL - conditionally select between two registers +pub fn csel(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd, cond: u8) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + Conditional::csel(rd.reg_no, rn.reg_no, rm.reg_no, cond, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to csel instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// EOR - perform a bitwise XOR of rn and rm, put the result in rd, don't update flags +pub fn eor(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::eor(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + let bitmask_imm = if rd.num_bits == 32 { + BitmaskImmediate::new_32b_reg(imm.try_into().unwrap()) + } else { + imm.try_into() + }.unwrap(); + + LogicalImm::eor(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to eor instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDADDAL - atomic add with acquire and release semantics +pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rs, rt, rn) { + (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!( + rs.num_bits == rt.num_bits && rt.num_bits == rn.num_bits, + "All operands must be of the same size." + ); + + Atomic::ldaddal(rs.reg_no, rt.reg_no, rn.reg_no, rs.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldaddal instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDAXR - atomic load with acquire semantics +pub fn ldaxr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register."); + + LoadStoreExclusive::ldaxr(rt.reg_no, rn.reg_no, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldaxr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDP (signed offset) - load a pair of registers from memory +pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::ldp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDP (pre-index) - load a pair of registers from memory, update the base pointer before loading it +pub fn ldp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::ldp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDP (post-index) - load a pair of registers from memory, update the base pointer after loading it +pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::ldp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDR - load a memory address into a register with a register offset +pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn, rm) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LoadRegister::ldr(rt.reg_no, rn.reg_no, rm.reg_no, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDR - load a PC-relative memory address into a register +pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: InstructionOffset) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDRH - load a halfword from memory +pub fn ldrh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less."); + + HalfwordImm::ldrh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDRH (pre-index) - load a halfword from memory, update the base pointer before loading it +pub fn ldrh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::ldrh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LDRH (post-index) - load a halfword from memory, update the base pointer after loading it +pub fn ldrh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::ldrh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldrh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// Whether or not a memory address displacement fits into the maximum number of +/// bits such that it can be used without loading it into a register first. +pub fn mem_disp_fits_bits(disp: i32) -> bool { + imm_fits_bits(disp.into(), 9) +} + +/// LDR (post-index) - load a register from memory, update the base pointer after loading it +pub fn ldr_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::ldr_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDR (pre-index) - load a register from memory, update the base pointer before loading it +pub fn ldr_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::ldr_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to ldr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// LDUR - load a memory address into a register +pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + + LoadStore::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into() + }, + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operands for LDUR") + }; + + cb.write_bytes(&bytes); +} + +/// LDURH - load a byte from memory, zero-extend it, and write it to a register +pub fn ldurh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::ldurh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operands for LDURH") + }; + + cb.write_bytes(&bytes); +} + +/// LDURB - load a byte from memory, zero-extend it, and write it to a register +pub fn ldurb(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(rt.num_bits == 8, "Expected registers to have size 8"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::ldurb(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operands for LDURB") + }; + + cb.write_bytes(&bytes); +} + +/// LDURSW - load a 32-bit memory address into a register and sign-extend it +pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to ldursw instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// LSL - logical shift left a register by an immediate +pub fn lsl(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => { + assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less"); + + ShiftImm::lsl(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into() + }, + _ => panic!("Invalid operands combination to lsl instruction") + }; + + cb.write_bytes(&bytes); +} + +/// LSR - logical shift right a register by an immediate +pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, shift) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => { + assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size"); + assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less"); + + ShiftImm::lsr(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into() + }, + _ => panic!("Invalid operands combination to lsr instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MOV - move a value in a register to another register +pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 }), A64Opnd::Reg(rm)) => { + assert!(rm.num_bits == 64, "Expected rm to be 64 bits"); + + DataImm::add(31, rm.reg_no, 0.try_into().unwrap(), 64).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 })) => { + assert!(rd.num_bits == 64, "Expected rd to be 64 bits"); + + DataImm::add(rd.reg_no, 31, 0.try_into().unwrap(), 64).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => { + LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => { + let bitmask_imm = if rd.num_bits == 32 { + BitmaskImmediate::new_32b_reg(imm.try_into().unwrap()) + } else { + imm.try_into() + }.unwrap(); + + LogicalImm::mov(rd.reg_no, bitmask_imm, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mov instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MOVK - move a 16 bit immediate into a register, keep the other bits in place +pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movk(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movk instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// MOVZ - move a 16 bit immediate into a register, zero the other bits +pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) { + let bytes: [u8; 4] = match (rd, imm16) { + (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => { + assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less."); + + Mov::movz(rd.reg_no, imm16 as u16, shift, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to movz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// MRS - move a system register into a general-purpose register +pub fn mrs(cb: &mut CodeBlock, rt: A64Opnd, systemregister: SystemRegister) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + SysReg::mrs(rt.reg_no, systemregister).into() + }, + _ => panic!("Invalid operand combination to mrs instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MSR - move a general-purpose register into a system register +pub fn msr(cb: &mut CodeBlock, systemregister: SystemRegister, rt: A64Opnd) { + let bytes: [u8; 4] = match rt { + A64Opnd::Reg(rt) => { + SysReg::msr(systemregister, rt.reg_no).into() + }, + _ => panic!("Invalid operand combination to msr instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MUL - multiply two registers, put the result in a third register +pub fn mul(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + MAdd::mul(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mul instruction") + }; + + cb.write_bytes(&bytes); +} + +/// SMULH - multiply two 64-bit registers to produce a 128-bit result, put the high 64-bits of the result into rd +pub fn smulh(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + assert!(rd.num_bits == 64, "smulh only applicable to 64-bit registers"); + + SMulH::smulh(rd.reg_no, rn.reg_no, rm.reg_no).into() + }, + _ => panic!("Invalid operand combination to mul instruction") + }; + + cb.write_bytes(&bytes); +} + +/// MVN - move a value in a register to another register, negating it +pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::mvn(rd.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to mvn instruction") + }; + + cb.write_bytes(&bytes); +} + +/// NOP - no-operation, used for alignment purposes +pub fn nop(cb: &mut CodeBlock) { + let bytes: [u8; 4] = Nop::nop().into(); + + cb.write_bytes(&bytes); +} + +/// ORN - perform a bitwise OR of rn and NOT rm, put the result in rd, don't update flags +pub fn orn(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size"); + + LogicalReg::orn(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orn instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// ORR - perform a bitwise OR of rn and rm, put the result in rd, don't update flags +pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + LogicalReg::orr(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + let bitmask_imm = if rd.num_bits == 32 { + BitmaskImmediate::new_32b_reg(imm.try_into().unwrap()) + } else { + imm.try_into() + }.unwrap(); + + LogicalImm::orr(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into() + }, + _ => panic!("Invalid operand combination to orr instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// STLXR - store a value to memory, release exclusive access +pub fn stlxr(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rs, rt, rn) { + (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => { + assert_eq!(rs.num_bits, 32, "rs must be a 32-bit register."); + assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register."); + + LoadStoreExclusive::stlxr(rs.reg_no, rt.reg_no, rn.reg_no, rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to stlxr instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STP (signed offset) - store a pair of registers to memory +pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::stp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STP (pre-index) - store a pair of registers to memory, update the base pointer before loading it +pub fn stp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::stp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STP (post-index) - store a pair of registers to memory, update the base pointer after loading it +pub fn stp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt1, rt2, rn) { + (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => { + assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size"); + assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less."); + assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register"); + + RegisterPair::stp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into() + }, + _ => panic!("Invalid operand combination to stp instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STR (post-index) - store a register to memory, update the base pointer after loading it +pub fn str_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::str_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to str instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// STR (pre-index) - store a register to memory, update the base pointer before loading it +pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size."); + assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less."); + + LoadStore::str_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into() + }, + _ => panic!("Invalid operand combination to str instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// STRH - store a halfword into memory +pub fn strh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less."); + + HalfwordImm::strh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STRH (pre-index) - store a halfword into memory, update the base pointer before loading it +pub fn strh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::strh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STRH (post-index) - store a halfword into memory, update the base pointer after loading it +pub fn strh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword"); + assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less."); + + HalfwordImm::strh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to strh instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STUR - store a value in a register at a memory address +pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rn.num_bits == 32 || rn.num_bits == 64); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to stur instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// STURH - store a value in a register at a memory address +pub fn sturh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rt, rn) { + (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => { + assert!(rn.num_bits == 16); + assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less"); + + LoadStore::sturh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into() + }, + _ => panic!("Invalid operand combination to stur instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// SUB - subtract rm from rn, put the result in rd, don't update flags +pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::sub(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::add(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::sub(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to sub instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// SUBS - subtract rm from rn, put the result in rd, update flags +pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn, rm) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!( + rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, + "All operands must be of the same size." + ); + + DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + DataImm::subs(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into() + }, + (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => { + assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size."); + + if imm12 < 0 { + DataImm::adds(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } else { + DataImm::subs(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into() + } + }, + _ => panic!("Invalid operand combination to subs instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// SXTW - sign extend a 32-bit register into a 64-bit register +pub fn sxtw(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd) { + let bytes: [u8; 4] = match (rd, rn) { + (A64Opnd::Reg(rd), A64Opnd::Reg(rn)) => { + assert_eq!(rd.num_bits, 64, "rd must be 64-bits wide."); + assert_eq!(rn.num_bits, 32, "rn must be 32-bits wide."); + + SBFM::sxtw(rd.reg_no, rn.reg_no).into() + }, + _ => panic!("Invalid operand combination to sxtw instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// RET - unconditionally return to a location in a register, defaults to X30 +pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) { + let bytes: [u8; 4] = match rn { + A64Opnd::None => Branch::ret(30).into(), + A64Opnd::Reg(reg) => Branch::ret(reg.reg_no).into(), + _ => panic!("Invalid operand to ret instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TBNZ - test bit and branch if not zero +pub fn tbnz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) { + let bytes: [u8; 4] = match (rt, bit_num, offset) { + (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => { + TestBit::tbnz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into() + }, + _ => panic!("Invalid operand combination to tbnz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TBZ - test bit and branch if zero +pub fn tbz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) { + let bytes: [u8; 4] = match (rt, bit_num, offset) { + (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => { + TestBit::tbz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into() + }, + _ => panic!("Invalid operand combination to tbz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// TST - test the bits of a register against a mask, then update flags +pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) { + let bytes: [u8; 4] = match (rn, rm) { + (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => { + assert!(rn.num_bits == rm.num_bits, "All operands must be of the same size."); + + LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into() + }, + (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => { + let bitmask_imm = if rn.num_bits == 32 { + BitmaskImmediate::new_32b_reg(imm.try_into().unwrap()) + } else { + imm.try_into() + }.unwrap(); + + LogicalImm::tst(rn.reg_no, bitmask_imm, rn.num_bits).into() + }, + _ => panic!("Invalid operand combination to tst instruction."), + }; + + cb.write_bytes(&bytes); +} + +/// CBZ - branch if a register is zero +pub fn cbz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) { + assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits"); + let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt { + cbz_cbnz(rt.num_bits, false, offset, rt.reg_no) + } else { + panic!("Invalid operand combination to cbz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// CBNZ - branch if a register is non-zero +pub fn cbnz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) { + assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits"); + let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt { + cbz_cbnz(rt.num_bits, true, offset, rt.reg_no) + } else { + panic!("Invalid operand combination to cbnz instruction.") + }; + + cb.write_bytes(&bytes); +} + +/// Encode Compare and Branch on Zero (CBZ) with `op=0` or Compare and Branch on Nonzero (CBNZ) +/// with `op=1`. +/// +/// <https://developer.arm.com/documentation/ddi0602/2024-03/Base-Instructions/CBZ--Compare-and-Branch-on-Zero-> +/// +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 | +/// | sf 0 1 1 0 1 0 op | +/// | imm19........................................................... Rt.............. | +/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ +fn cbz_cbnz(num_bits: u8, op: bool, offset: InstructionOffset, rt: u8) -> [u8; 4] { + ((Sf::from(num_bits) as u32) << 31 | + 0b11010 << 25 | + u32::from(op) << 24 | + truncate_imm::<_, 19>(offset) << 5 | + rt as u32).to_le_bytes() +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Check that the bytes for an instruction sequence match a hex string + fn check_bytes<R>(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { + let mut cb = super::CodeBlock::new_dummy(128); + run(&mut cb); + assert_eq!(format!("{:x}", cb), bytes); + } + + #[test] + fn test_imm_fits_bits() { + assert!(imm_fits_bits(i8::MAX.into(), 8)); + assert!(imm_fits_bits(i8::MIN.into(), 8)); + + assert!(imm_fits_bits(i16::MAX.into(), 16)); + assert!(imm_fits_bits(i16::MIN.into(), 16)); + + assert!(imm_fits_bits(i32::MAX.into(), 32)); + assert!(imm_fits_bits(i32::MIN.into(), 32)); + + assert!(imm_fits_bits(i64::MAX, 64)); + assert!(imm_fits_bits(i64::MIN, 64)); + } + + #[test] + fn test_uimm_fits_bits() { + assert!(uimm_fits_bits(u8::MAX.into(), 8)); + assert!(uimm_fits_bits(u16::MAX.into(), 16)); + assert!(uimm_fits_bits(u32::MAX.into(), 32)); + assert!(uimm_fits_bits(u64::MAX, 64)); + } + + #[test] + fn test_add_reg() { + check_bytes("2000028b", |cb| add(cb, X0, X1, X2)); + } + + #[test] + fn test_add_uimm() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_add_imm_positive() { + check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_add_imm_negative() { + check_bytes("201c00d1", |cb| add(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_adds_reg() { + check_bytes("200002ab", |cb| adds(cb, X0, X1, X2)); + } + + #[test] + fn test_adds_uimm() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_adds_imm_positive() { + check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_adds_imm_negative() { + check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_adr() { + check_bytes("aa000010", |cb| adr(cb, X10, A64Opnd::new_imm(20))); + } + + #[test] + fn test_adrp() { + check_bytes("4a000090", |cb| adrp(cb, X10, A64Opnd::new_imm(0x8000))); + } + + #[test] + fn test_and_register() { + check_bytes("2000028a", |cb| and(cb, X0, X1, X2)); + } + + #[test] + fn test_and_immediate() { + check_bytes("20084092", |cb| and(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_and_32b_immediate() { + check_bytes("404c0012", |cb| and(cb, W0, W2, A64Opnd::new_uimm(0xfffff))); + } + + #[test] + fn test_ands_register() { + check_bytes("200002ea", |cb| ands(cb, X0, X1, X2)); + } + + #[test] + fn test_ands_immediate() { + check_bytes("200840f2", |cb| ands(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_asr() { + check_bytes("b4fe4a93", |cb| asr(cb, X20, X21, A64Opnd::new_uimm(10))); + } + + #[test] + fn test_bcond() { + let offset = InstructionOffset::from_insns(0x100); + check_bytes("01200054", |cb| bcond(cb, Condition::NE, offset)); + } + + #[test] + fn test_b() { + let offset = InstructionOffset::from_insns((1 << 25) - 1); + check_bytes("ffffff15", |cb| b(cb, offset)); + } + + #[test] + #[should_panic] + fn test_b_too_big() { + // There are 26 bits available + let offset = InstructionOffset::from_insns(1 << 25); + check_bytes("", |cb| b(cb, offset)); + } + + #[test] + #[should_panic] + fn test_b_too_small() { + // There are 26 bits available + let offset = InstructionOffset::from_insns(-(1 << 25) - 1); + check_bytes("", |cb| b(cb, offset)); + } + + #[test] + fn test_bl() { + let offset = InstructionOffset::from_insns(-(1 << 25)); + check_bytes("00000096", |cb| bl(cb, offset)); + } + + #[test] + #[should_panic] + fn test_bl_too_big() { + // There are 26 bits available + let offset = InstructionOffset::from_insns(1 << 25); + check_bytes("", |cb| bl(cb, offset)); + } + + #[test] + #[should_panic] + fn test_bl_too_small() { + // There are 26 bits available + let offset = InstructionOffset::from_insns(-(1 << 25) - 1); + check_bytes("", |cb| bl(cb, offset)); + } + + #[test] + fn test_blr() { + check_bytes("80023fd6", |cb| blr(cb, X20)); + } + + #[test] + fn test_br() { + check_bytes("80021fd6", |cb| br(cb, X20)); + } + + #[test] + fn test_cbz() { + let offset = InstructionOffset::from_insns(-1); + check_bytes("e0ffffb4e0ffff34", |cb| { + cbz(cb, X0, offset); + cbz(cb, W0, offset); + }); + } + + #[test] + fn test_cbnz() { + let offset = InstructionOffset::from_insns(2); + check_bytes("540000b554000035", |cb| { + cbnz(cb, X20, offset); + cbnz(cb, W20, offset); + }); + } + + #[test] + fn test_brk_none() { + check_bytes("000020d4", |cb| brk(cb, A64Opnd::None)); + } + + #[test] + fn test_brk_uimm() { + check_bytes("c00120d4", |cb| brk(cb, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_cmp_register() { + check_bytes("5f010beb", |cb| cmp(cb, X10, X11)); + } + + #[test] + fn test_cmp_immediate() { + check_bytes("5f3900f1", |cb| cmp(cb, X10, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_csel() { + check_bytes("6a018c9a", |cb| csel(cb, X10, X11, X12, Condition::EQ)); + } + + #[test] + fn test_eor_register() { + check_bytes("6a010cca", |cb| eor(cb, X10, X11, X12)); + } + + #[test] + fn test_eor_immediate() { + check_bytes("6a0940d2", |cb| eor(cb, X10, X11, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_eor_32b_immediate() { + check_bytes("29040152", |cb| eor(cb, W9, W1, A64Opnd::new_uimm(0x80000001))); + } + + #[test] + fn test_ldaddal() { + check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12)); + } + + #[test] + fn test_ldaxr() { + check_bytes("6afd5fc8", |cb| ldaxr(cb, X10, X11)); + } + + #[test] + fn test_ldp() { + check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldp_pre() { + check_bytes("8a2dcda9", |cb| ldp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldp_post() { + check_bytes("8a2dcda8", |cb| ldp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_ldr() { + check_bytes("6a696cf8", |cb| ldr(cb, X10, X11, X12)); + } + + #[test] + fn test_ldr_literal() { + check_bytes("40010058", |cb| ldr_literal(cb, X0, 10.into())); + } + + #[test] + fn test_ldr_post() { + check_bytes("6a0541f8", |cb| ldr_post(cb, X10, A64Opnd::new_mem(64, X11, 16))); + } + + #[test] + fn test_ldr_pre() { + check_bytes("6a0d41f8", |cb| ldr_pre(cb, X10, A64Opnd::new_mem(64, X11, 16))); + } + + #[test] + fn test_ldrh() { + check_bytes("6a194079", |cb| ldrh(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldrh_pre() { + check_bytes("6acd4078", |cb| ldrh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldrh_post() { + check_bytes("6ac54078", |cb| ldrh_post(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_ldurh_memory() { + check_bytes("2a004078", |cb| ldurh(cb, W10, A64Opnd::new_mem(64, X1, 0))); + check_bytes("2ab04778", |cb| ldurh(cb, W10, A64Opnd::new_mem(64, X1, 123))); + } + + #[test] + fn test_ldur_memory() { + check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123))); + } + + #[test] + fn test_ldur_register() { + check_bytes("200040f8", |cb| ldur(cb, X0, X1)); + } + + #[test] + fn test_ldursw() { + check_bytes("6ab187b8", |cb| ldursw(cb, X10, A64Opnd::new_mem(64, X11, 123))); + } + + #[test] + fn test_lsl() { + check_bytes("6ac572d3", |cb| lsl(cb, X10, X11, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_lsr() { + check_bytes("6afd4ed3", |cb| lsr(cb, X10, X11, A64Opnd::new_uimm(14))); + } + + #[test] + fn test_mov_registers() { + check_bytes("ea030baa", |cb| mov(cb, X10, X11)); + } + + #[test] + fn test_mov_immediate() { + check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555))); + } + + #[test] + fn test_mov_32b_immediate() { + check_bytes("ea070132", |cb| mov(cb, W10, A64Opnd::new_uimm(0x80000001))); + } + #[test] + fn test_mov_into_sp() { + check_bytes("1f000091", |cb| mov(cb, X31, X0)); + } + + #[test] + fn test_mov_from_sp() { + check_bytes("e0030091", |cb| mov(cb, X0, X31)); + } + + #[test] + fn test_movk() { + check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_movz() { + check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16)); + } + + #[test] + fn test_mrs() { + check_bytes("0a423bd5", |cb| mrs(cb, X10, SystemRegister::NZCV)); + } + + #[test] + fn test_msr() { + check_bytes("0a421bd5", |cb| msr(cb, SystemRegister::NZCV, X10)); + } + + #[test] + fn test_mul() { + check_bytes("6a7d0c9b", |cb| mul(cb, X10, X11, X12)); + } + + #[test] + fn test_mvn() { + check_bytes("ea032baa", |cb| mvn(cb, X10, X11)); + } + + #[test] + fn test_nop() { + check_bytes("1f2003d5", |cb| nop(cb)); + } + + #[test] + fn test_orn() { + check_bytes("6a012caa", |cb| orn(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_register() { + check_bytes("6a010caa", |cb| orr(cb, X10, X11, X12)); + } + + #[test] + fn test_orr_immediate() { + check_bytes("6a0940b2", |cb| orr(cb, X10, X11, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_orr_32b_immediate() { + check_bytes("6a010032", |cb| orr(cb, W10, W11, A64Opnd::new_uimm(1))); + } + + #[test] + fn test_ret_none() { + check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None)); + } + + #[test] + fn test_ret_register() { + check_bytes("80025fd6", |cb| ret(cb, X20)); + } + + #[test] + fn test_stlxr() { + check_bytes("8bfd0ac8", |cb| stlxr(cb, W10, X11, X12)); + } + + #[test] + fn test_stp() { + check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_stp_pre() { + check_bytes("8a2d8da9", |cb| stp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_stp_post() { + check_bytes("8a2d8da8", |cb| stp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208))); + } + + #[test] + fn test_str_post() { + check_bytes("6a051ff8", |cb| str_post(cb, X10, A64Opnd::new_mem(64, X11, -16))); + } + + #[test] + fn test_str_pre() { + check_bytes("6a0d1ff8", |cb| str_pre(cb, X10, A64Opnd::new_mem(64, X11, -16))); + } + + #[test] + fn test_strh() { + check_bytes("6a190079", |cb| strh(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_strh_pre() { + check_bytes("6acd0078", |cb| strh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_strh_post() { + check_bytes("6ac50078", |cb| strh_post(cb, W10, A64Opnd::new_mem(64, X11, 12))); + } + + #[test] + fn test_stur_64_bits() { + check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128))); + } + + #[test] + fn test_stur_32_bits() { + check_bytes("6a0108b8", |cb| stur(cb, X10, A64Opnd::new_mem(32, X11, 128))); + } + + #[test] + fn test_sub_reg() { + check_bytes("200002cb", |cb| sub(cb, X0, X1, X2)); + } + + #[test] + fn test_sub_uimm() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_sub_imm_positive() { + check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_sub_imm_negative() { + check_bytes("201c0091", |cb| sub(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_reg() { + check_bytes("200002eb", |cb| subs(cb, X0, X1, X2)); + } + + #[test] + fn test_subs_imm_positive() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(7))); + } + + #[test] + fn test_subs_imm_negative() { + check_bytes("201c00b1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(-7))); + } + + #[test] + fn test_subs_uimm() { + check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_sxtw() { + check_bytes("6a7d4093", |cb| sxtw(cb, X10, W11)); + } + + #[test] + fn test_tbnz() { + check_bytes("4a005037", |cb| tbnz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2))); + } + + #[test] + fn test_tbz() { + check_bytes("4a005036", |cb| tbz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2))); + } + + #[test] + fn test_tst_register() { + check_bytes("1f0001ea", |cb| tst(cb, X0, X1)); + } + + #[test] + fn test_tst_immediate() { + check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7))); + } + + #[test] + fn test_tst_32b_immediate() { + check_bytes("1f3c0072", |cb| tst(cb, W0, A64Opnd::new_uimm(0xffff))); + } +} diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs new file mode 100644 index 0000000000..108824e08d --- /dev/null +++ b/yjit/src/asm/arm64/opnd.rs @@ -0,0 +1,195 @@ + + +/// This operand represents a register. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct A64Reg +{ + // Size in bits + pub num_bits: u8, + + // Register index number + pub reg_no: u8, +} + +impl A64Reg { + pub fn with_num_bits(&self, num_bits: u8) -> Self { + assert!(num_bits == 8 || num_bits == 16 || num_bits == 32 || num_bits == 64); + Self { num_bits, reg_no: self.reg_no } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct A64Mem +{ + // Size in bits + pub num_bits: u8, + + /// Base register number + pub base_reg_no: u8, + + /// Constant displacement from the base, not scaled + pub disp: i32, +} + +impl A64Mem { + pub fn new(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { + match reg { + A64Opnd::Reg(reg) => { + Self { num_bits, base_reg_no: reg.reg_no, disp } + }, + _ => panic!("Expected register operand") + } + } +} + +#[derive(Clone, Copy, Debug)] +pub enum A64Opnd +{ + // Dummy operand + None, + + // Immediate value + Imm(i64), + + // Unsigned immediate + UImm(u64), + + // Register + Reg(A64Reg), + + // Memory + Mem(A64Mem) +} + +impl A64Opnd { + /// Create a new immediate value operand. + pub fn new_imm(value: i64) -> Self { + A64Opnd::Imm(value) + } + + /// Create a new unsigned immediate value operand. + pub fn new_uimm(value: u64) -> Self { + A64Opnd::UImm(value) + } + + /// Creates a new memory operand. + pub fn new_mem(num_bits: u8, reg: A64Opnd, disp: i32) -> Self { + A64Opnd::Mem(A64Mem::new(num_bits, reg, disp)) + } + + /// Convenience function to check if this operand is a register. + pub fn is_reg(&self) -> bool { + match self { + A64Opnd::Reg(_) => true, + _ => false + } + } + + /// Unwrap a register from an operand. + pub fn unwrap_reg(&self) -> A64Reg { + match self { + A64Opnd::Reg(reg) => *reg, + _ => panic!("Expected register operand") + } + } +} + +// argument registers +pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 }; +pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 }; +pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 }; +pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 }; +pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 }; +pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 }; + +// caller-save registers +pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 }; +pub const X10_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 10 }; +pub const X11_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 11 }; +pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 }; +pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 }; +pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 }; +pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 }; +pub const X16_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 16 }; +pub const X17_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 17 }; + +// callee-save registers +pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 }; +pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; +pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; +pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; + +// zero register +pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 }; + +// 64-bit registers +pub const X0: A64Opnd = A64Opnd::Reg(X0_REG); +pub const X1: A64Opnd = A64Opnd::Reg(X1_REG); +pub const X2: A64Opnd = A64Opnd::Reg(X2_REG); +pub const X3: A64Opnd = A64Opnd::Reg(X3_REG); +pub const X4: A64Opnd = A64Opnd::Reg(X4_REG); +pub const X5: A64Opnd = A64Opnd::Reg(X5_REG); +pub const X6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 6 }); +pub const X7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 7 }); +pub const X8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 8 }); +pub const X9: A64Opnd = A64Opnd::Reg(X9_REG); +pub const X10: A64Opnd = A64Opnd::Reg(X10_REG); +pub const X11: A64Opnd = A64Opnd::Reg(X11_REG); +pub const X12: A64Opnd = A64Opnd::Reg(X12_REG); +pub const X13: A64Opnd = A64Opnd::Reg(X13_REG); +pub const X14: A64Opnd = A64Opnd::Reg(X14_REG); +pub const X15: A64Opnd = A64Opnd::Reg(X15_REG); +pub const X16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 16 }); +pub const X17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 17 }); +pub const X18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 18 }); +pub const X19: A64Opnd = A64Opnd::Reg(X19_REG); +pub const X20: A64Opnd = A64Opnd::Reg(X20_REG); +pub const X21: A64Opnd = A64Opnd::Reg(X21_REG); +pub const X22: A64Opnd = A64Opnd::Reg(X22_REG); +pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 }); +pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 }); +pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 }); +pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 }); +pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 }); +pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 }); +pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 }); +pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 }); +pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG); + +// 32-bit registers +pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 }); +pub const W1: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 1 }); +pub const W2: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 2 }); +pub const W3: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 3 }); +pub const W4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 4 }); +pub const W5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 5 }); +pub const W6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 6 }); +pub const W7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 7 }); +pub const W8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 8 }); +pub const W9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 9 }); +pub const W10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 10 }); +pub const W11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 11 }); +pub const W12: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 12 }); +pub const W13: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 13 }); +pub const W14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 14 }); +pub const W15: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 15 }); +pub const W16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 16 }); +pub const W17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 17 }); +pub const W18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 18 }); +pub const W19: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 19 }); +pub const W20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 20 }); +pub const W21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 21 }); +pub const W22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 22 }); +pub const W23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 23 }); +pub const W24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 24 }); +pub const W25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 25 }); +pub const W26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 26 }); +pub const W27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 27 }); +pub const W28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 28 }); +pub const W29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 29 }); +pub const W30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 30 }); +pub const W31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 31 }); + +// C argument registers +pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3]; +pub const C_ARG_REGREGS: [A64Reg; 4] = [X0_REG, X1_REG, X2_REG, X3_REG]; diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs new file mode 100644 index 0000000000..524d6341f5 --- /dev/null +++ b/yjit/src/asm/mod.rs @@ -0,0 +1,848 @@ +use std::cell::RefCell; +use std::fmt; +use std::mem; +use std::rc::Rc; +use crate::core::IseqPayload; +use crate::core::for_each_off_stack_iseq_payload; +use crate::core::for_each_on_stack_iseq_payload; +use crate::invariants::rb_yjit_tracing_invalidate_all; +use crate::stats::incr_counter; +use crate::virtualmem::WriteError; + +#[cfg(feature = "disasm")] +use std::collections::BTreeMap; + +use crate::codegen::CodegenGlobals; +use crate::virtualmem::{VirtualMem, CodePtr}; + +// Lots of manual vertical alignment in there that rustfmt doesn't handle well. +#[rustfmt::skip] +pub mod x86_64; + +pub mod arm64; + +// +// TODO: need a field_size_of macro, to compute the size of a struct field in bytes +// + +/// Reference to an ASM label +#[derive(Clone)] +pub struct LabelRef { + // Position in the code block where the label reference exists + pos: usize, + + // Label which this refers to + label_idx: usize, + + /// The number of bytes that this label reference takes up in the memory. + /// It's necessary to know this ahead of time so that when we come back to + /// patch it it takes the same amount of space. + num_bytes: usize, + + /// The object that knows how to encode the branch instruction. + encode: fn(&mut CodeBlock, i64, i64) +} + +/// Block of memory into which instructions can be assembled +pub struct CodeBlock { + // Memory for storing the encoded instructions + mem_block: Rc<RefCell<VirtualMem>>, + + // Size of a code page in bytes. Each code page is split into an inlined and an outlined portion. + // Code GC collects code memory at this granularity. + // Must be a multiple of the OS page size. + page_size: usize, + + // Memory block size + mem_size: usize, + + // Current writing position + write_pos: usize, + + // The index of the last page with written bytes + last_page_idx: usize, + + // Total number of bytes written to past pages + past_page_bytes: usize, + + // Size reserved for writing a jump to the next page + page_end_reserve: usize, + + // Table of registered label addresses + label_addrs: Vec<usize>, + + // Table of registered label names + label_names: Vec<String>, + + // References to labels + label_refs: Vec<LabelRef>, + + // Comments for assembly instructions, if that feature is enabled + #[cfg(feature = "disasm")] + asm_comments: BTreeMap<usize, Vec<String>>, + + // True for OutlinedCb + pub outlined: bool, + + // Set if the CodeBlock is unable to output some instructions, + // for example, when there is not enough space or when a jump + // target is too far away. + dropped_bytes: bool, + + // Keeps track of what pages we can write to after code gc. + // `None` means all pages are free. + freed_pages: Rc<Option<Vec<usize>>>, +} + +/// Set of CodeBlock label states. Used for recovering the previous state. +pub struct LabelState { + label_addrs: Vec<usize>, + label_names: Vec<String>, + label_refs: Vec<LabelRef>, +} + +impl CodeBlock { + /// Works for common AArch64 systems that have 16 KiB pages and + /// common x86_64 systems that use 4 KiB pages. + const PREFERRED_CODE_PAGE_SIZE: usize = 16 * 1024; + + /// Make a new CodeBlock + pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>) -> Self { + // Pick the code page size + let system_page_size = mem_block.borrow().system_page_size(); + let page_size = if 0 == Self::PREFERRED_CODE_PAGE_SIZE % system_page_size { + Self::PREFERRED_CODE_PAGE_SIZE + } else { + system_page_size + }; + + let mem_size = mem_block.borrow().virtual_region_size(); + let mut cb = Self { + mem_block, + mem_size, + page_size, + write_pos: 0, + last_page_idx: 0, + past_page_bytes: 0, + page_end_reserve: 0, + label_addrs: Vec::new(), + label_names: Vec::new(), + label_refs: Vec::new(), + #[cfg(feature = "disasm")] + asm_comments: BTreeMap::new(), + outlined, + dropped_bytes: false, + freed_pages, + }; + cb.page_end_reserve = cb.jmp_ptr_bytes(); + cb.write_pos = cb.page_start(); + + #[cfg(not(test))] + assert_eq!(0, mem_size % page_size, "partially in-bounds code pages should be impossible"); + + cb + } + + /// Move the CodeBlock to the next page. If it's on the furthest page, + /// move the other CodeBlock to the next page as well. + pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool { + let old_write_ptr = self.get_write_ptr(); + self.set_write_ptr(base_ptr); + + // Use the freed_pages list if code GC has been used. Otherwise use the next page. + let next_page_idx = if let Some(freed_pages) = self.freed_pages.as_ref() { + let current_page = self.write_pos / self.page_size; + freed_pages.iter().find(|&&page| current_page < page).map(|&page| page) + } else { + Some(self.write_pos / self.page_size + 1) + }; + + // Move self to the next page + if next_page_idx.is_none() || !self.set_page(next_page_idx.unwrap(), &jmp_ptr) { + self.set_write_ptr(old_write_ptr); // rollback if there are no more pages + return false; + } + + // Move the other CodeBlock to the same page if it's on the furthest page + if cfg!(not(test)) { + self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr); + } + + return !self.dropped_bytes; + } + + /// Move the CodeBlock to page_idx only if it's not going backwards. + fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool { + // Do not move the CodeBlock if page_idx points to an old position so that this + // CodeBlock will not overwrite existing code. + // + // Let's say this is the current situation: + // cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)] + // + // When cb needs to patch page1, this will be temporarily changed to: + // cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)] + // + // While patching page1, cb may need to jump to page2. What set_page currently does is: + // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)] + // instead of: + // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3] + // because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's + // write_pos point to existing code in page2, which might let ocb overwrite it later. + // + // We could remember the last write_pos in page2 and let set_page use that position, + // but you need to waste some space for keeping write_pos for every single page. + // It doesn't seem necessary for performance either. So we're currently not doing it. + let dst_pos = self.get_page_pos(page_idx); + if self.write_pos < dst_pos { + // Fail if next page is out of bounds + if dst_pos >= self.mem_size { + return false; + } + + // Reset dropped_bytes + self.dropped_bytes = false; + + // Generate jmp_ptr from src_pos to dst_pos + let dst_ptr = self.get_ptr(dst_pos); + self.without_page_end_reserve(|cb| { + assert!(cb.has_capacity(cb.jmp_ptr_bytes())); + cb.add_comment("jump to next page"); + jmp_ptr(cb, dst_ptr); + }); + + // Update past_page_bytes for code_size() if this is a new page + if self.last_page_idx < page_idx { + self.past_page_bytes += self.current_page_bytes(); + } + + // Start the next code from dst_pos + self.write_pos = dst_pos; + // Update the last_page_idx if page_idx points to the furthest page + self.last_page_idx = usize::max(self.last_page_idx, page_idx); + } + !self.dropped_bytes + } + + /// Free the memory pages of given code page indexes + fn free_pages(&mut self, page_idxs: &Vec<usize>) { + let mut page_idxs = page_idxs.clone(); + page_idxs.reverse(); // to loop with pop() + + // Group adjacent page indexes and free them in batches to reduce the # of syscalls. + while let Some(page_idx) = page_idxs.pop() { + // Group first adjacent page indexes + let mut batch_idxs = vec![page_idx]; + while page_idxs.last() == Some(&(batch_idxs.last().unwrap() + 1)) { + batch_idxs.push(page_idxs.pop().unwrap()); + } + + // Free the grouped pages at once + let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size); + let batch_size = self.page_size * batch_idxs.len(); + self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32); + } + } + + pub fn page_size(&self) -> usize { + self.page_size + } + + pub fn mapped_region_size(&self) -> usize { + self.mem_block.borrow().mapped_region_size() + } + + /// Size of the region in bytes where writes could be attempted. + #[cfg(target_arch = "aarch64")] + pub fn virtual_region_size(&self) -> usize { + self.mem_block.borrow().virtual_region_size() + } + + /// Return the number of code pages that have been mapped by the VirtualMemory. + pub fn num_mapped_pages(&self) -> usize { + // CodeBlock's page size != VirtualMem's page size on Linux, + // so mapped_region_size % self.page_size may not be 0 + ((self.mapped_region_size() - 1) / self.page_size) + 1 + } + + /// Return the number of code pages that have been reserved by the VirtualMemory. + pub fn num_virtual_pages(&self) -> usize { + let virtual_region_size = self.mem_block.borrow().virtual_region_size(); + // CodeBlock's page size != VirtualMem's page size on Linux, + // so mapped_region_size % self.page_size may not be 0 + ((virtual_region_size - 1) / self.page_size) + 1 + } + + /// Return the number of code pages that have been freed and not used yet. + pub fn num_freed_pages(&self) -> usize { + (0..self.num_mapped_pages()).filter(|&page_idx| self.has_freed_page(page_idx)).count() + } + + pub fn has_freed_page(&self, page_idx: usize) -> bool { + self.freed_pages.as_ref().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed + self.write_pos < page_idx * self.page_size // and not written yet + } + + /// Convert a page index to the write_pos for the page start. + fn get_page_pos(&self, page_idx: usize) -> usize { + self.page_size * page_idx + self.page_start() + } + + /// write_pos of the current page start + pub fn page_start_pos(&self) -> usize { + self.get_write_pos() / self.page_size * self.page_size + self.page_start() + } + + /// Offset of each page where CodeBlock should start writing + pub fn page_start(&self) -> usize { + let mut start = if self.inline() { + 0 + } else { + self.page_size / 2 + }; + if cfg!(debug_assertions) && !cfg!(test) { + // Leave illegal instructions at the beginning of each page to assert + // we're not accidentally crossing page boundaries. + start += self.jmp_ptr_bytes(); + } + start + } + + /// Offset of each page where CodeBlock should stop writing (exclusive) + pub fn page_end(&self) -> usize { + let page_end = if self.inline() { + self.page_size / 2 + } else { + self.page_size + }; + page_end - self.page_end_reserve // reserve space to jump to the next page + } + + /// Call a given function with page_end_reserve = 0 + pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) { + let old_page_end_reserve = self.page_end_reserve; + self.page_end_reserve = 0; + block(self); + self.page_end_reserve = old_page_end_reserve; + } + + /// Return the address ranges of a given address range that this CodeBlock can write. + #[allow(dead_code)] + pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> { + let region_start = self.get_ptr(0).raw_addr(self); + let region_end = self.get_ptr(self.get_mem_size()).raw_addr(self); + let mut start = start_ptr.raw_addr(self); + let end = std::cmp::min(end_ptr.raw_addr(self), region_end); + + let freed_pages = self.freed_pages.as_ref().as_ref(); + let mut addrs = vec![]; + while start < end { + let page_idx = start.saturating_sub(region_start) / self.page_size; + let current_page = region_start + (page_idx * self.page_size); + let page_end = std::cmp::min(end, current_page + self.page_end()); + // If code GC has been used, skip pages that are used by past on-stack code + if freed_pages.map_or(true, |pages| pages.contains(&page_idx)) { + addrs.push((start, page_end)); + } + start = current_page + self.page_size + self.page_start(); + } + addrs + } + + /// Return the number of bytes written by this CodeBlock. + pub fn code_size(&self) -> usize { + self.current_page_bytes() + self.past_page_bytes + } + + /// Return the number of bytes written to the current page. + fn current_page_bytes(&self) -> usize { + (self.write_pos % self.page_size).saturating_sub(self.page_start()) + } + + /// Check if this code block has sufficient remaining capacity + pub fn has_capacity(&self, num_bytes: usize) -> bool { + let page_offset = self.write_pos % self.page_size; + let capacity = self.page_end().saturating_sub(page_offset); + num_bytes <= capacity + } + + /// Add an assembly comment if the feature is on. + /// If not, this becomes an inline no-op. + #[cfg(feature = "disasm")] + pub fn add_comment(&mut self, comment: &str) { + let cur_ptr = self.get_write_ptr().raw_addr(self); + + // If there's no current list of comments for this line number, add one. + let this_line_comments = self.asm_comments.entry(cur_ptr).or_default(); + + // Unless this comment is the same as the last one at this same line, add it. + if this_line_comments.last().map(String::as_str) != Some(comment) { + this_line_comments.push(comment.to_string()); + } + } + #[cfg(not(feature = "disasm"))] + #[inline] + pub fn add_comment(&mut self, _: &str) {} + + #[cfg(feature = "disasm")] + pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> { + self.asm_comments.get(&pos) + } + + #[allow(unused_variables)] + #[cfg(feature = "disasm")] + pub fn remove_comments(&mut self, start_addr: CodePtr, end_addr: CodePtr) { + for addr in start_addr.raw_addr(self)..end_addr.raw_addr(self) { + self.asm_comments.remove(&addr); + } + } + #[cfg(not(feature = "disasm"))] + #[inline] + pub fn remove_comments(&mut self, _: CodePtr, _: CodePtr) {} + + pub fn clear_comments(&mut self) { + #[cfg(feature = "disasm")] + self.asm_comments.clear(); + } + + pub fn get_mem_size(&self) -> usize { + self.mem_size + } + + pub fn get_write_pos(&self) -> usize { + self.write_pos + } + + pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + self.mem_block.borrow_mut().write_byte(write_ptr, byte) + } + + // Set the current write position + pub fn set_pos(&mut self, pos: usize) { + // No bounds check here since we can be out of bounds + // when the code block fills up. We want to be able to + // restore to the filled up state after patching something + // in the middle. + self.write_pos = pos; + } + + // Set the current write position from a pointer + pub fn set_write_ptr(&mut self, code_ptr: CodePtr) { + let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset(); + self.set_pos(pos.try_into().unwrap()); + } + + /// Get a (possibly dangling) direct pointer into the executable memory block + pub fn get_ptr(&self, offset: usize) -> CodePtr { + self.mem_block.borrow().start_ptr().add_bytes(offset) + } + + /// Convert an address range to memory page indexes against a num_pages()-sized array. + pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> { + let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self); + let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self); + assert!(mem_start <= start_addr.raw_addr(self)); + assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self)); + assert!(end_addr.raw_addr(self) <= mem_end); + + // Ignore empty code ranges + if start_addr == end_addr { + return vec![]; + } + + let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size; + let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size; + (start_page..=end_page).collect() // TODO: consider returning an iterator + } + + /// Get a (possibly dangling) direct pointer to the current write position + pub fn get_write_ptr(&self) -> CodePtr { + self.get_ptr(self.write_pos) + } + + /// Write a single byte at the current position. + pub fn write_byte(&mut self, byte: u8) { + let write_ptr = self.get_write_ptr(); + if self.has_capacity(1) && self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_ok() { + self.write_pos += 1; + } else { + self.dropped_bytes = true; + } + } + + /// Write multiple bytes starting from the current position. + pub fn write_bytes(&mut self, bytes: &[u8]) { + for byte in bytes { + self.write_byte(*byte); + } + } + + /// Write an integer over the given number of bits at the current position. + fn write_int(&mut self, val: u64, num_bits: u32) { + assert!(num_bits > 0); + assert!(num_bits % 8 == 0); + + // Switch on the number of bits + match num_bits { + 8 => self.write_byte(val as u8), + 16 => self.write_bytes(&[(val & 0xff) as u8, ((val >> 8) & 0xff) as u8]), + 32 => self.write_bytes(&[ + (val & 0xff) as u8, + ((val >> 8) & 0xff) as u8, + ((val >> 16) & 0xff) as u8, + ((val >> 24) & 0xff) as u8, + ]), + _ => { + let mut cur = val; + + // Write out the bytes + for _byte in 0..(num_bits / 8) { + self.write_byte((cur & 0xff) as u8); + cur >>= 8; + } + } + } + } + + /// Check if bytes have been dropped (unwritten because of insufficient space) + pub fn has_dropped_bytes(&self) -> bool { + self.dropped_bytes + } + + /// To patch code that straddle pages correctly, we need to start with + /// the dropped bytes flag unset so we can detect when to switch to a new page. + pub fn set_dropped_bytes(&mut self, dropped_bytes: bool) { + self.dropped_bytes = dropped_bytes; + } + + /// Allocate a new label with a given name + pub fn new_label(&mut self, name: String) -> usize { + assert!(!name.contains(' '), "use underscores in label names, not spaces"); + + // This label doesn't have an address yet + self.label_addrs.push(0); + self.label_names.push(name); + + return self.label_addrs.len() - 1; + } + + /// Write a label at the current address + pub fn write_label(&mut self, label_idx: usize) { + self.label_addrs[label_idx] = self.write_pos; + } + + // Add a label reference at the current write position + pub fn label_ref(&mut self, label_idx: usize, num_bytes: usize, encode: fn(&mut CodeBlock, i64, i64)) { + assert!(label_idx < self.label_addrs.len()); + + // Keep track of the reference + self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode }); + + // Move past however many bytes the instruction takes up + if self.has_capacity(num_bytes) { + self.write_pos += num_bytes; + } else { + self.dropped_bytes = true; // retry emitting the Insn after next_page + } + } + + // Link internal label references + pub fn link_labels(&mut self) { + let orig_pos = self.write_pos; + + // For each label reference + for label_ref in mem::take(&mut self.label_refs) { + let ref_pos = label_ref.pos; + let label_idx = label_ref.label_idx; + assert!(ref_pos < self.mem_size); + + let label_addr = self.label_addrs[label_idx]; + assert!(label_addr < self.mem_size); + + self.set_pos(ref_pos); + (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64); + + // Assert that we've written the same number of bytes that we + // expected to have written. + assert!(self.write_pos == ref_pos + label_ref.num_bytes); + } + + self.write_pos = orig_pos; + + // Clear the label positions and references + self.label_addrs.clear(); + self.label_names.clear(); + assert!(self.label_refs.is_empty()); + } + + pub fn clear_labels(&mut self) { + self.label_addrs.clear(); + self.label_names.clear(); + self.label_refs.clear(); + } + + pub fn get_label_state(&self) -> LabelState { + LabelState { + label_addrs: self.label_addrs.clone(), + label_names: self.label_names.clone(), + label_refs: self.label_refs.clone(), + } + } + + pub fn set_label_state(&mut self, state: LabelState) { + self.label_addrs = state.label_addrs; + self.label_names = state.label_names; + self.label_refs = state.label_refs; + } + + pub fn mark_all_executable(&mut self) { + self.mem_block.borrow_mut().mark_all_executable(); + } + + /// Code GC. Free code pages that are not on stack and reuse them. + pub fn code_gc(&mut self, ocb: &mut OutlinedCb) { + assert!(self.inline(), "must use on inline code block"); + + // The previous code GC failed to free any pages. Give up. + if self.freed_pages.as_ref() == &Some(vec![]) { + return; + } + + // Check which pages are still in use + let mut pages_in_use = vec![false; self.num_mapped_pages()]; + // For each ISEQ, we currently assume that only code pages used by inline code + // are used by outlined code, so we mark only code pages used by inlined code. + for_each_on_stack_iseq_payload(|iseq_payload| { + for page in &iseq_payload.pages { + pages_in_use[*page] = true; + } + }); + // Avoid accumulating freed pages for future code GC + for_each_off_stack_iseq_payload(|iseq_payload: &mut IseqPayload| { + iseq_payload.pages = std::collections::HashSet::default(); + }); + // Outlined code generated by CodegenGlobals::init() should also be kept. + for page in CodegenGlobals::get_ocb_pages() { + pages_in_use[*page] = true; + } + + // Invalidate everything to have more compact code after code GC. + // This currently patches every ISEQ, which works, but in the future, + // we could limit that to patch only on-stack ISEQs for optimizing code GC. + rb_yjit_tracing_invalidate_all(); + + // Assert that all code pages are freeable + assert_eq!( + 0, + self.mem_size % self.page_size, + "end of the last code page should be the end of the entire region" + ); + + // Let VirtuamMem free the pages + let mut freed_pages: Vec<usize> = pages_in_use.iter().enumerate() + .filter(|&(_, &in_use)| !in_use).map(|(page, _)| page).collect(); + // ObjectSpace API may trigger Ruby's GC, which marks gc_offsets in JIT code. + // So this should be called after for_each_*_iseq_payload and rb_yjit_tracing_invalidate_all. + self.free_pages(&freed_pages); + + // Append virtual pages in case RubyVM::YJIT.code_gc is manually triggered. + let mut virtual_pages: Vec<usize> = (self.num_mapped_pages()..self.num_virtual_pages()).collect(); + freed_pages.append(&mut virtual_pages); + + if let Some(&first_page) = freed_pages.first() { + for cb in [&mut *self, ocb.unwrap()] { + cb.write_pos = cb.get_page_pos(first_page); + cb.past_page_bytes = 0; + cb.dropped_bytes = false; + cb.clear_comments(); + } + } + + // Track which pages are free. + let new_freed_pages = Rc::new(Some(freed_pages)); + let old_freed_pages = mem::replace(&mut self.freed_pages, Rc::clone(&new_freed_pages)); + ocb.unwrap().freed_pages = new_freed_pages; + assert_eq!(1, Rc::strong_count(&old_freed_pages)); // will deallocate + + incr_counter!(code_gc_count); + } + + pub fn inline(&self) -> bool { + !self.outlined + } + + pub fn other_cb(&self) -> Option<&'static mut Self> { + if !CodegenGlobals::has_instance() { + None + } else if self.inline() { + Some(CodegenGlobals::get_outlined_cb().unwrap()) + } else { + Some(CodegenGlobals::get_inline_cb()) + } + } +} + +#[cfg(test)] +impl CodeBlock { + /// Stubbed CodeBlock for testing. Can't execute generated code. + pub fn new_dummy(mem_size: usize) -> Self { + use std::ptr::NonNull; + use crate::virtualmem::*; + use crate::virtualmem::tests::TestingAllocator; + + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size); + + Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None)) + } + + /// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code. + #[cfg(target_arch = "aarch64")] + pub fn new_dummy_with_freed_pages(mut freed_pages: Vec<usize>) -> Self { + use std::ptr::NonNull; + use crate::virtualmem::*; + use crate::virtualmem::tests::TestingAllocator; + + freed_pages.sort_unstable(); + let mem_size = Self::PREFERRED_CODE_PAGE_SIZE * + (1 + freed_pages.last().expect("freed_pages vec should not be empty")); + + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size); + + Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages))) + } +} + +/// Produce hex string output from the bytes in a code block +impl fmt::LowerHex for CodeBlock { + fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { + for pos in 0..self.write_pos { + let mem_block = &*self.mem_block.borrow(); + let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() }; + fmtr.write_fmt(format_args!("{:02x}", byte))?; + } + Ok(()) + } +} + +impl crate::virtualmem::CodePtrBase for CodeBlock { + fn base_ptr(&self) -> std::ptr::NonNull<u8> { + self.mem_block.borrow().base_ptr() + } +} + +/// Wrapper struct so we can use the type system to distinguish +/// Between the inlined and outlined code blocks +pub struct OutlinedCb { + // This must remain private + cb: CodeBlock, +} + +impl OutlinedCb { + pub fn wrap(cb: CodeBlock) -> Self { + OutlinedCb { cb: cb } + } + + pub fn unwrap(&mut self) -> &mut CodeBlock { + &mut self.cb + } +} + +/// Compute the number of bits needed to encode a signed value +pub fn imm_num_bits(imm: i64) -> u8 +{ + // Compute the smallest size this immediate fits in + if imm >= i8::MIN.into() && imm <= i8::MAX.into() { + return 8; + } + if imm >= i16::MIN.into() && imm <= i16::MAX.into() { + return 16; + } + if imm >= i32::MIN.into() && imm <= i32::MAX.into() { + return 32; + } + + return 64; +} + +/// Compute the number of bits needed to encode an unsigned value +pub fn uimm_num_bits(uimm: u64) -> u8 +{ + // Compute the smallest size this immediate fits in + if uimm <= u8::MAX.into() { + return 8; + } + else if uimm <= u16::MAX.into() { + return 16; + } + else if uimm <= u32::MAX.into() { + return 32; + } + + return 64; +} + +#[cfg(test)] +mod tests +{ + use super::*; + + #[test] + fn test_imm_num_bits() + { + assert_eq!(imm_num_bits(i8::MIN.into()), 8); + assert_eq!(imm_num_bits(i8::MAX.into()), 8); + + assert_eq!(imm_num_bits(i16::MIN.into()), 16); + assert_eq!(imm_num_bits(i16::MAX.into()), 16); + + assert_eq!(imm_num_bits(i32::MIN.into()), 32); + assert_eq!(imm_num_bits(i32::MAX.into()), 32); + + assert_eq!(imm_num_bits(i64::MIN), 64); + assert_eq!(imm_num_bits(i64::MAX), 64); + } + + #[test] + fn test_uimm_num_bits() { + assert_eq!(uimm_num_bits(u8::MIN.into()), 8); + assert_eq!(uimm_num_bits(u8::MAX.into()), 8); + + assert_eq!(uimm_num_bits(((u8::MAX as u16) + 1).into()), 16); + assert_eq!(uimm_num_bits(u16::MAX.into()), 16); + + assert_eq!(uimm_num_bits(((u16::MAX as u32) + 1).into()), 32); + assert_eq!(uimm_num_bits(u32::MAX.into()), 32); + + assert_eq!(uimm_num_bits((u32::MAX as u64) + 1), 64); + assert_eq!(uimm_num_bits(u64::MAX), 64); + } + + #[test] + fn test_code_size() { + // Write 4 bytes in the first page + let mut cb = CodeBlock::new_dummy(CodeBlock::PREFERRED_CODE_PAGE_SIZE * 2); + cb.write_bytes(&[0, 0, 0, 0]); + assert_eq!(cb.code_size(), 4); + + // Moving to the next page should not increase code_size + cb.next_page(cb.get_write_ptr(), |_, _| {}); + assert_eq!(cb.code_size(), 4); + + // Write 4 bytes in the second page + cb.write_bytes(&[0, 0, 0, 0]); + assert_eq!(cb.code_size(), 8); + + // Rewrite 4 bytes in the first page + let old_write_pos = cb.get_write_pos(); + cb.set_pos(0); + cb.write_bytes(&[1, 1, 1, 1]); + + // Moving from an old page to the next page should not increase code_size + cb.next_page(cb.get_write_ptr(), |_, _| {}); + cb.set_pos(old_write_pos); + assert_eq!(cb.code_size(), 8); + } +} diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs new file mode 100644 index 0000000000..fbbfa714d8 --- /dev/null +++ b/yjit/src/asm/x86_64/mod.rs @@ -0,0 +1,1450 @@ +#![allow(dead_code)] // For instructions we don't currently generate + +use crate::asm::*; + +// Import the assembler tests module +mod tests; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct X86Imm +{ + // Size in bits + pub num_bits: u8, + + // The value of the immediate + pub value: i64 +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct X86UImm +{ + // Size in bits + pub num_bits: u8, + + // The value of the immediate + pub value: u64 +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum RegType +{ + GP, + //FP, + //XMM, + IP, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct X86Reg +{ + // Size in bits + pub num_bits: u8, + + // Register type + pub reg_type: RegType, + + // Register index number + pub reg_no: u8, +} + +#[derive(Clone, Copy, Debug)] +pub struct X86Mem +{ + // Size in bits + pub num_bits: u8, + + /// Base register number + pub base_reg_no: u8, + + /// Index register number + pub idx_reg_no: Option<u8>, + + /// SIB scale exponent value (power of two, two bits) + pub scale_exp: u8, + + /// Constant displacement from the base, not scaled + pub disp: i32, +} + +#[derive(Clone, Copy, Debug)] +pub enum X86Opnd +{ + // Dummy operand + None, + + // Immediate value + Imm(X86Imm), + + // Unsigned immediate + UImm(X86UImm), + + // General-purpose register + Reg(X86Reg), + + // Memory location + Mem(X86Mem), + + // IP-relative memory location + IPRel(i32) +} + +impl X86Reg { + pub fn with_num_bits(&self, num_bits: u8) -> Self { + assert!( + num_bits == 8 || + num_bits == 16 || + num_bits == 32 || + num_bits == 64 + ); + Self { + num_bits, + reg_type: self.reg_type, + reg_no: self.reg_no + } + } +} + +impl X86Opnd { + fn rex_needed(&self) -> bool { + match self { + X86Opnd::None => false, + X86Opnd::Imm(_) => false, + X86Opnd::UImm(_) => false, + X86Opnd::Reg(reg) => reg.reg_no > 7 || reg.num_bits == 8 && reg.reg_no >= 4, + X86Opnd::Mem(mem) => mem.base_reg_no > 7 || (mem.idx_reg_no.unwrap_or(0) > 7), + X86Opnd::IPRel(_) => false + } + } + + // Check if an SIB byte is needed to encode this operand + fn sib_needed(&self) -> bool { + match self { + X86Opnd::Mem(mem) => { + mem.idx_reg_no.is_some() || + mem.base_reg_no == RSP_REG_NO || + mem.base_reg_no == R12_REG_NO + }, + _ => false + } + } + + fn disp_size(&self) -> u32 { + match self { + X86Opnd::IPRel(_) => 32, + X86Opnd::Mem(mem) => { + if mem.disp != 0 { + // Compute the required displacement size + let num_bits = imm_num_bits(mem.disp.into()); + if num_bits > 32 { + panic!("displacement does not fit in 32 bits"); + } + + // x86 can only encode 8-bit and 32-bit displacements + if num_bits == 16 { 32 } else { 8 } + } else if mem.base_reg_no == RBP_REG_NO || mem.base_reg_no == R13_REG_NO { + // If EBP or RBP or R13 is used as the base, displacement must be encoded + 8 + } else { + 0 + } + }, + _ => 0 + } + } + + pub fn num_bits(&self) -> u8 { + match self { + X86Opnd::Reg(reg) => reg.num_bits, + X86Opnd::Imm(imm) => imm.num_bits, + X86Opnd::UImm(uimm) => uimm.num_bits, + X86Opnd::Mem(mem) => mem.num_bits, + _ => unreachable!() + } + } + + pub fn is_some(&self) -> bool { + match self { + X86Opnd::None => false, + _ => true + } + } + +} + +// Instruction pointer +pub const RIP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::IP, reg_no: 5 }); + +// 64-bit GP registers +const RAX_REG_NO: u8 = 0; +const RSP_REG_NO: u8 = 4; +const RBP_REG_NO: u8 = 5; +const R12_REG_NO: u8 = 12; +const R13_REG_NO: u8 = 13; + +pub const RAX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO }; +pub const RCX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 }; +pub const RDX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 }; +pub const RBX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 }; +pub const RSP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO }; +pub const RBP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO }; +pub const RSI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 }; +pub const RDI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 }; +pub const R8_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 }; +pub const R9_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 }; +pub const R10_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 }; +pub const R11_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 }; +pub const R12_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO }; +pub const R13_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO }; +pub const R14_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 }; +pub const R15_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 }; + +pub const RAX: X86Opnd = X86Opnd::Reg(RAX_REG); +pub const RCX: X86Opnd = X86Opnd::Reg(RCX_REG); +pub const RDX: X86Opnd = X86Opnd::Reg(RDX_REG); +pub const RBX: X86Opnd = X86Opnd::Reg(RBX_REG); +pub const RSP: X86Opnd = X86Opnd::Reg(RSP_REG); +pub const RBP: X86Opnd = X86Opnd::Reg(RBP_REG); +pub const RSI: X86Opnd = X86Opnd::Reg(RSI_REG); +pub const RDI: X86Opnd = X86Opnd::Reg(RDI_REG); +pub const R8: X86Opnd = X86Opnd::Reg(R8_REG); +pub const R9: X86Opnd = X86Opnd::Reg(R9_REG); +pub const R10: X86Opnd = X86Opnd::Reg(R10_REG); +pub const R11: X86Opnd = X86Opnd::Reg(R11_REG); +pub const R12: X86Opnd = X86Opnd::Reg(R12_REG); +pub const R13: X86Opnd = X86Opnd::Reg(R13_REG); +pub const R14: X86Opnd = X86Opnd::Reg(R14_REG); +pub const R15: X86Opnd = X86Opnd::Reg(R15_REG); + +// 32-bit GP registers +pub const EAX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 0 }); +pub const ECX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 1 }); +pub const EDX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 2 }); +pub const EBX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 3 }); +pub const ESP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 4 }); +pub const EBP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 5 }); +pub const ESI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 6 }); +pub const EDI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 7 }); +pub const R8D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 8 }); +pub const R9D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 9 }); +pub const R10D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 10 }); +pub const R11D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 11 }); +pub const R12D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 12 }); +pub const R13D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 13 }); +pub const R14D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 14 }); +pub const R15D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 15 }); + +// 16-bit GP registers +pub const AX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 0 }); +pub const CX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 1 }); +pub const DX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 2 }); +pub const BX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 3 }); +//pub const SP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 }); +pub const BP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 5 }); +pub const SI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 6 }); +pub const DI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 7 }); +pub const R8W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 8 }); +pub const R9W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 9 }); +pub const R10W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 10 }); +pub const R11W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 11 }); +pub const R12W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 12 }); +pub const R13W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 13 }); +pub const R14W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 14 }); +pub const R15W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 15 }); + +// 8-bit GP registers +pub const AL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 0 }); +pub const CL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 1 }); +pub const DL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 2 }); +pub const BL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 3 }); +pub const SPL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 4 }); +pub const BPL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 5 }); +pub const SIL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 6 }); +pub const DIL: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 7 }); +pub const R8B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 8 }); +pub const R9B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 9 }); +pub const R10B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 10 }); +pub const R11B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 11 }); +pub const R12B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 12 }); +pub const R13B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 13 }); +pub const R14B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 14 }); +pub const R15B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 15 }); + +//=========================================================================== + +/// Shorthand for memory operand with base register and displacement +pub fn mem_opnd(num_bits: u8, base_reg: X86Opnd, disp: i32) -> X86Opnd +{ + let base_reg = match base_reg { + X86Opnd::Reg(reg) => reg, + _ => unreachable!() + }; + + if base_reg.reg_type == RegType::IP { + X86Opnd::IPRel(disp) + } else { + X86Opnd::Mem( + X86Mem { + num_bits: num_bits, + base_reg_no: base_reg.reg_no, + idx_reg_no: None, + scale_exp: 0, + disp: disp, + } + ) + } +} + +/// Memory operand with SIB (Scale Index Base) indexing +pub fn mem_opnd_sib(num_bits: u8, base_opnd: X86Opnd, index_opnd: X86Opnd, scale: i32, disp: i32) -> X86Opnd { + if let (X86Opnd::Reg(base_reg), X86Opnd::Reg(index_reg)) = (base_opnd, index_opnd) { + let scale_exp: u8; + + match scale { + 8 => { scale_exp = 3; }, + 4 => { scale_exp = 2; }, + 2 => { scale_exp = 1; }, + 1 => { scale_exp = 0; }, + _ => unreachable!() + }; + + X86Opnd::Mem(X86Mem { + num_bits, + base_reg_no: base_reg.reg_no, + idx_reg_no: Some(index_reg.reg_no), + scale_exp, + disp + }) + } else { + unreachable!() + } +} + +/* +// Struct member operand +#define member_opnd(base_reg, struct_type, member_name) mem_opnd( \ + 8 * sizeof(((struct_type*)0)->member_name), \ + base_reg, \ + offsetof(struct_type, member_name) \ +) + +// Struct member operand with an array index +#define member_opnd_idx(base_reg, struct_type, member_name, idx) mem_opnd( \ + 8 * sizeof(((struct_type*)0)->member_name[0]), \ + base_reg, \ + (offsetof(struct_type, member_name) + \ + sizeof(((struct_type*)0)->member_name[0]) * idx) \ +) +*/ + +/* +// TODO: this should be a method, X86Opnd.resize() or X86Opnd.subreg() +static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits) +{ + assert (num_bits % 8 == 0); + x86opnd_t sub = opnd; + sub.num_bits = num_bits; + return sub; +} +*/ + +pub fn imm_opnd(value: i64) -> X86Opnd +{ + X86Opnd::Imm(X86Imm { num_bits: imm_num_bits(value), value }) +} + +pub fn uimm_opnd(value: u64) -> X86Opnd +{ + X86Opnd::UImm(X86UImm { num_bits: uimm_num_bits(value), value }) +} + +pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd +{ + uimm_opnd(ptr as u64) +} + +/// Write the REX byte +fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) { + // 0 1 0 0 w r x b + // w - 64-bit operand size flag + // r - MODRM.reg extension + // x - SIB.index extension + // b - MODRM.rm or SIB.base extension + let w: u8 = if w_flag { 1 } else { 0 }; + let r: u8 = if (reg_no & 8) > 0 { 1 } else { 0 }; + let x: u8 = if (idx_reg_no & 8) > 0 { 1 } else { 0 }; + let b: u8 = if (rm_reg_no & 8) > 0 { 1 } else { 0 }; + + // Encode and write the REX byte + cb.write_byte(0x40 + (w << 3) + (r << 2) + (x << 1) + (b)); +} + +/// Write an opcode byte with an embedded register operand +fn write_opcode(cb: &mut CodeBlock, opcode: u8, reg: X86Reg) { + let op_byte: u8 = opcode | (reg.reg_no & 7); + cb.write_byte(op_byte); +} + +/// Encode an RM instruction +fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_opnd: X86Opnd, op_ext: Option<u8>, bytes: &[u8]) { + let op_len = bytes.len(); + assert!(op_len > 0 && op_len <= 3); + assert!(matches!(r_opnd, X86Opnd::Reg(_) | X86Opnd::None), "Can only encode an RM instruction with a register or a none"); + + // Flag to indicate the REX prefix is needed + let need_rex = rex_w || r_opnd.rex_needed() || rm_opnd.rex_needed(); + + // Flag to indicate SIB byte is needed + let need_sib = r_opnd.sib_needed() || rm_opnd.sib_needed(); + + // Add the operand-size prefix, if needed + if sz_pref { + cb.write_byte(0x66); + } + + // Add the REX prefix, if needed + if need_rex { + // 0 1 0 0 w r x b + // w - 64-bit operand size flag + // r - MODRM.reg extension + // x - SIB.index extension + // b - MODRM.rm or SIB.base extension + + let w = if rex_w { 1 } else { 0 }; + let r = match r_opnd { + X86Opnd::None => 0, + X86Opnd::Reg(reg) => if (reg.reg_no & 8) > 0 { 1 } else { 0 }, + _ => unreachable!() + }; + + let x = match (need_sib, rm_opnd) { + (true, X86Opnd::Mem(mem)) => if (mem.idx_reg_no.unwrap_or(0) & 8) > 0 { 1 } else { 0 }, + _ => 0 + }; + + let b = match rm_opnd { + X86Opnd::Reg(reg) => if (reg.reg_no & 8) > 0 { 1 } else { 0 }, + X86Opnd::Mem(mem) => if (mem.base_reg_no & 8) > 0 { 1 } else { 0 }, + _ => 0 + }; + + // Encode and write the REX byte + let rex_byte: u8 = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b); + cb.write_byte(rex_byte); + } + + // Write the opcode bytes to the code block + for byte in bytes { + cb.write_byte(*byte) + } + + // MODRM.mod (2 bits) + // MODRM.reg (3 bits) + // MODRM.rm (3 bits) + + assert!( + !(op_ext.is_some() && r_opnd.is_some()), + "opcode extension and register operand present" + ); + + // Encode the mod field + let rm_mod = match rm_opnd { + X86Opnd::Reg(_) => 3, + X86Opnd::IPRel(_) => 0, + X86Opnd::Mem(_mem) => { + match rm_opnd.disp_size() { + 0 => 0, + 8 => 1, + 32 => 2, + _ => unreachable!() + } + }, + _ => unreachable!() + }; + + // Encode the reg field + let reg: u8; + if let Some(val) = op_ext { + reg = val; + } else { + reg = match r_opnd { + X86Opnd::Reg(reg) => reg.reg_no & 7, + _ => 0 + }; + } + + // Encode the rm field + let rm = match rm_opnd { + X86Opnd::Reg(reg) => reg.reg_no & 7, + X86Opnd::Mem(mem) => if need_sib { 4 } else { mem.base_reg_no & 7 }, + X86Opnd::IPRel(_) => 0b101, + _ => unreachable!() + }; + + // Encode and write the ModR/M byte + let rm_byte: u8 = (rm_mod << 6) + (reg << 3) + (rm); + cb.write_byte(rm_byte); + + // Add the SIB byte, if needed + if need_sib { + // SIB.scale (2 bits) + // SIB.index (3 bits) + // SIB.base (3 bits) + + match rm_opnd { + X86Opnd::Mem(mem) => { + // Encode the scale value + let scale = mem.scale_exp; + + // Encode the index value + let index = mem.idx_reg_no.map(|no| no & 7).unwrap_or(4); + + // Encode the base register + let base = mem.base_reg_no & 7; + + // Encode and write the SIB byte + let sib_byte: u8 = (scale << 6) + (index << 3) + (base); + cb.write_byte(sib_byte); + }, + _ => panic!("Expected mem operand") + } + } + + // Add the displacement + match rm_opnd { + X86Opnd::Mem(mem) => { + let disp_size = rm_opnd.disp_size(); + if disp_size > 0 { + cb.write_int(mem.disp as u64, disp_size); + } + }, + X86Opnd::IPRel(rel) => { + cb.write_int(rel as u64, 32); + }, + _ => () + }; +} + +// Encode a mul-like single-operand RM instruction +fn write_rm_unary(cb: &mut CodeBlock, op_mem_reg_8: u8, op_mem_reg_pref: u8, op_ext: Option<u8>, opnd: X86Opnd) { + assert!(matches!(opnd, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + + let opnd_size = opnd.num_bits(); + assert!(opnd_size == 8 || opnd_size == 16 || opnd_size == 32 || opnd_size == 64); + + if opnd_size == 8 { + write_rm(cb, false, false, X86Opnd::None, opnd, op_ext, &[op_mem_reg_8]); + } else { + let sz_pref = opnd_size == 16; + let rex_w = opnd_size == 64; + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd, op_ext, &[op_mem_reg_pref]); + } +} + +// Encode an add-like RM instruction with multiple possible encodings +fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_reg_mem8: u8, op_reg_mem_pref: u8, op_mem_imm8: u8, op_mem_imm_sml: u8, op_mem_imm_lrg: u8, op_ext_imm: Option<u8>, opnd0: X86Opnd, opnd1: X86Opnd) { + assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + + // Check the size of opnd0 + let opnd_size = opnd0.num_bits(); + assert!(opnd_size == 8 || opnd_size == 16 || opnd_size == 32 || opnd_size == 64); + + // Check the size of opnd1 + match opnd1 { + X86Opnd::Reg(reg) => assert_eq!(reg.num_bits, opnd_size), + X86Opnd::Mem(mem) => assert_eq!(mem.num_bits, opnd_size), + X86Opnd::Imm(imm) => assert!(imm.num_bits <= opnd_size), + X86Opnd::UImm(uimm) => assert!(uimm.num_bits <= opnd_size), + _ => () + }; + + let sz_pref = opnd_size == 16; + let rex_w = opnd_size == 64; + + match (opnd0, opnd1) { + // R/M + Reg + (X86Opnd::Mem(_), X86Opnd::Reg(_)) | (X86Opnd::Reg(_), X86Opnd::Reg(_)) => { + if opnd_size == 8 { + write_rm(cb, false, false, opnd1, opnd0, None, &[op_mem_reg8]); + } else { + write_rm(cb, sz_pref, rex_w, opnd1, opnd0, None, &[op_mem_reg_pref]); + } + }, + // Reg + R/M/IPRel + (X86Opnd::Reg(_), X86Opnd::Mem(_) | X86Opnd::IPRel(_)) => { + if opnd_size == 8 { + write_rm(cb, false, false, opnd0, opnd1, None, &[op_reg_mem8]); + } else { + write_rm(cb, sz_pref, rex_w, opnd0, opnd1, None, &[op_reg_mem_pref]); + } + }, + // R/M + Imm + (_, X86Opnd::Imm(imm)) => { + if imm.num_bits <= 8 { + // 8-bit immediate + + if opnd_size == 8 { + write_rm(cb, false, false, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm8]); + } else { + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_sml]); + } + + cb.write_int(imm.value as u64, 8); + } else if imm.num_bits <= 32 { + // 32-bit immediate + + assert!(imm.num_bits <= opnd_size); + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_lrg]); + cb.write_int(imm.value as u64, if opnd_size > 32 { 32 } else { opnd_size.into() }); + } else { + panic!("immediate value too large"); + } + }, + // R/M + UImm + (_, X86Opnd::UImm(uimm)) => { + // If the size of left hand operand equals the number of bits + // required to represent the right hand immediate, then we + // don't care about sign extension when calculating the immediate + let num_bits = if opnd0.num_bits() == uimm_num_bits(uimm.value) { + uimm_num_bits(uimm.value) + } else { + imm_num_bits(uimm.value.try_into().unwrap()) + }; + + if num_bits <= 8 { + // 8-bit immediate + + if opnd_size == 8 { + write_rm(cb, false, false, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm8]); + } else { + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_sml]); + } + + cb.write_int(uimm.value, 8); + } else if num_bits <= 32 { + // 32-bit immediate + + assert!(num_bits <= opnd_size); + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_lrg]); + cb.write_int(uimm.value, if opnd_size > 32 { 32 } else { opnd_size.into() }); + } else { + panic!("immediate value too large (num_bits={}, num={uimm:?})", num_bits); + } + }, + _ => panic!("unknown encoding combo: {opnd0:?} {opnd1:?}") + }; +} + +// LOCK - lock prefix for atomic shared memory operations +pub fn write_lock_prefix(cb: &mut CodeBlock) { + cb.write_byte(0xf0); +} + +/// add - Integer addition +pub fn add(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_rm_multi( + cb, + 0x00, // opMemReg8 + 0x01, // opMemRegPref + 0x02, // opRegMem8 + 0x03, // opRegMemPref + 0x80, // opMemImm8 + 0x83, // opMemImmSml + 0x81, // opMemImmLrg + Some(0x00), // opExtImm + opnd0, + opnd1 + ); +} + +/// and - Bitwise AND +pub fn and(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_rm_multi( + cb, + 0x20, // opMemReg8 + 0x21, // opMemRegPref + 0x22, // opRegMem8 + 0x23, // opRegMemPref + 0x80, // opMemImm8 + 0x83, // opMemImmSml + 0x81, // opMemImmLrg + Some(0x04), // opExtImm + opnd0, + opnd1 + ); +} + +/// call - Call to a pointer with a 32-bit displacement offset +pub fn call_rel32(cb: &mut CodeBlock, rel32: i32) { + // Write the opcode + cb.write_byte(0xe8); + + // Write the relative 32-bit jump offset + cb.write_bytes(&rel32.to_le_bytes()); +} + +/// call - Call a pointer, encode with a 32-bit offset if possible +pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) { + if let X86Opnd::Reg(_scratch_reg) = scratch_opnd { + use crate::stats::{incr_counter}; + + // Pointer to the end of this call instruction + let end_ptr = cb.get_ptr(cb.write_pos + 5); + + // Compute the jump offset + let rel64: i64 = dst_ptr as i64 - end_ptr.raw_ptr(cb) as i64; + + // If the offset fits in 32-bit + if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() { + incr_counter!(num_send_x86_rel32); + call_rel32(cb, rel64.try_into().unwrap()); + return; + } + + // Move the pointer into the scratch register and call + incr_counter!(num_send_x86_reg); + mov(cb, scratch_opnd, const_ptr_opnd(dst_ptr)); + call(cb, scratch_opnd); + } else { + unreachable!(); + } +} + +/// call - Call to label with 32-bit offset +pub fn call_label(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE8); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); +} + +/// call - Indirect call with an R/M operand +pub fn call(cb: &mut CodeBlock, opnd: X86Opnd) { + write_rm(cb, false, false, X86Opnd::None, opnd, Some(2), &[0xff]); +} + +/// Encode a conditional move instruction +fn write_cmov(cb: &mut CodeBlock, opcode1: u8, dst: X86Opnd, src: X86Opnd) { + if let X86Opnd::Reg(reg) = dst { + match src { + X86Opnd::Reg(_) => (), + X86Opnd::Mem(_) => (), + _ => unreachable!() + }; + + assert!(reg.num_bits >= 16); + let sz_pref = reg.num_bits == 16; + let rex_w = reg.num_bits == 64; + + write_rm(cb, sz_pref, rex_w, dst, src, None, &[0x0f, opcode1]); + } else { + unreachable!() + } +} + +// cmovcc - Conditional move +pub fn cmova(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x47, dst, src); } +pub fn cmovae(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); } +pub fn cmovb(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); } +pub fn cmovbe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x46, dst, src); } +pub fn cmovc(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); } +pub fn cmove(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x44, dst, src); } +pub fn cmovg(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4f, dst, src); } +pub fn cmovge(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4d, dst, src); } +pub fn cmovl(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4c, dst, src); } +pub fn cmovle(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4e, dst, src); } +pub fn cmovna(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x46, dst, src); } +pub fn cmovnae(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); } +pub fn cmovnb(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); } +pub fn cmovnbe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x47, dst, src); } +pub fn cmovnc(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); } +pub fn cmovne(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x45, dst, src); } +pub fn cmovng(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4e, dst, src); } +pub fn cmovnge(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4c, dst, src); } +pub fn cmovnl(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4d, dst, src); } +pub fn cmovnle(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4f, dst, src); } +pub fn cmovno(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x41, dst, src); } +pub fn cmovnp(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4b, dst, src); } +pub fn cmovns(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x49, dst, src); } +pub fn cmovnz(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x45, dst, src); } +pub fn cmovo(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x40, dst, src); } +pub fn cmovp(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4a, dst, src); } +pub fn cmovpe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4a, dst, src); } +pub fn cmovpo(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4b, dst, src); } +pub fn cmovs(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x48, dst, src); } +pub fn cmovz(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x44, dst, src); } + +/// cmp - Compare and set flags +pub fn cmp(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_rm_multi( + cb, + 0x38, // opMemReg8 + 0x39, // opMemRegPref + 0x3A, // opRegMem8 + 0x3B, // opRegMemPref + 0x80, // opMemImm8 + 0x83, // opMemImmSml + 0x81, // opMemImmLrg + Some(0x07), // opExtImm + opnd0, + opnd1 + ); +} + +/// cdq - Convert doubleword to quadword +pub fn cdq(cb: &mut CodeBlock) { + cb.write_byte(0x99); +} + +/// cqo - Convert quadword to octaword +pub fn cqo(cb: &mut CodeBlock) { + cb.write_bytes(&[0x48, 0x99]); +} + +/// imul - signed integer multiply +pub fn imul(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + assert!(opnd0.num_bits() == 64); + assert!(opnd1.num_bits() == 64); + assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + assert!(matches!(opnd1, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + + match (opnd0, opnd1) { + (X86Opnd::Reg(_), X86Opnd::Reg(_) | X86Opnd::Mem(_)) => { + //REX.W + 0F AF /rIMUL r64, r/m64 + // Quadword register := Quadword register * r/m64. + write_rm(cb, false, true, opnd0, opnd1, None, &[0x0F, 0xAF]); + } + + // Flip the operands to handle this case. This instruction has weird encoding restrictions. + (X86Opnd::Mem(_), X86Opnd::Reg(_)) => { + //REX.W + 0F AF /rIMUL r64, r/m64 + // Quadword register := Quadword register * r/m64. + write_rm(cb, false, true, opnd1, opnd0, None, &[0x0F, 0xAF]); + } + + _ => unreachable!() + } +} + +/// Interrupt 3 - trap to debugger +pub fn int3(cb: &mut CodeBlock) { + cb.write_byte(0xcc); +} + +// Encode a conditional relative jump to a label +// Note: this always encodes a 32-bit offset +fn write_jcc<const OP: u8>(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 6, |cb, src_addr, dst_addr| { + cb.write_byte(0x0F); + cb.write_byte(OP); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); +} + +/// jcc - relative jumps to a label +pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); } +pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); } +pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); } +pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); } +pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); } +pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); } +pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); } +pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); } +pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); } +pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); } +pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); } +pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); } +pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); } +pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); } +pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); } +pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); } +pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x81>(cb, label_idx); } +pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8b>(cb, label_idx); } +pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x89>(cb, label_idx); } +pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); } +pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x80>(cb, label_idx); } +pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); } +pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); } +pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8B>(cb, label_idx); } +pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x88>(cb, label_idx); } +pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); } + +pub fn jmp_label(cb: &mut CodeBlock, label_idx: usize) { + cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| { + cb.write_byte(0xE9); + cb.write_int((dst_addr - src_addr) as u64, 32); + }); +} + +/// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional) +fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) { + // Write the opcode + if op0 != 0xFF { + cb.write_byte(op0); + } + + cb.write_byte(op1); + + // Pointer to the end of this jump instruction + let end_ptr = cb.get_ptr(cb.write_pos + 4); + + // Compute the jump offset + let rel64 = dst_ptr.as_offset() - end_ptr.as_offset(); + + if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() { + // Write the relative 32-bit jump offset + cb.write_int(rel64 as u64, 32); + } + else { + // Offset doesn't fit in 4 bytes. Report error. + cb.dropped_bytes = true; + } +} + +/// jcc - relative jumps to a pointer (32-bit offset) +pub fn ja_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x87, ptr); } +pub fn jae_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); } +pub fn jb_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); } +pub fn jbe_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x86, ptr); } +pub fn jc_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); } +pub fn je_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x84, ptr); } +pub fn jg_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8F, ptr); } +pub fn jge_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8D, ptr); } +pub fn jl_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8C, ptr); } +pub fn jle_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8E, ptr); } +pub fn jna_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x86, ptr); } +pub fn jnae_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); } +pub fn jnb_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); } +pub fn jnbe_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x87, ptr); } +pub fn jnc_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); } +pub fn jne_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x85, ptr); } +pub fn jng_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8E, ptr); } +pub fn jnge_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8C, ptr); } +pub fn jnl_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8D, ptr); } +pub fn jnle_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8F, ptr); } +pub fn jno_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x81, ptr); } +pub fn jnp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8b, ptr); } +pub fn jns_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x89, ptr); } +pub fn jnz_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x85, ptr); } +pub fn jo_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x80, ptr); } +pub fn jp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8A, ptr); } +pub fn jpe_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8A, ptr); } +pub fn jpo_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8B, ptr); } +pub fn js_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x88, ptr); } +pub fn jz_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x84, ptr); } +pub fn jmp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0xFF, 0xE9, ptr); } + +/// jmp - Indirect jump near to an R/M operand. +pub fn jmp_rm(cb: &mut CodeBlock, opnd: X86Opnd) { + write_rm(cb, false, false, X86Opnd::None, opnd, Some(4), &[0xff]); +} + +// jmp - Jump with relative 32-bit offset +pub fn jmp32(cb: &mut CodeBlock, offset: i32) { + cb.write_byte(0xE9); + cb.write_int(offset as u64, 32); +} + +/// lea - Load Effective Address +pub fn lea(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { + if let X86Opnd::Reg(reg) = dst { + assert!(reg.num_bits == 64); + assert!(matches!(src, X86Opnd::Mem(_) | X86Opnd::IPRel(_))); + write_rm(cb, false, true, dst, src, None, &[0x8d]); + } else { + unreachable!(); + } +} + +/// mov - Data move operation +pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { + match (dst, src) { + // R + Imm + (X86Opnd::Reg(reg), X86Opnd::Imm(imm)) => { + assert!(imm.num_bits <= reg.num_bits); + + // In case the source immediate could be zero extended to be 64 + // bit, we can use the 32-bit operands version of the instruction. + // For example, we can turn mov(rax, 0x34) into the equivalent + // mov(eax, 0x34). + if (reg.num_bits == 64) && (imm.value > 0) && (imm.num_bits <= 32) { + if dst.rex_needed() { + write_rex(cb, false, 0, 0, reg.reg_no); + } + write_opcode(cb, 0xB8, reg); + cb.write_int(imm.value as u64, 32); + } else { + if reg.num_bits == 16 { + cb.write_byte(0x66); + } + + if dst.rex_needed() || reg.num_bits == 64 { + write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no); + } + + write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg); + cb.write_int(imm.value as u64, reg.num_bits.into()); + } + }, + // R + UImm + (X86Opnd::Reg(reg), X86Opnd::UImm(uimm)) => { + assert!(uimm.num_bits <= reg.num_bits); + + // In case the source immediate could be zero extended to be 64 + // bit, we can use the 32-bit operands version of the instruction. + // For example, we can turn mov(rax, 0x34) into the equivalent + // mov(eax, 0x34). + if (reg.num_bits == 64) && (uimm.value <= u32::MAX.into()) { + if dst.rex_needed() { + write_rex(cb, false, 0, 0, reg.reg_no); + } + write_opcode(cb, 0xB8, reg); + cb.write_int(uimm.value, 32); + } else { + if reg.num_bits == 16 { + cb.write_byte(0x66); + } + + if dst.rex_needed() || reg.num_bits == 64 { + write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no); + } + + write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg); + cb.write_int(uimm.value, reg.num_bits.into()); + } + }, + // M + Imm + (X86Opnd::Mem(mem), X86Opnd::Imm(imm)) => { + assert!(imm.num_bits <= mem.num_bits); + + if mem.num_bits == 8 { + write_rm(cb, false, false, X86Opnd::None, dst, None, &[0xc6]); + } else { + write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, Some(0), &[0xc7]); + } + + let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; + assert!(imm_num_bits(imm.value) <= (output_num_bits as u8)); + cb.write_int(imm.value as u64, output_num_bits); + }, + // M + UImm + (X86Opnd::Mem(mem), X86Opnd::UImm(uimm)) => { + assert!(uimm.num_bits <= mem.num_bits); + + if mem.num_bits == 8 { + write_rm(cb, false, false, X86Opnd::None, dst, None, &[0xc6]); + } + else { + write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, Some(0), &[0xc7]); + } + + let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() }; + assert!(imm_num_bits(uimm.value as i64) <= (output_num_bits as u8)); + cb.write_int(uimm.value, output_num_bits); + }, + // * + Imm/UImm + (_, X86Opnd::Imm(_) | X86Opnd::UImm(_)) => unreachable!(), + // * + * + (_, _) => { + write_rm_multi( + cb, + 0x88, // opMemReg8 + 0x89, // opMemRegPref + 0x8A, // opRegMem8 + 0x8B, // opRegMemPref + 0xC6, // opMemImm8 + 0xFF, // opMemImmSml (not available) + 0xFF, // opMemImmLrg + None, // opExtImm + dst, + src + ); + } + }; +} + +/// A variant of mov used for always writing the value in 64 bits for GC offsets. +pub fn movabs(cb: &mut CodeBlock, dst: X86Opnd, value: u64) { + match dst { + X86Opnd::Reg(reg) => { + assert_eq!(reg.num_bits, 64); + write_rex(cb, true, 0, 0, reg.reg_no); + + write_opcode(cb, 0xb8, reg); + cb.write_int(value, 64); + }, + _ => unreachable!() + } +} + +/// movsx - Move with sign extension (signed integers) +pub fn movsx(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { + if let X86Opnd::Reg(_dst_reg) = dst { + assert!(matches!(src, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + + let src_num_bits = src.num_bits(); + let dst_num_bits = dst.num_bits(); + assert!(src_num_bits < dst_num_bits); + + match src_num_bits { + 8 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, None, &[0x0f, 0xbe]), + 16 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, None, &[0x0f, 0xbf]), + 32 => write_rm(cb, false, true, dst, src, None, &[0x63]), + _ => unreachable!() + }; + } else { + unreachable!(); + } +} + +/* +/// movzx - Move with zero extension (unsigned values) +void movzx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) +{ + cb.writeASM("movzx", dst, src); + + uint32_t dstSize; + if (dst.isReg) + dstSize = dst.reg.size; + else + assert (false, "movzx dst must be a register"); + + uint32_t srcSize; + if (src.isReg) + srcSize = src.reg.size; + else if (src.isMem) + srcSize = src.mem.size; + else + assert (false); + + assert ( + srcSize < dstSize, + "movzx: srcSize >= dstSize" + ); + + if (srcSize is 8) + { + cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB6)(dstSize is 16, dstSize is 64, dst, src); + } + else if (srcSize is 16) + { + cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB7)(dstSize is 16, dstSize is 64, dst, src); + } + else + { + assert (false, "invalid src operand size for movxz"); + } +} +*/ + +/// nop - Noop, one or multiple bytes long +pub fn nop(cb: &mut CodeBlock, length: u32) { + match length { + 0 => {}, + 1 => cb.write_byte(0x90), + 2 => cb.write_bytes(&[0x66, 0x90]), + 3 => cb.write_bytes(&[0x0f, 0x1f, 0x00]), + 4 => cb.write_bytes(&[0x0f, 0x1f, 0x40, 0x00]), + 5 => cb.write_bytes(&[0x0f, 0x1f, 0x44, 0x00, 0x00]), + 6 => cb.write_bytes(&[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00]), + 7 => cb.write_bytes(&[0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00]), + 8 => cb.write_bytes(&[0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]), + 9 => cb.write_bytes(&[0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]), + _ => { + let mut written: u32 = 0; + while written + 9 <= length { + nop(cb, 9); + written += 9; + } + nop(cb, length - written); + } + }; +} + +/// not - Bitwise NOT +pub fn not(cb: &mut CodeBlock, opnd: X86Opnd) { + write_rm_unary( + cb, + 0xf6, // opMemReg8 + 0xf7, // opMemRegPref + Some(0x02), // opExt + opnd + ); +} + +/// or - Bitwise OR +pub fn or(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_rm_multi( + cb, + 0x08, // opMemReg8 + 0x09, // opMemRegPref + 0x0A, // opRegMem8 + 0x0B, // opRegMemPref + 0x80, // opMemImm8 + 0x83, // opMemImmSml + 0x81, // opMemImmLrg + Some(0x01), // opExtImm + opnd0, + opnd1 + ); +} + +/// pop - Pop a register off the stack +pub fn pop(cb: &mut CodeBlock, opnd: X86Opnd) { + match opnd { + X86Opnd::Reg(reg) => { + assert!(reg.num_bits == 64); + + if opnd.rex_needed() { + write_rex(cb, false, 0, 0, reg.reg_no); + } + write_opcode(cb, 0x58, reg); + }, + X86Opnd::Mem(mem) => { + assert!(mem.num_bits == 64); + + write_rm(cb, false, false, X86Opnd::None, opnd, Some(0), &[0x8f]); + }, + _ => unreachable!() + }; +} + +/// popfq - Pop the flags register (64-bit) +pub fn popfq(cb: &mut CodeBlock) { + // REX.W + 0x9D + cb.write_bytes(&[0x48, 0x9d]); +} + +/// push - Push an operand on the stack +pub fn push(cb: &mut CodeBlock, opnd: X86Opnd) { + match opnd { + X86Opnd::Reg(reg) => { + if opnd.rex_needed() { + write_rex(cb, false, 0, 0, reg.reg_no); + } + write_opcode(cb, 0x50, reg); + }, + X86Opnd::Mem(_mem) => { + write_rm(cb, false, false, X86Opnd::None, opnd, Some(6), &[0xff]); + }, + _ => unreachable!() + } +} + +/// pushfq - Push the flags register (64-bit) +pub fn pushfq(cb: &mut CodeBlock) { + cb.write_byte(0x9C); +} + +/// ret - Return from call, popping only the return address +pub fn ret(cb: &mut CodeBlock) { + cb.write_byte(0xC3); +} + +// Encode a bitwise shift instruction +fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, op_mem_cl_pref: u8, op_mem_imm_pref: u8, op_ext: u8, opnd0: X86Opnd, opnd1: X86Opnd) { + assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + + // Check the size of opnd0 + let opnd_size = opnd0.num_bits(); + assert!(opnd_size == 16 || opnd_size == 32 || opnd_size == 64); + + let sz_pref = opnd_size == 16; + let rex_w = opnd_size == 64; + + match opnd1 { + X86Opnd::UImm(imm) => { + if imm.value == 1 { + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_one_pref]); + } else { + assert!(imm.num_bits <= 8); + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_imm_pref]); + cb.write_byte(imm.value as u8); + } + } + + X86Opnd::Reg(reg) => { + // We can only use CL/RCX as the shift amount + assert!(reg.reg_no == RCX_REG.reg_no); + write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_cl_pref]); + } + + _ => { + unreachable!("unsupported operands: {:?}, {:?}", opnd0, opnd1); + } + } +} + +// sal - Shift arithmetic left +pub fn sal(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_shift( + cb, + 0xD1, // opMemOnePref, + 0xD3, // opMemClPref, + 0xC1, // opMemImmPref, + 0x04, + opnd0, + opnd1 + ); +} + +/// sar - Shift arithmetic right (signed) +pub fn sar(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_shift( + cb, + 0xD1, // opMemOnePref, + 0xD3, // opMemClPref, + 0xC1, // opMemImmPref, + 0x07, + opnd0, + opnd1 + ); +} + +// shl - Shift logical left +pub fn shl(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_shift( + cb, + 0xD1, // opMemOnePref, + 0xD3, // opMemClPref, + 0xC1, // opMemImmPref, + 0x04, + opnd0, + opnd1 + ); +} + +/// shr - Shift logical right (unsigned) +pub fn shr(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_shift( + cb, + 0xD1, // opMemOnePref, + 0xD3, // opMemClPref, + 0xC1, // opMemImmPref, + 0x05, + opnd0, + opnd1 + ); +} + +/// sub - Integer subtraction +pub fn sub(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_rm_multi( + cb, + 0x28, // opMemReg8 + 0x29, // opMemRegPref + 0x2A, // opRegMem8 + 0x2B, // opRegMemPref + 0x80, // opMemImm8 + 0x83, // opMemImmSml + 0x81, // opMemImmLrg + Some(0x05), // opExtImm + opnd0, + opnd1 + ); +} + +fn resize_opnd(opnd: X86Opnd, num_bits: u8) -> X86Opnd { + match opnd { + X86Opnd::Reg(reg) => { + let mut cloned = reg; + cloned.num_bits = num_bits; + X86Opnd::Reg(cloned) + }, + X86Opnd::Mem(mem) => { + let mut cloned = mem; + cloned.num_bits = num_bits; + X86Opnd::Mem(cloned) + }, + _ => unreachable!() + } +} + +/// test - Logical Compare +pub fn test(cb: &mut CodeBlock, rm_opnd: X86Opnd, test_opnd: X86Opnd) { + assert!(matches!(rm_opnd, X86Opnd::Reg(_) | X86Opnd::Mem(_))); + let rm_num_bits = rm_opnd.num_bits(); + + match test_opnd { + X86Opnd::UImm(uimm) => { + assert!(uimm.num_bits <= 32); + assert!(uimm.num_bits <= rm_num_bits); + + // Use the smallest operand size possible + assert!(rm_num_bits % 8 == 0); + let rm_resized = resize_opnd(rm_opnd, uimm.num_bits); + + if uimm.num_bits == 8 { + write_rm(cb, false, false, X86Opnd::None, rm_resized, Some(0x00), &[0xf6]); + cb.write_int(uimm.value, uimm.num_bits.into()); + } else { + write_rm(cb, uimm.num_bits == 16, false, X86Opnd::None, rm_resized, Some(0x00), &[0xf7]); + cb.write_int(uimm.value, uimm.num_bits.into()); + } + }, + X86Opnd::Imm(imm) => { + // This mode only applies to 64-bit R/M operands with 32-bit signed immediates + assert!(imm.num_bits <= 32); + assert!(rm_num_bits == 64); + + write_rm(cb, false, true, X86Opnd::None, rm_opnd, Some(0x00), &[0xf7]); + cb.write_int(imm.value as u64, 32); + }, + X86Opnd::Reg(reg) => { + assert!(reg.num_bits == rm_num_bits); + + if rm_num_bits == 8 { + write_rm(cb, false, false, test_opnd, rm_opnd, None, &[0x84]); + } else { + write_rm(cb, rm_num_bits == 16, rm_num_bits == 64, test_opnd, rm_opnd, None, &[0x85]); + } + }, + _ => unreachable!() + }; +} + +/// Undefined opcode +pub fn ud2(cb: &mut CodeBlock) { + cb.write_bytes(&[0x0f, 0x0b]); +} + +/// xchg - Exchange Register/Memory with Register +pub fn xchg(cb: &mut CodeBlock, rm_opnd: X86Opnd, r_opnd: X86Opnd) { + if let (X86Opnd::Reg(rm_reg), X86Opnd::Reg(r_reg)) = (rm_opnd, r_opnd) { + assert!(rm_reg.num_bits == 64); + assert!(r_reg.num_bits == 64); + + // If we're exchanging with RAX + if rm_reg.reg_no == RAX_REG_NO { + // Write the REX byte + write_rex(cb, true, 0, 0, r_reg.reg_no); + + // Write the opcode and register number + cb.write_byte(0x90 + (r_reg.reg_no & 7)); + } else { + write_rm(cb, false, true, r_opnd, rm_opnd, None, &[0x87]); + } + } else { + unreachable!(); + } +} + +/// xor - Exclusive bitwise OR +pub fn xor(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) { + write_rm_multi( + cb, + 0x30, // opMemReg8 + 0x31, // opMemRegPref + 0x32, // opRegMem8 + 0x33, // opRegMemPref + 0x80, // opMemImm8 + 0x83, // opMemImmSml + 0x81, // opMemImmLrg + Some(0x06), // opExtImm + opnd0, + opnd1 + ); +} diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs new file mode 100644 index 0000000000..5ae983270f --- /dev/null +++ b/yjit/src/asm/x86_64/tests.rs @@ -0,0 +1,459 @@ +#![cfg(test)] + +use crate::asm::x86_64::*; + +/// Check that the bytes for an instruction sequence match a hex string +fn check_bytes<R>(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) { + let mut cb = super::CodeBlock::new_dummy(4096); + run(&mut cb); + assert_eq!(format!("{:x}", cb), bytes); +} + +#[test] +fn test_add() { + check_bytes("80c103", |cb| add(cb, CL, imm_opnd(3))); + check_bytes("00d9", |cb| add(cb, CL, BL)); + check_bytes("4000e1", |cb| add(cb, CL, SPL)); + check_bytes("6601d9", |cb| add(cb, CX, BX)); + check_bytes("4801d8", |cb| add(cb, RAX, RBX)); + check_bytes("01d1", |cb| add(cb, ECX, EDX)); + check_bytes("4c01f2", |cb| add(cb, RDX, R14)); + check_bytes("480110", |cb| add(cb, mem_opnd(64, RAX, 0), RDX)); + check_bytes("480310", |cb| add(cb, RDX, mem_opnd(64, RAX, 0))); + check_bytes("48035008", |cb| add(cb, RDX, mem_opnd(64, RAX, 8))); + check_bytes("480390ff000000", |cb| add(cb, RDX, mem_opnd(64, RAX, 255))); + check_bytes("4881407fff000000", |cb| add(cb, mem_opnd(64, RAX, 127), imm_opnd(255))); + check_bytes("0110", |cb| add(cb, mem_opnd(32, RAX, 0), EDX)); + check_bytes("4883c408", |cb| add(cb, RSP, imm_opnd(8))); + check_bytes("83c108", |cb| add(cb, ECX, imm_opnd(8))); + check_bytes("81c1ff000000", |cb| add(cb, ECX, imm_opnd(255))); +} + +#[test] +fn test_add_unsigned() { + // ADD r/m8, imm8 + check_bytes("4180c001", |cb| add(cb, R8B, uimm_opnd(1))); + check_bytes("4180c07f", |cb| add(cb, R8B, imm_opnd(i8::MAX.try_into().unwrap()))); + + // ADD r/m16, imm16 + check_bytes("664183c001", |cb| add(cb, R8W, uimm_opnd(1))); + check_bytes("664181c0ff7f", |cb| add(cb, R8W, uimm_opnd(i16::MAX.try_into().unwrap()))); + + // ADD r/m32, imm32 + check_bytes("4183c001", |cb| add(cb, R8D, uimm_opnd(1))); + check_bytes("4181c0ffffff7f", |cb| add(cb, R8D, uimm_opnd(i32::MAX.try_into().unwrap()))); + + // ADD r/m64, imm32 + check_bytes("4983c001", |cb| add(cb, R8, uimm_opnd(1))); + check_bytes("4981c0ffffff7f", |cb| add(cb, R8, uimm_opnd(i32::MAX.try_into().unwrap()))); +} + +#[test] +fn test_and() { + check_bytes("4421e5", |cb| and(cb, EBP, R12D)); + check_bytes("48832008", |cb| and(cb, mem_opnd(64, RAX, 0), imm_opnd(0x08))); +} + +#[test] +fn test_call_label() { + check_bytes("e8fbffffff", |cb| { + let label_idx = cb.new_label("fn".to_owned()); + call_label(cb, label_idx); + cb.link_labels(); + }); +} + +#[test] +fn test_call_ptr() { + // calling a lower address + check_bytes("e8fbffffff", |cb| { + let ptr = cb.get_write_ptr(); + call_ptr(cb, RAX, ptr.raw_ptr(cb)); + }); +} + +#[test] +fn test_call_reg() { + check_bytes("ffd0", |cb| call(cb, RAX)); +} + +#[test] +fn test_call_mem() { + check_bytes("ff542408", |cb| call(cb, mem_opnd(64, RSP, 8))); +} + +#[test] +fn test_cmovcc() { + check_bytes("0f4ff7", |cb| cmovg(cb, ESI, EDI)); + check_bytes("0f4f750c", |cb| cmovg(cb, ESI, mem_opnd(32, RBP, 12))); + check_bytes("0f4cc1", |cb| cmovl(cb, EAX, ECX)); + check_bytes("480f4cdd", |cb| cmovl(cb, RBX, RBP)); + check_bytes("0f4e742404", |cb| cmovle(cb, ESI, mem_opnd(32, RSP, 4))); +} + +#[test] +fn test_cmp() { + check_bytes("38d1", |cb| cmp(cb, CL, DL)); + check_bytes("39f9", |cb| cmp(cb, ECX, EDI)); + check_bytes("493b1424", |cb| cmp(cb, RDX, mem_opnd(64, R12, 0))); + check_bytes("4883f802", |cb| cmp(cb, RAX, imm_opnd(2))); + check_bytes("81f900000080", |cb| cmp(cb, ECX, uimm_opnd(0x8000_0000))); +} + +#[test] +fn test_cqo() { + check_bytes("4899", |cb| cqo(cb)); +} + +#[test] +fn test_imul() { + check_bytes("480fafc3", |cb| imul(cb, RAX, RBX)); + check_bytes("480faf10", |cb| imul(cb, RDX, mem_opnd(64, RAX, 0))); + + // Operands flipped for encoding since multiplication is commutative + check_bytes("480faf10", |cb| imul(cb, mem_opnd(64, RAX, 0), RDX)); +} + +#[test] +fn test_jge_label() { + check_bytes("0f8dfaffffff", |cb| { + let label_idx = cb.new_label("loop".to_owned()); + jge_label(cb, label_idx); + cb.link_labels(); + }); +} + +#[test] +fn test_jmp_label() { + // Forward jump + check_bytes("e900000000", |cb| { + let label_idx = cb.new_label("next".to_owned()); + jmp_label(cb, label_idx); + cb.write_label(label_idx); + cb.link_labels(); + }); + + // Backwards jump + check_bytes("e9fbffffff", |cb| { + let label_idx = cb.new_label("loop".to_owned()); + cb.write_label(label_idx); + jmp_label(cb, label_idx); + cb.link_labels(); + }); +} + +#[test] +fn test_jmp_rm() { + check_bytes("41ffe4", |cb| jmp_rm(cb, R12)); +} + +#[test] +fn test_jo_label() { + check_bytes("0f80faffffff", |cb| { + let label_idx = cb.new_label("loop".to_owned()); + jo_label(cb, label_idx); + cb.link_labels(); + }); +} + +#[test] +fn test_lea() { + check_bytes("488d5108", |cb| lea(cb, RDX, mem_opnd(64, RCX, 8))); + check_bytes("488d0500000000", |cb| lea(cb, RAX, mem_opnd(8, RIP, 0))); + check_bytes("488d0505000000", |cb| lea(cb, RAX, mem_opnd(8, RIP, 5))); + check_bytes("488d3d05000000", |cb| lea(cb, RDI, mem_opnd(8, RIP, 5))); +} + +#[test] +fn test_mov() { + check_bytes("b807000000", |cb| mov(cb, EAX, imm_opnd(7))); + check_bytes("b8fdffffff", |cb| mov(cb, EAX, imm_opnd(-3))); + check_bytes("41bf03000000", |cb| mov(cb, R15, imm_opnd(3))); + check_bytes("89d8", |cb| mov(cb, EAX, EBX)); + check_bytes("89c8", |cb| mov(cb, EAX, ECX)); + check_bytes("8b9380000000", |cb| mov(cb, EDX, mem_opnd(32, RBX, 128))); + check_bytes("488b442404", |cb| mov(cb, RAX, mem_opnd(64, RSP, 4))); + + // Test `mov rax, 3` => `mov eax, 3` optimization + check_bytes("41b834000000", |cb| mov(cb, R8, imm_opnd(0x34))); + check_bytes("49b80000008000000000", |cb| mov(cb, R8, imm_opnd(0x80000000))); + check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, imm_opnd(-1))); + + check_bytes("b834000000", |cb| mov(cb, RAX, imm_opnd(0x34))); + check_bytes("48b8020000000000c0ff", |cb| mov(cb, RAX, imm_opnd(-18014398509481982))); + check_bytes("48b80000008000000000", |cb| mov(cb, RAX, imm_opnd(0x80000000))); + check_bytes("48b8ccffffffffffffff", |cb| mov(cb, RAX, imm_opnd(-52))); // yasm thinks this could use a dword immediate instead of qword + check_bytes("48b8ffffffffffffffff", |cb| mov(cb, RAX, imm_opnd(-1))); // yasm thinks this could use a dword immediate instead of qword + check_bytes("4488c9", |cb| mov(cb, CL, R9B)); + check_bytes("4889c3", |cb| mov(cb, RBX, RAX)); + check_bytes("4889df", |cb| mov(cb, RDI, RBX)); + check_bytes("40b60b", |cb| mov(cb, SIL, imm_opnd(11))); + + check_bytes("c60424fd", |cb| mov(cb, mem_opnd(8, RSP, 0), imm_opnd(-3))); + check_bytes("48c7470801000000", |cb| mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1))); + //check_bytes("67c7400411000000", |cb| mov(cb, mem_opnd(32, EAX, 4), imm_opnd(0x34))); // We don't distinguish between EAX and RAX here - that's probably fine? + check_bytes("c7400411000000", |cb| mov(cb, mem_opnd(32, RAX, 4), imm_opnd(17))); + check_bytes("41895814", |cb| mov(cb, mem_opnd(32, R8, 20), EBX)); + check_bytes("4d8913", |cb| mov(cb, mem_opnd(64, R11, 0), R10)); + check_bytes("48c742f8f4ffffff", |cb| mov(cb, mem_opnd(64, RDX, -8), imm_opnd(-12))); +} + +#[test] +fn test_movabs() { + check_bytes("49b83400000000000000", |cb| movabs(cb, R8, 0x34)); + check_bytes("49b80000008000000000", |cb| movabs(cb, R8, 0x80000000)); +} + +#[test] +fn test_mov_unsigned() { + // MOV AL, imm8 + check_bytes("b001", |cb| mov(cb, AL, uimm_opnd(1))); + check_bytes("b0ff", |cb| mov(cb, AL, uimm_opnd(u8::MAX.into()))); + + // MOV AX, imm16 + check_bytes("66b80100", |cb| mov(cb, AX, uimm_opnd(1))); + check_bytes("66b8ffff", |cb| mov(cb, AX, uimm_opnd(u16::MAX.into()))); + + // MOV EAX, imm32 + check_bytes("b801000000", |cb| mov(cb, EAX, uimm_opnd(1))); + check_bytes("b8ffffffff", |cb| mov(cb, EAX, uimm_opnd(u32::MAX.into()))); + check_bytes("41b800000000", |cb| mov(cb, R8, uimm_opnd(0))); + check_bytes("41b8ffffffff", |cb| mov(cb, R8, uimm_opnd(0xFF_FF_FF_FF))); + + // MOV RAX, imm64, will move down into EAX since it fits into 32 bits + check_bytes("b801000000", |cb| mov(cb, RAX, uimm_opnd(1))); + check_bytes("b8ffffffff", |cb| mov(cb, RAX, uimm_opnd(u32::MAX.into()))); + + // MOV RAX, imm64, will not move down into EAX since it does not fit into 32 bits + check_bytes("48b80000000001000000", |cb| mov(cb, RAX, uimm_opnd(u32::MAX as u64 + 1))); + check_bytes("48b8ffffffffffffffff", |cb| mov(cb, RAX, uimm_opnd(u64::MAX))); + check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, uimm_opnd(u64::MAX))); + + // MOV r8, imm8 + check_bytes("41b001", |cb| mov(cb, R8B, uimm_opnd(1))); + check_bytes("41b0ff", |cb| mov(cb, R8B, uimm_opnd(u8::MAX.into()))); + + // MOV r16, imm16 + check_bytes("6641b80100", |cb| mov(cb, R8W, uimm_opnd(1))); + check_bytes("6641b8ffff", |cb| mov(cb, R8W, uimm_opnd(u16::MAX.into()))); + + // MOV r32, imm32 + check_bytes("41b801000000", |cb| mov(cb, R8D, uimm_opnd(1))); + check_bytes("41b8ffffffff", |cb| mov(cb, R8D, uimm_opnd(u32::MAX.into()))); + + // MOV r64, imm64, will move down into 32 bit since it fits into 32 bits + check_bytes("41b801000000", |cb| mov(cb, R8, uimm_opnd(1))); + + // MOV r64, imm64, will not move down into 32 bit since it does not fit into 32 bits + check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, uimm_opnd(u64::MAX))); +} + +#[test] +fn test_mov_iprel() { + check_bytes("8b0500000000", |cb| mov(cb, EAX, mem_opnd(32, RIP, 0))); + check_bytes("8b0505000000", |cb| mov(cb, EAX, mem_opnd(32, RIP, 5))); + + check_bytes("488b0500000000", |cb| mov(cb, RAX, mem_opnd(64, RIP, 0))); + check_bytes("488b0505000000", |cb| mov(cb, RAX, mem_opnd(64, RIP, 5))); + check_bytes("488b3d05000000", |cb| mov(cb, RDI, mem_opnd(64, RIP, 5))); +} + +#[test] +fn test_movsx() { + check_bytes("660fbec0", |cb| movsx(cb, AX, AL)); + check_bytes("0fbed0", |cb| movsx(cb, EDX, AL)); + check_bytes("480fbec3", |cb| movsx(cb, RAX, BL)); + check_bytes("0fbfc8", |cb| movsx(cb, ECX, AX)); + check_bytes("4c0fbed9", |cb| movsx(cb, R11, CL)); + check_bytes("4c6354240c", |cb| movsx(cb, R10, mem_opnd(32, RSP, 12))); + check_bytes("480fbe0424", |cb| movsx(cb, RAX, mem_opnd(8, RSP, 0))); + check_bytes("490fbf5504", |cb| movsx(cb, RDX, mem_opnd(16, R13, 4))); +} + +#[test] +fn test_nop() { + check_bytes("90", |cb| nop(cb, 1)); + check_bytes("6690", |cb| nop(cb, 2)); + check_bytes("0f1f00", |cb| nop(cb, 3)); + check_bytes("0f1f4000", |cb| nop(cb, 4)); + check_bytes("0f1f440000", |cb| nop(cb, 5)); + check_bytes("660f1f440000", |cb| nop(cb, 6)); + check_bytes("0f1f8000000000", |cb| nop(cb, 7)); + check_bytes("0f1f840000000000", |cb| nop(cb, 8)); + check_bytes("660f1f840000000000", |cb| nop(cb, 9)); + check_bytes("660f1f84000000000090", |cb| nop(cb, 10)); + check_bytes("660f1f8400000000006690", |cb| nop(cb, 11)); + check_bytes("660f1f8400000000000f1f00", |cb| nop(cb, 12)); +} + +#[test] +fn test_not() { + check_bytes("66f7d0", |cb| not(cb, AX)); + check_bytes("f7d0", |cb| not(cb, EAX)); + check_bytes("49f71424", |cb| not(cb, mem_opnd(64, R12, 0))); + check_bytes("f794242d010000", |cb| not(cb, mem_opnd(32, RSP, 301))); + check_bytes("f71424", |cb| not(cb, mem_opnd(32, RSP, 0))); + check_bytes("f7542403", |cb| not(cb, mem_opnd(32, RSP, 3))); + check_bytes("f75500", |cb| not(cb, mem_opnd(32, RBP, 0))); + check_bytes("f7550d", |cb| not(cb, mem_opnd(32, RBP, 13))); + check_bytes("48f7d0", |cb| not(cb, RAX)); + check_bytes("49f7d3", |cb| not(cb, R11)); + check_bytes("f710", |cb| not(cb, mem_opnd(32, RAX, 0))); + check_bytes("f716", |cb| not(cb, mem_opnd(32, RSI, 0))); + check_bytes("f717", |cb| not(cb, mem_opnd(32, RDI, 0))); + check_bytes("f75237", |cb| not(cb, mem_opnd(32, RDX, 55))); + check_bytes("f79239050000", |cb| not(cb, mem_opnd(32, RDX, 1337))); + check_bytes("f752c9", |cb| not(cb, mem_opnd(32, RDX, -55))); + check_bytes("f792d5fdffff", |cb| not(cb, mem_opnd(32, RDX, -555))); +} + +#[test] +fn test_or() { + check_bytes("09f2", |cb| or(cb, EDX, ESI)); +} + +#[test] +fn test_pop() { + check_bytes("58", |cb| pop(cb, RAX)); + check_bytes("5b", |cb| pop(cb, RBX)); + check_bytes("5c", |cb| pop(cb, RSP)); + check_bytes("5d", |cb| pop(cb, RBP)); + check_bytes("415c", |cb| pop(cb, R12)); + check_bytes("8f00", |cb| pop(cb, mem_opnd(64, RAX, 0))); + check_bytes("418f00", |cb| pop(cb, mem_opnd(64, R8, 0))); + check_bytes("418f4003", |cb| pop(cb, mem_opnd(64, R8, 3))); + check_bytes("8f44c803", |cb| pop(cb, mem_opnd_sib(64, RAX, RCX, 8, 3))); + check_bytes("418f44c803", |cb| pop(cb, mem_opnd_sib(64, R8, RCX, 8, 3))); +} + +#[test] +fn test_push() { + check_bytes("50", |cb| push(cb, RAX)); + check_bytes("53", |cb| push(cb, RBX)); + check_bytes("4154", |cb| push(cb, R12)); + check_bytes("ff30", |cb| push(cb, mem_opnd(64, RAX, 0))); + check_bytes("41ff30", |cb| push(cb, mem_opnd(64, R8, 0))); + check_bytes("41ff7003", |cb| push(cb, mem_opnd(64, R8, 3))); + check_bytes("ff74c803", |cb| push(cb, mem_opnd_sib(64, RAX, RCX, 8, 3))); + check_bytes("41ff74c803", |cb| push(cb, mem_opnd_sib(64, R8, RCX, 8, 3))); +} + +#[test] +fn test_ret() { + check_bytes("c3", |cb| ret(cb)); +} + +#[test] +fn test_sal() { + check_bytes("66d1e1", |cb| sal(cb, CX, uimm_opnd(1))); + check_bytes("d1e1", |cb| sal(cb, ECX, uimm_opnd(1))); + check_bytes("c1e505", |cb| sal(cb, EBP, uimm_opnd(5))); + check_bytes("d1642444", |cb| sal(cb, mem_opnd(32, RSP, 68), uimm_opnd(1))); + check_bytes("48d3e1", |cb| sal(cb, RCX, CL)); +} + +#[test] +fn test_sar() { + check_bytes("d1fa", |cb| sar(cb, EDX, uimm_opnd(1))); +} + +#[test] +fn test_shr() { + check_bytes("49c1ee07", |cb| shr(cb, R14, uimm_opnd(7))); +} + +#[test] +fn test_sub() { + check_bytes("83e801", |cb| sub(cb, EAX, imm_opnd(1))); + check_bytes("4883e802", |cb| sub(cb, RAX, imm_opnd(2))); +} + +#[test] +#[should_panic] +fn test_sub_uimm_too_large() { + // This immediate becomes a different value after + // sign extension, so not safe to encode. + check_bytes("ff", |cb| sub(cb, RCX, uimm_opnd(0x8000_0000))); +} + +#[test] +fn test_test() { + check_bytes("84c0", |cb| test(cb, AL, AL)); + check_bytes("6685c0", |cb| test(cb, AX, AX)); + check_bytes("f6c108", |cb| test(cb, CL, uimm_opnd(8))); + check_bytes("f6c207", |cb| test(cb, DL, uimm_opnd(7))); + check_bytes("f6c108", |cb| test(cb, RCX, uimm_opnd(8))); + check_bytes("f6420808", |cb| test(cb, mem_opnd(8, RDX, 8), uimm_opnd(8))); + check_bytes("f64208ff", |cb| test(cb, mem_opnd(8, RDX, 8), uimm_opnd(255))); + check_bytes("66f7c2ffff", |cb| test(cb, DX, uimm_opnd(0xffff))); + check_bytes("66f74208ffff", |cb| test(cb, mem_opnd(16, RDX, 8), uimm_opnd(0xffff))); + check_bytes("f60601", |cb| test(cb, mem_opnd(8, RSI, 0), uimm_opnd(1))); + check_bytes("f6461001", |cb| test(cb, mem_opnd(8, RSI, 16), uimm_opnd(1))); + check_bytes("f646f001", |cb| test(cb, mem_opnd(8, RSI, -16), uimm_opnd(1))); + check_bytes("854640", |cb| test(cb, mem_opnd(32, RSI, 64), EAX)); + check_bytes("4885472a", |cb| test(cb, mem_opnd(64, RDI, 42), RAX)); + check_bytes("4885c0", |cb| test(cb, RAX, RAX)); + check_bytes("4885f0", |cb| test(cb, RAX, RSI)); + check_bytes("48f74640f7ffffff", |cb| test(cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08))); + check_bytes("48f7464008000000", |cb| test(cb, mem_opnd(64, RSI, 64), imm_opnd(0x08))); + check_bytes("48f7c108000000", |cb| test(cb, RCX, imm_opnd(0x08))); + //check_bytes("48a9f7ffff0f", |cb| test(cb, RAX, imm_opnd(0x0FFFFFF7))); +} + +#[test] +fn test_xchg() { + check_bytes("4891", |cb| xchg(cb, RAX, RCX)); + check_bytes("4995", |cb| xchg(cb, RAX, R13)); + check_bytes("4887d9", |cb| xchg(cb, RCX, RBX)); + check_bytes("4d87f9", |cb| xchg(cb, R9, R15)); +} + +#[test] +fn test_xor() { + check_bytes("31c0", |cb| xor(cb, EAX, EAX)); +} + +#[test] +#[cfg(feature = "disasm")] +fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> { + // Test drive Capstone with simple input + use capstone::prelude::*; + let cs = Capstone::new() + .x86() + .mode(arch::x86::ArchMode::Mode64) + .syntax(arch::x86::ArchSyntax::Intel) + .build()?; + + let insns = cs.disasm_all(&[0xCC], 0x1000)?; + + match insns.as_ref() { + [insn] => { + assert_eq!(Some("int3"), insn.mnemonic()); + Ok(()) + } + _ => Err(capstone::Error::CustomError( + "expected to disassemble to int3", + )), + } +} + +#[test] +#[cfg(feature = "disasm")] +fn block_comments() { + let mut cb = super::CodeBlock::new_dummy(4096); + + let first_write_ptr = cb.get_write_ptr().raw_addr(&cb); + cb.add_comment("Beginning"); + xor(&mut cb, EAX, EAX); // 2 bytes long + let second_write_ptr = cb.get_write_ptr().raw_addr(&cb); + cb.add_comment("Two bytes in"); + cb.add_comment("Still two bytes in"); + cb.add_comment("Still two bytes in"); // Duplicate, should be ignored + test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long + let third_write_ptr = cb.get_write_ptr().raw_addr(&cb); + cb.add_comment("Ten bytes in"); + + assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap()); + assert_eq!(&vec!( "Two bytes in".to_string(), "Still two bytes in".to_string() ), cb.comments_at(second_write_ptr).unwrap()); + assert_eq!(&vec!( "Ten bytes in".to_string() ), cb.comments_at(third_write_ptr).unwrap()); +} diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs new file mode 100644 index 0000000000..3bf949ba7d --- /dev/null +++ b/yjit/src/backend/arm64/mod.rs @@ -0,0 +1,1835 @@ +use std::mem::take; + +use crate::asm::{CodeBlock, OutlinedCb}; +use crate::asm::arm64::*; +use crate::cruby::*; +use crate::backend::ir::*; +use crate::virtualmem::CodePtr; +use crate::utils::*; + +// Use the arm64 register type for this platform +pub type Reg = A64Reg; + +// Callee-saved registers +pub const _CFP: Opnd = Opnd::Reg(X19_REG); +pub const _EC: Opnd = Opnd::Reg(X20_REG); +pub const _SP: Opnd = Opnd::Reg(X21_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(X0_REG), + Opnd::Reg(X1_REG), + Opnd::Reg(X2_REG), + Opnd::Reg(X3_REG), + Opnd::Reg(X4_REG), + Opnd::Reg(X5_REG) +]; + +// C return value register on this platform +pub const C_RET_REG: Reg = X0_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); + +// These constants define the way we work with Arm64's stack pointer. The stack +// pointer always needs to be aligned to a 16-byte boundary. +pub const C_SP_REG: A64Opnd = X31; +pub const C_SP_STEP: i32 = 16; + +impl CodeBlock { + // The maximum number of bytes that can be generated by emit_jmp_ptr. + pub fn jmp_ptr_bytes(&self) -> usize { + // b instruction's offset is encoded as imm26 times 4. It can jump to + // +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128. + let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) { + 1 // b instruction + } else { + 5 // 4 instructions to load a 64-bit absolute address + br instruction + }; + num_insns * 4 + } + + // The maximum number of instructions that can be generated by emit_conditional_jump. + fn conditional_jump_insns(&self) -> i32 { + // The worst case is instructions for a jump + bcond. + self.jmp_ptr_bytes() as i32 / 4 + 1 + } +} + +/// Map Opnd to A64Opnd +impl From<Opnd> for A64Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + Opnd::UImm(value) => A64Opnd::new_uimm(value), + Opnd::Imm(value) => A64Opnd::new_imm(value), + Opnd::Reg(reg) => A64Opnd::Reg(reg), + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { + A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { + panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") + }, + Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"), + Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), + Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), + Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"), + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + + } + } +} + +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for A64Opnd { + fn from(opnd: &Opnd) -> Self { + A64Opnd::from(*opnd) + } +} + +/// Call emit_jmp_ptr and immediately invalidate the written range. +/// This is needed when next_page also moves other_cb that is not invalidated +/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot +/// more than necessary when other_cb jumps from a position early in the page. +/// This invalidates a small range of cb twice, but we accept the small cost. +fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { + #[cfg(not(test))] + let start = cb.get_write_ptr(); + emit_jmp_ptr(cb, dst_ptr, true); + #[cfg(not(test))] + { + let end = cb.get_write_ptr(); + unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) }; + } +} + +fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { + let src_addr = cb.get_write_ptr().as_offset(); + let dst_addr = dst_ptr.as_offset(); + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { + b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + 1 + } else { + let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); + br(cb, Assembler::SCRATCH0); + num_insns + 1 + }; + + if padding { + // Make sure it's always a consistent number of + // instructions in case it gets patched and has to + // use the other branch. + assert!(num_insns * 4 <= cb.jmp_ptr_bytes()); + for _ in num_insns..(cb.jmp_ptr_bytes() / 4) { + nop(cb); + } + } +} + +/// Emit the required instructions to load the given value into the +/// given register. Our goal here is to use as few instructions as +/// possible to get this value into the register. +fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { + let mut current = value; + + if current <= 0xffff { + // If the value fits into a single movz + // instruction, then we'll use that. + movz(cb, rd, A64Opnd::new_uimm(current), 0); + return 1; + } else if BitmaskImmediate::try_from(current).is_ok() { + // Otherwise, if the immediate can be encoded + // with the special bitmask immediate encoding, + // we'll use that. + mov(cb, rd, A64Opnd::new_uimm(current)); + return 1; + } else { + // Finally we'll fall back to encoding the value + // using movz for the first 16 bits and movk for + // each subsequent set of 16 bits as long we + // they are necessary. + movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + let mut num_insns = 1; + + // (We're sure this is necessary since we + // checked if it only fit into movz above). + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + num_insns += 1; + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + num_insns += 1; + } + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + num_insns += 1; + } + return num_insns; + } +} + +/// List of registers that can be used for stack temps. +/// These are caller-saved registers. +pub static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; + +#[derive(Debug, PartialEq)] +enum EmitError { + RetryOnNextPage, + OutOfMemory, +} + +impl Assembler +{ + // Special scratch registers for intermediate processing. + // This register is caller-saved (so we don't have to save it before using it) + pub const SCRATCH_REG: Reg = X16_REG; + const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); + const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); + + /// Get the list of registers from which we will allocate on this platform + /// These are caller-saved registers + /// Note: we intentionally exclude C_RET_REG (X0) from this list + /// because of the way it's used in gen_leave() and gen_leave_exit() + pub fn get_alloc_regs() -> Vec<Reg> { + vec![X11_REG, X12_REG, X13_REG] + } + + /// Get a list of all of the caller-saved registers + pub fn get_caller_save_regs() -> Vec<Reg> { + vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] + } + + /// Split platform-specific instructions + /// The transformations done here are meant to make our lives simpler in later + /// stages of the compilation pipeline. + /// Here we may want to make sure that all instructions (except load and store) + /// have no memory operands. + fn arm64_split(mut self) -> Assembler + { + /// When we're attempting to load a memory address into a register, the + /// displacement must fit into the maximum number of bits for an Op::Add + /// immediate. If it doesn't, we have to load the displacement into a + /// register first. + fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(Mem { base, disp, num_bits }) => { + if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() { + asm.lea(opnd) + } else { + let disp = asm.load(Opnd::Imm(disp.into())); + let reg = match base { + MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }), + MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits } + }; + + asm.add(reg, disp) + } + }, + _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.") + } + } + + /// When you're storing a register into a memory location or loading a + /// memory location into a register, the displacement from the base + /// register of the memory location must fit into 9 bits. If it doesn't, + /// then we need to load that memory address into a register first. + fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(mem) => { + if mem_disp_fits_bits(mem.disp) { + opnd + } else { + let base = split_lea_operand(asm, opnd); + Opnd::mem(64, base, 0) + } + }, + _ => unreachable!("Can only split memory addresses.") + } + } + + /// Any memory operands you're sending into an Op::Load instruction need + /// to be split in case their displacement doesn't fit into 9 bits. + fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) => { + let split_opnd = split_memory_address(asm, opnd); + let out_opnd = asm.load(split_opnd); + // Many Arm insns support only 32-bit or 64-bit operands. asm.load with fewer + // bits zero-extends the value, so it's safe to recognize it as a 32-bit value. + if out_opnd.rm_num_bits() < 32 { + out_opnd.with_num_bits(32).unwrap() + } else { + out_opnd + } + }, + _ => asm.load(opnd) + } + } + + /// Operands that take the place of bitmask immediates must follow a + /// certain encoding. In this function we ensure that those operands + /// do follow that encoding, and if they don't then we load them first. + fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd, + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(imm) => { + if imm == 0 { + Opnd::Reg(XZR_REG) + } else if (dest_num_bits == 64 && + BitmaskImmediate::try_from(imm as u64).is_ok()) || + (dest_num_bits == 32 && + u32::try_from(imm).is_ok() && + BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) { + Opnd::UImm(imm as u64) + } else { + asm.load(opnd).with_num_bits(dest_num_bits).unwrap() + } + }, + Opnd::UImm(uimm) => { + if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) || + (dest_num_bits == 32 && + u32::try_from(uimm).is_ok() && + BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) { + opnd + } else { + asm.load(opnd).with_num_bits(dest_num_bits).unwrap() + } + }, + Opnd::None | Opnd::Value(_) => unreachable!() + } + } + + /// Operands that take the place of a shifted immediate must fit within + /// a certain size. If they don't then we need to load them first. + fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() { + opnd + } else { + asm.load(opnd) + } + Opnd::UImm(uimm) => { + if ShiftedImmediate::try_from(uimm).is_ok() { + opnd + } else { + asm.load(opnd) + } + }, + Opnd::None | Opnd::Value(_) | Opnd::Stack { .. } => unreachable!() + } + } + + /// Returns the operands that should be used for a boolean logic + /// instruction. + fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + match (opnd0, opnd1) { + (Opnd::Reg(_), Opnd::Reg(_)) => { + (opnd0, opnd1) + }, + (reg_opnd @ Opnd::Reg(_), other_opnd) | + (other_opnd, reg_opnd @ Opnd::Reg(_)) => { + let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits()); + (reg_opnd, opnd1) + }, + _ => { + let opnd0 = split_load_operand(asm, opnd0); + let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits()); + (opnd0, opnd1) + } + } + } + + /// Returns the operands that should be used for a csel instruction. + fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + let opnd0 = match opnd0 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd0, + _ => split_load_operand(asm, opnd0) + }; + + let opnd1 = match opnd1 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd1, + _ => split_load_operand(asm, opnd1) + }; + + (opnd0, opnd1) + } + + fn split_less_than_32_cmp(asm: &mut Assembler, opnd0: Opnd) -> Opnd { + match opnd0 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => { + match opnd0.rm_num_bits() { + 8 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xff)), + 16 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xffff)), + 32 | 64 => opnd0, + bits => unreachable!("Invalid number of bits. {}", bits) + } + } + _ => opnd0 + } + } + + let live_ranges: Vec<usize> = take(&mut self.live_ranges); + let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let asm = &mut asm_local; + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_mapped() { + // Here we're going to map the operands of the instruction to load + // any Opnd::Value operands into registers if they are heap objects + // such that only the Op::Load instruction needs to handle that + // case. If the values aren't heap objects then we'll treat them as + // if they were just unsigned integer. + let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { + match opnd { + Opnd::Value(value) => { + if value.special_const_p() { + *opnd = Opnd::UImm(value.as_u64()); + } else if !is_load { + *opnd = asm.load(*opnd); + } + }, + Opnd::Stack { .. } => { + *opnd = asm.lower_stack_opnd(opnd); + } + _ => {} + }; + } + + // We are replacing instructions here so we know they are already + // being used. It is okay not to use their output here. + #[allow(unused_must_use)] + match &mut insn { + Insn::Add { left, right, .. } => { + match (*left, *right) { + (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { + asm.add(*left, *right); + }, + (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) | + (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => { + let opnd1 = split_shifted_immediate(asm, other_opnd); + asm.add(reg_opnd, opnd1); + }, + _ => { + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_shifted_immediate(asm, *right); + asm.add(opnd0, opnd1); + } + } + }, + Insn::And { left, right, out } | + Insn::Or { left, right, out } | + Insn::Xor { left, right, out } => { + let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right); + *left = opnd0; + *right = opnd1; + + // Since these instructions are lowered to an instruction that have 2 input + // registers and an output register, look to merge with an `Insn::Mov` that + // follows which puts the output in another register. For example: + // `Add a, b => out` followed by `Mov c, out` becomes `Add a, b => c`. + if let (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) = (left, right, iterator.peek()) { + if live_ranges[index] == index + 1 { + // Check after potentially lowering a stack operand to a register operand + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + if out == src && matches!(lowered_dest, Opnd::Reg(_)) { + *out = lowered_dest; + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + } + } + + asm.push_insn(insn); + } + // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch. + ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } | + ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if { + let same_opnd_if_test = if let Insn::Test { .. } = insn { + left == right + } else { + true + }; + + same_opnd_if_test && if let Some( + Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target) + ) = iterator.peek() { + matches!(target, Target::SideExit { .. }) + } else { + false + } + } => { + let reg = split_load_operand(asm, *left); + match iterator.peek() { + Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)), + Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)), + _ => () + } + + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged jump instruction + } + Insn::CCall { opnds, fptr, .. } => { + assert!(opnds.len() <= C_ARG_OPNDS.len()); + + // Load each operand into the corresponding argument + // register. + // Note: the iteration order is reversed to avoid corrupting x0, + // which is both the return value and first argument register + for (idx, opnd) in opnds.into_iter().enumerate().rev() { + // If the value that we're sending is 0, then we can use + // the zero register, so in this case we'll just send + // a UImm of 0 along as the argument to the move. + let value = match opnd { + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), + Opnd::Mem(_) => split_memory_address(asm, *opnd), + _ => *opnd + }; + + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(*fptr, vec![]); + }, + Insn::Cmp { left, right } => { + let opnd0 = split_load_operand(asm, *left); + let opnd0 = split_less_than_32_cmp(asm, opnd0); + let split_right = split_shifted_immediate(asm, *right); + let opnd1 = match split_right { + Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => { + split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap() + }, + _ => split_right + }; + + asm.cmp(opnd0, opnd1); + }, + Insn::CRet(opnd) => { + match opnd { + // If the value is already in the return register, then + // we don't need to do anything. + Opnd::Reg(C_RET_REG) => {}, + + // If the value is a memory address, we need to first + // make sure the displacement isn't too large and then + // load it into the return register. + Opnd::Mem(_) => { + let split = split_memory_address(asm, *opnd); + asm.load_into(C_RET_OPND, split); + }, + + // Otherwise we just need to load the value into the + // return register. + _ => { + asm.load_into(C_RET_OPND, *opnd); + } + } + asm.cret(C_RET_OPND); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } | + Insn::CSelL { truthy, falsy, out } | + Insn::CSelLE { truthy, falsy, out } | + Insn::CSelG { truthy, falsy, out } | + Insn::CSelGE { truthy, falsy, out } => { + let (opnd0, opnd1) = split_csel_operands(asm, *truthy, *falsy); + *truthy = opnd0; + *falsy = opnd1; + // Merge `csel` and `mov` into a single `csel` when possible + match iterator.peek() { + Some(Insn::Mov { dest: Opnd::Reg(reg), src }) + if matches!(out, Opnd::InsnOut { .. }) && *out == *src && live_ranges[index] == index + 1 => { + *out = Opnd::Reg(*reg); + asm.push_insn(insn); + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => { + asm.push_insn(insn); + } + } + }, + Insn::IncrCounter { mem, value } => { + let counter_addr = match mem { + Opnd::Mem(_) => split_lea_operand(asm, *mem), + _ => *mem + }; + + asm.incr_counter(counter_addr, *value); + }, + Insn::JmpOpnd(opnd) => { + if let Opnd::Mem(_) = opnd { + let opnd0 = split_load_operand(asm, *opnd); + asm.jmp_opnd(opnd0); + } else { + asm.jmp_opnd(*opnd); + } + }, + Insn::Load { opnd, .. } | + Insn::LoadInto { opnd, .. } => { + *opnd = match opnd { + Opnd::Mem(_) => split_memory_address(asm, *opnd), + _ => *opnd + }; + asm.push_insn(insn); + }, + Insn::LoadSExt { opnd, .. } => { + match opnd { + // We only want to sign extend if the operand is a + // register, instruction output, or memory address that + // is 32 bits. Otherwise we'll just load the value + // directly since there's no need to sign extend. + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } | + Opnd::Mem(Mem { num_bits: 32, .. }) => { + asm.load_sext(*opnd); + }, + _ => { + asm.load(*opnd); + } + }; + }, + Insn::Mov { dest, src } => { + match (&dest, &src) { + // If we're attempting to load into a memory operand, then + // we'll switch over to the store instruction. + (Opnd::Mem(_), _) => { + let opnd0 = split_memory_address(asm, *dest); + let value = match *src { + // If the first operand is zero, then we can just use + // the zero register. + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), + // If the first operand is a memory operand, we're going + // to transform this into a store instruction, so we'll + // need to load this anyway. + Opnd::UImm(_) => asm.load(*src), + // The value that is being moved must be either a + // register or an immediate that can be encoded as a + // bitmask immediate. Otherwise, we'll need to split the + // move into multiple instructions. + _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()) + }; + + asm.store(opnd0, value); + }, + // If we're loading a memory operand into a register, then + // we'll switch over to the load instruction. + (Opnd::Reg(_), Opnd::Mem(_)) => { + let value = split_memory_address(asm, *src); + asm.load_into(*dest, value); + }, + // Otherwise we'll use the normal mov instruction. + (Opnd::Reg(_), _) => { + let value = match *src { + // Unlike other instructions, we can avoid splitting this case, using movz. + Opnd::UImm(uimm) if uimm <= 0xffff => *src, + _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()), + }; + asm.mov(*dest, value); + }, + _ => unreachable!() + }; + }, + Insn::Not { opnd, .. } => { + // The value that is being negated must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnd { + Opnd::Mem(_) => split_load_operand(asm, *opnd), + _ => *opnd + }; + + asm.not(opnd0); + }, + Insn::LShift { opnd, .. } | + Insn::RShift { opnd, .. } | + Insn::URShift { opnd, .. } => { + // The operand must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnd { + Opnd::Mem(_) => split_load_operand(asm, *opnd), + _ => *opnd + }; + + *opnd = opnd0; + asm.push_insn(insn); + }, + Insn::Store { dest, src } => { + // The value being stored must be in a register, so if it's + // not already one we'll load it first. + let opnd1 = match src { + // If the first operand is zero, then we can just use + // the zero register. + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), + // Otherwise we'll check if we need to load it first. + _ => split_load_operand(asm, *src) + }; + + match dest { + Opnd::Reg(_) => { + // Store does not support a register as a dest operand. + asm.mov(*dest, opnd1); + } + _ => { + // The displacement for the STUR instruction can't be more + // than 9 bits long. If it's longer, we need to load the + // memory address into a register first. + let opnd0 = split_memory_address(asm, *dest); + asm.store(opnd0, opnd1); + } + } + }, + Insn::Sub { left, right, .. } => { + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_shifted_immediate(asm, *right); + asm.sub(opnd0, opnd1); + }, + Insn::Mul { left, right, .. } => { + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_load_operand(asm, *right); + asm.mul(opnd0, opnd1); + }, + Insn::Test { left, right } => { + // The value being tested must be in a register, so if it's + // not already one we'll load it first. + let opnd0 = split_load_operand(asm, *left); + + // The second value must be either a register or an + // unsigned immediate that can be encoded as a bitmask + // immediate. If it's not one of those, we'll need to load + // it first. + let opnd1 = split_bitmask_immediate(asm, *right, opnd0.rm_num_bits()); + asm.test(opnd0, opnd1); + }, + _ => { + // If we have an output operand, then we need to replace it + // with a new output operand from the new assembler. + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + asm.push_insn(insn); + } + }; + + iterator.map_insn_index(asm); + } + + asm_local + } + + /// Emit platform-specific machine code + /// Returns a list of GC offsets. Can return failure to signal caller to retry. + fn arm64_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Result<Vec<u32>, EmitError> { + /// Determine how many instructions it will take to represent moving + /// this value into a register. Note that the return value of this + /// function must correspond to how many instructions are used to + /// represent this load in the emit_load_value function. + fn emit_load_size(value: u64) -> u8 { + if BitmaskImmediate::try_from(value).is_ok() { + return 1; + } + + if value < (1 << 16) { + 1 + } else if value < (1 << 32) { + 2 + } else if value < (1 << 48) { + 3 + } else { + 4 + } + } + + /// Emit a conditional jump instruction to a specific target. This is + /// called when lowering any of the conditional jump instructions. + fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) { + match target { + Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => { + let dst_addr = dst_ptr.as_offset(); + let src_addr = cb.get_write_ptr().as_offset(); + + let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { + // If the jump offset fits into the conditional jump as + // an immediate value and it's properly aligned, then we + // can use the b.cond instruction directly. We're safe + // to use as i32 here since we already checked that it + // fits. + let bytes = (dst_addr - src_addr) as i32; + bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); + + // Here we're going to return 1 because we've only + // written out 1 instruction. + 1 + } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond + // If the jump offset fits into the unconditional jump as + // an immediate value, we can use inverse b.cond + b. + // + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instruction used for branching. + bcond(cb, Condition::inverse(CONDITION), 2.into()); + b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond + + // We've only written out 2 instructions. + 2 + } else { + // Otherwise, we need to load the address into a + // register and use the branch register instruction. + let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64(); + let load_insns: i32 = emit_load_size(dst_addr).into(); + + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instructions used for branching. + bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + + // Here we'll return the number of instructions that it + // took to write out the destination address + 1 for the + // b.cond and 1 for the br. + load_insns + 2 + }; + + if let Target::CodePtr(_) = target { + // We need to make sure we have at least 6 instructions for + // every kind of jump for invalidation purposes, so we're + // going to write out padding nop instructions here. + assert!(num_insns <= cb.conditional_jump_insns()); + for _ in num_insns..cb.conditional_jump_insns() { nop(cb); } + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for ourselves and + // then come back and write the instruction once we know the + // offset. We're going to assume we can fit into a single + // b.cond instruction. It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); + bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); + }); + }, + Target::SideExit { .. } => { + unreachable!("Target::SideExit should have been compiled by compile_side_exit") + }, + }; + } + + /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero + fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) { + if let Target::SideExitPtr(dst_ptr) = target { + let dst_addr = dst_ptr.as_offset(); + let src_addr = cb.get_write_ptr().as_offset(); + + if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) { + // If the offset fits in one instruction, generate cbz or cbnz + let bytes = (dst_addr - src_addr) as i32; + if branch_if_zero { + cbz(cb, reg, InstructionOffset::from_bytes(bytes)); + } else { + cbnz(cb, reg, InstructionOffset::from_bytes(bytes)); + } + } else { + // Otherwise, we load the address into a register and + // use the branch register instruction. Note that because + // side exits should always be close, this form should be + // rare or impossible to see. + let dst_addr = dst_ptr.raw_addr(cb) as u64; + let load_insns: i32 = emit_load_size(dst_addr).into(); + + // Write out the inverse condition so that if + // it doesn't match it will skip over the + // instructions used for branching. + if branch_if_zero { + cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); + } else { + cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); + } + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + + } + } else { + unreachable!("We should only generate Joz/Jonz with side-exit targets"); + } + } + + /// Emit a push instruction for the given operand by adding to the stack + /// pointer and then storing the given value. + fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { + str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); + } + + /// Emit a pop instruction into the given operand by loading the value + /// and then subtracting from the stack pointer. + fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { + ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); + } + + /// Compile a side exit if Target::SideExit is given. + fn compile_side_exit( + target: Target, + asm: &mut Assembler, + ocb: &mut Option<&mut OutlinedCb>, + ) -> Result<Target, EmitError> { + if let Target::SideExit { counter, context } = target { + let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()) + .ok_or(EmitError::OutOfMemory)?; + Ok(Target::SideExitPtr(side_exit)) + } else { + Ok(target) + } + } + + // dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec<u32> = Vec::new(); + + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + + // For each instruction + let start_write_pos = cb.get_write_pos(); + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec<u32> = Vec::new(); + + match insn { + Insn::Comment(text) => { + if cfg!(feature = "disasm") { + cb.add_comment(text); + } + }, + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); + }, + // Report back the current position in the generated code + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())) + } + Insn::BakeString(text) => { + for byte in text.as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + + // Pad out the string to the next 4-byte boundary so that + // it's easy to jump past. + for _ in 0..(4 - ((text.len() + 1) % 4)) { + cb.write_byte(0); + } + }, + Insn::FrameSetup => { + stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); + + // X29 (frame_pointer) = SP + mov(cb, X29, C_SP_REG); + }, + Insn::FrameTeardown => { + // SP = X29 (frame pointer) + mov(cb, C_SP_REG, X29); + + ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); + }, + Insn::Add { left, right, out } => { + adds(cb, out.into(), left.into(), right.into()); + }, + Insn::Sub { left, right, out } => { + subs(cb, out.into(), left.into(), right.into()); + }, + Insn::Mul { left, right, out } => { + // If the next instruction is jo (jump on overflow) + match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) { + (Some(Insn::JoMul(_)), _) | + (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => { + // Compute the high 64 bits + smulh(cb, Self::SCRATCH0, left.into(), right.into()); + + // Compute the low 64 bits + // This may clobber one of the input registers, + // so we do it after smulh + mul(cb, out.into(), left.into(), right.into()); + + // Produce a register that is all zeros or all ones + // Based on the sign bit of the 64-bit mul result + asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63)); + + // If the high 64-bits are not all zeros or all ones, + // matching the sign bit, then we have an overflow + cmp(cb, Self::SCRATCH0, Self::SCRATCH1); + // Insn::JoMul will emit_conditional_jump::<{Condition::NE}> + } + _ => { + mul(cb, out.into(), left.into(), right.into()); + } + } + }, + Insn::And { left, right, out } => { + and(cb, out.into(), left.into(), right.into()); + }, + Insn::Or { left, right, out } => { + orr(cb, out.into(), left.into(), right.into()); + }, + Insn::Xor { left, right, out } => { + eor(cb, out.into(), left.into(), right.into()); + }, + Insn::Not { opnd, out } => { + mvn(cb, out.into(), opnd.into()); + }, + Insn::RShift { opnd, shift, out } => { + asr(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::URShift { opnd, shift, out } => { + lsr(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::LShift { opnd, shift, out } => { + lsl(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::Store { dest, src } => { + // This order may be surprising but it is correct. The way + // the Arm64 assembler works, the register that is going to + // be stored is first and the address is second. However in + // our IR we have the address first and the register second. + match dest.rm_num_bits() { + 64 | 32 => stur(cb, src.into(), dest.into()), + 16 => sturh(cb, src.into(), dest.into()), + num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest), + } + }, + Insn::Load { opnd, out } | + Insn::LoadInto { opnd, dest: out } => { + match *opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => { + mov(cb, out.into(), opnd.into()); + }, + Opnd::UImm(uimm) => { + emit_load_value(cb, out.into(), uimm); + }, + Opnd::Imm(imm) => { + emit_load_value(cb, out.into(), imm as u64); + }, + Opnd::Mem(_) => { + match opnd.rm_num_bits() { + 64 | 32 => ldur(cb, out.into(), opnd.into()), + 16 => ldurh(cb, out.into(), opnd.into()), + 8 => ldurb(cb, out.into(), opnd.into()), + num_bits => panic!("unexpected num_bits: {}", num_bits) + }; + }, + Opnd::Value(value) => { + // We dont need to check if it's a special const + // here because we only allow these operands to hit + // this point if they're not a special const. + assert!(!value.special_const_p()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr_literal(cb, out.into(), 2.into()); + b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + insn_gc_offsets.push(ptr_offset); + }, + Opnd::CArg { .. } => { + unreachable!("C argument operand was not lowered before arm64_emit"); + } + Opnd::Stack { .. } => { + unreachable!("Stack operand was not lowered before arm64_emit"); + } + Opnd::None => { + unreachable!("Attempted to load from None operand"); + } + }; + }, + Insn::LoadSExt { opnd, out } => { + match *opnd { + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } => { + sxtw(cb, out.into(), opnd.into()); + }, + Opnd::Mem(Mem { num_bits: 32, .. }) => { + ldursw(cb, out.into(), opnd.into()); + }, + _ => unreachable!() + }; + }, + Insn::Mov { dest, src } => { + // This supports the following two kinds of immediates: + // * The value fits into a single movz instruction + // * It can be encoded with the special bitmask immediate encoding + // arm64_split() should have split other immediates that require multiple instructions. + match src { + Opnd::UImm(uimm) if *uimm <= 0xffff => { + movz(cb, dest.into(), A64Opnd::new_uimm(*uimm), 0); + }, + _ => { + mov(cb, dest.into(), src.into()); + } + } + }, + Insn::Lea { opnd, out } => { + let opnd: A64Opnd = opnd.into(); + + match opnd { + A64Opnd::Mem(mem) => { + add( + cb, + out.into(), + A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), + A64Opnd::new_imm(mem.disp.into()) + ); + }, + _ => { + panic!("Op::Lea only accepts Opnd::Mem operands."); + } + }; + }, + Insn::LeaJumpTarget { out, target, .. } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| { + adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); + }); + + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + emit_load_value(cb, out.into(), target_addr); + } + }, + Insn::CPush(opnd) => { + emit_push(cb, opnd.into()); + }, + Insn::CPop { out } => { + emit_pop(cb, out.into()); + }, + Insn::CPopInto(opnd) => { + emit_pop(cb, opnd.into()); + }, + Insn::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + emit_push(cb, A64Opnd::Reg(reg)); + } + + // Push the flags/state register + mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); + emit_push(cb, Self::SCRATCH0); + }, + Insn::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + // Pop the state/flags register + msr(cb, SystemRegister::NZCV, Self::SCRATCH0); + emit_pop(cb, Self::SCRATCH0); + + for reg in regs.into_iter().rev() { + emit_pop(cb, A64Opnd::Reg(reg)); + } + }, + Insn::CCall { fptr, .. } => { + // The offset to the call target in bytes + let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64; + let dst_addr = *fptr as i64; + + // Use BL if the offset is short enough to encode as an immediate. + // Otherwise, use BLR with a register. + if b_offset_fits_bits((dst_addr - src_addr) / 4) { + bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + } else { + emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + blr(cb, Self::SCRATCH0); + } + }, + Insn::CRet { .. } => { + ret(cb, A64Opnd::None); + }, + Insn::Cmp { left, right } => { + cmp(cb, left.into(), right.into()); + }, + Insn::Test { left, right } => { + tst(cb, left.into(), right.into()); + }, + Insn::JmpOpnd(opnd) => { + br(cb, opnd.into()); + }, + Insn::Jmp(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(dst_ptr) => { + emit_jmp_ptr(cb, dst_ptr, true); + }, + Target::SideExitPtr(dst_ptr) => { + emit_jmp_ptr(cb, dst_ptr, false); + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for + // ourselves and then come back and write the + // instruction once we know the offset. We're going + // to assume we can fit into a single b instruction. + // It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); + b(cb, InstructionOffset::from_bytes(bytes)); + }); + }, + Target::SideExit { .. } => { + unreachable!("Target::SideExit should have been compiled by compile_side_exit") + }, + }; + }, + Insn::Je(target) | Insn::Jz(target) => { + emit_conditional_jump::<{Condition::EQ}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => { + emit_conditional_jump::<{Condition::NE}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jl(target) => { + emit_conditional_jump::<{Condition::LT}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jg(target) => { + emit_conditional_jump::<{Condition::GT}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jge(target) => { + emit_conditional_jump::<{Condition::GE}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jbe(target) => { + emit_conditional_jump::<{Condition::LS}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jb(target) => { + emit_conditional_jump::<{Condition::CC}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jo(target) => { + emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Joz(opnd, target) => { + emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jonz(opnd, target) => { + emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?); + }, + Insn::IncrCounter { mem, value } => { + let label = cb.new_label("incr_counter_loop".to_string()); + cb.write_label(label); + + ldaxr(cb, Self::SCRATCH0, mem.into()); + add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into()); + + // The status register that gets used to track whether or + // not the store was successful must be 32 bytes. Since we + // store the SCRATCH registers as their 64-bit versions, we + // need to rewrap it here. + let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32)); + stlxr(cb, status, Self::SCRATCH0, mem.into()); + + cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0)); + emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label)); + }, + Insn::Breakpoint => { + brk(cb, A64Opnd::None); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); + }, + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE); + }, + Insn::CSelL { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT); + }, + Insn::CSelLE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE); + }, + Insn::CSelG { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT); + }, + Insn::CSelGE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); + } + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadInvalPatch => { + while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() { + nop(cb); + } + } + }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + + // We don't want label references to cross page boundaries. Signal caller for + // retry. + if !self.label_names.is_empty() { + return Err(EmitError::RetryOnNextPage); + } + } else { + insn_idx += 1; + gc_offsets.append(&mut insn_gc_offsets); + } + } + + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return Err(EmitError::OutOfMemory) + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Ok(gc_offsets) + } + } + + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { + let asm = self.arm64_split(); + let mut asm = asm.alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let start_ptr = cb.get_write_ptr(); + let starting_label_state = cb.get_label_state(); + let mut ocb = ocb; // for &mut + let emit_result = match asm.arm64_emit(cb, &mut ocb) { + Err(EmitError::RetryOnNextPage) => { + // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB + // range limit. We can easily exceed the limit in case the jump straddles two pages. + // In this case, we retry with a fresh page. + cb.set_label_state(starting_label_state); + cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation); + let result = asm.arm64_emit(cb, &mut ocb); + assert_ne!( + Err(EmitError::RetryOnNextPage), + result, + "should not fail when writing to a fresh code page" + ); + result + } + result => result + }; + + if let (Ok(gc_offsets), false) = (emit_result, cb.has_dropped_bytes()) { + cb.link_labels(); + + // Invalidate icache for newly written out region so we don't run stale code. + // It should invalidate only the code ranges of the current cb because the code + // ranges of the other cb might have a memory region that is still PROT_NONE. + #[cfg(not(test))] + cb.without_page_end_reserve(|cb| { + for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) { + unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + } + }); + + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::disasm::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_regs(&mut cb, None, vec![X3_REG]); + + // Assert that only 2 instructions were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_bake_string() { + let (mut asm, mut cb) = setup_asm(); + + asm.bake_string("Hello, world!"); + asm.compile_with_num_regs(&mut cb, 0); + + // Testing that we pad the string to the nearest 4-byte boundary to make + // it easier to jump over. + assert_eq!(16, cb.get_write_pos()); + } + + #[test] + fn test_emit_cpush_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpush_all(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_cpop_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpop_all(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_frame() { + let (mut asm, mut cb) = setup_asm(); + + asm.frame_setup(); + asm.frame_teardown(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_je_fits_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let target: CodePtr = cb.get_write_ptr().add_bytes(80); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_je_does_not_fit_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let offset = 1 << 21; + let target: CodePtr = cb.get_write_ptr().add_bytes(offset); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_lea_label() { + let (mut asm, mut cb) = setup_asm(); + + let label = asm.new_label("label"); + let opnd = asm.lea_jump_target(label); + + asm.write_label(label); + asm.bake_string("Hello, world!"); + asm.store(Opnd::mem(64, SP, 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_load() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 0)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that two instructions were written: LDUR and STUR. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 10)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: ADD, LDUR, and STUR. + assert_eq!(12, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_does_not_fit_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR. + assert_eq!(16, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_value_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(Qnil)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that only two instructions were written since the value is an + // immediate. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_value_non_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000))); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that five instructions were written since the value is not an + // immediate and needs to be loaded into a register. + assert_eq!(20, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_32b_reg_not_bitmask_imm() { + let (mut asm, mut cb) = setup_asm(); + let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap(); + asm.test(w0, Opnd::UImm(u32::MAX.into())); + // All ones is not encodable with a bitmask immediate, + // so this needs one register + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_test_32b_reg_bitmask_imm() { + let (mut asm, mut cb) = setup_asm(); + let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap(); + asm.test(w0, Opnd::UImm(0x80000001)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_or() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_lshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_rshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_urshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_test() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_negative_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a test instruction is written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_32_bit_register_with_some_number() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); + asm.cmp(shape_opnd, Opnd::UImm(4097)); + asm.compile_with_num_regs(&mut cb, 2); + } + + #[test] + fn test_16_bit_register_store_some_number() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(16, Opnd::Reg(X0_REG), 0); + asm.store(shape_opnd, Opnd::UImm(4097)); + asm.compile_with_num_regs(&mut cb, 2); + } + + #[test] + fn test_32_bit_register_store_some_number() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); + asm.store(shape_opnd, Opnd::UImm(4097)); + asm.compile_with_num_regs(&mut cb, 2); + } + + #[test] + fn test_bcond_straddling_code_pages() { + const LANDING_PAGE: usize = 65; + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]); + + // Skip to near the end of the page. Room for two instructions. + cb.set_pos(cb.page_start_pos() + cb.page_end() - 8); + + let end = asm.new_label("end"); + // Start with a conditional jump... + asm.jz(end); + + // A few instructions, enough to cause a page switch. + let sum = asm.add(399.into(), 111.into()); + let xorred = asm.xor(sum, 859.into()); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred); + asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred); + + // The branch target. It should be in the landing page. + asm.write_label(end); + asm.cret(xorred); + + // [Bug #19385] + // This used to panic with "The offset must be 19 bits or less." + // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB. + let starting_pos = cb.get_write_pos(); + asm.compile_with_num_regs(&mut cb, 2); + let gap = cb.get_write_pos() - starting_pos; + assert!(gap > 0b1111111111111111111); + + let instruction_at_starting_pos: [u8; 4] = unsafe { + std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4) + }.try_into().unwrap(); + assert_eq!( + 0b000101 << 26_u32, + u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32), + "starting instruction should be an unconditional branch to the new page (B)" + ); + } + + #[test] + fn test_emit_xor() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "0b0001ca4b0000f8", " + 0x0: eor x11, x0, x1 + 0x4: stur x11, [x2] + "); + } + + #[test] + #[cfg(feature = "disasm")] + fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { + // Test drive Capstone with simple input + use capstone::prelude::*; + + let cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build()?; + + let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?; + + match insns.as_ref() { + [insn] => { + assert_eq!(Some("movk"), insn.mnemonic()); + Ok(()) + } + _ => Err(capstone::Error::CustomError( + "expected to disassemble to movk", + )), + } + } + + #[test] + fn test_replace_mov_with_ldur() { + let (mut asm, mut cb) = setup_asm(); + + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::mem(64, CFP, 8)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "618240f8", {" + 0x0: ldur x1, [x19, #8] + "}); + } + + #[test] + fn test_not_split_mov() { + let (mut asm, mut cb) = setup_asm(); + + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0xffff)); + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0x10000)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "e1ff9fd2e10370b2", {" + 0x0: mov x1, #0xffff + 0x4: orr x1, xzr, #0x10000 + "}); + } + + #[test] + fn test_merge_csel_mov() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.csel_l(Qtrue.into(), Qfalse.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "8b0280d20c0080d261b18c9a", {" + 0x0: mov x11, #0x14 + 0x4: mov x12, #0 + 0x8: csel x1, x11, x12, lt + "}); + } + + #[test] + fn test_add_with_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into()); + let out = asm.add(out, 1_usize.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "2b0500b16b0500b1e1030baa", {" + 0x0: adds x11, x9, #1 + 0x4: adds x11, x11, #1 + 0x8: mov x1, x11 + "}); + } + + #[test] + fn test_mul_with_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "6b0080d22b7d0b9be1030baa", {" + 0x0: mov x11, #3 + 0x4: mul x11, x9, x11 + 0x8: mov x1, x11 + "}); + } +} diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs new file mode 100644 index 0000000000..edc0eaf390 --- /dev/null +++ b/yjit/src/backend/ir.rs @@ -0,0 +1,2095 @@ +use std::collections::HashMap; +use std::fmt; +use std::convert::From; +use std::mem::take; +use crate::codegen::{gen_outlined_exit, gen_counted_exit}; +use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE}; +use crate::virtualmem::CodePtr; +use crate::asm::{CodeBlock, OutlinedCb}; +use crate::core::{Context, RegTemps, MAX_REG_TEMPS}; +use crate::options::*; +use crate::stats::*; + +use crate::backend::current::*; + +pub const EC: Opnd = _EC; +pub const CFP: Opnd = _CFP; +pub const SP: Opnd = _SP; + +pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS; +pub const C_RET_OPND: Opnd = _C_RET_OPND; +pub use crate::backend::current::{Reg, C_RET_REG}; + +// Memory operand base +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum MemBase +{ + Reg(u8), + InsnOut(usize), +} + +// Memory location +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct Mem +{ + // Base register number or instruction index + pub(super) base: MemBase, + + // Offset relative to the base pointer + pub(super) disp: i32, + + // Size in bits + pub(super) num_bits: u8, +} + +impl fmt::Debug for Mem { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "Mem{}[{:?}", self.num_bits, self.base)?; + if self.disp != 0 { + let sign = if self.disp > 0 { '+' } else { '-' }; + write!(fmt, " {sign} {}", self.disp)?; + } + + write!(fmt, "]") + } +} + +/// Operand to an IR instruction +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Opnd +{ + None, // For insns with no output + + // Immediate Ruby value, may be GC'd, movable + Value(VALUE), + + /// C argument register. The alloc_regs resolves its register dependencies. + CArg(Reg), + + // Output of a preceding instruction in this block + InsnOut{ idx: usize, num_bits: u8 }, + + /// Pointer to a slot on the VM stack + Stack { + /// Index from stack top. Used for conversion to StackOpnd. + idx: i32, + /// Number of bits for Opnd::Reg and Opnd::Mem. + num_bits: u8, + /// ctx.stack_size when this operand is made. Used with idx for Opnd::Reg. + stack_size: u8, + /// ctx.sp_offset when this operand is made. Used with idx for Opnd::Mem. + sp_offset: i8, + /// ctx.reg_temps when this operand is read. Used for register allocation. + reg_temps: Option<RegTemps> + }, + + // Low-level operands, for lowering + Imm(i64), // Raw signed immediate + UImm(u64), // Raw unsigned immediate + Mem(Mem), // Memory location + Reg(Reg), // Machine register +} + +impl fmt::Debug for Opnd { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + use Opnd::*; + match self { + Self::None => write!(fmt, "None"), + Value(val) => write!(fmt, "Value({val:?})"), + CArg(reg) => write!(fmt, "CArg({reg:?})"), + Stack { idx, sp_offset, .. } => write!(fmt, "SP[{}]", *sp_offset as i32 - idx - 1), + InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"), + Imm(signed) => write!(fmt, "{signed:x}_i64"), + UImm(unsigned) => write!(fmt, "{unsigned:x}_u64"), + // Say Mem and Reg only once + Mem(mem) => write!(fmt, "{mem:?}"), + Reg(reg) => write!(fmt, "{reg:?}"), + } + } +} + +impl Opnd +{ + /// Convenience constructor for memory operands + pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { + match base { + Opnd::Reg(base_reg) => { + assert!(base_reg.num_bits == 64); + Opnd::Mem(Mem { + base: MemBase::Reg(base_reg.reg_no), + disp: disp, + num_bits: num_bits, + }) + }, + + Opnd::InsnOut{idx, num_bits: out_num_bits } => { + assert!(num_bits <= out_num_bits); + Opnd::Mem(Mem { + base: MemBase::InsnOut(idx), + disp: disp, + num_bits: num_bits, + }) + }, + + _ => unreachable!("memory operand with non-register base") + } + } + + /// Constructor for constant pointer operand + pub fn const_ptr(ptr: *const u8) -> Self { + Opnd::UImm(ptr as u64) + } + + /// Constructor for a C argument operand + pub fn c_arg(reg_opnd: Opnd) -> Self { + match reg_opnd { + Opnd::Reg(reg) => Opnd::CArg(reg), + _ => unreachable!(), + } + } + + /// Unwrap a register operand + pub fn unwrap_reg(&self) -> Reg { + match self { + Opnd::Reg(reg) => *reg, + _ => unreachable!("trying to unwrap {:?} into reg", self) + } + } + + /// Get the size in bits for this operand if there is one. + pub fn num_bits(&self) -> Option<u8> { + match *self { + Opnd::Reg(Reg { num_bits, .. }) => Some(num_bits), + Opnd::Mem(Mem { num_bits, .. }) => Some(num_bits), + Opnd::InsnOut { num_bits, .. } => Some(num_bits), + _ => None + } + } + + pub fn with_num_bits(&self, num_bits: u8) -> Option<Opnd> { + assert!(num_bits == 8 || num_bits == 16 || num_bits == 32 || num_bits == 64); + match *self { + Opnd::Reg(reg) => Some(Opnd::Reg(reg.with_num_bits(num_bits))), + Opnd::Mem(Mem { base, disp, .. }) => Some(Opnd::Mem(Mem { base, disp, num_bits })), + Opnd::InsnOut { idx, .. } => Some(Opnd::InsnOut { idx, num_bits }), + Opnd::Stack { idx, stack_size, sp_offset, reg_temps, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps }), + _ => None, + } + } + + /// Get the size in bits for register/memory operands. + pub fn rm_num_bits(&self) -> u8 { + self.num_bits().unwrap() + } + + /// Maps the indices from a previous list of instructions to a new list of + /// instructions. + pub fn map_index(self, indices: &Vec<usize>) -> Opnd { + match self { + Opnd::InsnOut { idx, num_bits } => { + Opnd::InsnOut { idx: indices[idx], num_bits } + } + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { + Opnd::Mem(Mem { base: MemBase::InsnOut(indices[idx]), disp, num_bits }) + }, + _ => self + } + } + + /// When there aren't any operands to check against, this is the number of + /// bits that should be used for any given output variable. + const DEFAULT_NUM_BITS: u8 = 64; + + /// Determine the size in bits from the iterator of operands. If any of them + /// are different sizes this will panic. + pub fn match_num_bits_iter<'a>(opnds: impl Iterator<Item = &'a Opnd>) -> u8 { + let mut value: Option<u8> = None; + + for opnd in opnds { + if let Some(num_bits) = opnd.num_bits() { + match value { + None => { + value = Some(num_bits); + }, + Some(value) => { + assert_eq!(value, num_bits, "operands of incompatible sizes"); + } + }; + } + } + + value.unwrap_or(Self::DEFAULT_NUM_BITS) + } + + /// Determine the size in bits of the slice of the given operands. If any of + /// them are different sizes this will panic. + pub fn match_num_bits(opnds: &[Opnd]) -> u8 { + Self::match_num_bits_iter(opnds.iter()) + } + + /// Calculate Opnd::Stack's index from the stack bottom. + pub fn stack_idx(&self) -> u8 { + self.get_stack_idx().unwrap() + } + + /// Calculate Opnd::Stack's index from the stack bottom if it's Opnd::Stack. + pub fn get_stack_idx(&self) -> Option<u8> { + match self { + Opnd::Stack { idx, stack_size, .. } => { + Some((*stack_size as isize - *idx as isize - 1) as u8) + }, + _ => None + } + } + + /// Get the index for stack temp registers. + pub fn reg_idx(&self) -> usize { + match self { + Opnd::Stack { .. } => { + self.stack_idx() as usize % get_option!(num_temp_regs) + }, + _ => unreachable!(), + } + } +} + +impl From<usize> for Opnd { + fn from(value: usize) -> Self { + Opnd::UImm(value.try_into().unwrap()) + } +} + +impl From<u64> for Opnd { + fn from(value: u64) -> Self { + Opnd::UImm(value) + } +} + +impl From<i64> for Opnd { + fn from(value: i64) -> Self { + Opnd::Imm(value) + } +} + +impl From<i32> for Opnd { + fn from(value: i32) -> Self { + Opnd::Imm(value.try_into().unwrap()) + } +} + +impl From<u32> for Opnd { + fn from(value: u32) -> Self { + Opnd::UImm(value as u64) + } +} + +impl From<VALUE> for Opnd { + fn from(value: VALUE) -> Self { + Opnd::Value(value) + } +} + +/// Branch target (something that we can jump to) +/// for branch instructions +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum Target +{ + /// Pointer to a piece of YJIT-generated code + CodePtr(CodePtr), + /// Side exit with a counter + SideExit { counter: Counter, context: Option<SideExitContext> }, + /// Pointer to a side exit code + SideExitPtr(CodePtr), + /// A label within the generated code + Label(usize), +} + +impl Target +{ + pub fn side_exit(counter: Counter) -> Target { + Target::SideExit { counter, context: None } + } + + pub fn unwrap_label_idx(&self) -> usize { + match self { + Target::Label(idx) => *idx, + _ => unreachable!("trying to unwrap {:?} into label", self) + } + } + + pub fn unwrap_code_ptr(&self) -> CodePtr { + match self { + Target::CodePtr(ptr) => *ptr, + Target::SideExitPtr(ptr) => *ptr, + _ => unreachable!("trying to unwrap {:?} into code ptr", self) + } + } +} + +impl From<CodePtr> for Target { + fn from(code_ptr: CodePtr) -> Self { + Target::CodePtr(code_ptr) + } +} + +type PosMarkerFn = Box<dyn Fn(CodePtr, &CodeBlock)>; + +/// YJIT IR instruction +pub enum Insn { + /// Add two operands together, and return the result as a new operand. + Add { left: Opnd, right: Opnd, out: Opnd }, + + /// This is the same as the OP_ADD instruction, except that it performs the + /// binary AND operation. + And { left: Opnd, right: Opnd, out: Opnd }, + + /// Bake a string directly into the instruction stream. + BakeString(String), + + // Trigger a debugger breakpoint + #[allow(dead_code)] + Breakpoint, + + /// Add a comment into the IR at the point that this instruction is added. + /// It won't have any impact on that actual compiled code. + Comment(String), + + /// Compare two operands + Cmp { left: Opnd, right: Opnd }, + + /// Pop a register from the C stack + CPop { out: Opnd }, + + /// Pop all of the caller-save registers and the flags from the C stack + CPopAll, + + /// Pop a register from the C stack and store it into another register + CPopInto(Opnd), + + /// Push a register onto the C stack + CPush(Opnd), + + /// Push all of the caller-save registers and the flags to the C stack + CPushAll, + + // C function call with N arguments (variadic) + CCall { opnds: Vec<Opnd>, fptr: *const u8, out: Opnd }, + + // C function return + CRet(Opnd), + + /// Conditionally select if equal + CSelE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if greater + CSelG { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if greater or equal + CSelGE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if less + CSelL { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if less or equal + CSelLE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if not equal + CSelNE { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if not zero + CSelNZ { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Conditionally select if zero + CSelZ { truthy: Opnd, falsy: Opnd, out: Opnd }, + + /// Set up the frame stack as necessary per the architecture. + FrameSetup, + + /// Tear down the frame stack as necessary per the architecture. + FrameTeardown, + + // Atomically increment a counter + // Input: memory operand, increment value + // Produces no output + IncrCounter { mem: Opnd, value: Opnd }, + + /// Jump if below or equal (unsigned) + Jbe(Target), + + /// Jump if below (unsigned) + Jb(Target), + + /// Jump if equal + Je(Target), + + /// Jump if lower + Jl(Target), + + /// Jump if greater + Jg(Target), + + /// Jump if greater or equal + Jge(Target), + + // Unconditional jump to a branch target + Jmp(Target), + + // Unconditional jump which takes a reg/mem address operand + JmpOpnd(Opnd), + + /// Jump if not equal + Jne(Target), + + /// Jump if not zero + Jnz(Target), + + /// Jump if overflow + Jo(Target), + + /// Jump if overflow in multiplication + JoMul(Target), + + /// Jump if zero + Jz(Target), + + /// Jump if operand is zero (only used during lowering at the moment) + Joz(Opnd, Target), + + /// Jump if operand is non-zero (only used during lowering at the moment) + Jonz(Opnd, Target), + + // Add a label into the IR at the point that this instruction is added. + Label(Target), + + /// Get the code address of a jump target + LeaJumpTarget { target: Target, out: Opnd }, + + // Load effective address + Lea { opnd: Opnd, out: Opnd }, + + /// Take a specific register. Signal the register allocator to not use it. + LiveReg { opnd: Opnd, out: Opnd }, + + // A low-level instruction that loads a value into a register. + Load { opnd: Opnd, out: Opnd }, + + // A low-level instruction that loads a value into a specified register. + LoadInto { dest: Opnd, opnd: Opnd }, + + // A low-level instruction that loads a value into a register and + // sign-extends it to a 64-bit value. + LoadSExt { opnd: Opnd, out: Opnd }, + + /// Shift a value left by a certain amount. + LShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // A low-level mov instruction. It accepts two operands. + Mov { dest: Opnd, src: Opnd }, + + // Perform the NOT operation on an individual operand, and return the result + // as a new operand. This operand can then be used as the operand on another + // instruction. + Not { opnd: Opnd, out: Opnd }, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary OR operation. + Or { left: Opnd, right: Opnd, out: Opnd }, + + /// Pad nop instructions to accommodate Op::Jmp in case the block or the insn + /// is invalidated. + PadInvalPatch, + + // Mark a position in the generated code + PosMarker(PosMarkerFn), + + /// Shift a value right by a certain amount (signed). + RShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // Low-level instruction to store a value to memory. + Store { dest: Opnd, src: Opnd }, + + // This is the same as the add instruction, except for subtraction. + Sub { left: Opnd, right: Opnd, out: Opnd }, + + // Integer multiplication + Mul { left: Opnd, right: Opnd, out: Opnd }, + + // Bitwise AND test instruction + Test { left: Opnd, right: Opnd }, + + /// Shift a value right by a certain amount (unsigned). + URShift { opnd: Opnd, shift: Opnd, out: Opnd }, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary XOR operation. + Xor { left: Opnd, right: Opnd, out: Opnd } +} + +impl Insn { + /// Create an iterator that will yield a non-mutable reference to each + /// operand in turn for this instruction. + pub(super) fn opnd_iter(&self) -> InsnOpndIterator { + InsnOpndIterator::new(self) + } + + /// Create an iterator that will yield a mutable reference to each operand + /// in turn for this instruction. + pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator { + InsnOpndMutIterator::new(self) + } + + /// Get a mutable reference to a Target if it exists. + pub(super) fn target_mut(&mut self) -> Option<&mut Target> { + match self { + Insn::Jbe(target) | + Insn::Jb(target) | + Insn::Je(target) | + Insn::Jl(target) | + Insn::Jg(target) | + Insn::Jge(target) | + Insn::Jmp(target) | + Insn::Jne(target) | + Insn::Jnz(target) | + Insn::Jo(target) | + Insn::Jz(target) | + Insn::Label(target) | + Insn::JoMul(target) | + Insn::Joz(_, target) | + Insn::Jonz(_, target) | + Insn::LeaJumpTarget { target, .. } => { + Some(target) + } + _ => None, + } + } + + /// Returns a string that describes which operation this instruction is + /// performing. This is used for debugging. + fn op(&self) -> &'static str { + match self { + Insn::Add { .. } => "Add", + Insn::And { .. } => "And", + Insn::BakeString(_) => "BakeString", + Insn::Breakpoint => "Breakpoint", + Insn::Comment(_) => "Comment", + Insn::Cmp { .. } => "Cmp", + Insn::CPop { .. } => "CPop", + Insn::CPopAll => "CPopAll", + Insn::CPopInto(_) => "CPopInto", + Insn::CPush(_) => "CPush", + Insn::CPushAll => "CPushAll", + Insn::CCall { .. } => "CCall", + Insn::CRet(_) => "CRet", + Insn::CSelE { .. } => "CSelE", + Insn::CSelG { .. } => "CSelG", + Insn::CSelGE { .. } => "CSelGE", + Insn::CSelL { .. } => "CSelL", + Insn::CSelLE { .. } => "CSelLE", + Insn::CSelNE { .. } => "CSelNE", + Insn::CSelNZ { .. } => "CSelNZ", + Insn::CSelZ { .. } => "CSelZ", + Insn::FrameSetup => "FrameSetup", + Insn::FrameTeardown => "FrameTeardown", + Insn::IncrCounter { .. } => "IncrCounter", + Insn::Jbe(_) => "Jbe", + Insn::Jb(_) => "Jb", + Insn::Je(_) => "Je", + Insn::Jl(_) => "Jl", + Insn::Jg(_) => "Jg", + Insn::Jge(_) => "Jge", + Insn::Jmp(_) => "Jmp", + Insn::JmpOpnd(_) => "JmpOpnd", + Insn::Jne(_) => "Jne", + Insn::Jnz(_) => "Jnz", + Insn::Jo(_) => "Jo", + Insn::JoMul(_) => "JoMul", + Insn::Jz(_) => "Jz", + Insn::Joz(..) => "Joz", + Insn::Jonz(..) => "Jonz", + Insn::Label(_) => "Label", + Insn::LeaJumpTarget { .. } => "LeaJumpTarget", + Insn::Lea { .. } => "Lea", + Insn::LiveReg { .. } => "LiveReg", + Insn::Load { .. } => "Load", + Insn::LoadInto { .. } => "LoadInto", + Insn::LoadSExt { .. } => "LoadSExt", + Insn::LShift { .. } => "LShift", + Insn::Mov { .. } => "Mov", + Insn::Not { .. } => "Not", + Insn::Or { .. } => "Or", + Insn::PadInvalPatch => "PadEntryExit", + Insn::PosMarker(_) => "PosMarker", + Insn::RShift { .. } => "RShift", + Insn::Store { .. } => "Store", + Insn::Sub { .. } => "Sub", + Insn::Mul { .. } => "Mul", + Insn::Test { .. } => "Test", + Insn::URShift { .. } => "URShift", + Insn::Xor { .. } => "Xor" + } + } + + /// Return a non-mutable reference to the out operand for this instruction + /// if it has one. + pub fn out_opnd(&self) -> Option<&Opnd> { + match self { + Insn::Add { out, .. } | + Insn::And { out, .. } | + Insn::CCall { out, .. } | + Insn::CPop { out, .. } | + Insn::CSelE { out, .. } | + Insn::CSelG { out, .. } | + Insn::CSelGE { out, .. } | + Insn::CSelL { out, .. } | + Insn::CSelLE { out, .. } | + Insn::CSelNE { out, .. } | + Insn::CSelNZ { out, .. } | + Insn::CSelZ { out, .. } | + Insn::Lea { out, .. } | + Insn::LeaJumpTarget { out, .. } | + Insn::LiveReg { out, .. } | + Insn::Load { out, .. } | + Insn::LoadSExt { out, .. } | + Insn::LShift { out, .. } | + Insn::Not { out, .. } | + Insn::Or { out, .. } | + Insn::RShift { out, .. } | + Insn::Sub { out, .. } | + Insn::Mul { out, .. } | + Insn::URShift { out, .. } | + Insn::Xor { out, .. } => Some(out), + _ => None + } + } + + /// Return a mutable reference to the out operand for this instruction if it + /// has one. + pub fn out_opnd_mut(&mut self) -> Option<&mut Opnd> { + match self { + Insn::Add { out, .. } | + Insn::And { out, .. } | + Insn::CCall { out, .. } | + Insn::CPop { out, .. } | + Insn::CSelE { out, .. } | + Insn::CSelG { out, .. } | + Insn::CSelGE { out, .. } | + Insn::CSelL { out, .. } | + Insn::CSelLE { out, .. } | + Insn::CSelNE { out, .. } | + Insn::CSelNZ { out, .. } | + Insn::CSelZ { out, .. } | + Insn::Lea { out, .. } | + Insn::LeaJumpTarget { out, .. } | + Insn::LiveReg { out, .. } | + Insn::Load { out, .. } | + Insn::LoadSExt { out, .. } | + Insn::LShift { out, .. } | + Insn::Not { out, .. } | + Insn::Or { out, .. } | + Insn::RShift { out, .. } | + Insn::Sub { out, .. } | + Insn::Mul { out, .. } | + Insn::URShift { out, .. } | + Insn::Xor { out, .. } => Some(out), + _ => None + } + } + + /// Returns the target for this instruction if there is one. + pub fn target(&self) -> Option<&Target> { + match self { + Insn::Jbe(target) | + Insn::Jb(target) | + Insn::Je(target) | + Insn::Jl(target) | + Insn::Jg(target) | + Insn::Jge(target) | + Insn::Jmp(target) | + Insn::Jne(target) | + Insn::Jnz(target) | + Insn::Jo(target) | + Insn::Jz(target) | + Insn::LeaJumpTarget { target, .. } => Some(target), + _ => None + } + } + + /// Returns the text associated with this instruction if there is some. + pub fn text(&self) -> Option<&String> { + match self { + Insn::BakeString(text) | + Insn::Comment(text) => Some(text), + _ => None + } + } +} + +/// An iterator that will yield a non-mutable reference to each operand in turn +/// for the given instruction. +pub(super) struct InsnOpndIterator<'a> { + insn: &'a Insn, + idx: usize, +} + +impl<'a> InsnOpndIterator<'a> { + fn new(insn: &'a Insn) -> Self { + Self { insn, idx: 0 } + } +} + +impl<'a> Iterator for InsnOpndIterator<'a> { + type Item = &'a Opnd; + + fn next(&mut self) -> Option<Self::Item> { + match self.insn { + Insn::BakeString(_) | + Insn::Breakpoint | + Insn::Comment(_) | + Insn::CPop { .. } | + Insn::CPopAll | + Insn::CPushAll | + Insn::FrameSetup | + Insn::FrameTeardown | + Insn::Jbe(_) | + Insn::Jb(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jg(_) | + Insn::Jge(_) | + Insn::Jmp(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::JoMul(_) | + Insn::Jz(_) | + Insn::Label(_) | + Insn::LeaJumpTarget { .. } | + Insn::PadInvalPatch | + Insn::PosMarker(_) => None, + + Insn::CPopInto(opnd) | + Insn::CPush(opnd) | + Insn::CRet(opnd) | + Insn::JmpOpnd(opnd) | + Insn::Lea { opnd, .. } | + Insn::LiveReg { opnd, .. } | + Insn::Load { opnd, .. } | + Insn::LoadSExt { opnd, .. } | + Insn::Joz(opnd, _) | + Insn::Jonz(opnd, _) | + Insn::Not { opnd, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&opnd) + }, + _ => None + } + }, + Insn::Add { left: opnd0, right: opnd1, .. } | + Insn::And { left: opnd0, right: opnd1, .. } | + Insn::Cmp { left: opnd0, right: opnd1 } | + Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LoadInto { dest: opnd0, opnd: opnd1 } | + Insn::LShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Mov { dest: opnd0, src: opnd1 } | + Insn::Or { left: opnd0, right: opnd1, .. } | + Insn::RShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Store { dest: opnd0, src: opnd1 } | + Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Mul { left: opnd0, right: opnd1, .. } | + Insn::Test { left: opnd0, right: opnd1 } | + Insn::URShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Xor { left: opnd0, right: opnd1, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(&opnd0) + } + 1 => { + self.idx += 1; + Some(&opnd1) + } + _ => None + } + }, + Insn::CCall { opnds, .. } => { + if self.idx < opnds.len() { + let opnd = &opnds[self.idx]; + self.idx += 1; + Some(opnd) + } else { + None + } + } + } + } +} + +/// An iterator that will yield each operand in turn for the given instruction. +pub(super) struct InsnOpndMutIterator<'a> { + insn: &'a mut Insn, + idx: usize, +} + +impl<'a> InsnOpndMutIterator<'a> { + fn new(insn: &'a mut Insn) -> Self { + Self { insn, idx: 0 } + } + + pub(super) fn next(&mut self) -> Option<&mut Opnd> { + match self.insn { + Insn::BakeString(_) | + Insn::Breakpoint | + Insn::Comment(_) | + Insn::CPop { .. } | + Insn::CPopAll | + Insn::CPushAll | + Insn::FrameSetup | + Insn::FrameTeardown | + Insn::Jbe(_) | + Insn::Jb(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jg(_) | + Insn::Jge(_) | + Insn::Jmp(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::JoMul(_) | + Insn::Jz(_) | + Insn::Label(_) | + Insn::LeaJumpTarget { .. } | + Insn::PadInvalPatch | + Insn::PosMarker(_) => None, + + Insn::CPopInto(opnd) | + Insn::CPush(opnd) | + Insn::CRet(opnd) | + Insn::JmpOpnd(opnd) | + Insn::Lea { opnd, .. } | + Insn::LiveReg { opnd, .. } | + Insn::Load { opnd, .. } | + Insn::LoadSExt { opnd, .. } | + Insn::Joz(opnd, _) | + Insn::Jonz(opnd, _) | + Insn::Not { opnd, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(opnd) + }, + _ => None + } + }, + Insn::Add { left: opnd0, right: opnd1, .. } | + Insn::And { left: opnd0, right: opnd1, .. } | + Insn::Cmp { left: opnd0, right: opnd1 } | + Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } | + Insn::IncrCounter { mem: opnd0, value: opnd1, .. } | + Insn::LoadInto { dest: opnd0, opnd: opnd1 } | + Insn::LShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Mov { dest: opnd0, src: opnd1 } | + Insn::Or { left: opnd0, right: opnd1, .. } | + Insn::RShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Store { dest: opnd0, src: opnd1 } | + Insn::Sub { left: opnd0, right: opnd1, .. } | + Insn::Mul { left: opnd0, right: opnd1, .. } | + Insn::Test { left: opnd0, right: opnd1 } | + Insn::URShift { opnd: opnd0, shift: opnd1, .. } | + Insn::Xor { left: opnd0, right: opnd1, .. } => { + match self.idx { + 0 => { + self.idx += 1; + Some(opnd0) + } + 1 => { + self.idx += 1; + Some(opnd1) + } + _ => None + } + }, + Insn::CCall { opnds, .. } => { + if self.idx < opnds.len() { + let opnd = &mut opnds[self.idx]; + self.idx += 1; + Some(opnd) + } else { + None + } + } + } + } +} + +impl fmt::Debug for Insn { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}(", self.op())?; + + // Print list of operands + let mut opnd_iter = self.opnd_iter(); + if let Some(first_opnd) = opnd_iter.next() { + write!(fmt, "{first_opnd:?}")?; + } + for opnd in opnd_iter { + write!(fmt, ", {opnd:?}")?; + } + write!(fmt, ")")?; + + // Print text, target, and pos if they are present + if let Some(text) = self.text() { + write!(fmt, " {text:?}")? + } + if let Some(target) = self.target() { + write!(fmt, " target={target:?}")?; + } + + write!(fmt, " -> {:?}", self.out_opnd().unwrap_or(&Opnd::None)) + } +} + +/// Set of variables used for generating side exits +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct SideExitContext { + /// PC of the instruction being compiled + pub pc: *mut VALUE, + + /// Context fields used by get_generic_ctx() + pub stack_size: u8, + pub sp_offset: i8, + pub reg_temps: RegTemps, + pub is_return_landing: bool, + pub is_deferred: bool, +} + +impl SideExitContext { + /// Convert PC and Context into SideExitContext + pub fn new(pc: *mut VALUE, ctx: Context) -> Self { + let exit_ctx = SideExitContext { + pc, + stack_size: ctx.get_stack_size(), + sp_offset: ctx.get_sp_offset(), + reg_temps: ctx.get_reg_temps(), + is_return_landing: ctx.is_return_landing(), + is_deferred: ctx.is_deferred(), + }; + if cfg!(debug_assertions) { + // Assert that we're not losing any mandatory metadata + assert_eq!(exit_ctx.get_ctx(), ctx.get_generic_ctx()); + } + exit_ctx + } + + /// Convert SideExitContext to Context + fn get_ctx(&self) -> Context { + let mut ctx = Context::default(); + ctx.set_stack_size(self.stack_size); + ctx.set_sp_offset(self.sp_offset); + ctx.set_reg_temps(self.reg_temps); + if self.is_return_landing { + ctx.set_as_return_landing(); + } + if self.is_deferred { + ctx.mark_as_deferred(); + } + ctx + } +} + +/// Initial capacity for asm.insns vector +const ASSEMBLER_INSNS_CAPACITY: usize = 256; + +/// Object into which we assemble instructions to be +/// optimized and lowered +pub struct Assembler { + pub(super) insns: Vec<Insn>, + + /// Parallel vec with insns + /// Index of the last insn using the output of this insn + pub(super) live_ranges: Vec<usize>, + + /// Names of labels + pub(super) label_names: Vec<String>, + + /// Context for generating the current insn + pub ctx: Context, + + /// Side exit caches for each SideExitContext + pub(super) side_exits: HashMap<SideExitContext, CodePtr>, + + /// PC for Target::SideExit + side_exit_pc: Option<*mut VALUE>, + + /// Stack size for Target::SideExit + side_exit_stack_size: Option<u8>, + + /// If true, the next ccall() should verify its leafness + leaf_ccall: bool, +} + +impl Assembler +{ + pub fn new() -> Self { + Self::new_with_label_names(Vec::default(), HashMap::default()) + } + + pub fn new_with_label_names(label_names: Vec<String>, side_exits: HashMap<SideExitContext, CodePtr>) -> Self { + Self { + insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), + live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), + label_names, + ctx: Context::default(), + side_exits, + side_exit_pc: None, + side_exit_stack_size: None, + leaf_ccall: false, + } + } + + /// Get the list of registers that can be used for stack temps. + pub fn get_temp_regs() -> &'static [Reg] { + let num_regs = get_option!(num_temp_regs); + &TEMP_REGS[0..num_regs] + } + + /// Set a context for generating side exits + pub fn set_side_exit_context(&mut self, pc: *mut VALUE, stack_size: u8) { + self.side_exit_pc = Some(pc); + self.side_exit_stack_size = Some(stack_size); + } + + /// Build an Opnd::InsnOut from the current index of the assembler and the + /// given number of bits. + pub(super) fn next_opnd_out(&self, num_bits: u8) -> Opnd { + Opnd::InsnOut { idx: self.insns.len(), num_bits } + } + + /// Append an instruction onto the current list of instructions and update + /// the live ranges of any instructions whose outputs are being used as + /// operands to this instruction. + pub fn push_insn(&mut self, mut insn: Insn) { + // Index of this instruction + let insn_idx = self.insns.len(); + + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + match opnd { + // If we find any InsnOut from previous instructions, we're going to update + // the live range of the previous instruction to point to this one. + Opnd::InsnOut { idx, .. } => { + assert!(*idx < self.insns.len()); + self.live_ranges[*idx] = insn_idx; + } + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { + assert!(*idx < self.insns.len()); + self.live_ranges[*idx] = insn_idx; + } + // Set current ctx.reg_temps to Opnd::Stack. + Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: None } => { + assert_eq!( + self.ctx.get_stack_size() as i16 - self.ctx.get_sp_offset() as i16, + *stack_size as i16 - *sp_offset as i16, + "Opnd::Stack (stack_size: {}, sp_offset: {}) expects a different SP position from asm.ctx (stack_size: {}, sp_offset: {})", + *stack_size, *sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(), + ); + *opnd = Opnd::Stack { + idx: *idx, + num_bits: *num_bits, + stack_size: *stack_size, + sp_offset: *sp_offset, + reg_temps: Some(self.ctx.get_reg_temps()), + }; + } + _ => {} + } + } + + // Set a side exit context to Target::SideExit + if let Some(Target::SideExit { context, .. }) = insn.target_mut() { + // We should skip this when this instruction is being copied from another Assembler. + if context.is_none() { + *context = Some(SideExitContext::new( + self.side_exit_pc.unwrap(), + self.ctx.with_stack_size(self.side_exit_stack_size.unwrap()), + )); + } + } + + self.insns.push(insn); + self.live_ranges.push(insn_idx); + } + + /// Get a cached side exit, wrapping a counter if specified + pub fn get_side_exit(&mut self, side_exit_context: &SideExitContext, counter: Option<Counter>, ocb: &mut OutlinedCb) -> Option<CodePtr> { + // Get a cached side exit + let side_exit = match self.side_exits.get(&side_exit_context) { + None => { + let exit_code = gen_outlined_exit(side_exit_context.pc, &side_exit_context.get_ctx(), ocb)?; + self.side_exits.insert(*side_exit_context, exit_code); + exit_code + } + Some(code_ptr) => *code_ptr, + }; + + // Wrap a counter if needed + gen_counted_exit(side_exit_context.pc, side_exit, ocb, counter) + } + + /// Create a new label instance that we can jump to + pub fn new_label(&mut self, name: &str) -> Target + { + assert!(!name.contains(' '), "use underscores in label names, not spaces"); + + let label_idx = self.label_names.len(); + self.label_names.push(name.to_string()); + Target::Label(label_idx) + } + + /// Convert Opnd::Stack to Opnd::Mem or Opnd::Reg + pub fn lower_stack_opnd(&self, opnd: &Opnd) -> Opnd { + // Convert Opnd::Stack to Opnd::Mem + fn mem_opnd(opnd: &Opnd) -> Opnd { + if let Opnd::Stack { idx, sp_offset, num_bits, .. } = *opnd { + incr_counter!(temp_mem_opnd); + Opnd::mem(num_bits, SP, (sp_offset as i32 - idx - 1) * SIZEOF_VALUE_I32) + } else { + unreachable!() + } + } + + // Convert Opnd::Stack to Opnd::Reg + fn reg_opnd(opnd: &Opnd) -> Opnd { + let regs = Assembler::get_temp_regs(); + if let Opnd::Stack { num_bits, .. } = *opnd { + incr_counter!(temp_reg_opnd); + Opnd::Reg(regs[opnd.reg_idx()]).with_num_bits(num_bits).unwrap() + } else { + unreachable!() + } + } + + match opnd { + Opnd::Stack { reg_temps, .. } => { + if opnd.stack_idx() < MAX_REG_TEMPS && reg_temps.unwrap().get(opnd.stack_idx()) { + reg_opnd(opnd) + } else { + mem_opnd(opnd) + } + } + _ => unreachable!(), + } + } + + /// Allocate a register to a stack temp if available. + pub fn alloc_temp_reg(&mut self, stack_idx: u8) { + if get_option!(num_temp_regs) == 0 { + return; + } + + // Allocate a register if there's no conflict. + let mut reg_temps = self.ctx.get_reg_temps(); + if reg_temps.conflicts_with(stack_idx) { + assert!(!reg_temps.get(stack_idx)); + } else { + reg_temps.set(stack_idx, true); + self.set_reg_temps(reg_temps); + } + } + + /// Erase local variable type information + /// eg: because of a call we can't track + pub fn clear_local_types(&mut self) { + asm_comment!(self, "clear local variable types"); + self.ctx.clear_local_types(); + } + + /// Spill all live stack temps from registers to the stack + pub fn spill_temps(&mut self) { + // Forget registers above the stack top + let mut reg_temps = self.ctx.get_reg_temps(); + for stack_idx in self.ctx.get_stack_size()..MAX_REG_TEMPS { + reg_temps.set(stack_idx, false); + } + self.set_reg_temps(reg_temps); + + // Spill live stack temps + if self.ctx.get_reg_temps() != RegTemps::default() { + asm_comment!(self, "spill_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), RegTemps::default().as_u8()); + for stack_idx in 0..u8::min(MAX_REG_TEMPS, self.ctx.get_stack_size()) { + if self.ctx.get_reg_temps().get(stack_idx) { + let idx = self.ctx.get_stack_size() - 1 - stack_idx; + self.spill_temp(self.stack_opnd(idx.into())); + reg_temps.set(stack_idx, false); + } + } + self.ctx.set_reg_temps(reg_temps); + } + + // Every stack temp should have been spilled + assert_eq!(self.ctx.get_reg_temps(), RegTemps::default()); + } + + /// Spill a stack temp from a register to the stack + fn spill_temp(&mut self, opnd: Opnd) { + assert!(self.ctx.get_reg_temps().get(opnd.stack_idx())); + + // Use different RegTemps for dest and src operands + let reg_temps = self.ctx.get_reg_temps(); + let mut mem_temps = reg_temps; + mem_temps.set(opnd.stack_idx(), false); + + // Move the stack operand from a register to memory + match opnd { + Opnd::Stack { idx, num_bits, stack_size, sp_offset, .. } => { + self.mov( + Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(mem_temps) }, + Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(reg_temps) }, + ); + } + _ => unreachable!(), + } + incr_counter!(temp_spill); + } + + /// Update which stack temps are in a register + pub fn set_reg_temps(&mut self, reg_temps: RegTemps) { + if self.ctx.get_reg_temps() != reg_temps { + asm_comment!(self, "reg_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), reg_temps.as_u8()); + self.ctx.set_reg_temps(reg_temps); + self.verify_reg_temps(); + } + } + + /// Assert there's no conflict in stack temp register allocation + fn verify_reg_temps(&self) { + for stack_idx in 0..MAX_REG_TEMPS { + if self.ctx.get_reg_temps().get(stack_idx) { + assert!(!self.ctx.get_reg_temps().conflicts_with(stack_idx)); + } + } + } + + /// Sets the out field on the various instructions that require allocated + /// registers because their output is used as the operand on a subsequent + /// instruction. This is our implementation of the linear scan algorithm. + pub(super) fn alloc_regs(mut self, regs: Vec<Reg>) -> Assembler + { + //dbg!(&self); + + // First, create the pool of registers. + let mut pool: u32 = 0; + + // Mutate the pool bitmap to indicate that the register at that index + // has been allocated and is live. + fn alloc_reg(pool: &mut u32, regs: &Vec<Reg>) -> Option<Reg> { + for (index, reg) in regs.iter().enumerate() { + if (*pool & (1 << index)) == 0 { + *pool |= 1 << index; + return Some(*reg); + } + } + None + } + + // Allocate a specific register + fn take_reg(pool: &mut u32, regs: &Vec<Reg>, reg: &Reg) -> Reg { + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + assert_eq!(*pool & (1 << reg_index), 0, "register already allocated"); + *pool |= 1 << reg_index; + } + + return *reg; + } + + // Mutate the pool bitmap to indicate that the given register is being + // returned as it is no longer used by the instruction that previously + // held it. + fn dealloc_reg(pool: &mut u32, regs: &Vec<Reg>, reg: &Reg) { + let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no); + + if let Some(reg_index) = reg_index { + *pool &= !(1 << reg_index); + } + } + + // Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG, + // so that they will not rewrite each other before they are used. + fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> { + // Return the index of a move whose destination is not used as a source if any. + fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> { + c_args.iter().enumerate().find(|(_, &(dest_reg, _))| { + c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg)) + }).map(|(index, _)| index) + } + + // Remove moves whose source and destination are the same + let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter() + .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect(); + + let mut moves = vec![]; + while c_args.len() > 0 { + // Keep taking safe moves + while let Some(index) = find_safe_arg(&c_args) { + moves.push(c_args.remove(index)); + } + + // No safe move. Load the source of one move into SCRATCH_REG, and + // then load SCRATCH_REG into the destination when it's safe. + if c_args.len() > 0 { + // Make sure it's safe to use SCRATCH_REG + assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG))); + + // Move SCRATCH <- opnd, and delay reg <- SCRATCH + let (reg, opnd) = c_args.remove(0); + moves.push((Assembler::SCRATCH_REG, opnd)); + c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG))); + } + } + moves + } + + // Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes. + fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) { + if shift_offset >= 0 { + for index in 0..(shift_offset as usize) { + live_ranges.insert(start_index + index, start_index + index); + } + } else { + for _ in 0..-shift_offset { + live_ranges.remove(start_index); + } + } + } + + // Dump live registers for register spill debugging. + fn dump_live_regs(insns: Vec<Insn>, live_ranges: Vec<usize>, num_regs: usize, spill_index: usize) { + // Convert live_ranges to live_regs: the number of live registers at each index + let mut live_regs: Vec<usize> = vec![]; + let mut end_idxs: Vec<usize> = vec![]; + for (cur_idx, &end_idx) in live_ranges.iter().enumerate() { + end_idxs.push(end_idx); + while let Some(end_idx) = end_idxs.iter().position(|&end_idx| cur_idx == end_idx) { + end_idxs.remove(end_idx); + } + live_regs.push(end_idxs.len()); + } + + // Dump insns along with live registers + for (insn_idx, insn) in insns.iter().enumerate() { + eprint!("{:3} ", if spill_index == insn_idx { "==>" } else { "" }); + for reg in 0..=num_regs { + eprint!("{:1}", if reg < live_regs[insn_idx] { "|" } else { "" }); + } + eprintln!(" [{:3}] {:?}", insn_idx, insn); + } + } + + // We may need to reorder LoadInto instructions with a C argument operand. + // This buffers the operands of such instructions to process them in batches. + let mut c_args: Vec<(Reg, Opnd)> = vec![]; + + // live_ranges is indexed by original `index` given by the iterator. + let live_ranges: Vec<usize> = take(&mut self.live_ranges); + // shifted_live_ranges is indexed by mapped indexes in insn operands. + let mut shifted_live_ranges: Vec<usize> = live_ranges.clone(); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_mapped() { + // Check if this is the last instruction that uses an operand that + // spans more than one instruction. In that case, return the + // allocated register to the pool. + for opnd in insn.opnd_iter() { + match opnd { + Opnd::InsnOut { idx, .. } | + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { + // Since we have an InsnOut, we know it spans more that one + // instruction. + let start_index = *idx; + + // We're going to check if this is the last instruction that + // uses this operand. If it is, we can return the allocated + // register to the pool. + if shifted_live_ranges[start_index] == index { + if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() { + dealloc_reg(&mut pool, ®s, reg); + } else { + unreachable!("no register allocated for insn {:?}", insn); + } + } + } + _ => {} + } + } + + // C return values need to be mapped to the C return register + if matches!(insn, Insn::CCall { .. }) { + assert_eq!(pool, 0, "register lives past C function call"); + } + + // If this instruction is used by another instruction, + // we need to allocate a register to it + if live_ranges[index] != index { + // If we get to this point where the end of the live range is + // not equal to the index of the instruction, then it must be + // true that we set an output operand for this instruction. If + // it's not true, something has gone wrong. + assert!( + !matches!(insn.out_opnd(), None), + "Instruction output reused but no output operand set" + ); + + // This is going to be the output operand that we will set on + // the instruction. + let mut out_reg: Option<Reg> = None; + + // C return values need to be mapped to the C return register + if matches!(insn, Insn::CCall { .. }) { + out_reg = Some(take_reg(&mut pool, ®s, &C_RET_REG)); + } + + // If this instruction's first operand maps to a register and + // this is the last use of the register, reuse the register + // We do this to improve register allocation on x86 + // e.g. out = add(reg0, reg1) + // reg0 = add(reg0, reg1) + if out_reg.is_none() { + let mut opnd_iter = insn.opnd_iter(); + + if let Some(Opnd::InsnOut{ idx, .. }) = opnd_iter.next() { + if shifted_live_ranges[*idx] == index { + if let Some(Opnd::Reg(reg)) = asm.insns[*idx].out_opnd() { + out_reg = Some(take_reg(&mut pool, ®s, reg)); + } + } + } + } + + // Allocate a new register for this instruction if one is not + // already allocated. + if out_reg.is_none() { + out_reg = match &insn { + Insn::LiveReg { opnd, .. } => { + // Allocate a specific register + let reg = opnd.unwrap_reg(); + Some(take_reg(&mut pool, ®s, ®)) + }, + _ => match alloc_reg(&mut pool, ®s) { + Some(reg) => Some(reg), + None => { + let mut insns = asm.insns; + insns.push(insn); + for insn in iterator.insns { + insns.push(insn); + } + dump_live_regs(insns, live_ranges, regs.len(), index); + unreachable!("Register spill not supported"); + } + } + }; + } + + // Set the output operand on the instruction + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + + // If we have gotten to this point, then we're sure we have an + // output operand on this instruction because the live range + // extends beyond the index of the instruction. + let out = insn.out_opnd_mut().unwrap(); + *out = Opnd::Reg(out_reg.unwrap().with_num_bits(out_num_bits)); + } + + // Replace InsnOut operands by their corresponding register + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + match *opnd { + Opnd::InsnOut { idx, num_bits } => { + *opnd = (*asm.insns[idx].out_opnd().unwrap()).with_num_bits(num_bits).unwrap(); + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => { + let base = MemBase::Reg(asm.insns[idx].out_opnd().unwrap().unwrap_reg().reg_no); + *opnd = Opnd::Mem(Mem { base, disp, num_bits }); + } + _ => {}, + } + } + + // Push instruction(s). Batch and reorder C argument operations if needed. + if let Insn::LoadInto { dest: Opnd::CArg(reg), opnd } = insn { + // Buffer C arguments + c_args.push((reg, opnd)); + } else { + // C arguments are buffered until CCall + if c_args.len() > 0 { + // Resolve C argument dependencies + let c_args_len = c_args.len() as isize; + let moves = reorder_c_args(&c_args.drain(..).into_iter().collect()); + shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len); + + // Push batched C arguments + for (reg, opnd) in moves { + asm.load_into(Opnd::Reg(reg), opnd); + } + } + // Other instructions are pushed as is + asm.push_insn(insn); + } + iterator.map_insn_index(&mut asm); + } + + assert_eq!(pool, 0, "Expected all registers to be returned to the pool"); + asm + } + + /// Compile the instructions down to machine code. + /// Can fail due to lack of code memory and inopportune code placement, among other reasons. + #[must_use] + pub fn compile(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>) -> Option<(CodePtr, Vec<u32>)> + { + #[cfg(feature = "disasm")] + let start_addr = cb.get_write_ptr(); + + let alloc_regs = Self::get_alloc_regs(); + let ret = self.compile_with_regs(cb, ocb, alloc_regs); + + #[cfg(feature = "disasm")] + if let Some(dump_disasm) = get_option_ref!(dump_disasm) { + use crate::disasm::dump_disasm_addr_range; + let end_addr = cb.get_write_ptr(); + dump_disasm_addr_range(cb, start_addr, end_addr, dump_disasm) + } + ret + } + + /// Compile with a limited number of registers. Used only for unit tests. + #[cfg(test)] + pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> (CodePtr, Vec<u32>) + { + let mut alloc_regs = Self::get_alloc_regs(); + let alloc_regs = alloc_regs.drain(0..num_regs).collect(); + self.compile_with_regs(cb, None, alloc_regs).unwrap() + } + + /// Consume the assembler by creating a new draining iterator. + pub fn into_draining_iter(self) -> AssemblerDrainingIterator { + AssemblerDrainingIterator::new(self) + } + + /// Return true if the next ccall() is expected to be leaf. + pub fn get_leaf_ccall(&mut self) -> bool { + self.leaf_ccall + } + + /// Assert that the next ccall() is going to be leaf. + pub fn expect_leaf_ccall(&mut self) { + self.leaf_ccall = true; + } +} + +/// A struct that allows iterating through an assembler's instructions and +/// consuming them as it iterates. +pub struct AssemblerDrainingIterator { + insns: std::iter::Peekable<std::vec::IntoIter<Insn>>, + index: usize, + indices: Vec<usize> +} + +impl AssemblerDrainingIterator { + fn new(asm: Assembler) -> Self { + Self { + insns: asm.insns.into_iter().peekable(), + index: 0, + indices: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), + } + } + + /// When you're working with two lists of instructions, you need to make + /// sure you do some bookkeeping to align the indices contained within the + /// operands of the two lists. + /// + /// This function accepts the assembler that is being built and tracks the + /// end of the current list of instructions in order to maintain that + /// alignment. + pub fn map_insn_index(&mut self, asm: &mut Assembler) { + self.indices.push(asm.insns.len().saturating_sub(1)); + } + + /// Map an operand by using this iterator's list of mapped indices. + #[cfg(target_arch = "x86_64")] + pub fn map_opnd(&self, opnd: Opnd) -> Opnd { + opnd.map_index(&self.indices) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the next list of instructions. + pub fn next_mapped(&mut self) -> Option<(usize, Insn)> { + self.next_unmapped().map(|(index, mut insn)| { + let mut opnd_iter = insn.opnd_iter_mut(); + while let Some(opnd) = opnd_iter.next() { + *opnd = opnd.map_index(&self.indices); + } + + (index, insn) + }) + } + + /// Returns the next instruction in the list with the indices corresponding + /// to the previous list of instructions. + pub fn next_unmapped(&mut self) -> Option<(usize, Insn)> { + let index = self.index; + self.index += 1; + self.insns.next().map(|insn| (index, insn)) + } + + /// Returns the next instruction without incrementing the iterator's index. + pub fn peek(&mut self) -> Option<&Insn> { + self.insns.peek() + } +} + +impl fmt::Debug for Assembler { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + writeln!(fmt, "Assembler")?; + + for (idx, insn) in self.insns.iter().enumerate() { + writeln!(fmt, " {idx:03} {insn:?}")?; + } + + Ok(()) + } +} + +impl Assembler { + #[must_use] + pub fn add(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Add { left, right, out }); + out + } + + #[must_use] + pub fn and(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::And { left, right, out }); + out + } + + pub fn bake_string(&mut self, text: &str) { + self.push_insn(Insn::BakeString(text.to_string())); + } + + #[allow(dead_code)] + pub fn breakpoint(&mut self) { + self.push_insn(Insn::Breakpoint); + } + + pub fn ccall(&mut self, fptr: *const u8, opnds: Vec<Opnd>) -> Opnd { + // Let vm_check_canary() assert this ccall's leafness if leaf_ccall is set + let canary_opnd = self.set_stack_canary(&opnds); + + let old_temps = self.ctx.get_reg_temps(); // with registers + // Spill stack temp registers since they are caller-saved registers. + // Note that this doesn't spill stack temps that are already popped + // but may still be used in the C arguments. + self.spill_temps(); + let new_temps = self.ctx.get_reg_temps(); // all spilled + + // Temporarily manipulate RegTemps so that we can use registers + // to pass stack operands that are already spilled above. + self.ctx.set_reg_temps(old_temps); + + // Call a C function + let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); + self.push_insn(Insn::CCall { fptr, opnds, out }); + + // Registers in old_temps may be clobbered by the above C call, + // so rollback the manipulated RegTemps to a spilled version. + self.ctx.set_reg_temps(new_temps); + + // Clear the canary after use + if let Some(canary_opnd) = canary_opnd { + self.mov(canary_opnd, 0.into()); + } + + out + } + + /// Let vm_check_canary() assert the leafness of this ccall if leaf_ccall is set + fn set_stack_canary(&mut self, opnds: &Vec<Opnd>) -> Option<Opnd> { + // Use the slot right above the stack top for verifying leafness. + let canary_opnd = self.stack_opnd(-1); + + // If the slot is already used, which is a valid optimization to avoid spills, + // give up the verification. + let canary_opnd = if cfg!(debug_assertions) && self.leaf_ccall && opnds.iter().all(|opnd| + opnd.get_stack_idx() != canary_opnd.get_stack_idx() + ) { + asm_comment!(self, "set stack canary"); + self.mov(canary_opnd, vm_stack_canary().into()); + Some(canary_opnd) + } else { + None + }; + + // Avoid carrying the flag to the next instruction whether we verified it or not. + self.leaf_ccall = false; + + canary_opnd + } + + pub fn cmp(&mut self, left: Opnd, right: Opnd) { + self.push_insn(Insn::Cmp { left, right }); + } + + #[must_use] + pub fn cpop(&mut self) -> Opnd { + let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); + self.push_insn(Insn::CPop { out }); + out + } + + pub fn cpop_all(&mut self) { + self.push_insn(Insn::CPopAll); + + // Re-enable ccall's RegTemps assertion disabled by cpush_all. + // cpush_all + cpop_all preserve all stack temp registers, so it's safe. + self.set_reg_temps(self.ctx.get_reg_temps()); + } + + pub fn cpop_into(&mut self, opnd: Opnd) { + self.push_insn(Insn::CPopInto(opnd)); + } + + pub fn cpush(&mut self, opnd: Opnd) { + self.push_insn(Insn::CPush(opnd)); + } + + pub fn cpush_all(&mut self) { + self.push_insn(Insn::CPushAll); + + // Mark all temps as not being in registers. + // Temps will be marked back as being in registers by cpop_all. + // We assume that cpush_all + cpop_all are used for C functions in utils.rs + // that don't require spill_temps for GC. + self.set_reg_temps(RegTemps::default()); + } + + pub fn cret(&mut self, opnd: Opnd) { + self.push_insn(Insn::CRet(opnd)); + } + + #[must_use] + pub fn csel_e(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_g(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelG { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_ge(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelGE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_l(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelL { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_le(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelLE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_ne(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelNE { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_nz(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelNZ { truthy, falsy, out }); + out + } + + #[must_use] + pub fn csel_z(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy])); + self.push_insn(Insn::CSelZ { truthy, falsy, out }); + out + } + + pub fn frame_setup(&mut self) { + self.push_insn(Insn::FrameSetup); + } + + pub fn frame_teardown(&mut self) { + self.push_insn(Insn::FrameTeardown); + } + + pub fn incr_counter(&mut self, mem: Opnd, value: Opnd) { + self.push_insn(Insn::IncrCounter { mem, value }); + } + + pub fn jbe(&mut self, target: Target) { + self.push_insn(Insn::Jbe(target)); + } + + pub fn jb(&mut self, target: Target) { + self.push_insn(Insn::Jb(target)); + } + + pub fn je(&mut self, target: Target) { + self.push_insn(Insn::Je(target)); + } + + pub fn jl(&mut self, target: Target) { + self.push_insn(Insn::Jl(target)); + } + + #[allow(dead_code)] + pub fn jg(&mut self, target: Target) { + self.push_insn(Insn::Jg(target)); + } + + #[allow(dead_code)] + pub fn jge(&mut self, target: Target) { + self.push_insn(Insn::Jge(target)); + } + + pub fn jmp(&mut self, target: Target) { + self.push_insn(Insn::Jmp(target)); + } + + pub fn jmp_opnd(&mut self, opnd: Opnd) { + self.push_insn(Insn::JmpOpnd(opnd)); + } + + pub fn jne(&mut self, target: Target) { + self.push_insn(Insn::Jne(target)); + } + + pub fn jnz(&mut self, target: Target) { + self.push_insn(Insn::Jnz(target)); + } + + pub fn jo(&mut self, target: Target) { + self.push_insn(Insn::Jo(target)); + } + + pub fn jo_mul(&mut self, target: Target) { + self.push_insn(Insn::JoMul(target)); + } + + pub fn jz(&mut self, target: Target) { + self.push_insn(Insn::Jz(target)); + } + + #[must_use] + pub fn lea(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::Lea { opnd, out }); + out + } + + #[must_use] + pub fn lea_jump_target(&mut self, target: Target) -> Opnd { + let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS); + self.push_insn(Insn::LeaJumpTarget { target, out }); + out + } + + #[must_use] + pub fn live_reg_opnd(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::LiveReg { opnd, out }); + out + } + + #[must_use] + pub fn load(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::Load { opnd, out }); + out + } + + pub fn load_into(&mut self, dest: Opnd, opnd: Opnd) { + match (dest, opnd) { + (Opnd::Reg(dest), Opnd::Reg(opnd)) if dest == opnd => {}, // skip if noop + _ => self.push_insn(Insn::LoadInto { dest, opnd }), + } + } + + #[must_use] + pub fn load_sext(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::LoadSExt { opnd, out }); + out + } + + #[must_use] + pub fn lshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); + self.push_insn(Insn::LShift { opnd, shift, out }); + out + } + + pub fn mov(&mut self, dest: Opnd, src: Opnd) { + self.push_insn(Insn::Mov { dest, src }); + } + + #[must_use] + pub fn not(&mut self, opnd: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd])); + self.push_insn(Insn::Not { opnd, out }); + out + } + + #[must_use] + pub fn or(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Or { left, right, out }); + out + } + + pub fn pad_inval_patch(&mut self) { + self.push_insn(Insn::PadInvalPatch); + } + + //pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F) + pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr, &CodeBlock) + 'static) { + self.push_insn(Insn::PosMarker(Box::new(marker_fn))); + } + + #[must_use] + pub fn rshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); + self.push_insn(Insn::RShift { opnd, shift, out }); + out + } + + pub fn store(&mut self, dest: Opnd, src: Opnd) { + self.push_insn(Insn::Store { dest, src }); + } + + #[must_use] + pub fn sub(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Sub { left, right, out }); + out + } + + #[must_use] + pub fn mul(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Mul { left, right, out }); + out + } + + pub fn test(&mut self, left: Opnd, right: Opnd) { + self.push_insn(Insn::Test { left, right }); + } + + #[must_use] + #[allow(dead_code)] + pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift])); + self.push_insn(Insn::URShift { opnd, shift, out }); + out + } + + /// Verify the leafness of the given block + pub fn with_leaf_ccall<F, R>(&mut self, mut block: F) -> R + where F: FnMut(&mut Self) -> R { + let old_leaf_ccall = self.leaf_ccall; + self.leaf_ccall = true; + let ret = block(self); + self.leaf_ccall = old_leaf_ccall; + ret + } + + /// Add a label at the current position + pub fn write_label(&mut self, target: Target) { + assert!(target.unwrap_label_idx() < self.label_names.len()); + self.push_insn(Insn::Label(target)); + } + + #[must_use] + pub fn xor(&mut self, left: Opnd, right: Opnd) -> Opnd { + let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right])); + self.push_insn(Insn::Xor { left, right, out }); + out + } +} + +/// Macro to use format! for Insn::Comment, which skips a format! call +/// when disasm is not supported. +macro_rules! asm_comment { + ($asm:expr, $($fmt:tt)*) => { + if cfg!(feature = "disasm") { + $asm.push_insn(Insn::Comment(format!($($fmt)*))); + } + }; +} +pub(crate) use asm_comment; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_opnd_iter() { + let insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None }; + + let mut opnd_iter = insn.opnd_iter(); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + + assert!(matches!(opnd_iter.next(), None)); + } + + #[test] + fn test_opnd_iter_mut() { + let mut insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None }; + + let mut opnd_iter = insn.opnd_iter_mut(); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + assert!(matches!(opnd_iter.next(), Some(Opnd::None))); + + assert!(matches!(opnd_iter.next(), None)); + } +} diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs new file mode 100644 index 0000000000..6921244c72 --- /dev/null +++ b/yjit/src/backend/mod.rs @@ -0,0 +1,14 @@ +#[cfg(target_arch = "x86_64")] +pub mod x86_64; + +#[cfg(target_arch = "aarch64")] +pub mod arm64; + +#[cfg(target_arch = "x86_64")] +pub use x86_64 as current; + +#[cfg(target_arch = "aarch64")] +pub use arm64 as current; + +pub mod ir; +mod tests; diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs new file mode 100644 index 0000000000..01e87fe26c --- /dev/null +++ b/yjit/src/backend/tests.rs @@ -0,0 +1,330 @@ +#![cfg(test)] +use crate::asm::{CodeBlock}; +use crate::backend::ir::*; +use crate::cruby::*; +use crate::utils::c_callable; + +#[test] +fn test_add() { + let mut asm = Assembler::new(); + let out = asm.add(SP, Opnd::UImm(1)); + let _ = asm.add(out, Opnd::UImm(2)); +} + +#[test] +fn test_alloc_regs() { + let mut asm = Assembler::new(); + + // Get the first output that we're going to reuse later. + let out1 = asm.add(EC, Opnd::UImm(1)); + + // Pad some instructions in to make sure it can handle that. + let _ = asm.add(EC, Opnd::UImm(2)); + + // Get the second output we're going to reuse. + let out2 = asm.add(EC, Opnd::UImm(3)); + + // Pad another instruction. + let _ = asm.add(EC, Opnd::UImm(4)); + + // Reuse both the previously captured outputs. + let _ = asm.add(out1, out2); + + // Now get a third output to make sure that the pool has registers to + // allocate now that the previous ones have been returned. + let out3 = asm.add(EC, Opnd::UImm(5)); + let _ = asm.add(out3, Opnd::UImm(6)); + + // Here we're going to allocate the registers. + let result = asm.alloc_regs(Assembler::get_alloc_regs()); + + // Now we're going to verify that the out field has been appropriately + // updated for each of the instructions that needs it. + let regs = Assembler::get_alloc_regs(); + let reg0 = regs[0]; + let reg1 = regs[1]; + + match result.insns[0].out_opnd() { + Some(Opnd::Reg(value)) => assert_eq!(value, ®0), + val => panic!("Unexpected register value {:?}", val), + } + + match result.insns[2].out_opnd() { + Some(Opnd::Reg(value)) => assert_eq!(value, ®1), + val => panic!("Unexpected register value {:?}", val), + } + + match result.insns[5].out_opnd() { + Some(Opnd::Reg(value)) => assert_eq!(value, ®0), + val => panic!("Unexpected register value {:?}", val), + } +} + +fn setup_asm() -> (Assembler, CodeBlock) { + return ( + Assembler::new(), + CodeBlock::new_dummy(1024) + ); +} + +// Test full codegen pipeline +#[test] +fn test_compile() +{ + let (mut asm, mut cb) = setup_asm(); + let regs = Assembler::get_alloc_regs(); + + let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2)); + let out2 = asm.add(out, Opnd::UImm(2)); + asm.store(Opnd::mem(64, SP, 0), out2); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test memory-to-memory move +#[test] +fn test_mov_mem2mem() +{ + let (mut asm, mut cb) = setup_asm(); + + asm_comment!(asm, "check that comments work too"); + asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8)); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test load of register into new register +#[test] +fn test_load_reg() +{ + let (mut asm, mut cb) = setup_asm(); + + let out = asm.load(SP); + asm.mov(Opnd::mem(64, SP, 0), out); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Test load of a GC'd value +#[test] +fn test_load_value() +{ + let (mut asm, mut cb) = setup_asm(); + + let gcd_value = VALUE(0xFFFFFFFFFFFF00); + assert!(!gcd_value.special_const_p()); + + let out = asm.load(Opnd::Value(gcd_value)); + asm.mov(Opnd::mem(64, SP, 0), out); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Multiple registers needed and register reuse +#[test] +fn test_reuse_reg() +{ + let (mut asm, mut cb) = setup_asm(); + + let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1)); + let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1)); + + let v2 = asm.add(v1, Opnd::UImm(1)); // Reuse v1 register + let v3 = asm.add(v0, v2); + + asm.store(Opnd::mem(64, SP, 0), v2); + asm.store(Opnd::mem(64, SP, 8), v3); + + asm.compile_with_num_regs(&mut cb, 2); +} + +// 64-bit values can't be written directly to memory, +// need to be split into one or more register movs first +#[test] +fn test_store_u64() +{ + let (mut asm, mut cb) = setup_asm(); + asm.store(Opnd::mem(64, SP, 0), u64::MAX.into()); + + asm.compile_with_num_regs(&mut cb, 1); +} + +// Use instruction output as base register for memory operand +#[test] +fn test_base_insn_out() +{ + let (mut asm, mut cb) = setup_asm(); + + // Forced register to be reused + // This also causes the insn sequence to change length + asm.mov( + Opnd::mem(64, SP, 8), + Opnd::mem(64, SP, 0) + ); + + // Load the pointer into a register + let ptr_reg = asm.load(Opnd::const_ptr(4351776248 as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, 1.into()); + + asm.compile_with_num_regs(&mut cb, 2); +} + +#[test] +fn test_c_call() +{ + c_callable! { + fn dummy_c_fun(_v0: usize, _v1: usize) {} + } + + let (mut asm, mut cb) = setup_asm(); + + let ret_val = asm.ccall( + dummy_c_fun as *const u8, + vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)] + ); + + // Make sure that the call's return value is usable + asm.mov(Opnd::mem(64, SP, 0), ret_val); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_alloc_ccall_regs() { + let mut asm = Assembler::new(); + let out1 = asm.ccall(0 as *const u8, vec![]); + let out2 = asm.ccall(0 as *const u8, vec![out1]); + asm.mov(EC, out2); + let mut cb = CodeBlock::new_dummy(1024); + asm.compile_with_regs(&mut cb, None, Assembler::get_alloc_regs()); +} + +#[test] +fn test_lea_ret() +{ + let (mut asm, mut cb) = setup_asm(); + + let addr = asm.lea(Opnd::mem(64, SP, 0)); + asm.cret(addr); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_jcc_label() +{ + let (mut asm, mut cb) = setup_asm(); + + let label = asm.new_label("foo"); + asm.cmp(EC, EC); + asm.je(label); + asm.write_label(label); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_jcc_ptr() +{ + let (mut asm, mut cb) = setup_asm(); + + let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); + let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); + asm.test( + Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), + not_mask, + ); + asm.jnz(side_exit); + + asm.compile_with_num_regs(&mut cb, 2); +} + +/// Direct jump to a stub e.g. for deferred compilation +#[test] +fn test_jmp_ptr() +{ + let (mut asm, mut cb) = setup_asm(); + + let stub = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); + asm.jmp(stub); + + asm.compile_with_num_regs(&mut cb, 0); +} + +#[test] +fn test_jo() +{ + let (mut asm, mut cb) = setup_asm(); + + let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4)); + + let arg1 = Opnd::mem(64, SP, 0); + let arg0 = Opnd::mem(64, SP, 8); + + let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); + let out_val = asm.add(arg0_untag, arg1); + asm.jo(side_exit); + + asm.mov(Opnd::mem(64, SP, 0), out_val); + + asm.compile_with_num_regs(&mut cb, 2); +} + +#[test] +fn test_bake_string() { + let (mut asm, mut cb) = setup_asm(); + + asm.bake_string("Hello, world!"); + asm.compile_with_num_regs(&mut cb, 0); +} + +#[test] +fn test_draining_iterator() { + + let mut asm = Assembler::new(); + + let _ = asm.load(Opnd::None); + asm.store(Opnd::None, Opnd::None); + let _ = asm.add(Opnd::None, Opnd::None); + + let mut iter = asm.into_draining_iter(); + + while let Some((index, insn)) = iter.next_unmapped() { + match index { + 0 => assert!(matches!(insn, Insn::Load { .. })), + 1 => assert!(matches!(insn, Insn::Store { .. })), + 2 => assert!(matches!(insn, Insn::Add { .. })), + _ => panic!("Unexpected instruction index"), + }; + } +} + +#[test] +fn test_cmp_8_bit() { + let (mut asm, mut cb) = setup_asm(); + let reg = Assembler::get_alloc_regs()[0]; + asm.cmp(Opnd::Reg(reg).with_num_bits(8).unwrap(), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + + asm.compile_with_num_regs(&mut cb, 1); +} + +#[test] +fn test_no_pos_marker_callback_when_compile_fails() { + // When compilation fails (e.g. when out of memory), the code written out is malformed. + // We don't want to invoke the pos_marker callbacks with positions of malformed code. + let mut asm = Assembler::new(); + + // Markers around code to exhaust memory limit + let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called"); + asm.pos_marker(fail_if_called); + let zero = asm.load(0.into()); + let sum = asm.add(zero, 500.into()); + asm.store(Opnd::mem(64, SP, 8), sum); + asm.pos_marker(fail_if_called); + + let cb = &mut CodeBlock::new_dummy(8); + assert!(asm.compile(cb, None).is_none(), "should fail due to tiny size limit"); +} diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs new file mode 100644 index 0000000000..4ca5e9be9c --- /dev/null +++ b/yjit/src/backend/x86_64/mod.rs @@ -0,0 +1,1322 @@ +use std::mem::take; + +use crate::asm::*; +use crate::asm::x86_64::*; +use crate::codegen::CodePtr; +use crate::cruby::*; +use crate::backend::ir::*; +use crate::options::*; +use crate::utils::*; + +// Use the x86 register type for this platform +pub type Reg = X86Reg; + +// Callee-saved registers +pub const _CFP: Opnd = Opnd::Reg(R13_REG); +pub const _EC: Opnd = Opnd::Reg(R12_REG); +pub const _SP: Opnd = Opnd::Reg(RBX_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(RDI_REG), + Opnd::Reg(RSI_REG), + Opnd::Reg(RDX_REG), + Opnd::Reg(RCX_REG), + Opnd::Reg(R8_REG), + Opnd::Reg(R9_REG) +]; + +// C return value register on this platform +pub const C_RET_REG: Reg = RAX_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); + +impl CodeBlock { + // The number of bytes that are generated by jmp_ptr + pub fn jmp_ptr_bytes(&self) -> usize { 5 } +} + +/// Map Opnd to X86Opnd +impl From<Opnd> for X86Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + // NOTE: these operand types need to be lowered first + //Value(VALUE), // Immediate Ruby value, may be GC'd, movable + //InsnOut(usize), // Output of a preceding instruction in this block + + Opnd::InsnOut{..} => panic!("InsnOut operand made it past register allocation"), + + Opnd::UImm(val) => uimm_opnd(val), + Opnd::Imm(val) => imm_opnd(val), + Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64), + + // General-purpose register + Opnd::Reg(reg) => X86Opnd::Reg(reg), + + // Memory operand with displacement + Opnd::Mem(Mem{ base: MemBase::Reg(reg_no), num_bits, disp }) => { + let reg = X86Reg { + reg_no, + num_bits: 64, + reg_type: RegType::GP + }; + + mem_opnd(num_bits, X86Opnd::Reg(reg), disp) + } + + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + + _ => panic!("unsupported x86 operand type") + } + } +} + +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for X86Opnd { + fn from(opnd: &Opnd) -> Self { + X86Opnd::from(*opnd) + } +} + +/// List of registers that can be used for stack temps. +pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; + +impl Assembler +{ + // A special scratch register for intermediate processing. + // This register is caller-saved (so we don't have to save it before using it) + pub const SCRATCH_REG: Reg = R11_REG; + const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); + + + /// Get the list of registers from which we can allocate on this platform + pub fn get_alloc_regs() -> Vec<Reg> + { + vec![ + RAX_REG, + RCX_REG, + RDX_REG, + ] + } + + /// Get a list of all of the caller-save registers + pub fn get_caller_save_regs() -> Vec<Reg> { + vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG] + } + + // These are the callee-saved registers in the x86-64 SysV ABI + // RBX, RSP, RBP, and R12–R15 + + /// Split IR instructions for the x86 platform + fn x86_split(mut self) -> Assembler + { + let live_ranges: Vec<usize> = take(&mut self.live_ranges); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_unmapped() { + // When we're iterating through the instructions with x86_split, we + // need to know the previous live ranges in order to tell if a + // register lasts beyond the current instruction. So instead of + // using next_mapped, we call next_unmapped. When you're using the + // next_unmapped API, you need to make sure that you map each + // operand that could reference an old index, which means both + // Opnd::InsnOut operands and Opnd::Mem operands with a base of + // MemBase::InsnOut. + // + // You need to ensure that you only map it _once_, because otherwise + // you'll end up mapping an incorrect index which could end up being + // out of bounds of the old set of indices. + // + // We handle all of that mapping here to ensure that it's only + // mapped once. We also handle loading Opnd::Value operands into + // registers here so that all mapping happens in one place. We load + // Opnd::Value operands into registers here because: + // + // - Most instructions can't be encoded with 64-bit immediates. + // - We look for Op::Load specifically when emitting to keep GC'ed + // VALUEs alive. This is a sort of canonicalization. + let mut unmapped_opnds: Vec<Opnd> = vec![]; + + let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { + if let Opnd::Stack { .. } = opnd { + *opnd = asm.lower_stack_opnd(opnd); + } + unmapped_opnds.push(*opnd); + + *opnd = match opnd { + Opnd::Value(value) if !is_load => { + // Since mov(mem64, imm32) sign extends, as_i64() makes sure + // we split when the extended value is different. + if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { + asm.load(iterator.map_opnd(*opnd)) + } else { + Opnd::UImm(value.as_u64()) + } + } + _ => iterator.map_opnd(*opnd), + }; + } + + // We are replacing instructions here so we know they are already + // being used. It is okay not to use their output here. + #[allow(unused_must_use)] + match &mut insn { + Insn::Add { left, right, out } | + Insn::Sub { left, right, out } | + Insn::Mul { left, right, out } | + Insn::And { left, right, out } | + Insn::Or { left, right, out } | + Insn::Xor { left, right, out } => { + match (&left, &right, iterator.peek()) { + // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible + (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src })) + if out == src && left == dest && live_ranges[index] == index + 1 && uimm_num_bits(*value) <= 32 => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) + if out == src && live_ranges[index] == index + 1 && { + // We want to do `dest == left`, but `left` has already gone + // through lower_stack_opnd() while `dest` has not. So we + // lower `dest` before comparing. + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + lowered_dest == *left + } => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => { + match (unmapped_opnds[0], unmapped_opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + *left = asm.load(*left); + *right = asm.load(*right); + }, + (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => { + *left = asm.load(*left); + }, + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[idx] > index { + *left = asm.load(*left); + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + *left = asm.load(*left); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); + asm.push_insn(insn); + } + } + }, + Insn::Cmp { left, right } => { + // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes) + // when next IR is `je`, `jne`, `csel_e`, or `csel_ne` + match (&left, &right, iterator.peek()) { + (Opnd::InsnOut { .. }, + Opnd::UImm(0) | Opnd::Imm(0), + Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => { + asm.push_insn(Insn::Test { left: *left, right: *left }); + } + _ => { + if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { + let loaded = asm.load(*right); + *right = loaded; + } + asm.push_insn(insn); + } + } + }, + Insn::Test { left, right } => { + if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { + let loaded = asm.load(*right); + *right = loaded; + } + asm.push_insn(insn); + }, + // These instructions modify their input operand in-place, so we + // may need to load the input value to preserve it + Insn::LShift { opnd, shift, out } | + Insn::RShift { opnd, shift, out } | + Insn::URShift { opnd, shift, out } => { + match (&unmapped_opnds[0], &unmapped_opnds[1]) { + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[*idx] > index { + *opnd = asm.load(*opnd); + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + *opnd = asm.load(*opnd); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*opnd, *shift])); + asm.push_insn(insn); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } | + Insn::CSelL { truthy, falsy, out } | + Insn::CSelLE { truthy, falsy, out } | + Insn::CSelG { truthy, falsy, out } | + Insn::CSelGE { truthy, falsy, out } => { + match unmapped_opnds[0] { + // If we have an instruction output whose live range + // spans beyond this instruction, we have to load it. + Opnd::InsnOut { idx, .. } => { + if live_ranges[idx] > index { + *truthy = asm.load(*truthy); + } + }, + Opnd::UImm(_) | Opnd::Imm(_) => { + *truthy = asm.load(*truthy); + }, + // Opnd::Value could have already been split + Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => { + *truthy = asm.load(*truthy); + }, + _ => {} + }; + + match falsy { + Opnd::UImm(_) | Opnd::Imm(_) => { + *falsy = asm.load(*falsy); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy])); + asm.push_insn(insn); + }, + Insn::Mov { dest, src } | Insn::Store { dest, src } => { + match (&dest, &src) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + // We load opnd1 because for mov, opnd0 is the output + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + // 32-bit values will be sign-extended + if imm_num_bits(*value as i64) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + } else { + asm.mov(*dest, *src); + } + }, + (Opnd::Mem(_), Opnd::Imm(value)) => { + if imm_num_bits(*value) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + } else { + asm.mov(*dest, *src); + } + }, + _ => { + asm.mov(*dest, *src); + } + } + }, + Insn::Not { opnd, .. } => { + let opnd0 = match unmapped_opnds[0] { + // If we have an instruction output whose live range + // spans beyond this instruction, we have to load it. + Opnd::InsnOut { idx, .. } => { + if live_ranges[idx] > index { + asm.load(*opnd) + } else { + *opnd + } + }, + // We have to load memory and register operands to avoid + // corrupting them. + Opnd::Mem(_) | Opnd::Reg(_) => { + asm.load(*opnd) + }, + // Otherwise we can just reuse the existing operand. + _ => *opnd + }; + + asm.not(opnd0); + }, + Insn::CCall { opnds, fptr, .. } => { + assert!(opnds.len() <= C_ARG_OPNDS.len()); + + // Load each operand into the corresponding argument + // register. + for (idx, opnd) in opnds.into_iter().enumerate() { + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(*fptr, vec![]); + }, + Insn::Lea { .. } => { + // Merge `lea` and `mov` into a single `lea` when possible + match (&insn, iterator.peek()) { + (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src })) + if matches!(out, Opnd::InsnOut { .. }) && out == src && live_ranges[index] == index + 1 => { + asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) }); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => asm.push_insn(insn), + } + }, + _ => { + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + asm.push_insn(insn); + } + }; + + iterator.map_insn_index(&mut asm); + } + + asm + } + + /// Emit platform-specific machine code + pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>> + { + /// For some instructions, we want to be able to lower a 64-bit operand + /// without requiring more registers to be available in the register + /// allocator. So we just use the SCRATCH0 register temporarily to hold + /// the value before we immediately use it. + fn emit_64bit_immediate(cb: &mut CodeBlock, opnd: &Opnd) -> X86Opnd { + match opnd { + Opnd::Imm(value) => { + // 32-bit values will be sign-extended + if imm_num_bits(*value) > 32 { + mov(cb, Assembler::SCRATCH0, opnd.into()); + Assembler::SCRATCH0 + } else { + opnd.into() + } + }, + Opnd::UImm(value) => { + // 32-bit values will be sign-extended + if imm_num_bits(*value as i64) > 32 { + mov(cb, Assembler::SCRATCH0, opnd.into()); + Assembler::SCRATCH0 + } else { + opnd.into() + } + }, + _ => opnd.into() + } + } + + /// Compile a side exit if Target::SideExit is given. + fn compile_side_exit( + target: Target, + asm: &mut Assembler, + ocb: &mut Option<&mut OutlinedCb>, + ) -> Option<Target> { + if let Target::SideExit { counter, context } = target { + let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()); + Some(Target::SideExitPtr(side_exit?)) + } else { + Some(target) + } + } + + fn emit_csel( + cb: &mut CodeBlock, + truthy: Opnd, + falsy: Opnd, + out: Opnd, + cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd), + cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){ + + // Assert that output is a register + out.unwrap_reg(); + + // If the truthy value is a memory operand + if let Opnd::Mem(_) = truthy { + if out != falsy { + mov(cb, out.into(), falsy.into()); + } + + cmov_fn(cb, out.into(), truthy.into()); + } else { + if out != truthy { + mov(cb, out.into(), truthy.into()); + } + + cmov_neg(cb, out.into(), falsy.into()); + } + } + + //dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec<u32> = Vec::new(); + + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + + // For each instruction + let start_write_pos = cb.get_write_pos(); + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec<u32> = Vec::new(); + + match insn { + Insn::Comment(text) => { + if cfg!(feature = "disasm") { + cb.add_comment(text); + } + }, + + // Write the label at the current position + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); + }, + + // Report back the current position in the generated code + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())); + }, + + Insn::BakeString(text) => { + for byte in text.as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + }, + + // Set up RBP to work with frame pointer unwinding + // (e.g. with Linux `perf record --call-graph fp`) + Insn::FrameSetup => { + if get_option!(frame_pointer) { + push(cb, RBP); + mov(cb, RBP, RSP); + push(cb, RBP); + } + }, + Insn::FrameTeardown => { + if get_option!(frame_pointer) { + pop(cb, RBP); + pop(cb, RBP); + } + }, + + Insn::Add { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + add(cb, left.into(), opnd1); + }, + + Insn::Sub { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + sub(cb, left.into(), opnd1); + }, + + Insn::Mul { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + imul(cb, left.into(), opnd1); + }, + + Insn::And { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + and(cb, left.into(), opnd1); + }, + + Insn::Or { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + or(cb, left.into(), opnd1); + }, + + Insn::Xor { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + xor(cb, left.into(), opnd1); + }, + + Insn::Not { opnd, .. } => { + not(cb, opnd.into()); + }, + + Insn::LShift { opnd, shift , ..} => { + shl(cb, opnd.into(), shift.into()) + }, + + Insn::RShift { opnd, shift , ..} => { + sar(cb, opnd.into(), shift.into()) + }, + + Insn::URShift { opnd, shift, .. } => { + shr(cb, opnd.into(), shift.into()) + }, + + Insn::Store { dest, src } => { + mov(cb, dest.into(), src.into()); + }, + + // This assumes only load instructions can contain references to GC'd Value operands + Insn::Load { opnd, out } | + Insn::LoadInto { dest: out, opnd } => { + match opnd { + Opnd::Value(val) if val.heap_object_p() => { + // Using movabs because mov might write value in 32 bits + movabs(cb, out.into(), val.0 as _); + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + insn_gc_offsets.push(ptr_offset); + } + _ => mov(cb, out.into(), opnd.into()) + } + }, + + Insn::LoadSExt { opnd, out } => { + movsx(cb, out.into(), opnd.into()); + }, + + Insn::Mov { dest, src } => { + mov(cb, dest.into(), src.into()); + }, + + // Load effective address + Insn::Lea { opnd, out } => { + lea(cb, out.into(), opnd.into()); + }, + + // Load address of jump target + Insn::LeaJumpTarget { target, out } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| { + let disp = dst_addr - src_addr; + lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + }); + + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + // Constant encoded length important for patching + movabs(cb, out.into(), target_addr); + } + }, + + // Push and pop to/from the C stack + Insn::CPush(opnd) => { + push(cb, opnd.into()); + }, + Insn::CPop { out } => { + pop(cb, out.into()); + }, + Insn::CPopInto(opnd) => { + pop(cb, opnd.into()); + }, + + // Push and pop to the C stack all caller-save registers and the + // flags + Insn::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + push(cb, X86Opnd::Reg(reg)); + } + pushfq(cb); + }, + Insn::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + popfq(cb); + for reg in regs.into_iter().rev() { + pop(cb, X86Opnd::Reg(reg)); + } + }, + + // C function call + Insn::CCall { fptr, .. } => { + call_ptr(cb, RAX, *fptr); + }, + + Insn::CRet(opnd) => { + // TODO: bias allocation towards return register + if *opnd != Opnd::Reg(C_RET_REG) { + mov(cb, RAX, opnd.into()); + } + + ret(cb); + }, + + // Compare + Insn::Cmp { left, right } => { + let num_bits = match right { + Opnd::Imm(value) => Some(imm_num_bits(*value)), + Opnd::UImm(value) => Some(uimm_num_bits(*value)), + _ => None + }; + + // If the immediate is less than 64 bits (like 32, 16, 8), and the operand + // sizes match, then we can represent it as an immediate in the instruction + // without moving it to a register first. + // IOW, 64 bit immediates must always be moved to a register + // before comparisons, where other sizes may be encoded + // directly in the instruction. + if num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64 { + cmp(cb, left.into(), right.into()); + } else { + let emitted = emit_64bit_immediate(cb, right); + cmp(cb, left.into(), emitted); + } + } + + // Test and set flags + Insn::Test { left, right } => { + let emitted = emit_64bit_immediate(cb, right); + test(cb, left.into(), emitted); + } + + Insn::JmpOpnd(opnd) => { + jmp_rm(cb, opnd.into()); + } + + // Conditional jump to a label + Insn::Jmp(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr), + Target::Label(label_idx) => jmp_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Je(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr), + Target::Label(label_idx) => je_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jne(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr), + Target::Label(label_idx) => jne_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jl(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr), + Target::Label(label_idx) => jl_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jg(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr), + Target::Label(label_idx) => jg_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jge(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr), + Target::Label(label_idx) => jge_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jbe(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr), + Target::Label(label_idx) => jbe_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jb(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr), + Target::Label(label_idx) => jb_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jz(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr), + Target::Label(label_idx) => jz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jnz(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr), + Target::Label(label_idx) => jnz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jo(target) | + Insn::JoMul(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr), + Target::Label(label_idx) => jo_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"), + + // Atomically increment a counter at a given memory location + Insn::IncrCounter { mem, value } => { + assert!(matches!(mem, Opnd::Mem(_))); + assert!(matches!(value, Opnd::UImm(_) | Opnd::Imm(_) ) ); + write_lock_prefix(cb); + add(cb, mem.into(), value.into()); + }, + + Insn::Breakpoint => int3(cb), + + Insn::CSelZ { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz); + }, + Insn::CSelNZ { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz); + }, + Insn::CSelE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne); + }, + Insn::CSelNE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove); + }, + Insn::CSelL { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge); + }, + Insn::CSelLE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg); + }, + Insn::CSelG { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle); + }, + Insn::CSelGE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl); + } + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadInvalPatch => { + let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos())); + if code_size < cb.jmp_ptr_bytes() { + nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32); + } + } + }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + } else { + insn_idx += 1; + gc_offsets.append(&mut insn_gc_offsets); + } + } + + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return None + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Some(gc_offsets) + } + } + + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { + let asm = self.x86_split(); + let mut asm = asm.alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let mut ocb = ocb; // for &mut + let start_ptr = cb.get_write_ptr(); + let gc_offsets = asm.x86_emit(cb, &mut ocb); + + if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) { + cb.link_labels(); + + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } + } +} + +#[cfg(test)] +mod tests { + use crate::disasm::{assert_disasm}; + #[cfg(feature = "disasm")] + use crate::disasm::{unindent, disasm_addr_range}; + + use super::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881c0ff000000"); + } + + #[test] + fn test_emit_add_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c01d8"); + } + + #[test] + fn test_emit_and_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881e0ff000000"); + } + + #[test] + fn test_emit_and_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c21d8"); + } + + #[test] + fn test_emit_cmp_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "4881f8ff000000"); + } + + #[test] + fn test_emit_cmp_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c39d8"); + } + + #[test] + fn test_emit_cmp_mem_16_bits_with_imm_16() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(16, Opnd::Reg(RAX_REG), 6); + + asm.cmp(shape_opnd, Opnd::UImm(0xF000)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "6681780600f0"); + } + + #[test] + fn test_emit_cmp_mem_32_bits_with_imm_32() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, Opnd::Reg(RAX_REG), 4); + + asm.cmp(shape_opnd, Opnd::UImm(0xF000_0000)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "817804000000f0"); + } + + #[test] + fn test_emit_or_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881c8ff000000"); + } + + #[test] + fn test_emit_or_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c09d8"); + } + + #[test] + fn test_emit_sub_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881e8ff000000"); + } + + #[test] + fn test_emit_sub_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c29d8"); + } + + #[test] + fn test_emit_test_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "f6c0ff"); + } + + #[test] + fn test_emit_test_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c85d8"); + } + + #[test] + fn test_emit_xor_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881f0ff000000"); + } + + #[test] + fn test_emit_xor_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c31d8"); + } + + #[test] + fn test_merge_lea_reg() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(SP, sp); // should be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d5b08", {" + 0x0: lea rbx, [rbx + 8] + "}); + } + + #[test] + fn test_merge_lea_mem() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d4308488903", {" + 0x0: lea rax, [rbx + 8] + 0x4: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_replace_cmp_0() { + let (mut asm, mut cb) = setup_asm(); + + let val = asm.load(Opnd::mem(64, SP, 8)); + asm.cmp(val, 0.into()); + let result = asm.csel_e(Qtrue.into(), Qfalse.into()); + asm.mov(Opnd::Reg(RAX_REG), result); + asm.compile_with_num_regs(&mut cb, 2); + + assert_eq!(format!("{:x}", cb), "488b43084885c0b814000000b900000000480f45c14889c0"); + } + + #[test] + fn test_merge_add_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.add(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983c540"); + } + + #[test] + fn test_merge_sub_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.sub(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983ed40"); + } + + #[test] + fn test_merge_and_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.and(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983e540"); + } + + #[test] + fn test_merge_or_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.or(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983cd40"); + } + + #[test] + fn test_merge_xor_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.xor(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983f540"); + } + + #[test] + fn test_reorder_c_args_no_cycle() { + let (mut asm, mut cb) = setup_asm(); + + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[0], // mov rdi, rdi (optimized away) + C_ARG_OPNDS[1], // mov rsi, rsi (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "b800000000ffd0", {" + 0x0: mov eax, 0 + 0x5: call rax + "}); + } + + #[test] + fn test_reorder_c_args_single_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[2], // mov rdx, rdx (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov eax, 0 + 0xe: call rax + "}); + } + + #[test] + fn test_reorder_c_args_two_cycles() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle, and rdx and rcx form another cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[3], // mov rdx, rcx + C_ARG_OPNDS[2], // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov r11, rcx + 0xc: mov rcx, rdx + 0xf: mov rdx, r11 + 0x12: mov eax, 0 + 0x17: call rax + "}); + } + + #[test] + fn test_reorder_c_args_large_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi, rsi, and rdx form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[2], // mov rsi, rdx + C_ARG_OPNDS[0], // mov rdx, rdi + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdx + 0x6: mov rdx, rdi + 0x9: mov rdi, r11 + 0xc: mov eax, 0 + 0x11: call rax + "}); + } + + #[test] + fn test_reorder_c_args_with_insn_out() { + let (mut asm, mut cb) = setup_asm(); + + let rax = asm.load(Opnd::UImm(1)); + let rcx = asm.load(Opnd::UImm(2)); + let rdx = asm.load(Opnd::UImm(3)); + // rcx and rdx form a cycle + asm.ccall(0 as _, vec![ + rax, // mov rdi, rax + rcx, // mov rsi, rcx + rcx, // mov rdx, rcx + rdx, // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov eax, 1 + 0x5: mov ecx, 2 + 0xa: mov edx, 3 + 0xf: mov rdi, rax + 0x12: mov rsi, rcx + 0x15: mov r11, rcx + 0x18: mov rcx, rdx + 0x1b: mov rdx, r11 + 0x1e: mov eax, 0 + 0x23: call rax + "}); + } + + #[test] + fn test_cmov_mem() { + let (mut asm, mut cb) = setup_asm(); + + let top = Opnd::mem(64, SP, 0); + let ary_opnd = SP; + let array_len_opnd = Opnd::mem(64, SP, 16); + + asm.cmp(array_len_opnd, 1.into()); + let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into()); + asm.mov(top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {" + 0x0: cmp qword ptr [rbx + 0x10], 1 + 0x5: mov eax, 4 + 0xa: cmovg rax, qword ptr [rbx] + 0xe: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_csel_split() { + let (mut asm, mut cb) = setup_asm(); + + let stack_top = Opnd::mem(64, SP, 0); + let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into()); + asm.mov(stack_top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {" + 0x0: movabs rax, 0x7f22c88d1930 + 0xa: mov ecx, 4 + 0xf: cmove rax, rcx + 0x13: mov qword ptr [rbx], rax + "}); + } +} diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs new file mode 100644 index 0000000000..f426dd87ca --- /dev/null +++ b/yjit/src/codegen.rs @@ -0,0 +1,10767 @@ +// We use the YARV bytecode constants which have a CRuby-style name +#![allow(non_upper_case_globals)] + +use crate::asm::*; +use crate::backend::ir::*; +use crate::core::*; +use crate::cruby::*; +use crate::invariants::*; +use crate::options::*; +use crate::stats::*; +use crate::utils::*; +use CodegenStatus::*; +use YARVOpnd::*; + +use std::cell::Cell; +use std::cmp; +use std::cmp::min; +use std::collections::HashMap; +use std::ffi::c_void; +use std::ffi::CStr; +use std::mem; +use std::os::raw::c_int; +use std::ptr; +use std::rc::Rc; +use std::cell::RefCell; +use std::slice; + +pub use crate::virtualmem::CodePtr; + +/// Status returned by code generation functions +#[derive(PartialEq, Debug)] +enum CodegenStatus { + SkipNextInsn, + KeepCompiling, + EndBlock, +} + +/// Code generation function signature +type InsnGenFn = fn( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus>; + +/// Ephemeral code generation state. +/// Represents a [core::Block] while we build it. +pub struct JITState { + /// Instruction sequence for the compiling block + pub iseq: IseqPtr, + + /// The iseq index of the first instruction in the block + starting_insn_idx: IseqIdx, + + /// The [Context] entering into the first instruction of the block + starting_ctx: Context, + + /// The placement for the machine code of the [Block] + output_ptr: CodePtr, + + /// Index of the current instruction being compiled + insn_idx: IseqIdx, + + /// Opcode for the instruction being compiled + opcode: usize, + + /// PC of the instruction being compiled + pc: *mut VALUE, + + /// stack_size when it started to compile the current instruction. + stack_size_for_pc: u8, + + /// Execution context when compilation started + /// This allows us to peek at run-time values + ec: EcPtr, + + /// The outgoing branches the block will have + pub pending_outgoing: Vec<PendingBranchRef>, + + // --- Fields for block invalidation and invariants tracking below: + // Public mostly so into_block defined in the sibling module core + // can partially move out of Self. + + /// Whether we need to record the code address at + /// the end of this bytecode instruction for global invalidation + pub record_boundary_patch_point: bool, + + /// Code for immediately exiting upon entry to the block. + /// Required for invalidation. + pub block_entry_exit: Option<CodePtr>, + + /// A list of callable method entries that must be valid for the block to be valid. + pub method_lookup_assumptions: Vec<CmePtr>, + + /// A list of basic operators that not be redefined for the block to be valid. + pub bop_assumptions: Vec<(RedefinitionFlag, ruby_basic_operators)>, + + /// A list of constant expression path segments that must have + /// not been written to for the block to be valid. + pub stable_constant_names_assumption: Option<*const ID>, + + /// A list of classes that are not supposed to have a singleton class. + pub no_singleton_class_assumptions: Vec<VALUE>, + + /// When true, the block is valid only when base pointer is equal to environment pointer. + pub no_ep_escape: bool, + + /// When true, the block is valid only when there is a total of one ractor running + pub block_assumes_single_ractor: bool, + + /// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt) + perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>, + + /// Stack of symbol names for --yjit-perf + perf_stack: Vec<String>, +} + +impl JITState { + pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr) -> Self { + JITState { + iseq: blockid.iseq, + starting_insn_idx: blockid.idx, + starting_ctx, + output_ptr, + insn_idx: 0, + opcode: 0, + pc: ptr::null_mut::<VALUE>(), + stack_size_for_pc: starting_ctx.get_stack_size(), + pending_outgoing: vec![], + ec, + record_boundary_patch_point: false, + block_entry_exit: None, + method_lookup_assumptions: vec![], + bop_assumptions: vec![], + stable_constant_names_assumption: None, + no_singleton_class_assumptions: vec![], + no_ep_escape: false, + block_assumes_single_ractor: false, + perf_map: Rc::default(), + perf_stack: vec![], + } + } + + pub fn get_insn_idx(&self) -> IseqIdx { + self.insn_idx + } + + pub fn get_iseq(self: &JITState) -> IseqPtr { + self.iseq + } + + pub fn get_opcode(self: &JITState) -> usize { + self.opcode + } + + pub fn get_pc(self: &JITState) -> *mut VALUE { + self.pc + } + + pub fn get_starting_insn_idx(&self) -> IseqIdx { + self.starting_insn_idx + } + + pub fn get_block_entry_exit(&self) -> Option<CodePtr> { + self.block_entry_exit + } + + pub fn get_starting_ctx(&self) -> Context { + self.starting_ctx + } + + pub fn get_arg(&self, arg_idx: isize) -> VALUE { + // insn_len require non-test config + #[cfg(not(test))] + assert!(insn_len(self.get_opcode()) > (arg_idx + 1).try_into().unwrap()); + unsafe { *(self.pc.offset(arg_idx + 1)) } + } + + /// Return true if the current ISEQ could escape an environment. + /// + /// As of vm_push_frame(), EP is always equal to BP. However, after pushing + /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP. + /// Also, some method calls escape the environment to the heap. + fn escapes_ep(&self) -> bool { + match unsafe { get_iseq_body_type(self.iseq) } { + // <main> frame is always associated to TOPLEVEL_BINDING. + ISEQ_TYPE_MAIN | + // Kernel#eval uses a heap EP when a Binding argument is not nil. + ISEQ_TYPE_EVAL => true, + // If this ISEQ has previously escaped EP, give up the optimization. + _ if iseq_escapes_ep(self.iseq) => true, + _ => false, + } + } + + // Get the index of the next instruction + fn next_insn_idx(&self) -> u16 { + self.insn_idx + insn_len(self.get_opcode()) as u16 + } + + // Check if we are compiling the instruction at the stub PC + // Meaning we are compiling the instruction that is next to execute + pub fn at_current_insn(&self) -> bool { + let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) }; + ec_pc == self.pc + } + + // Peek at the nth topmost value on the Ruby stack. + // Returns the topmost value when n == 0. + pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE { + assert!(self.at_current_insn()); + assert!(n < ctx.get_stack_size() as isize); + + // Note: this does not account for ctx->sp_offset because + // this is only available when hitting a stub, and while + // hitting a stub, cfp->sp needs to be up to date in case + // codegen functions trigger GC. See :stub-sp-flush:. + return unsafe { + let sp: *mut VALUE = get_cfp_sp(self.get_cfp()); + + *(sp.offset(-1 - n)) + }; + } + + fn peek_at_self(&self) -> VALUE { + unsafe { get_cfp_self(self.get_cfp()) } + } + + fn peek_at_local(&self, n: i32) -> VALUE { + assert!(self.at_current_insn()); + + let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) } + .try_into() + .unwrap(); + assert!(n < local_table_size.try_into().unwrap()); + + unsafe { + let ep = get_cfp_ep(self.get_cfp()); + let n_isize: isize = n.try_into().unwrap(); + let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1; + *ep.offset(offs) + } + } + + fn peek_at_block_handler(&self, level: u32) -> VALUE { + assert!(self.at_current_insn()); + + unsafe { + let ep = get_cfp_ep_level(self.get_cfp(), level); + *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize) + } + } + + pub fn assume_expected_cfunc( + &mut self, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + class: VALUE, + method: ID, + cfunc: *mut c_void, + ) -> bool { + let cme = unsafe { rb_callable_method_entry(class, method) }; + + if cme.is_null() { + return false; + } + + let def_type = unsafe { get_cme_def_type(cme) }; + if def_type != VM_METHOD_TYPE_CFUNC { + return false; + } + if unsafe { get_mct_func(get_cme_def_body_cfunc(cme)) } != cfunc { + return false; + } + + self.assume_method_lookup_stable(asm, ocb, cme); + + true + } + + pub fn assume_method_lookup_stable(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, cme: CmePtr) -> Option<()> { + jit_ensure_block_entry_exit(self, asm, ocb)?; + self.method_lookup_assumptions.push(cme); + + Some(()) + } + + /// Assume that objects of a given class will have no singleton class. + /// Return true if there has been no such singleton class since boot + /// and we can safely invalidate it. + pub fn assume_no_singleton_class(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, klass: VALUE) -> bool { + if jit_ensure_block_entry_exit(self, asm, ocb).is_none() { + return false; // out of space, give up + } + if has_singleton_class_of(klass) { + return false; // we've seen a singleton class. disable the optimization to avoid an invalidation loop. + } + self.no_singleton_class_assumptions.push(klass); + true + } + + /// Assume that base pointer is equal to environment pointer in the current ISEQ. + /// Return true if it's safe to assume so. + fn assume_no_ep_escape(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool { + if jit_ensure_block_entry_exit(self, asm, ocb).is_none() { + return false; // out of space, give up + } + if self.escapes_ep() { + return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop. + } + self.no_ep_escape = true; + true + } + + fn get_cfp(&self) -> *mut rb_control_frame_struct { + unsafe { get_ec_cfp(self.ec) } + } + + pub fn assume_stable_constant_names(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, id: *const ID) -> Option<()> { + jit_ensure_block_entry_exit(self, asm, ocb)?; + self.stable_constant_names_assumption = Some(id); + + Some(()) + } + + pub fn queue_outgoing_branch(&mut self, branch: PendingBranchRef) { + self.pending_outgoing.push(branch) + } + + /// Push a symbol for --yjit-perf + fn perf_symbol_push(&mut self, asm: &mut Assembler, symbol_name: &str) { + if !self.perf_stack.is_empty() { + self.perf_symbol_range_end(asm); + } + self.perf_stack.push(symbol_name.to_string()); + self.perf_symbol_range_start(asm, symbol_name); + } + + /// Pop the stack-top symbol for --yjit-perf + fn perf_symbol_pop(&mut self, asm: &mut Assembler) { + self.perf_symbol_range_end(asm); + self.perf_stack.pop(); + if let Some(symbol_name) = self.perf_stack.get(0) { + self.perf_symbol_range_start(asm, symbol_name); + } + } + + /// Mark the start address of a symbol to be reported to perf + fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) { + let symbol_name = format!("[JIT] {}", symbol_name); + let syms = self.perf_map.clone(); + asm.pos_marker(move |start, _| syms.borrow_mut().push((start, None, symbol_name.clone()))); + } + + /// Mark the end address of a symbol to be reported to perf + fn perf_symbol_range_end(&self, asm: &mut Assembler) { + let syms = self.perf_map.clone(); + asm.pos_marker(move |end, _| { + if let Some((_, ref mut end_store, _)) = syms.borrow_mut().last_mut() { + assert_eq!(None, *end_store); + *end_store = Some(end); + } + }); + } + + /// Flush addresses and symbols to /tmp/perf-{pid}.map + fn flush_perf_symbols(&self, cb: &CodeBlock) { + assert_eq!(0, self.perf_stack.len()); + let path = format!("/tmp/perf-{}.map", std::process::id()); + let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap(); + for sym in self.perf_map.borrow().iter() { + if let (start, Some(end), name) = sym { + // In case the code straddles two pages, part of it belongs to the symbol. + for (inline_start, inline_end) in cb.writable_addrs(*start, *end) { + use std::io::Write; + let code_size = inline_end - inline_start; + writeln!(f, "{inline_start:x} {code_size:x} {name}").unwrap(); + } + } + } + } + + /// Return true if we're compiling a send-like instruction, not an opt_* instruction. + pub fn is_sendish(&self) -> bool { + match unsafe { rb_iseq_opcode_at_pc(self.iseq, self.pc) } as u32 { + YARVINSN_send | + YARVINSN_opt_send_without_block | + YARVINSN_invokesuper => true, + _ => false, + } + } +} + +/// Macro to call jit.perf_symbol_push() without evaluating arguments when +/// the option is turned off, which is useful for avoiding string allocation. +macro_rules! jit_perf_symbol_push { + ($jit:expr, $asm:expr, $symbol_name:expr, $perf_map:expr) => { + if get_option!(perf_map) == Some($perf_map) { + $jit.perf_symbol_push($asm, $symbol_name); + } + }; +} + +/// Macro to call jit.perf_symbol_pop(), for consistency with jit_perf_symbol_push!(). +macro_rules! jit_perf_symbol_pop { + ($jit:expr, $asm:expr, $perf_map:expr) => { + if get_option!(perf_map) == Some($perf_map) { + $jit.perf_symbol_pop($asm); + } + }; +} + +/// Macro to push and pop a perf symbol around a function call. +macro_rules! perf_call { + // perf_call!("prefix: ", func(...)) uses "prefix: func" as a symbol. + ($prefix:expr, $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) ) => { + { + jit_perf_symbol_push!($jit, $asm, &format!("{}{}", $prefix, stringify!($func_name)), PerfMap::Codegen); + let ret = $func_name($jit, $asm, $($arg),*); + jit_perf_symbol_pop!($jit, $asm, PerfMap::Codegen); + ret + } + }; + // perf_call! { func(...) } uses "func" as a symbol. + { $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) } => { + perf_call!("", $func_name($jit, $asm, $($arg),*)) + }; +} + +use crate::codegen::JCCKinds::*; + +#[allow(non_camel_case_types, unused)] +pub enum JCCKinds { + JCC_JNE, + JCC_JNZ, + JCC_JZ, + JCC_JE, + JCC_JB, + JCC_JBE, + JCC_JNA, + JCC_JNAE, + JCC_JO_MUL, +} + +#[inline(always)] +fn gen_counter_incr(asm: &mut Assembler, counter: Counter) { + // Assert that default counters are not incremented by generated code as this would impact performance + assert!(!DEFAULT_COUNTERS.contains(&counter), "gen_counter_incr incremented {:?}", counter); + + if get_option!(gen_stats) { + asm_comment!(asm, "increment counter {}", counter.get_name()); + let ptr = get_counter_ptr(&counter.get_name()); + let ptr_reg = asm.load(Opnd::const_ptr(ptr as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, Opnd::UImm(1)); + } +} + +// Save the incremented PC on the CFP +// This is necessary when callees can raise or allocate +fn jit_save_pc(jit: &JITState, asm: &mut Assembler) { + let pc: *mut VALUE = jit.get_pc(); + let ptr: *mut VALUE = unsafe { + let cur_insn_len = insn_len(jit.get_opcode()) as isize; + pc.offset(cur_insn_len) + }; + + asm_comment!(asm, "save PC to CFP"); + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(ptr as *const u8)); +} + +/// Save the current SP on the CFP +/// This realigns the interpreter SP with the JIT SP +/// Note: this will change the current value of REG_SP, +/// which could invalidate memory operands +fn gen_save_sp(asm: &mut Assembler) { + gen_save_sp_with_offset(asm, 0); +} + +/// Save the current SP + offset on the CFP +fn gen_save_sp_with_offset(asm: &mut Assembler, offset: i8) { + if asm.ctx.get_sp_offset() != -offset { + asm_comment!(asm, "save SP to CFP"); + let stack_pointer = asm.ctx.sp_opnd(offset as i32); + let sp_addr = asm.lea(stack_pointer); + asm.mov(SP, sp_addr); + let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); + asm.mov(cfp_sp_opnd, SP); + asm.ctx.set_sp_offset(-offset); + } +} + +/// Basically jit_prepare_non_leaf_call(), but this registers the current PC +/// to lazily push a C method frame when it's necessary. +fn jit_prepare_lazy_frame_call( + jit: &mut JITState, + asm: &mut Assembler, + cme: *const rb_callable_method_entry_t, + recv_opnd: YARVOpnd, +) -> bool { + // We can use this only when the receiver is on stack. + let recv_idx = match recv_opnd { + StackOpnd(recv_idx) => recv_idx, + _ => unreachable!("recv_opnd must be on stack, but got: {:?}", recv_opnd), + }; + + // Get the next PC. jit_save_pc() saves that PC. + let pc: *mut VALUE = unsafe { + let cur_insn_len = insn_len(jit.get_opcode()) as isize; + jit.get_pc().offset(cur_insn_len) + }; + + let pc_to_cfunc = CodegenGlobals::get_pc_to_cfunc(); + match pc_to_cfunc.get(&pc) { + Some(&(other_cme, _)) if other_cme != cme => { + // Bail out if it's not the only cme on this callsite. + incr_counter!(lazy_frame_failure); + return false; + } + _ => { + // Let rb_yjit_lazy_push_frame() lazily push a C frame on this PC. + incr_counter!(lazy_frame_count); + pc_to_cfunc.insert(pc, (cme, recv_idx)); + } + } + + // Save the PC to trigger a lazy frame push, and save the SP to get the receiver. + // The C func may call a method that doesn't raise, so prepare for invalidation too. + jit_prepare_non_leaf_call(jit, asm); + + // Make sure we're ready for calling rb_vm_push_cfunc_frame(). + let cfunc_argc = unsafe { get_mct_argc(get_cme_def_body_cfunc(cme)) }; + if cfunc_argc != -1 { + assert_eq!(recv_idx as i32, cfunc_argc); // verify the receiver index if possible + } + assert!(asm.get_leaf_ccall()); // It checks the stack canary we set for known_cfunc_codegen. + + true +} + +/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could: +/// - Perform GC allocation +/// - Take the VM lock through RB_VM_LOCK_ENTER() +/// - Perform Ruby method call +/// +/// If the routine doesn't call arbitrary methods, use jit_prepare_call_with_gc() instead. +fn jit_prepare_non_leaf_call( + jit: &mut JITState, + asm: &mut Assembler +) { + // Prepare for GC. Setting PC also prepares for showing a backtrace. + jit.record_boundary_patch_point = true; // VM lock could trigger invalidation + jit_save_pc(jit, asm); // for allocation tracing + gen_save_sp(asm); // protect objects from GC + + // In case the routine calls Ruby methods, it can set local variables + // through Kernel#binding, rb_debug_inspector API, and other means. + asm.clear_local_types(); +} + +/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could: +/// - Perform GC allocation +/// - Take the VM lock through RB_VM_LOCK_ENTER() +fn jit_prepare_call_with_gc( + jit: &mut JITState, + asm: &mut Assembler +) { + jit.record_boundary_patch_point = true; // VM lock could trigger invalidation + jit_save_pc(jit, asm); // for allocation tracing + gen_save_sp(asm); // protect objects from GC + + // Expect a leaf ccall(). You should use jit_prepare_non_leaf_call() if otherwise. + asm.expect_leaf_ccall(); +} + +/// Record the current codeblock write position for rewriting into a jump into +/// the outlined block later. Used to implement global code invalidation. +fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) { + // We add a padding before pos_marker so that the previous patch will not overlap this. + // jump_to_next_insn() puts a patch point at the end of the block in fallthrough cases. + // In the fallthrough case, the next block should start with the same Context, so the + // patch is fine, but it should not overlap another patch. + asm.pad_inval_patch(); + asm.pos_marker(move |code_ptr, cb| { + CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos, cb); + }); +} + +/// Verify the ctx's types and mappings against the compile-time stack, self, +/// and locals. +fn verify_ctx(jit: &JITState, ctx: &Context) { + fn obj_info_str<'a>(val: VALUE) -> &'a str { + unsafe { CStr::from_ptr(rb_obj_info(val)).to_str().unwrap() } + } + + // Some types such as CString only assert the class field of the object + // when there has never been a singleton class created for objects of that class. + // Once there is a singleton class created they become their weaker + // `T*` variant, and we more objects should pass the verification. + fn relax_type_with_singleton_class_assumption(ty: Type) -> Type { + if let Type::CString | Type::CArray | Type::CHash = ty { + if has_singleton_class_of(ty.known_class().unwrap()) { + match ty { + Type::CString => return Type::TString, + Type::CArray => return Type::TArray, + Type::CHash => return Type::THash, + _ => (), + } + } + } + + ty + } + + // Only able to check types when at current insn + assert!(jit.at_current_insn()); + + let self_val = jit.peek_at_self(); + let self_val_type = Type::from(self_val); + let learned_self_type = ctx.get_opnd_type(SelfOpnd); + let learned_self_type = relax_type_with_singleton_class_assumption(learned_self_type); + + + // Verify self operand type + if self_val_type.diff(learned_self_type) == TypeDiff::Incompatible { + panic!( + "verify_ctx: ctx self type ({:?}) incompatible with actual value of self {}", + ctx.get_opnd_type(SelfOpnd), + obj_info_str(self_val) + ); + } + + // Verify stack operand types + let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u8); + for i in 0..top_idx { + let learned_mapping = ctx.get_opnd_mapping(StackOpnd(i)); + let learned_type = ctx.get_opnd_type(StackOpnd(i)); + let learned_type = relax_type_with_singleton_class_assumption(learned_type); + + let stack_val = jit.peek_at_stack(ctx, i as isize); + let val_type = Type::from(stack_val); + + match learned_mapping.get_kind() { + TempMappingKind::MapToSelf => { + if self_val != stack_val { + panic!( + "verify_ctx: stack value was mapped to self, but values did not match!\n stack: {}\n self: {}", + obj_info_str(stack_val), + obj_info_str(self_val) + ); + } + } + TempMappingKind::MapToLocal => { + let local_idx: u8 = learned_mapping.get_local_idx(); + let local_val = jit.peek_at_local(local_idx.into()); + if local_val != stack_val { + panic!( + "verify_ctx: stack value was mapped to local, but values did not match\n stack: {}\n local {}: {}", + obj_info_str(stack_val), + local_idx, + obj_info_str(local_val) + ); + } + } + TempMappingKind::MapToStack => {} + } + + // If the actual type differs from the learned type + if val_type.diff(learned_type) == TypeDiff::Incompatible { + panic!( + "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {} ({:?})", + learned_type, + obj_info_str(stack_val), + val_type, + ); + } + } + + // Verify local variable types + let local_table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; + let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES); + for i in 0..top_idx { + let learned_type = ctx.get_local_type(i); + let learned_type = relax_type_with_singleton_class_assumption(learned_type); + let local_val = jit.peek_at_local(i as i32); + let local_type = Type::from(local_val); + + if local_type.diff(learned_type) == TypeDiff::Incompatible { + panic!( + "verify_ctx: ctx type ({:?}) incompatible with actual value of local: {} (type {:?})", + learned_type, + obj_info_str(local_val), + local_type + ); + } + } +} + +// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit +// to the interpreter when it cannot service a stub by generating new code. +// Before coming here, branch_stub_hit() takes care of fully reconstructing +// interpreter state. +fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + gen_counter_incr(&mut asm, Counter::exit_from_branch_stub); + + asm_comment!(asm, "exit from branch stub"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(Qundef.into()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Generate an exit to return to the interpreter +fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { + #[cfg(all(feature = "disasm", not(test)))] + { + let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; + asm_comment!(asm, "exit to interpreter on {}", insn_name(opcode as usize)); + } + + if asm.ctx.is_return_landing() { + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + } + + // Spill stack temps before returning to the interpreter + asm.spill_temps(); + + // Generate the code to exit to the interpreters + // Write the adjusted SP back into the CFP + if asm.ctx.get_sp_offset() != 0 { + let sp_opnd = asm.lea(asm.ctx.sp_opnd(0)); + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), + sp_opnd + ); + } + + // Update CFP->PC + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), + Opnd::const_ptr(exit_pc as *const u8) + ); + + // Accumulate stats about interpreter exits + if get_option!(gen_stats) { + asm.ccall( + rb_yjit_count_side_exit_op as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); + + // If --yjit-trace-exits is enabled, record the exit stack while recording + // the side exits. TraceExits::Counter is handled by gen_counted_exit(). + if get_option!(trace_exits) == Some(TraceExits::All) { + asm.ccall( + rb_yjit_record_exit_stack as *const u8, + vec![Opnd::const_ptr(exit_pc as *const u8)] + ); + } + } + + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(Qundef.into()); +} + +/// :side-exit: +/// Get an exit for the current instruction in the outlined block. The code +/// for each instruction often begins with several guards before proceeding +/// to do work. When guards fail, an option we have is to exit to the +/// interpreter at an instruction boundary. The piece of code that takes +/// care of reconstructing interpreter state and exiting out of generated +/// code is called the side exit. +/// +/// No guards change the logic for reconstructing interpreter state at the +/// moment, so there is one unique side exit for each context. Note that +/// it's incorrect to jump to the side exit after any ctx stack push operations +/// since they change the logic required for reconstructing interpreter state. +pub fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> { + let mut cb = ocb.unwrap(); + let mut asm = Assembler::new(); + asm.ctx = *ctx; + asm.set_reg_temps(ctx.get_reg_temps()); + + gen_exit(exit_pc, &mut asm); + + asm.compile(&mut cb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Get a side exit. Increment a counter in it if --yjit-stats is enabled. +pub fn gen_counted_exit(exit_pc: *mut VALUE, side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Option<Counter>) -> Option<CodePtr> { + // The counter is only incremented when stats are enabled + if !get_option!(gen_stats) { + return Some(side_exit); + } + let counter = match counter { + Some(counter) => counter, + None => return Some(side_exit), + }; + + let mut asm = Assembler::new(); + + // Increment a counter + gen_counter_incr(&mut asm, counter); + + // Trace a counted exit if --yjit-trace-exits=counter is given. + // TraceExits::All is handled by gen_exit(). + if get_option!(trace_exits) == Some(TraceExits::CountedExit(counter)) { + with_caller_saved_temp_regs(&mut asm, |asm| { + asm.ccall(rb_yjit_record_exit_stack as *const u8, vec![Opnd::const_ptr(exit_pc as *const u8)]); + }); + } + + // Jump to the existing side exit + asm.jmp(Target::CodePtr(side_exit)); + + let ocb = ocb.unwrap(); + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Preserve caller-saved stack temp registers during the call of a given block +fn with_caller_saved_temp_regs<F, R>(asm: &mut Assembler, block: F) -> R where F: FnOnce(&mut Assembler) -> R { + for ® in caller_saved_temp_regs() { + asm.cpush(Opnd::Reg(reg)); // save stack temps + } + let ret = block(asm); + for ® in caller_saved_temp_regs().rev() { + asm.cpop_into(Opnd::Reg(reg)); // restore stack temps + } + ret +} + +// Ensure that there is an exit for the start of the block being compiled. +// Block invalidation uses this exit. +#[must_use] +pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler, ocb: &mut OutlinedCb) -> Option<()> { + if jit.block_entry_exit.is_some() { + return Some(()); + } + + let block_starting_context = &jit.get_starting_ctx(); + + // If we're compiling the first instruction in the block. + if jit.insn_idx == jit.starting_insn_idx { + // Generate the exit with the cache in Assembler. + let side_exit_context = SideExitContext::new(jit.pc, *block_starting_context); + let entry_exit = asm.get_side_exit(&side_exit_context, None, ocb); + jit.block_entry_exit = Some(entry_exit?); + } else { + let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) }; + jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, block_starting_context, ocb)?); + } + + Some(()) +} + +// Landing code for when c_return tracing is enabled. See full_cfunc_return(). +fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + // This chunk of code expects REG_EC to be filled properly and + // RAX to contain the return value of the C method. + + asm_comment!(asm, "full cfunc return"); + asm.ccall( + rb_full_cfunc_return as *const u8, + vec![EC, C_RET_OPND] + ); + + // Count the exit + gen_counter_incr(&mut asm, Counter::traced_cfunc_return); + + // Return to the interpreter + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(Qundef.into()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc. +/// This is used by gen_leave() and gen_entry_prologue() +fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + // gen_leave() fully reconstructs interpreter state and leaves the + // return value in C_RET_OPND before coming here. + let ret_opnd = asm.live_reg_opnd(C_RET_OPND); + + // Every exit to the interpreter should be counted + gen_counter_incr(&mut asm, Counter::leave_interp_return); + + asm_comment!(asm, "exit from leave"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(ret_opnd); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +// Increment SP and transfer the execution to the interpreter after jit_exec_exception(). +// On jit_exec_exception(), you need to return Qundef to keep executing caller non-FINISH +// frames on the interpreter. You also need to increment SP to push the return value to +// the caller's stack, which is different from gen_stub_exit(). +fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + // gen_leave() leaves the return value in C_RET_OPND before coming here. + let ruby_ret_val = asm.live_reg_opnd(C_RET_OPND); + + // Every exit to the interpreter should be counted + gen_counter_incr(&mut asm, Counter::leave_interp_return); + + asm_comment!(asm, "push return value through cfp->sp"); + let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); + let sp = asm.load(cfp_sp); + asm.mov(Opnd::mem(64, sp, 0), ruby_ret_val); + let new_sp = asm.add(sp, SIZEOF_VALUE.into()); + asm.mov(cfp_sp, new_sp); + + asm_comment!(asm, "exit from exception"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + // Execute vm_exec_core + asm.cret(Qundef.into()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +// Generate a runtime guard that ensures the PC is at the expected +// instruction index in the iseq, otherwise takes an entry stub +// that generates another check and entry. +// This is to handle the situation of optional parameters. +// When a function with optional parameters is called, the entry +// PC for the method isn't necessarily 0. +pub fn gen_entry_chain_guard( + asm: &mut Assembler, + ocb: &mut OutlinedCb, + iseq: IseqPtr, + insn_idx: u16, +) -> Option<PendingEntryRef> { + let entry = new_pending_entry(); + let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?; + + let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC); + let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; + let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8); + + asm_comment!(asm, "guard expected PC"); + asm.cmp(pc_opnd, expected_pc_opnd); + + asm.mark_entry_start(&entry); + asm.jne(stub_addr.into()); + asm.mark_entry_end(&entry); + return Some(entry); +} + +/// Compile an interpreter entry block to be inserted into an iseq +/// Returns None if compilation fails. +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See [jit_compile_exception] for details. +pub fn gen_entry_prologue( + cb: &mut CodeBlock, + ocb: &mut OutlinedCb, + iseq: IseqPtr, + insn_idx: u16, + jit_exception: bool, +) -> Option<CodePtr> { + let code_ptr = cb.get_write_ptr(); + + let mut asm = Assembler::new(); + if get_option_ref!(dump_disasm).is_some() { + asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); + } else { + asm_comment!(asm, "YJIT entry"); + } + + asm.frame_setup(); + + // Save the CFP, EC, SP registers to the C stack + asm.cpush(CFP); + asm.cpush(EC); + asm.cpush(SP); + + // We are passed EC and CFP as arguments + asm.mov(EC, C_ARG_OPNDS[0]); + asm.mov(CFP, C_ARG_OPNDS[1]); + + // Load the current SP from the CFP into REG_SP + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + + // Setup cfp->jit_return + // If this is an exception handler entry point + if jit_exception { + // On jit_exec_exception(), it's NOT safe to return a non-Qundef value + // from a non-FINISH frame. This function fixes that problem. + // See [jit_compile_exception] for details. + asm.ccall( + rb_yjit_set_exception_return as *mut u8, + vec![ + CFP, + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)), + Opnd::const_ptr(CodegenGlobals::get_leave_exception_code().raw_ptr(cb)), + ], + ); + } else { + // On jit_exec() or JIT_EXEC(), it's safe to return a non-Qundef value + // on the entry frame. See [jit_compile] for details. + asm.mov( + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), + Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)), + ); + } + + // We're compiling iseqs that we *expect* to start at `insn_idx`. + // But in the case of optional parameters or when handling exceptions, + // the interpreter can set the pc to a different location. For + // such scenarios, we'll add a runtime check that the PC we've + // compiled for is the same PC that the interpreter wants us to run with. + // If they don't match, then we'll jump to an entry stub and generate + // another PC check and entry there. + let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } || jit_exception { + Some(gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?) + } else { + None + }; + + asm.compile(cb, Some(ocb))?; + + if cb.has_dropped_bytes() { + None + } else { + // Mark code pages for code GC + let iseq_payload = get_or_create_iseq_payload(iseq); + for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) { + iseq_payload.pages.insert(page); + } + // Write an entry to the heap and push it to the ISEQ + if let Some(pending_entry) = pending_entry { + let pending_entry = Rc::try_unwrap(pending_entry) + .ok().expect("PendingEntry should be unique"); + iseq_payload.entries.push(pending_entry.into_entry()); + } + Some(code_ptr) + } +} + +// Generate code to check for interrupts and take a side-exit. +// Warning: this function clobbers REG0 +fn gen_check_ints( + asm: &mut Assembler, + counter: Counter, +) { + // Check for interrupts + // see RUBY_VM_CHECK_INTS(ec) macro + asm_comment!(asm, "RUBY_VM_CHECK_INTS(ec)"); + + // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages, + // signal_exec, or rb_postponed_job_flush. + let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG)); + asm.test(interrupt_flag, interrupt_flag); + + asm.jnz(Target::side_exit(counter)); +} + +// Generate a stubbed unconditional jump to the next bytecode instruction. +// Blocks that are part of a guard chain can use this to share the same successor. +fn jump_to_next_insn( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<()> { + // Reset the depth since in current usages we only ever jump to + // chain_depth > 0 from the same instruction. + let mut reset_depth = asm.ctx; + reset_depth.reset_chain_depth_and_defer(); + + let jump_block = BlockId { + iseq: jit.iseq, + idx: jit.next_insn_idx(), + }; + + // We are at the end of the current instruction. Record the boundary. + if jit.record_boundary_patch_point { + jit.record_boundary_patch_point = false; + let exit_pc = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) }; + let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, ocb); + record_global_inval_patch(asm, exit_pos?); + } + + // Generate the jump instruction + gen_direct_jump(jit, &reset_depth, jump_block, asm); + Some(()) +} + +// Compile a sequence of bytecode instructions for a given basic block version. +// Part of gen_block_version(). +// Note: this function will mutate its context while generating code, +// but the input start_ctx argument should remain immutable. +pub fn gen_single_block( + blockid: BlockId, + start_ctx: &Context, + ec: EcPtr, + cb: &mut CodeBlock, + ocb: &mut OutlinedCb, +) -> Result<BlockRef, ()> { + // Limit the number of specialized versions for this block + let ctx = limit_block_versions(blockid, start_ctx); + + verify_blockid(blockid); + assert!(!(blockid.idx == 0 && ctx.get_stack_size() > 0)); + + // Save machine code placement of the block. `cb` might page switch when we + // generate code in `ocb`. + let block_start_addr = cb.get_write_ptr(); + + // Instruction sequence to compile + let iseq = blockid.iseq; + let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; + let iseq_size: IseqIdx = if let Ok(size) = iseq_size.try_into() { + size + } else { + // ISeq too large to compile + return Err(()); + }; + let mut insn_idx: IseqIdx = blockid.idx; + + // Initialize a JIT state object + let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec); + jit.iseq = blockid.iseq; + + // Create a backend assembler instance + let mut asm = Assembler::new(); + asm.ctx = ctx; + + #[cfg(feature = "disasm")] + if get_option_ref!(dump_disasm).is_some() { + let blockid_idx = blockid.idx; + let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() }; + asm_comment!(asm, "Block: {} {}", iseq_get_location(blockid.iseq, blockid_idx), chain_depth); + asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8()); + } + + // Mark the start of an ISEQ for --yjit-perf + jit_perf_symbol_push!(jit, &mut asm, &get_iseq_name(iseq), PerfMap::ISEQ); + + if asm.ctx.is_return_landing() { + // Continuation of the end of gen_leave(). + // Reload REG_SP for the current frame and transfer the return value + // to the stack top. + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + + asm.ctx.clear_return_landing(); + } + + // For each instruction to compile + // NOTE: could rewrite this loop with a std::iter::Iterator + while insn_idx < iseq_size { + // Get the current pc and opcode + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; + // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. + let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } + .try_into() + .unwrap(); + + // We need opt_getconstant_path to be in a block all on its own. Cut the block short + // if we run into it. This is necessary because we want to invalidate based on the + // instruction's index. + if opcode == YARVINSN_opt_getconstant_path.as_usize() && insn_idx > jit.starting_insn_idx { + jump_to_next_insn(&mut jit, &mut asm, ocb); + break; + } + + // Set the current instruction + jit.insn_idx = insn_idx; + jit.opcode = opcode; + jit.pc = pc; + jit.stack_size_for_pc = asm.ctx.get_stack_size(); + asm.set_side_exit_context(pc, asm.ctx.get_stack_size()); + + // stack_pop doesn't immediately deallocate a register for stack temps, + // but it's safe to do so at this instruction boundary. + for stack_idx in asm.ctx.get_stack_size()..MAX_REG_TEMPS { + asm.ctx.dealloc_temp_reg(stack_idx); + } + + // If previous instruction requested to record the boundary + if jit.record_boundary_patch_point { + // Generate an exit to this instruction and record it + let exit_pos = gen_outlined_exit(jit.pc, &asm.ctx, ocb).ok_or(())?; + record_global_inval_patch(&mut asm, exit_pos); + jit.record_boundary_patch_point = false; + } + + // In debug mode, verify our existing assumption + if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() { + verify_ctx(&jit, &asm.ctx); + } + + // :count-placement: + // Count bytecode instructions that execute in generated code. + // Note that the increment happens even when the output takes side exit. + gen_counter_incr(&mut asm, Counter::yjit_insns_count); + + // Lookup the codegen function for this instruction + let mut status = None; + if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) { + // Add a comment for the name of the YARV instruction + asm_comment!(asm, "Insn: {:04} {} (stack_size: {})", insn_idx, insn_name(opcode), asm.ctx.get_stack_size()); + + // If requested, dump instructions for debugging + if get_option!(dump_insns) { + println!("compiling {}", insn_name(opcode)); + print_str(&mut asm, &format!("executing {}", insn_name(opcode))); + } + + // Call the code generation function + jit_perf_symbol_push!(jit, &mut asm, &insn_name(opcode), PerfMap::Codegen); + status = gen_fn(&mut jit, &mut asm, ocb); + jit_perf_symbol_pop!(jit, &mut asm, PerfMap::Codegen); + + #[cfg(debug_assertions)] + assert!(!asm.get_leaf_ccall(), "ccall() wasn't used after leaf_ccall was set in {}", insn_name(opcode)); + } + + // If we can't compile this instruction + // exit to the interpreter and stop compiling + if status == None { + if get_option!(dump_insns) { + println!("can't compile {}", insn_name(opcode)); + } + + // Rewind stack_size using ctx.with_stack_size to allow stack_size changes + // before you return None. + asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc); + gen_exit(jit.pc, &mut asm); + + // If this is the first instruction in the block, then + // the entry address is the address for block_entry_exit + if insn_idx == jit.starting_insn_idx { + jit.block_entry_exit = Some(jit.output_ptr); + } + + break; + } + + // For now, reset the chain depth after each instruction as only the + // first instruction in the block can concern itself with the depth. + asm.ctx.reset_chain_depth_and_defer(); + + // Move to the next instruction to compile + insn_idx += insn_len(opcode) as u16; + + // Move past next instruction when instructed + if status == Some(SkipNextInsn) { + let next_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; + let next_opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, next_pc) }.try_into().unwrap(); + insn_idx += insn_len(next_opcode) as u16; + } + + // If the instruction terminates this block + if status == Some(EndBlock) { + break; + } + } + let end_insn_idx = insn_idx; + + // We currently can't handle cases where the request is for a block that + // doesn't go to the next instruction in the same iseq. + assert!(!jit.record_boundary_patch_point); + + // Pad the block if it has the potential to be invalidated + if jit.block_entry_exit.is_some() { + asm.pad_inval_patch(); + } + + // Mark the end of an ISEQ for --yjit-perf + jit_perf_symbol_pop!(jit, &mut asm, PerfMap::ISEQ); + + // Compile code into the code block + let (_, gc_offsets) = asm.compile(cb, Some(ocb)).ok_or(())?; + let end_addr = cb.get_write_ptr(); + + // Flush perf symbols after asm.compile() writes addresses + if get_option!(perf_map).is_some() { + jit.flush_perf_symbols(cb); + } + + // If code for the block doesn't fit, fail + if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() { + return Err(()); + } + + // Block compiled successfully + Ok(jit.into_block(end_insn_idx, block_start_addr, end_addr, gc_offsets)) +} + +fn gen_nop( + _jit: &mut JITState, + _asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Do nothing + Some(KeepCompiling) +} + +fn gen_pop( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Decrement SP + asm.stack_pop(1); + Some(KeepCompiling) +} + +fn gen_dup( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let dup_val = asm.stack_opnd(0); + let mapping = asm.ctx.get_opnd_mapping(dup_val.into()); + + let loc0 = asm.stack_push_mapping(mapping); + asm.mov(loc0, dup_val); + + Some(KeepCompiling) +} + +// duplicate stack top n elements +fn gen_dupn( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + + // In practice, seems to be only used for n==2 + if n != 2 { + return None; + } + + let opnd1: Opnd = asm.stack_opnd(1); + let opnd0: Opnd = asm.stack_opnd(0); + + let mapping1 = asm.ctx.get_opnd_mapping(opnd1.into()); + let mapping0 = asm.ctx.get_opnd_mapping(opnd0.into()); + + let dst1: Opnd = asm.stack_push_mapping(mapping1); + asm.mov(dst1, opnd1); + + let dst0: Opnd = asm.stack_push_mapping(mapping0); + asm.mov(dst0, opnd0); + + Some(KeepCompiling) +} + +// Swap top 2 stack entries +fn gen_swap( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + stack_swap(asm, 0, 1); + Some(KeepCompiling) +} + +fn stack_swap( + asm: &mut Assembler, + offset0: i32, + offset1: i32, +) { + let stack0_mem = asm.stack_opnd(offset0); + let stack1_mem = asm.stack_opnd(offset1); + + let mapping0 = asm.ctx.get_opnd_mapping(stack0_mem.into()); + let mapping1 = asm.ctx.get_opnd_mapping(stack1_mem.into()); + + let stack0_reg = asm.load(stack0_mem); + let stack1_reg = asm.load(stack1_mem); + asm.mov(stack0_mem, stack1_reg); + asm.mov(stack1_mem, stack0_reg); + + asm.ctx.set_opnd_mapping(stack0_mem.into(), mapping1); + asm.ctx.set_opnd_mapping(stack1_mem.into(), mapping0); +} + +fn gen_putnil( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + jit_putobject(asm, Qnil); + Some(KeepCompiling) +} + +fn jit_putobject(asm: &mut Assembler, arg: VALUE) { + let val_type: Type = Type::from(arg); + let stack_top = asm.stack_push(val_type); + asm.mov(stack_top, arg.into()); +} + +fn gen_putobject_int2fix( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let opcode = jit.opcode; + let cst_val: usize = if opcode == YARVINSN_putobject_INT2FIX_0_.as_usize() { + 0 + } else { + 1 + }; + let cst_val = VALUE::fixnum_from_usize(cst_val); + + if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, cst_val, ocb) { + return Some(result); + } + + jit_putobject(asm, cst_val); + Some(KeepCompiling) +} + +fn gen_putobject( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let arg: VALUE = jit.get_arg(0); + + if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, arg, ocb) { + return Some(result); + } + + jit_putobject(asm, arg); + Some(KeepCompiling) +} + +/// Combine `putobject` and `opt_ltlt` together if profitable, for example when +/// left shifting an integer by a constant amount. +fn fuse_putobject_opt_ltlt( + jit: &mut JITState, + asm: &mut Assembler, + constant_object: VALUE, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let next_opcode = unsafe { rb_vm_insn_addr2opcode(jit.pc.add(insn_len(jit.opcode).as_usize()).read().as_ptr()) }; + if next_opcode == YARVINSN_opt_ltlt as i32 && constant_object.fixnum_p() { + // Untag the fixnum shift amount + let shift_amt = constant_object.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return None; + } + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let lhs = jit.peek_at_stack(&asm.ctx, 0); + if !lhs.fixnum_p() { + return None; + } + + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LTLT) { + return None; + } + + asm_comment!(asm, "integer left shift with rhs={shift_amt}"); + let lhs = asm.stack_opnd(0); + + // Guard that lhs is a fixnum if necessary + let lhs_type = asm.ctx.get_opnd_type(lhs.into()); + if lhs_type != Type::Fixnum { + asm_comment!(asm, "guard arg0 fixnum"); + asm.test(lhs, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnums, + ); + } + + asm.stack_pop(1); + fixnum_left_shift_body(asm, lhs, shift_amt as u64); + return Some(SkipNextInsn); + } + return None; +} + +fn gen_putself( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + + // Write it on the stack + let stack_top = asm.stack_push_self(); + asm.mov( + stack_top, + Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF) + ); + + Some(KeepCompiling) +} + +fn gen_putspecialobject( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let object_type = jit.get_arg(0).as_usize(); + + if object_type == VM_SPECIAL_OBJECT_VMCORE.as_usize() { + let stack_top = asm.stack_push(Type::UnknownHeap); + let frozen_core = unsafe { rb_mRubyVMFrozenCore }; + asm.mov(stack_top, frozen_core.into()); + Some(KeepCompiling) + } else { + // TODO: implement for VM_SPECIAL_OBJECT_CBASE and + // VM_SPECIAL_OBJECT_CONST_BASE + None + } +} + +// set Nth stack entry to stack top +fn gen_setn( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + + let top_val = asm.stack_opnd(0); + let dst_opnd = asm.stack_opnd(n.try_into().unwrap()); + asm.mov( + dst_opnd, + top_val + ); + + let mapping = asm.ctx.get_opnd_mapping(top_val.into()); + asm.ctx.set_opnd_mapping(dst_opnd.into(), mapping); + + Some(KeepCompiling) +} + +// get nth stack value, then push it +fn gen_topn( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + + let top_n_val = asm.stack_opnd(n.try_into().unwrap()); + let mapping = asm.ctx.get_opnd_mapping(top_n_val.into()); + let loc0 = asm.stack_push_mapping(mapping); + asm.mov(loc0, top_n_val); + + Some(KeepCompiling) +} + +// Pop n values off the stack +fn gen_adjuststack( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + asm.stack_pop(n); + Some(KeepCompiling) +} + +fn gen_opt_plus( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Add arg0 + arg1 and test for overflow + let arg0_untag = asm.sub(arg0, Opnd::Imm(1)); + let out_val = asm.add(arg0_untag, arg1); + asm.jo(Target::side_exit(Counter::opt_plus_overflow)); + + // Push the output on the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + Some(KeepCompiling) + } else { + gen_opt_send_without_block(jit, asm, ocb) + } +} + +// new array initialized from top N values +fn gen_newarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we are allocating + jit_prepare_call_with_gc(jit, asm); + + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(n as i32)); + asm.lea(values_opnd) + }; + + // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts); + let new_ary = asm.ccall( + rb_ec_ary_new_from_values as *const u8, + vec![ + EC, + Opnd::UImm(n.into()), + values_ptr + ] + ); + + asm.stack_pop(n.as_usize()); + let stack_ret = asm.stack_push(Type::CArray); + asm.mov(stack_ret, new_ary); + + Some(KeepCompiling) +} + +// dup array +fn gen_duparray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let ary = jit.get_arg(0); + + // Save the PC and SP because we are allocating + jit_prepare_call_with_gc(jit, asm); + + // call rb_ary_resurrect(VALUE ary); + let new_ary = asm.ccall( + rb_ary_resurrect as *const u8, + vec![ary.into()], + ); + + let stack_ret = asm.stack_push(Type::CArray); + asm.mov(stack_ret, new_ary); + + Some(KeepCompiling) +} + +// dup hash +fn gen_duphash( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let hash = jit.get_arg(0); + + // Save the PC and SP because we are allocating + jit_prepare_call_with_gc(jit, asm); + + // call rb_hash_resurrect(VALUE hash); + let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]); + + let stack_ret = asm.stack_push(Type::CHash); + asm.mov(stack_ret, hash); + + Some(KeepCompiling) +} + +// call to_a on the array on the stack +fn gen_splatarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_usize(); + + // Save the PC and SP because the callee may call #to_a + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let ary_opnd = asm.stack_opnd(0); + + // Call rb_vm_splat_array(flag, ary) + let ary = asm.ccall(rb_vm_splat_array as *const u8, vec![flag.into(), ary_opnd]); + asm.stack_pop(1); // Keep it on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// call to_hash on hash to keyword splat before converting block +// e.g. foo(**object, &block) +fn gen_splatkw( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on a runtime hash operand + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let comptime_hash = jit.peek_at_stack(&asm.ctx, 1); + if comptime_hash.hash_p() { + // If a compile-time hash operand is T_HASH, just guard that it's T_HASH. + let hash_opnd = asm.stack_opnd(1); + guard_object_is_hash(asm, hash_opnd, hash_opnd.into(), Counter::splatkw_not_hash); + } else if comptime_hash.nil_p() { + // Speculate we'll see nil if compile-time hash operand is nil + let hash_opnd = asm.stack_opnd(1); + let hash_opnd_type = asm.ctx.get_opnd_type(hash_opnd.into()); + + if hash_opnd_type != Type::Nil { + asm.cmp(hash_opnd, Qnil.into()); + asm.jne(Target::side_exit(Counter::splatkw_not_nil)); + + if Type::Nil.diff(hash_opnd_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(hash_opnd.into(), Type::Nil); + } + } + } else { + // Otherwise, call #to_hash on the operand if it's not nil. + + // Save the PC and SP because the callee may call #to_hash + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let block_opnd = asm.stack_opnd(0); + let block_type = asm.ctx.get_opnd_type(block_opnd.into()); + let hash_opnd = asm.stack_opnd(1); + + c_callable! { + fn to_hash_if_not_nil(mut obj: VALUE) -> VALUE { + if obj != Qnil { + obj = unsafe { rb_to_hash_type(obj) }; + } + obj + } + } + + let hash = asm.ccall(to_hash_if_not_nil as _, vec![hash_opnd]); + asm.stack_pop(2); // Keep it on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, hash); + asm.stack_push(block_type); + // Leave block_opnd spilled by ccall as is + asm.ctx.dealloc_temp_reg(asm.ctx.get_stack_size() - 1); + } + + Some(KeepCompiling) +} + +// concat two arrays +fn gen_concatarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Save the PC and SP because the callee may call #to_a + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let ary2st_opnd = asm.stack_opnd(0); + let ary1_opnd = asm.stack_opnd(1); + + // Call rb_vm_concat_array(ary1, ary2st) + let ary = asm.ccall(rb_vm_concat_array as *const u8, vec![ary1_opnd, ary2st_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// concat second array to first array. +// first argument must already be an array. +// attempts to convert second object to array using to_a. +fn gen_concattoarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Save the PC and SP because the callee may call #to_a + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let ary2_opnd = asm.stack_opnd(0); + let ary1_opnd = asm.stack_opnd(1); + + let ary = asm.ccall(rb_vm_concat_to_array as *const u8, vec![ary1_opnd, ary2_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// push given number of objects to array directly before. +fn gen_pushtoarray( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let num = jit.get_arg(0).as_u64(); + + // Save the PC and SP because the callee may allocate + jit_prepare_call_with_gc(jit, asm); + + // Get the operands from the stack + let ary_opnd = asm.stack_opnd(num as i32); + let objp_opnd = asm.lea(asm.ctx.sp_opnd(-(num as i32))); + + let ary = asm.ccall(rb_ary_cat as *const u8, vec![ary_opnd, objp_opnd, num.into()]); + asm.stack_pop(num as usize + 1); // Keep it on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::TArray); + asm.mov(stack_ret, ary); + + Some(KeepCompiling) +} + +// new range initialized from top 2 values +fn gen_newrange( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_usize(); + + // rb_range_new() allocates and can raise + jit_prepare_non_leaf_call(jit, asm); + + // val = rb_range_new(low, high, (int)flag); + let range_opnd = asm.ccall( + rb_range_new as *const u8, + vec![ + asm.stack_opnd(1), + asm.stack_opnd(0), + flag.into() + ] + ); + + asm.stack_pop(2); + let stack_ret = asm.stack_push(Type::UnknownHeap); + asm.mov(stack_ret, range_opnd); + + Some(KeepCompiling) +} + +fn guard_object_is_heap( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_heap() { + return; + } + + asm_comment!(asm, "guard object is heap"); + + // Test that the object is not an immediate + asm.test(object, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(Target::side_exit(counter)); + + // Test that the object is not false + asm.cmp(object, Qfalse.into()); + asm.je(Target::side_exit(counter)); + + if Type::UnknownHeap.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::UnknownHeap); + } +} + +fn guard_object_is_array( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_array() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is array"); + + // Pull out the type mask + let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS); + let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); + + // Compare the result with T_ARRAY + asm.cmp(flags_opnd, (RUBY_T_ARRAY as u64).into()); + asm.jne(Target::side_exit(counter)); + + if Type::TArray.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::TArray); + } +} + +fn guard_object_is_hash( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_hash() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is hash"); + + // Pull out the type mask + let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS); + let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); + + // Compare the result with T_HASH + asm.cmp(flags_opnd, (RUBY_T_HASH as u64).into()); + asm.jne(Target::side_exit(counter)); + + if Type::THash.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::THash); + } +} + +fn guard_object_is_string( + asm: &mut Assembler, + object: Opnd, + object_opnd: YARVOpnd, + counter: Counter, +) { + let object_type = asm.ctx.get_opnd_type(object_opnd); + if object_type.is_string() { + return; + } + + let object_reg = match object { + Opnd::InsnOut { .. } => object, + _ => asm.load(object), + }; + guard_object_is_heap(asm, object_reg, object_opnd, counter); + + asm_comment!(asm, "guard object is string"); + + // Pull out the type mask + let flags_reg = asm.load(Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS)); + let flags_reg = asm.and(flags_reg, Opnd::UImm(RUBY_T_MASK as u64)); + + // Compare the result with T_STRING + asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64)); + asm.jne(Target::side_exit(counter)); + + if Type::TString.diff(object_type) != TypeDiff::Incompatible { + asm.ctx.upgrade_opnd_type(object_opnd, Type::TString); + } +} + +/// This guards that a special flag is not set on a hash. +/// By passing a hash with this flag set as the last argument +/// in a splat call, you can change the way keywords are handled +/// to behave like ruby 2. We don't currently support this. +fn guard_object_is_not_ruby2_keyword_hash( + asm: &mut Assembler, + object_opnd: Opnd, + counter: Counter, +) { + asm_comment!(asm, "guard object is not ruby2 keyword hash"); + + let not_ruby2_keyword = asm.new_label("not_ruby2_keyword"); + asm.test(object_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(not_ruby2_keyword); + + asm.cmp(object_opnd, Qfalse.into()); + asm.je(not_ruby2_keyword); + + let flags_opnd = asm.load(Opnd::mem( + VALUE_BITS, + object_opnd, + RUBY_OFFSET_RBASIC_FLAGS, + )); + let type_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into()); + + asm.cmp(type_opnd, (RUBY_T_HASH as u64).into()); + asm.jne(not_ruby2_keyword); + + asm.test(flags_opnd, (RHASH_PASS_AS_KEYWORDS as u64).into()); + asm.jnz(Target::side_exit(counter)); + + asm.write_label(not_ruby2_keyword); +} + +/// This instruction pops a single value off the stack, converts it to an +/// arrayif it isn’t already one using the #to_ary method, and then pushes +/// the values from the array back onto the stack. +fn gen_expandarray( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Both arguments are rb_num_t which is unsigned + let num = jit.get_arg(0).as_u32(); + let flag = jit.get_arg(1).as_usize(); + + // If this instruction has the splat flag, then bail out. + if flag & 0x01 != 0 { + gen_counter_incr(asm, Counter::expandarray_splat); + return None; + } + + // If this instruction has the postarg flag, then bail out. + if flag & 0x02 != 0 { + gen_counter_incr(asm, Counter::expandarray_postarg); + return None; + } + + let array_opnd = asm.stack_opnd(0); + + // Defer compilation so we can specialize on a runtime `self` + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); + + // If the comptime receiver is not an array + if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } { + // at compile time, ensure to_ary is not defined + let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) }; + let cme_def_type = unsafe { get_cme_def_type(target_cme) }; + + // if to_ary is defined, return can't compile so to_ary can be called + if cme_def_type != VM_METHOD_TYPE_UNDEF { + gen_counter_incr(asm, Counter::expandarray_to_ary); + return None; + } + + // invalidate compile block if to_ary is later defined + jit.assume_method_lookup_stable(asm, ocb, target_cme); + + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_recv.class_of(), + array_opnd, + array_opnd.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::expandarray_not_array, + ); + + let opnd = asm.stack_pop(1); // pop after using the type info + + // If we don't actually want any values, then just keep going + if num == 0 { + return Some(KeepCompiling); + } + + // load opnd to avoid a race because we are also pushing onto the stack + let opnd = asm.load(opnd); + + for _ in 1..num { + let push_opnd = asm.stack_push(Type::Nil); + asm.mov(push_opnd, Qnil.into()); + } + + let push_opnd = asm.stack_push(Type::Unknown); + asm.mov(push_opnd, opnd); + + return Some(KeepCompiling); + } + + // Get the compile-time array length + let comptime_len = unsafe { rb_yjit_array_len(comptime_recv) as u32 }; + + // Move the array from the stack and check that it's an array. + guard_object_is_array( + asm, + array_opnd, + array_opnd.into(), + Counter::expandarray_not_array, + ); + + // If we don't actually want any values, then just return. + if num == 0 { + asm.stack_pop(1); // pop the array + return Some(KeepCompiling); + } + + let array_opnd = asm.stack_opnd(0); + let array_reg = asm.load(array_opnd); + let array_len_opnd = get_array_len(asm, array_reg); + + // Guard on the comptime/expected array length + if comptime_len >= num { + asm_comment!(asm, "guard array length >= {}", num); + asm.cmp(array_len_opnd, num.into()); + jit_chain_guard( + JCC_JB, + jit, + asm, + ocb, + EXPANDARRAY_MAX_CHAIN_DEPTH, + Counter::expandarray_chain_max_depth, + ); + + } else { + asm_comment!(asm, "guard array length == {}", comptime_len); + asm.cmp(array_len_opnd, comptime_len.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + EXPANDARRAY_MAX_CHAIN_DEPTH, + Counter::expandarray_chain_max_depth, + ); + } + + let array_opnd = asm.stack_pop(1); // pop after using the type info + + // Load the pointer to the embedded or heap array + let ary_opnd = if comptime_len > 0 { + let array_reg = asm.load(array_opnd); + Some(get_array_ptr(asm, array_reg)) + } else { + None + }; + + // Loop backward through the array and push each element onto the stack. + for i in (0..num).rev() { + let top = asm.stack_push(if i < comptime_len { Type::Unknown } else { Type::Nil }); + let offset = i32::try_from(i * (SIZEOF_VALUE as u32)).unwrap(); + + // Missing elements are Qnil + asm_comment!(asm, "load array[{}]", i); + let elem_opnd = if i < comptime_len { Opnd::mem(64, ary_opnd.unwrap(), offset) } else { Qnil.into() }; + asm.mov(top, elem_opnd); + } + + Some(KeepCompiling) +} + +// Compute the index of a local variable from its slot index +fn ep_offset_to_local_idx(iseq: IseqPtr, ep_offset: u32) -> u32 { + // Layout illustration + // This is an array of VALUE + // | VM_ENV_DATA_SIZE | + // v v + // low addr <+-------+-------+-------+-------+------------------+ + // |local 0|local 1| ... |local n| .... | + // +-------+-------+-------+-------+------------------+ + // ^ ^ ^ ^ + // +-------+---local_table_size----+ cfp->ep--+ + // | | + // +------------------ep_offset---------------+ + // + // See usages of local_var_name() from iseq.c for similar calculation. + + // Equivalent of iseq->body->local_table_size + let local_table_size: i32 = unsafe { get_iseq_body_local_table_size(iseq) } + .try_into() + .unwrap(); + let op = (ep_offset - VM_ENV_DATA_SIZE) as i32; + let local_idx = local_table_size - op - 1; + assert!(local_idx >= 0 && local_idx < local_table_size); + local_idx.try_into().unwrap() +} + +// Get EP at level from CFP +fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { + // Load environment pointer EP from CFP into a register + let ep_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP); + let mut ep_opnd = asm.load(ep_opnd); + + for _ in (0..level).rev() { + // Get the previous EP from the current EP + // See GET_PREV_EP(ep) macro + // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03)) + let offs = SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL; + ep_opnd = asm.load(Opnd::mem(64, ep_opnd, offs)); + ep_opnd = asm.and(ep_opnd, Opnd::Imm(!0x03)); + } + + ep_opnd +} + +// Gets the EP of the ISeq of the containing method, or "local level". +// Equivalent of GET_LEP() macro. +fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { + // Equivalent of get_lvar_level() in compile.c + fn get_lvar_level(iseq: IseqPtr) -> u32 { + if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } { + 0 + } else { + 1 + get_lvar_level(unsafe { rb_get_iseq_body_parent_iseq(iseq) }) + } + } + + let level = get_lvar_level(jit.get_iseq()); + gen_get_ep(asm, level) +} + +fn gen_getlocal_generic( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ep_offset: u32, + level: u32, +) -> Option<CodegenStatus> { + let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm, ocb) { + // Load the local using SP register + asm.ctx.ep_opnd(-(ep_offset as i32)) + } else { + // Load environment pointer EP (level 0) from CFP + let ep_opnd = gen_get_ep(asm, level); + + // Load the local from the block + // val = *(vm_get_ep(GET_EP(), level) - idx); + let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); + Opnd::mem(64, ep_opnd, offs) + }; + + // Write the local at SP + let stack_top = if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset); + asm.stack_push_local(local_idx.as_usize()) + } else { + asm.stack_push(Type::Unknown) + }; + + asm.mov(stack_top, local_opnd); + + Some(KeepCompiling) +} + +fn gen_getlocal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + let level = jit.get_arg(1).as_u32(); + gen_getlocal_generic(jit, asm, ocb, idx, level) +} + +fn gen_getlocal_wc0( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_getlocal_generic(jit, asm, ocb, idx, 0) +} + +fn gen_getlocal_wc1( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_getlocal_generic(jit, asm, ocb, idx, 1) +} + +fn gen_setlocal_generic( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ep_offset: u32, + level: u32, +) -> Option<CodegenStatus> { + let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); + + // Fallback because of write barrier + if asm.ctx.get_chain_depth() > 0 { + // Load environment pointer EP at level + let ep_opnd = gen_get_ep(asm, level); + + // This function should not yield to the GC. + // void rb_vm_env_write(const VALUE *ep, int index, VALUE v) + let index = -(ep_offset as i64); + let value_opnd = asm.stack_opnd(0); + asm.ccall( + rb_vm_env_write as *const u8, + vec![ + ep_opnd, + index.into(), + value_opnd, + ] + ); + asm.stack_pop(1); + + return Some(KeepCompiling); + } + + let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm, ocb) { + // Load flags and the local using SP register + let local_opnd = asm.ctx.ep_opnd(-(ep_offset as i32)); + let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32); + (flags_opnd, local_opnd) + } else { + // Load flags and the local for the level + let ep_opnd = gen_get_ep(asm, level); + let flags_opnd = Opnd::mem( + 64, + ep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, + ); + (flags_opnd, Opnd::mem(64, ep_opnd, -SIZEOF_VALUE_I32 * ep_offset as i32)) + }; + + // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers + // only affect heap objects being written. If we know an immediate value is being written we + // can skip this check. + if !value_type.is_imm() { + // flags & VM_ENV_FLAG_WB_REQUIRED + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); + + // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 + assert!(asm.ctx.get_chain_depth() == 0); + jit_chain_guard( + JCC_JNZ, + jit, + asm, + ocb, + 1, + Counter::setlocal_wb_required, + ); + } + + if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); + asm.ctx.set_local_type(local_idx, value_type); + } + + // Pop the value to write from the stack + let stack_top = asm.stack_pop(1); + + // Write the value at the environment pointer + asm.mov(local_opnd, stack_top); + + Some(KeepCompiling) +} + +fn gen_setlocal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + let level = jit.get_arg(1).as_u32(); + gen_setlocal_generic(jit, asm, ocb, idx, level) +} + +fn gen_setlocal_wc0( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_setlocal_generic(jit, asm, ocb, idx, 0) +} + +fn gen_setlocal_wc1( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let idx = jit.get_arg(0).as_u32(); + gen_setlocal_generic(jit, asm, ocb, idx, 1) +} + +// new hash initialized from top N values +fn gen_newhash( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let num: u64 = jit.get_arg(0).as_u64(); + + // Save the PC and SP because we are allocating + jit_prepare_call_with_gc(jit, asm); + + if num != 0 { + // val = rb_hash_new_with_size(num / 2); + let new_hash = asm.ccall( + rb_hash_new_with_size as *const u8, + vec![Opnd::UImm(num / 2)] + ); + + // Save the allocated hash as we want to push it after insertion + asm.cpush(new_hash); + asm.cpush(new_hash); // x86 alignment + + // Get a pointer to the values to insert into the hash + let stack_addr_from_top = asm.lea(asm.stack_opnd((num - 1) as i32)); + + // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val); + asm.ccall( + rb_hash_bulk_insert as *const u8, + vec![ + Opnd::UImm(num), + stack_addr_from_top, + new_hash + ] + ); + + let new_hash = asm.cpop(); + asm.cpop_into(new_hash); // x86 alignment + + asm.stack_pop(num.try_into().unwrap()); + let stack_ret = asm.stack_push(Type::CHash); + asm.mov(stack_ret, new_hash); + } else { + // val = rb_hash_new(); + let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]); + let stack_ret = asm.stack_push(Type::CHash); + asm.mov(stack_ret, new_hash); + } + + Some(KeepCompiling) +} + +fn gen_putstring( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let put_val = jit.get_arg(0); + + // Save the PC and SP because the callee will allocate + jit_prepare_call_with_gc(jit, asm); + + let str_opnd = asm.ccall( + rb_ec_str_resurrect as *const u8, + vec![EC, put_val.into(), 0.into()] + ); + + let stack_top = asm.stack_push(Type::CString); + asm.mov(stack_top, str_opnd); + + Some(KeepCompiling) +} + +fn gen_putchilledstring( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let put_val = jit.get_arg(0); + + // Save the PC and SP because the callee will allocate + jit_prepare_call_with_gc(jit, asm); + + let str_opnd = asm.ccall( + rb_ec_str_resurrect as *const u8, + vec![EC, put_val.into(), 1.into()] + ); + + let stack_top = asm.stack_push(Type::CString); + asm.mov(stack_top, str_opnd); + + Some(KeepCompiling) +} + +fn gen_checkmatch( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let flag = jit.get_arg(0).as_u32(); + + // rb_vm_check_match is not leaf unless flag is VM_CHECKMATCH_TYPE_WHEN. + // See also: leafness_of_checkmatch() and check_match() + if flag != VM_CHECKMATCH_TYPE_WHEN { + jit_prepare_non_leaf_call(jit, asm); + } + + let pattern = asm.stack_opnd(0); + let target = asm.stack_opnd(1); + + extern "C" { + fn rb_vm_check_match(ec: EcPtr, target: VALUE, pattern: VALUE, num: u32) -> VALUE; + } + let result = asm.ccall(rb_vm_check_match as *const u8, vec![EC, target, pattern, flag.into()]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, result); + + Some(KeepCompiling) +} + +// Push Qtrue or Qfalse depending on whether the given keyword was supplied by +// the caller +fn gen_checkkeyword( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // When a keyword is unspecified past index 32, a hash will be used + // instead. This can only happen in iseqs taking more than 32 keywords. + if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } { + return None; + } + + // The EP offset to the undefined bits local + let bits_offset = jit.get_arg(0).as_i32(); + + // The index of the keyword we want to check + let index: i64 = jit.get_arg(1).as_i64(); + + // Load environment pointer EP + let ep_opnd = gen_get_ep(asm, 0); + + // VALUE kw_bits = *(ep - bits); + let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset); + + // unsigned int b = (unsigned int)FIX2ULONG(kw_bits); + // if ((b & (0x01 << idx))) { + // + // We can skip the FIX2ULONG conversion by shifting the bit we test + let bit_test: i64 = 0x01 << (index + 1); + asm.test(bits_opnd, Opnd::Imm(bit_test)); + let ret_opnd = asm.csel_z(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + + Some(KeepCompiling) +} + +// Generate a jump to a stub that recompiles the current YARV instruction on failure. +// When depth_limit is exceeded, generate a jump to a side exit. +fn jit_chain_guard( + jcc: JCCKinds, + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + depth_limit: u8, + counter: Counter, +) { + let target0_gen_fn = match jcc { + JCC_JNE | JCC_JNZ => BranchGenFn::JNZToTarget0, + JCC_JZ | JCC_JE => BranchGenFn::JZToTarget0, + JCC_JBE | JCC_JNA => BranchGenFn::JBEToTarget0, + JCC_JB | JCC_JNAE => BranchGenFn::JBToTarget0, + JCC_JO_MUL => BranchGenFn::JOMulToTarget0, + }; + + if asm.ctx.get_chain_depth() < depth_limit { + // Rewind Context to use the stack_size at the beginning of this instruction. + let mut deeper = asm.ctx.with_stack_size(jit.stack_size_for_pc); + deeper.increment_chain_depth(); + let bid = BlockId { + iseq: jit.iseq, + idx: jit.insn_idx, + }; + + gen_branch(jit, asm, ocb, bid, &deeper, None, None, target0_gen_fn); + } else { + target0_gen_fn.call(asm, Target::side_exit(counter), None); + } +} + +// up to 8 different shapes for each +pub const GET_IVAR_MAX_DEPTH: u8 = 8; + +// up to 8 different shapes for each +pub const SET_IVAR_MAX_DEPTH: u8 = 8; + +// hashes and arrays +pub const OPT_AREF_MAX_CHAIN_DEPTH: u8 = 2; + +// expandarray +pub const EXPANDARRAY_MAX_CHAIN_DEPTH: u8 = 4; + +// up to 5 different methods for send +pub const SEND_MAX_DEPTH: u8 = 5; + +// up to 20 different offsets for case-when +pub const CASE_WHEN_MAX_DEPTH: u8 = 20; + +pub const MAX_SPLAT_LENGTH: i32 = 127; + +// Codegen for getting an instance variable. +// Preconditions: +// - receiver has the same class as CLASS_OF(comptime_receiver) +// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled +fn gen_get_ivar( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + max_chain_depth: u8, + comptime_receiver: VALUE, + ivar_name: ID, + recv: Opnd, + recv_opnd: YARVOpnd, +) -> Option<CodegenStatus> { + let comptime_val_klass = comptime_receiver.class_of(); + + // If recv isn't already a register, load it. + let recv = match recv { + Opnd::InsnOut { .. } => recv, + _ => asm.load(recv), + }; + + // Check if the comptime class uses a custom allocator + let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; + let uses_custom_allocator = match custom_allocator { + Some(alloc_fun) => { + let allocate_instance = rb_class_allocate_instance as *const u8; + alloc_fun as *const u8 != allocate_instance + } + None => false, + }; + + // Check if the comptime receiver is a T_OBJECT + let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; + // Use a general C call at the last chain to avoid exits on megamorphic shapes + let megamorphic = asm.ctx.get_chain_depth() >= max_chain_depth; + if megamorphic { + gen_counter_incr(asm, Counter::num_getivar_megamorphic); + } + + // If the class uses the default allocator, instances should all be T_OBJECT + // NOTE: This assumes nobody changes the allocator of the class after allocation. + // Eventually, we can encode whether an object is T_OBJECT or not + // inside object shapes. + // too-complex shapes can't use index access, so we use rb_ivar_get for them too. + if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic { + // General case. Call rb_ivar_get(). + // VALUE rb_ivar_get(VALUE obj, ID id) + asm_comment!(asm, "call rb_ivar_get()"); + + // The function could raise RactorIsolationError. + jit_prepare_non_leaf_call(jit, asm); + + let ivar_val = asm.ccall(rb_ivar_get as *const u8, vec![recv, Opnd::UImm(ivar_name)]); + + if recv_opnd != SelfOpnd { + asm.stack_pop(1); + } + + // Push the ivar on the stack + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_val); + + // Jump to next instruction. This allows guard chains to share the same successor. + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } + + let ivar_index = unsafe { + let shape_id = comptime_receiver.shape_id_of(); + let shape = rb_shape_get_shape_by_id(shape_id); + let mut ivar_index: u32 = 0; + if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) { + Some(ivar_index as usize) + } else { + None + } + }; + + // Guard heap object (recv_opnd must be used before stack_pop) + guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap); + + // Compile time self is embedded and the ivar index lands within the object + let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) }; + + let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + asm_comment!(asm, "guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + max_chain_depth, + Counter::getivar_megamorphic, + ); + + // Pop receiver if it's on the temp stack + if recv_opnd != SelfOpnd { + asm.stack_pop(1); + } + + match ivar_index { + // If there is no IVAR index, then the ivar was undefined + // when we entered the compiler. That means we can just return + // nil for this shape + iv name + None => { + let out_opnd = asm.stack_push(Type::Nil); + asm.mov(out_opnd, Qnil.into()); + } + Some(ivar_index) => { + if embed_test_result { + // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h + + // Load the variable + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + let ivar_opnd = Opnd::mem(64, recv, offs); + + // Push the ivar on the stack + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); + } else { + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); + + // Read the ivar from the extended table + let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ivar_opnd); + } + } + } + + // Jump to next instruction. This allows guard chains to share the same successor. + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) +} + +fn gen_getinstancevariable( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on a runtime `self` + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let ivar_name = jit.get_arg(0).as_u64(); + + let comptime_val = jit.peek_at_self(); + + // Guard that the receiver has the same class as the one from compile time. + let self_asm_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF); + + gen_get_ivar( + jit, + asm, + ocb, + GET_IVAR_MAX_DEPTH, + comptime_val, + ivar_name, + self_asm_opnd, + SelfOpnd, + ) +} + +// Generate an IV write. +// This function doesn't deal with writing the shape, or expanding an object +// to use an IV buffer if necessary. That is the callers responsibility +fn gen_write_iv( + asm: &mut Assembler, + comptime_receiver: VALUE, + recv: Opnd, + ivar_index: usize, + set_value: Opnd, + extension_needed: bool) +{ + // Compile time self is embedded and the ivar index lands within the object + let embed_test_result = comptime_receiver.embedded_p() && !extension_needed; + + if embed_test_result { + // Find the IV offset + let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32; + let ivar_opnd = Opnd::mem(64, recv, offs); + + // Write the IV + asm_comment!(asm, "write IV"); + asm.mov(ivar_opnd, set_value); + } else { + // Compile time value is *not* embedded. + + // Get a pointer to the extended table + let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32)); + + // Write the ivar in to the extended table + let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32); + + asm_comment!(asm, "write IV"); + asm.mov(ivar_opnd, set_value); + } +} + +fn gen_setinstancevariable( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on a runtime `self` + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let ivar_name = jit.get_arg(0).as_u64(); + let ic = jit.get_arg(1).as_ptr(); + let comptime_receiver = jit.peek_at_self(); + gen_set_ivar( + jit, + asm, + ocb, + comptime_receiver, + ivar_name, + SelfOpnd, + Some(ic), + ) +} + +/// Set an instance variable on setinstancevariable or attr_writer. +/// It switches the behavior based on what recv_opnd is given. +/// * SelfOpnd: setinstancevariable, which doesn't push a result onto the stack. +/// * StackOpnd: attr_writer, which pushes a result onto the stack. +fn gen_set_ivar( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + comptime_receiver: VALUE, + ivar_name: ID, + recv_opnd: YARVOpnd, + ic: Option<*const iseq_inline_iv_cache_entry>, +) -> Option<CodegenStatus> { + let comptime_val_klass = comptime_receiver.class_of(); + + // If the comptime receiver is frozen, writing an IV will raise an exception + // and we don't want to JIT code to deal with that situation. + if comptime_receiver.is_frozen() { + gen_counter_incr(asm, Counter::setivar_frozen); + return None; + } + + let stack_type = asm.ctx.get_opnd_type(StackOpnd(0)); + + // Check if the comptime class uses a custom allocator + let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) }; + let uses_custom_allocator = match custom_allocator { + Some(alloc_fun) => { + let allocate_instance = rb_class_allocate_instance as *const u8; + alloc_fun as *const u8 != allocate_instance + } + None => false, + }; + + // Check if the comptime receiver is a T_OBJECT + let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) }; + // Use a general C call at the last chain to avoid exits on megamorphic shapes + let megamorphic = asm.ctx.get_chain_depth() >= SET_IVAR_MAX_DEPTH; + if megamorphic { + gen_counter_incr(asm, Counter::num_setivar_megamorphic); + } + + // Get the iv index + let shape_too_complex = comptime_receiver.shape_too_complex(); + let ivar_index = if !shape_too_complex { + let shape_id = comptime_receiver.shape_id_of(); + let shape = unsafe { rb_shape_get_shape_by_id(shape_id) }; + let mut ivar_index: u32 = 0; + if unsafe { rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) } { + Some(ivar_index as usize) + } else { + None + } + } else { + None + }; + + // The current shape doesn't contain this iv, we need to transition to another shape. + let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() { + let current_shape = comptime_receiver.shape_of(); + let next_shape = unsafe { rb_shape_get_next(current_shape, comptime_receiver, ivar_name) }; + let next_shape_id = unsafe { rb_shape_id(next_shape) }; + + // If the VM ran out of shapes, or this class generated too many leaf, + // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table). + if next_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID { + Some((next_shape_id, None, 0_usize)) + } else { + let current_capacity = unsafe { (*current_shape).capacity }; + + // If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to + // reallocate it. + let needs_extension = unsafe { (*current_shape).capacity != (*next_shape).capacity }; + + // We can write to the object, but we need to transition the shape + let ivar_index = unsafe { (*current_shape).next_iv_index } as usize; + + let needs_extension = if needs_extension { + Some((current_capacity, unsafe { (*next_shape).capacity })) + } else { + None + }; + Some((next_shape_id, needs_extension, ivar_index)) + } + } else { + None + }; + let new_shape_too_complex = matches!(new_shape, Some((OBJ_TOO_COMPLEX_SHAPE_ID, _, _))); + + // If the receiver isn't a T_OBJECT, or uses a custom allocator, + // then just write out the IV write as a function call. + // too-complex shapes can't use index access, so we use rb_ivar_get for them too. + if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic { + // The function could raise FrozenError. + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let val_opnd = asm.stack_opnd(0); + + if let StackOpnd(index) = recv_opnd { // attr_writer + let recv = asm.stack_opnd(index as i32); + asm_comment!(asm, "call rb_vm_set_ivar_id()"); + asm.ccall( + rb_vm_set_ivar_id as *const u8, + vec![ + recv, + Opnd::UImm(ivar_name), + val_opnd, + ], + ); + } else { // setinstancevariable + asm_comment!(asm, "call rb_vm_setinstancevariable()"); + asm.ccall( + rb_vm_setinstancevariable as *const u8, + vec![ + Opnd::const_ptr(jit.iseq as *const u8), + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + ivar_name.into(), + val_opnd, + Opnd::const_ptr(ic.unwrap() as *const u8), + ], + ); + } + } else { + // Get the receiver + let mut recv = asm.load(if let StackOpnd(index) = recv_opnd { + asm.stack_opnd(index as i32) + } else { + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) + }); + + // Upgrade type + guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap); + + let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) }; + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + asm_comment!(asm, "guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SET_IVAR_MAX_DEPTH, + Counter::setivar_megamorphic, + ); + + let write_val; + + match ivar_index { + // If we don't have an instance variable index, then we need to + // transition out of the current shape. + None => { + let (new_shape_id, needs_extension, ivar_index) = new_shape.unwrap(); + if let Some((current_capacity, new_capacity)) = needs_extension { + // Generate the C call so that runtime code will increase + // the capacity and set the buffer. + asm_comment!(asm, "call rb_ensure_iv_list_size"); + + // It allocates so can trigger GC, which takes the VM lock + // so could yield to a different ractor. + jit_prepare_call_with_gc(jit, asm); + asm.ccall(rb_ensure_iv_list_size as *const u8, + vec![ + recv, + Opnd::UImm(current_capacity.into()), + Opnd::UImm(new_capacity.into()) + ] + ); + + // Load the receiver again after the function call + recv = asm.load(if let StackOpnd(index) = recv_opnd { + asm.stack_opnd(index as i32) + } else { + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF) + }); + } + + write_val = asm.stack_opnd(0); + gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension.is_some()); + + asm_comment!(asm, "write shape"); + + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + // Store the new shape + asm.store(shape_opnd, Opnd::UImm(new_shape_id as u64)); + }, + + Some(ivar_index) => { + // If the iv index already exists, then we don't need to + // transition to a new shape. The reason is because we find + // the iv index by searching up the shape tree. If we've + // made the transition already, then there's no reason to + // update the shape on the object. Just set the IV. + write_val = asm.stack_opnd(0); + gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false); + }, + } + + // If we know the stack value is an immediate, there's no need to + // generate WB code. + if !stack_type.is_imm() { + asm.spill_temps(); // for ccall (unconditionally spill them for RegTemps consistency) + let skip_wb = asm.new_label("skip_wb"); + // If the value we're writing is an immediate, we don't need to WB + asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(skip_wb); + + // If the value we're writing is nil or false, we don't need to WB + asm.cmp(write_val, Qnil.into()); + asm.jbe(skip_wb); + + asm_comment!(asm, "write barrier"); + asm.ccall( + rb_gc_writebarrier as *const u8, + vec![ + recv, + write_val, + ] + ); + + asm.write_label(skip_wb); + } + } + let write_val = asm.stack_pop(1); // Keep write_val on stack during ccall for GC + + // If it's attr_writer, i.e. recv_opnd is StackOpnd, we need to pop + // the receiver and push the written value onto the stack. + if let StackOpnd(_) = recv_opnd { + asm.stack_pop(1); // Pop receiver + + let out_opnd = asm.stack_push(Type::Unknown); // Push a return value + asm.mov(out_opnd, write_val); + } + + Some(KeepCompiling) +} + +fn gen_defined( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let op_type = jit.get_arg(0).as_u64(); + let obj = jit.get_arg(1); + let pushval = jit.get_arg(2); + + match op_type as u32 { + DEFINED_YIELD => { + asm.stack_pop(1); // v operand is not used + let out_opnd = asm.stack_push(Type::Unknown); // nil or "yield" + + gen_block_given(jit, asm, out_opnd, pushval.into(), Qnil.into()); + } + _ => { + // Save the PC and SP because the callee may allocate or call #respond_to? + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_non_leaf_call(jit, asm); + + // Get the operands from the stack + let v_opnd = asm.stack_opnd(0); + + // Call vm_defined(ec, reg_cfp, op_type, obj, v) + let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]); + asm.stack_pop(1); // Keep it on stack during ccall for GC + + // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) { + // val = pushval; + // } + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); + + // Push the return value onto the stack + let out_type = if pushval.special_const_p() { + Type::UnknownImm + } else { + Type::Unknown + }; + let stack_ret = asm.stack_push(out_type); + asm.mov(stack_ret, out_value); + } + } + + Some(KeepCompiling) +} + +fn gen_definedivar( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize base on a runtime receiver + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let ivar_name = jit.get_arg(0).as_u64(); + // Value that will be pushed on the stack if the ivar is defined. In practice this is always the + // string "instance-variable". If the ivar is not defined, nil will be pushed instead. + let pushval = jit.get_arg(2); + + // Get the receiver + let recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + + // Specialize base on compile time values + let comptime_receiver = jit.peek_at_self(); + + if comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH { + // Fall back to calling rb_ivar_defined + + // Save the PC and SP because the callee may allocate + // Note that this modifies REG_SP, which is why we do it first + jit_prepare_call_with_gc(jit, asm); + + // Call rb_ivar_defined(recv, ivar_name) + let def_result = asm.ccall(rb_ivar_defined as *const u8, vec![recv, ivar_name.into()]); + + // if (rb_ivar_defined(recv, ivar_name)) { + // val = pushval; + // } + asm.test(def_result, Opnd::UImm(255)); + let out_value = asm.csel_nz(pushval.into(), Qnil.into()); + + // Push the return value onto the stack + let out_type = if pushval.special_const_p() { Type::UnknownImm } else { Type::Unknown }; + let stack_ret = asm.stack_push(out_type); + asm.mov(stack_ret, out_value); + + return Some(KeepCompiling) + } + + let shape_id = comptime_receiver.shape_id_of(); + let ivar_exists = unsafe { + let shape = rb_shape_get_shape_by_id(shape_id); + let mut ivar_index: u32 = 0; + rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) + }; + + // Guard heap object (recv_opnd must be used before stack_pop) + guard_object_is_heap(asm, recv, SelfOpnd, Counter::definedivar_not_heap); + + let shape_id_offset = unsafe { rb_shape_id_offset() }; + let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset); + + asm_comment!(asm, "guard shape"); + asm.cmp(shape_opnd, Opnd::UImm(shape_id as u64)); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + GET_IVAR_MAX_DEPTH, + Counter::definedivar_megamorphic, + ); + + let result = if ivar_exists { pushval } else { Qnil }; + jit_putobject(asm, result); + + // Jump to next instruction. This allows guard chains to share the same successor. + jump_to_next_insn(jit, asm, ocb); + + return Some(EndBlock); +} + +fn gen_checktype( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let type_val = jit.get_arg(0).as_u32(); + + // Only three types are emitted by compile.c at the moment + if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val { + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val = asm.stack_pop(1); + + // Check if we know from type information + match val_type.known_value_type() { + Some(value_type) => { + if value_type == type_val { + jit_putobject(asm, Qtrue); + return Some(KeepCompiling); + } else { + jit_putobject(asm, Qfalse); + return Some(KeepCompiling); + } + }, + _ => (), + } + + let ret = asm.new_label("ret"); + + let val = asm.load(val); + if !val_type.is_heap() { + // if (SPECIAL_CONST_P(val)) { + // Return Qfalse via REG1 if not on heap + asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(ret); + asm.cmp(val, Qfalse.into()); + asm.je(ret); + } + + // Check type on object + let object_type = asm.and( + Opnd::mem(64, val, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::UImm(RUBY_T_MASK.into())); + asm.cmp(object_type, Opnd::UImm(type_val.into())); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + asm.write_label(ret); + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + + Some(KeepCompiling) + } else { + None + } +} + +fn gen_concatstrings( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let n = jit.get_arg(0).as_usize(); + + // rb_str_concat_literals may raise Encoding::CompatibilityError + jit_prepare_non_leaf_call(jit, asm); + + let values_ptr = asm.lea(asm.ctx.sp_opnd(-(n as i32))); + + // call rb_str_concat_literals(size_t n, const VALUE *strings); + let return_value = asm.ccall( + rb_str_concat_literals as *const u8, + vec![n.into(), values_ptr] + ); + + asm.stack_pop(n); + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, return_value); + + Some(KeepCompiling) +} + +fn guard_two_fixnums( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) { + let counter = Counter::guard_send_not_fixnums; + + // Get stack operands without popping them + let arg1 = asm.stack_opnd(0); + let arg0 = asm.stack_opnd(1); + + // Get the stack operand types + let arg1_type = asm.ctx.get_opnd_type(arg1.into()); + let arg0_type = asm.ctx.get_opnd_type(arg0.into()); + + if arg0_type.is_heap() || arg1_type.is_heap() { + asm_comment!(asm, "arg is heap object"); + asm.jmp(Target::side_exit(counter)); + return; + } + + if arg0_type != Type::Fixnum && arg0_type.is_specific() { + asm_comment!(asm, "arg0 not fixnum"); + asm.jmp(Target::side_exit(counter)); + return; + } + + if arg1_type != Type::Fixnum && arg1_type.is_specific() { + asm_comment!(asm, "arg1 not fixnum"); + asm.jmp(Target::side_exit(counter)); + return; + } + + assert!(!arg0_type.is_heap()); + assert!(!arg1_type.is_heap()); + assert!(arg0_type == Type::Fixnum || arg0_type.is_unknown()); + assert!(arg1_type == Type::Fixnum || arg1_type.is_unknown()); + + // If not fixnums at run-time, fall back + if arg0_type != Type::Fixnum { + asm_comment!(asm, "guard arg0 fixnum"); + asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + counter, + ); + } + if arg1_type != Type::Fixnum { + asm_comment!(asm, "guard arg1 fixnum"); + asm.test(arg1, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + counter, + ); + } + + // Set stack types in context + asm.ctx.upgrade_opnd_type(arg1.into(), Type::Fixnum); + asm.ctx.upgrade_opnd_type(arg0.into(), Type::Fixnum); +} + +// Conditional move operation used by comparison operators +type CmovFn = fn(cb: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> Opnd; + +fn gen_fixnum_cmp( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cmov_op: CmovFn, + bop: ruby_basic_operators, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize based on a runtime receiver + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, bop) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Compare the arguments + asm.cmp(arg0, arg1); + let bool_opnd = cmov_op(asm, Qtrue.into(), Qfalse.into()); + + // Push the output on the stack + let dst = asm.stack_push(Type::UnknownImm); + asm.mov(dst, bool_opnd); + + Some(KeepCompiling) + } else { + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_lt( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_l, BOP_LT) +} + +fn gen_opt_le( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_le, BOP_LE) +} + +fn gen_opt_ge( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_ge, BOP_GE) +} + +fn gen_opt_gt( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_g, BOP_GT) +} + +// Implements specialized equality for either two fixnum or two strings +// Returns None if enough type information isn't available, Some(true) +// if code was generated, otherwise Some(false). +fn gen_equality_specialized( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + gen_eq: bool, +) -> Option<bool> { + let a_opnd = asm.stack_opnd(1); + let b_opnd = asm.stack_opnd(0); + + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => return None, + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) { + // if overridden, emit the generic version + return Some(false); + } + + guard_two_fixnums(jit, asm, ocb); + + asm.cmp(a_opnd, b_opnd); + let val = if gen_eq { + asm.csel_e(Qtrue.into(), Qfalse.into()) + } else { + asm.csel_ne(Qtrue.into(), Qfalse.into()) + }; + + // Push the output on the stack + asm.stack_pop(2); + let dst = asm.stack_push(Type::UnknownImm); + asm.mov(dst, val); + + return Some(true); + } + + if !jit.at_current_insn() { + return None; + } + let comptime_a = jit.peek_at_stack(&asm.ctx, 1); + let comptime_b = jit.peek_at_stack(&asm.ctx, 0); + + if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } { + if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) { + // if overridden, emit the generic version + return Some(false); + } + + // Guard that a is a String + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cString }, + a_opnd, + a_opnd.into(), + comptime_a, + SEND_MAX_DEPTH, + Counter::guard_send_not_string, + ); + + let equal = asm.new_label("equal"); + let ret = asm.new_label("ret"); + + // Spill for ccall. For safety, unconditionally spill temps before branching. + asm.spill_temps(); + + // If they are equal by identity, return true + asm.cmp(a_opnd, b_opnd); + asm.je(equal); + + // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard) + let btype = asm.ctx.get_opnd_type(b_opnd.into()); + if btype.known_value_type() != Some(RUBY_T_STRING) { + // Note: any T_STRING is valid here, but we check for a ::String for simplicity + // To pass a mutable static variable (rb_cString) requires an unsafe block + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cString }, + b_opnd, + b_opnd.into(), + comptime_b, + SEND_MAX_DEPTH, + Counter::guard_send_not_string, + ); + } + + // Call rb_str_eql_internal(a, b) + let val = asm.ccall( + if gen_eq { rb_str_eql_internal } else { rb_str_neq_internal } as *const u8, + vec![a_opnd, b_opnd], + ); + + // Push the output on the stack + asm.stack_pop(2); + let dst = asm.stack_push(Type::UnknownImm); + asm.mov(dst, val); + asm.jmp(ret); + + asm.write_label(equal); + asm.mov(dst, if gen_eq { Qtrue } else { Qfalse }.into()); + + asm.write_label(ret); + + Some(true) + } else { + Some(false) + } +} + +fn gen_opt_eq( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let specialized = match gen_equality_specialized(jit, asm, ocb, true) { + Some(specialized) => specialized, + None => { + // Defer compilation so we can specialize base on a runtime receiver + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if specialized { + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) + } else { + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_neq( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // opt_neq is passed two rb_call_data as arguments: + // first for ==, second for != + let cd = jit.get_arg(1).as_ptr(); + perf_call! { gen_send_general(jit, asm, ocb, cd, None) } +} + +fn gen_opt_aref( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let cd: *const rb_call_data = jit.get_arg(0).as_ptr(); + let argc = unsafe { vm_ci_argc((*cd).ci) }; + + // Only JIT one arg calls like `ary[6]` + if argc != 1 { + gen_counter_incr(asm, Counter::opt_aref_argc_not_one); + return None; + } + + // Defer compilation so we can specialize base on a runtime receiver + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + // Specialize base on compile time values + let comptime_idx = jit.peek_at_stack(&asm.ctx, 0); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 1); + + if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_idx.fixnum_p() { + if !assume_bop_not_redefined(jit, asm, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) { + return None; + } + + // Get the stack operands + let idx_opnd = asm.stack_opnd(0); + let recv_opnd = asm.stack_opnd(1); + + // Guard that the receiver is an ::Array + // BOP_AREF check above is only good for ::Array. + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cArray }, + recv_opnd, + recv_opnd.into(), + comptime_recv, + OPT_AREF_MAX_CHAIN_DEPTH, + Counter::opt_aref_not_array, + ); + + // Bail if idx is not a FIXNUM + let idx_reg = asm.load(idx_opnd); + asm.test(idx_reg, (RUBY_FIXNUM_FLAG as u64).into()); + asm.jz(Target::side_exit(Counter::opt_aref_arg_not_fixnum)); + + // Call VALUE rb_ary_entry_internal(VALUE ary, long offset). + // It never raises or allocates, so we don't need to write to cfp->pc. + { + // Pop the argument and the receiver + asm.stack_pop(2); + + let idx_reg = asm.rshift(idx_reg, Opnd::UImm(1)); // Convert fixnum to int + let val = asm.ccall(rb_ary_entry_internal as *const u8, vec![recv_opnd, idx_reg]); + + // Push the return value onto the stack + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + } + + // Jump to next instruction. This allows guard chains to share the same successor. + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } else if comptime_recv.class_of() == unsafe { rb_cHash } { + if !assume_bop_not_redefined(jit, asm, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) { + return None; + } + + let recv_opnd = asm.stack_opnd(1); + + // Guard that the receiver is a hash + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cHash }, + recv_opnd, + recv_opnd.into(), + comptime_recv, + OPT_AREF_MAX_CHAIN_DEPTH, + Counter::opt_aref_not_hash, + ); + + // Prepare to call rb_hash_aref(). It might call #hash on the key. + jit_prepare_non_leaf_call(jit, asm); + + // Call rb_hash_aref + let key_opnd = asm.stack_opnd(0); + let recv_opnd = asm.stack_opnd(1); + let val = asm.ccall(rb_hash_aref as *const u8, vec![recv_opnd, key_opnd]); + + // Pop the key and the receiver + asm.stack_pop(2); + + // Push the return value onto the stack + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + + // Jump to next instruction. This allows guard chains to share the same successor. + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) + } else { + // General case. Call the [] method. + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_aset( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on a runtime `self` + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let comptime_recv = jit.peek_at_stack(&asm.ctx, 2); + let comptime_key = jit.peek_at_stack(&asm.ctx, 1); + + // Get the operands from the stack + let recv = asm.stack_opnd(2); + let key = asm.stack_opnd(1); + let _val = asm.stack_opnd(0); + + if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_key.fixnum_p() { + // Guard receiver is an Array + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cArray }, + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::opt_aset_not_array, + ); + + // Guard key is a fixnum + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cInteger }, + key, + key.into(), + comptime_key, + SEND_MAX_DEPTH, + Counter::opt_aset_not_fixnum, + ); + + // We might allocate or raise + jit_prepare_non_leaf_call(jit, asm); + + // Call rb_ary_store + let recv = asm.stack_opnd(2); + let key = asm.load(asm.stack_opnd(1)); + let key = asm.rshift(key, Opnd::UImm(1)); // FIX2LONG(key) + let val = asm.stack_opnd(0); + asm.ccall(rb_ary_store as *const u8, vec![recv, key, val]); + + // rb_ary_store returns void + // stored value should still be on stack + let val = asm.load(asm.stack_opnd(0)); + + // Push the return value onto the stack + asm.stack_pop(3); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } else if comptime_recv.class_of() == unsafe { rb_cHash } { + // Guard receiver is a Hash + jit_guard_known_klass( + jit, + asm, + ocb, + unsafe { rb_cHash }, + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::opt_aset_not_hash, + ); + + // We might allocate or raise + jit_prepare_non_leaf_call(jit, asm); + + // Call rb_hash_aset + let recv = asm.stack_opnd(2); + let key = asm.stack_opnd(1); + let val = asm.stack_opnd(0); + let ret = asm.ccall(rb_hash_aset as *const u8, vec![recv, key, val]); + + // Push the return value onto the stack + asm.stack_pop(3); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) + } else { + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_aref_with( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus>{ + // We might allocate or raise + jit_prepare_non_leaf_call(jit, asm); + + let key_opnd = Opnd::Value(jit.get_arg(0)); + let recv_opnd = asm.stack_opnd(0); + + extern "C" { + fn rb_vm_opt_aref_with(recv: VALUE, key: VALUE) -> VALUE; + } + + let val_opnd = asm.ccall( + rb_vm_opt_aref_with as *const u8, + vec![ + recv_opnd, + key_opnd + ], + ); + asm.stack_pop(1); // Keep it on stack during GC + + asm.cmp(val_opnd, Qundef.into()); + asm.je(Target::side_exit(Counter::opt_aref_with_qundef)); + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); + + return Some(KeepCompiling); +} + +fn gen_opt_and( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands and destination from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Do the bitwise and arg0 & arg1 + let val = asm.and(arg0, arg1); + + // Push the output on the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); + + Some(KeepCompiling) + } else { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_or( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands and destination from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Do the bitwise or arg0 | arg1 + let val = asm.or(arg0, arg1); + + // Push the output on the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); + + Some(KeepCompiling) + } else { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_minus( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands and destination from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Subtract arg0 - arg1 and test for overflow + let val_untag = asm.sub(arg0, arg1); + asm.jo(Target::side_exit(Counter::opt_minus_overflow)); + let val = asm.add(val_untag, Opnd::Imm(1)); + + // Push the output on the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, val); + + Some(KeepCompiling) + } else { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_mult( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + // Fallback to a method call if it overflows + if two_fixnums && asm.ctx.get_chain_depth() == 0 { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MULT) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Do some bitwise gymnastics to handle tag bits + // x * y is translated to (x >> 1) * (y - 1) + 1 + let arg0_untag = asm.rshift(arg0, Opnd::UImm(1)); + let arg1_untag = asm.sub(arg1, Opnd::UImm(1)); + let out_val = asm.mul(arg0_untag, arg1_untag); + jit_chain_guard(JCC_JO_MUL, jit, asm, ocb, 1, Counter::opt_mult_overflow); + let out_val = asm.add(out_val, Opnd::UImm(1)); + + // Push the output on the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + Some(KeepCompiling) + } else { + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_div( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) +} + +fn gen_opt_mod( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) { + Some(two_fixnums) => two_fixnums, + None => { + // Defer compilation so we can specialize on a runtime `self` + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + }; + + if two_fixnums { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MOD) { + return None; + } + + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Get the operands and destination from the stack + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + + // Check for arg0 % 0 + asm.cmp(arg1, Opnd::Imm(VALUE::fixnum_from_usize(0).as_i64())); + asm.je(Target::side_exit(Counter::opt_mod_zero)); + + // Call rb_fix_mod_fix(VALUE recv, VALUE obj) + let ret = asm.ccall(rb_fix_mod_fix as *const u8, vec![arg0, arg1]); + + // Push the return value onto the stack + // When the two arguments are fixnums, the modulo output is always a fixnum + let stack_ret = asm.stack_push(Type::Fixnum); + asm.mov(stack_ret, ret); + + Some(KeepCompiling) + } else { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) + } +} + +fn gen_opt_ltlt( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) +} + +fn gen_opt_nil_p( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) +} + +fn gen_opt_empty_p( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) +} + +fn gen_opt_succ( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Delegate to send, call the method on the recv + gen_opt_send_without_block(jit, asm, ocb) +} + +fn gen_opt_str_freeze( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) { + return None; + } + + let str = jit.get_arg(0); + + // Push the return value onto the stack + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, str.into()); + + Some(KeepCompiling) +} + +fn gen_opt_str_uminus( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) { + return None; + } + + let str = jit.get_arg(0); + + // Push the return value onto the stack + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, str.into()); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_max( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #max + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_max(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_max as *const u8, + vec![ + EC, + num.into(), + values_ptr + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_send( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let method = jit.get_arg(1).as_u64(); + + if method == ID!(min) { + gen_opt_newarray_min(jit, asm, _ocb) + } else if method == ID!(max) { + gen_opt_newarray_max(jit, asm, _ocb) + } else if method == ID!(hash) { + gen_opt_newarray_hash(jit, asm, _ocb) + } else if method == ID!(pack) { + gen_opt_newarray_pack(jit, asm, _ocb) + } else { + None + } +} + +fn gen_opt_newarray_pack( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // num == 4 ( for this code ) + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #pack + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_pack(ec: EcPtr, num: u32, elts: *const VALUE, fmt: VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let fmt_string = asm.ctx.sp_opnd(-1); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_pack as *const u8, + vec![ + EC, + (num - 1).into(), + values_ptr, + fmt_string + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::CString); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_hash( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #hash + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_hash(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_hash as *const u8, + vec![ + EC, + num.into(), + values_ptr + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_newarray_min( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + + let num = jit.get_arg(0).as_u32(); + + // Save the PC and SP because we may call #min + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_opt_newarray_min(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE; + } + + let values_opnd = asm.ctx.sp_opnd(-(num as i32)); + let values_ptr = asm.lea(values_opnd); + + let val_opnd = asm.ccall( + rb_vm_opt_newarray_min as *const u8, + vec![ + EC, + num.into(), + values_ptr + ], + ); + + asm.stack_pop(num.as_usize()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_not( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); +} + +fn gen_opt_size( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); +} + +fn gen_opt_length( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); +} + +fn gen_opt_regexpmatch2( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + return gen_opt_send_without_block(jit, asm, ocb); +} + +fn gen_opt_case_dispatch( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Normally this instruction would lookup the key in a hash and jump to an + // offset based on that. + // Instead we can take the fallback case and continue with the next + // instruction. + // We'd hope that our jitted code will be sufficiently fast without the + // hash lookup, at least for small hashes, but it's worth revisiting this + // assumption in the future. + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let case_hash = jit.get_arg(0); + let else_offset = jit.get_arg(1).as_u32(); + + // Try to reorder case/else branches so that ones that are actually used come first. + // Supporting only Fixnum for now so that the implementation can be an equality check. + let key_opnd = asm.stack_opnd(0); + let comptime_key = jit.peek_at_stack(&asm.ctx, 0); + + // Check that all cases are fixnums to avoid having to register BOP assumptions on + // all the types that case hashes support. This spends compile time to save memory. + fn case_hash_all_fixnum_p(hash: VALUE) -> bool { + let mut all_fixnum = true; + unsafe { + unsafe extern "C" fn per_case(key: st_data_t, _value: st_data_t, data: st_data_t) -> c_int { + (if VALUE(key as usize).fixnum_p() { + ST_CONTINUE + } else { + (data as *mut bool).write(false); + ST_STOP + }) as c_int + } + rb_hash_stlike_foreach(hash, Some(per_case), (&mut all_fixnum) as *mut _ as st_data_t); + } + + all_fixnum + } + + // If megamorphic, fallback to compiling branch instructions after opt_case_dispatch + let megamorphic = asm.ctx.get_chain_depth() >= CASE_WHEN_MAX_DEPTH; + if megamorphic { + gen_counter_incr(asm, Counter::num_opt_case_dispatch_megamorphic); + } + + if comptime_key.fixnum_p() && comptime_key.0 <= u32::MAX.as_usize() && case_hash_all_fixnum_p(case_hash) && !megamorphic { + if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQQ) { + return None; + } + + // Check if the key is the same value + asm.cmp(key_opnd, comptime_key.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + CASE_WHEN_MAX_DEPTH, + Counter::opt_case_dispatch_megamorphic, + ); + asm.stack_pop(1); // Pop key_opnd + + // Get the offset for the compile-time key + let mut offset = 0; + unsafe { rb_hash_stlike_lookup(case_hash, comptime_key.0 as _, &mut offset) }; + let jump_offset = if offset == 0 { + // NOTE: If we hit the else branch with various values, it could negatively impact the performance. + else_offset + } else { + (offset as u32) >> 1 // FIX2LONG + }; + + // Jump to the offset of case or else + let jump_idx = jit.next_insn_idx() as u32 + jump_offset; + let jump_block = BlockId { iseq: jit.iseq, idx: jump_idx.try_into().unwrap() }; + gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm); + Some(EndBlock) + } else { + asm.stack_pop(1); // Pop key_opnd + Some(KeepCompiling) // continue with === branches + } +} + +fn gen_branchif( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); + + // Check for interrupts, but only on backward branches that may create loops + if jump_offset < 0 { + gen_check_ints(asm, Counter::branchif_interrupted); + } + + // Get the branch target instruction offsets + let next_idx = jit.next_insn_idx(); + let jump_idx = (next_idx as i32) + jump_offset; + let next_block = BlockId { + iseq: jit.iseq, + idx: next_idx, + }; + let jump_block = BlockId { + iseq: jit.iseq, + idx: jump_idx.try_into().unwrap(), + }; + + // Test if any bit (outside of the Qnil bit) is on + // See RB_TEST() + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); + + incr_counter!(branch_insn_count); + + if let Some(result) = val_type.known_truthy() { + let target = if result { jump_block } else { next_block }; + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); + } else { + asm.test(val_opnd, Opnd::Imm(!Qnil.as_i64())); + + // Generate the branch instructions + let ctx = asm.ctx; + gen_branch( + jit, + asm, + ocb, + jump_block, + &ctx, + Some(next_block), + Some(&ctx), + BranchGenFn::BranchIf(Cell::new(BranchShape::Default)), + ); + } + + Some(EndBlock) +} + +fn gen_branchunless( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); + + // Check for interrupts, but only on backward branches that may create loops + if jump_offset < 0 { + gen_check_ints(asm, Counter::branchunless_interrupted); + } + + // Get the branch target instruction offsets + let next_idx = jit.next_insn_idx() as i32; + let jump_idx = next_idx + jump_offset; + let next_block = BlockId { + iseq: jit.iseq, + idx: next_idx.try_into().unwrap(), + }; + let jump_block = BlockId { + iseq: jit.iseq, + idx: jump_idx.try_into().unwrap(), + }; + + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); + + incr_counter!(branch_insn_count); + + if let Some(result) = val_type.known_truthy() { + let target = if result { next_block } else { jump_block }; + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); + } else { + // Test if any bit (outside of the Qnil bit) is on + // See RB_TEST() + let not_qnil = !Qnil.as_i64(); + asm.test(val_opnd, not_qnil.into()); + + // Generate the branch instructions + let ctx = asm.ctx; + gen_branch( + jit, + asm, + ocb, + jump_block, + &ctx, + Some(next_block), + Some(&ctx), + BranchGenFn::BranchUnless(Cell::new(BranchShape::Default)), + ); + } + + Some(EndBlock) +} + +fn gen_branchnil( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); + + // Check for interrupts, but only on backward branches that may create loops + if jump_offset < 0 { + gen_check_ints(asm, Counter::branchnil_interrupted); + } + + // Get the branch target instruction offsets + let next_idx = jit.next_insn_idx() as i32; + let jump_idx = next_idx + jump_offset; + let next_block = BlockId { + iseq: jit.iseq, + idx: next_idx.try_into().unwrap(), + }; + let jump_block = BlockId { + iseq: jit.iseq, + idx: jump_idx.try_into().unwrap(), + }; + + let val_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let val_opnd = asm.stack_pop(1); + + incr_counter!(branch_insn_count); + + if let Some(result) = val_type.known_nil() { + let target = if result { jump_block } else { next_block }; + gen_direct_jump(jit, &asm.ctx.clone(), target, asm); + incr_counter!(branch_known_count); + } else { + // Test if the value is Qnil + asm.cmp(val_opnd, Opnd::UImm(Qnil.into())); + // Generate the branch instructions + let ctx = asm.ctx; + gen_branch( + jit, + asm, + ocb, + jump_block, + &ctx, + Some(next_block), + Some(&ctx), + BranchGenFn::BranchNil(Cell::new(BranchShape::Default)), + ); + } + + Some(EndBlock) +} + +fn gen_throw( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let throw_state = jit.get_arg(0).as_u64(); + let throwobj = asm.stack_pop(1); + let throwobj = asm.load(throwobj); + + // Gather some statistics about throw + gen_counter_incr(asm, Counter::num_throw); + match (throw_state & VM_THROW_STATE_MASK as u64) as u32 { + RUBY_TAG_BREAK => gen_counter_incr(asm, Counter::num_throw_break), + RUBY_TAG_RETRY => gen_counter_incr(asm, Counter::num_throw_retry), + RUBY_TAG_RETURN => gen_counter_incr(asm, Counter::num_throw_return), + _ => {}, + } + + // THROW_DATA_NEW allocates. Save SP for GC and PC for allocation tracing as + // well as handling the catch table. However, not using jit_prepare_call_with_gc + // since we don't need a patch point for this implementation. + jit_save_pc(jit, asm); + gen_save_sp(asm); + + // rb_vm_throw verifies it's a valid throw, sets ec->tag->state, and returns throw + // data, which is throwobj or a vm_throw_data wrapping it. When ec->tag->state is + // set, JIT code callers will handle the throw with vm_exec_handle_exception. + extern "C" { + fn rb_vm_throw(ec: EcPtr, reg_cfp: CfpPtr, throw_state: u32, throwobj: VALUE) -> VALUE; + } + let val = asm.ccall(rb_vm_throw as *mut u8, vec![EC, CFP, throw_state.into(), throwobj]); + + asm_comment!(asm, "exit from throw"); + asm.cpop_into(SP); + asm.cpop_into(EC); + asm.cpop_into(CFP); + + asm.frame_teardown(); + + asm.cret(val); + Some(EndBlock) +} + +fn gen_jump( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let jump_offset = jit.get_arg(0).as_i32(); + + // Check for interrupts, but only on backward branches that may create loops + if jump_offset < 0 { + gen_check_ints(asm, Counter::jump_interrupted); + } + + // Get the branch target instruction offsets + let jump_idx = jit.next_insn_idx() as i32 + jump_offset; + let jump_block = BlockId { + iseq: jit.iseq, + idx: jump_idx.try_into().unwrap(), + }; + + // Generate the jump instruction + gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm); + + Some(EndBlock) +} + +/// Guard that self or a stack operand has the same class as `known_klass`, using +/// `sample_instance` to speculate about the shape of the runtime value. +/// FIXNUM and on-heap integers are treated as if they have distinct classes, and +/// the guard generated for one will fail for the other. +/// +/// Recompile as contingency if possible, or take side exit a last resort. +fn jit_guard_known_klass( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + known_klass: VALUE, + obj_opnd: Opnd, + insn_opnd: YARVOpnd, + sample_instance: VALUE, + max_chain_depth: u8, + counter: Counter, +) { + let val_type = asm.ctx.get_opnd_type(insn_opnd); + + if val_type.known_class() == Some(known_klass) { + // Unless frozen, Array, Hash, and String objects may change their RBASIC_CLASS + // when they get a singleton class. Those types need invalidations. + if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&known_klass) } { + if jit.assume_no_singleton_class(asm, ocb, known_klass) { + // Speculate that this object will not have a singleton class, + // and invalidate the block in case it does. + return; + } + } else { + // We already know from type information that this is a match + return; + } + } + + if unsafe { known_klass == rb_cNilClass } { + assert!(!val_type.is_heap()); + assert!(val_type.is_unknown()); + + asm_comment!(asm, "guard object is nil"); + asm.cmp(obj_opnd, Qnil.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Nil); + } else if unsafe { known_klass == rb_cTrueClass } { + assert!(!val_type.is_heap()); + assert!(val_type.is_unknown()); + + asm_comment!(asm, "guard object is true"); + asm.cmp(obj_opnd, Qtrue.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + + asm.ctx.upgrade_opnd_type(insn_opnd, Type::True); + } else if unsafe { known_klass == rb_cFalseClass } { + assert!(!val_type.is_heap()); + assert!(val_type.is_unknown()); + + asm_comment!(asm, "guard object is false"); + assert!(Qfalse.as_i32() == 0); + asm.test(obj_opnd, obj_opnd); + jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter); + + asm.ctx.upgrade_opnd_type(insn_opnd, Type::False); + } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() { + // We will guard fixnum and bignum as though they were separate classes + // BIGNUM can be handled by the general else case below + assert!(val_type.is_unknown()); + + asm_comment!(asm, "guard object is fixnum"); + asm.test(obj_opnd, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + jit_chain_guard(JCC_JZ, jit, asm, ocb, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum); + } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() { + assert!(!val_type.is_heap()); + // We will guard STATIC vs DYNAMIC as though they were separate classes + // DYNAMIC symbols can be handled by the general else case below + if val_type != Type::ImmSymbol || !val_type.is_imm() { + assert!(val_type.is_unknown()); + + asm_comment!(asm, "guard object is static symbol"); + assert!(RUBY_SPECIAL_SHIFT == 8); + asm.cmp(obj_opnd.with_num_bits(8).unwrap(), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol); + } + } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() { + assert!(!val_type.is_heap()); + if val_type != Type::Flonum || !val_type.is_imm() { + assert!(val_type.is_unknown()); + + // We will guard flonum vs heap float as though they were separate classes + asm_comment!(asm, "guard object is flonum"); + let flag_bits = asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(flag_bits, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + asm.ctx.upgrade_opnd_type(insn_opnd, Type::Flonum); + } + } else if unsafe { + FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON as usize)) != VALUE(0) + && sample_instance == rb_class_attached_object(known_klass) + && !rb_obj_is_kind_of(sample_instance, rb_cIO).test() + } { + // Singleton classes are attached to one specific object, so we can + // avoid one memory access (and potentially the is_heap check) by + // looking for the expected object directly. + // Note that in case the sample instance has a singleton class that + // doesn't attach to the sample instance, it means the sample instance + // has an empty singleton class that hasn't been materialized yet. In + // this case, comparing against the sample instance doesn't guarantee + // that its singleton class is empty, so we can't avoid the memory + // access. As an example, `Object.new.singleton_class` is an object in + // this situation. + // Also, guarding by identity is incorrect for IO objects because + // IO#reopen can be used to change the class and singleton class of IO objects! + asm_comment!(asm, "guard known object with singleton class"); + asm.cmp(obj_opnd, sample_instance.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + } else if val_type == Type::CString && unsafe { known_klass == rb_cString } { + // guard elided because the context says we've already checked + unsafe { + assert_eq!(sample_instance.class_of(), rb_cString, "context says class is exactly ::String") + }; + } else { + assert!(!val_type.is_imm()); + + // Check that the receiver is a heap object + // Note: if we get here, the class doesn't have immediate instances. + if !val_type.is_heap() { + asm_comment!(asm, "guard not immediate"); + asm.test(obj_opnd, (RUBY_IMMEDIATE_MASK as u64).into()); + jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter); + asm.cmp(obj_opnd, Qfalse.into()); + jit_chain_guard(JCC_JE, jit, asm, ocb, max_chain_depth, counter); + + asm.ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap); + } + + // If obj_opnd isn't already a register, load it. + let obj_opnd = match obj_opnd { + Opnd::InsnOut { .. } => obj_opnd, + _ => asm.load(obj_opnd), + }; + let klass_opnd = Opnd::mem(64, obj_opnd, RUBY_OFFSET_RBASIC_KLASS); + + // Bail if receiver class is different from known_klass + // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class. + asm_comment!(asm, "guard known class"); + asm.cmp(klass_opnd, known_klass.into()); + jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter); + + if known_klass == unsafe { rb_cString } { + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CString); + } else if known_klass == unsafe { rb_cArray } { + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CArray); + } else if known_klass == unsafe { rb_cHash } { + asm.ctx.upgrade_opnd_type(insn_opnd, Type::CHash); + } + } +} + +// Generate ancestry guard for protected callee. +// Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee). +fn jit_protected_callee_ancestry_guard( + asm: &mut Assembler, + cme: *const rb_callable_method_entry_t, +) { + // See vm_call_method(). + let def_class = unsafe { (*cme).defined_class }; + // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise. + // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass); + + let val = asm.ccall( + rb_obj_is_kind_of as *mut u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF), + def_class.into(), + ], + ); + asm.test(val, val); + asm.jz(Target::side_exit(Counter::guard_send_se_protected_check_failed)) +} + +// Codegen for rb_obj_not(). +// Note, caller is responsible for generating all the right guards, including +// arity guards. +fn jit_rb_obj_not( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let recv_opnd = asm.ctx.get_opnd_type(StackOpnd(0)); + + match recv_opnd.known_truthy() { + Some(false) => { + asm_comment!(asm, "rb_obj_not(nil_or_false)"); + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::True); + asm.mov(out_opnd, Qtrue.into()); + }, + Some(true) => { + // Note: recv_opnd != Type::Nil && recv_opnd != Type::False. + asm_comment!(asm, "rb_obj_not(truthy)"); + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::False); + asm.mov(out_opnd, Qfalse.into()); + }, + _ => { + return false; + }, + } + + true +} + +// Codegen for rb_true() +fn jit_rb_true( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "nil? == true"); + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); + true +} + +// Codegen for rb_false() +fn jit_rb_false( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "nil? == false"); + asm.stack_pop(1); + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); + true +} + +/// Codegen for Kernel#is_a? +fn jit_rb_kernel_is_a( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + // If this is a super call we might not know the class + if known_recv_class.is_none() { + return false; + } + + // Important note: The output code will simply `return true/false`. + // Correctness follows from: + // - `known_recv_class` implies there is a guard scheduled before here + // for a particular `CLASS_OF(lhs)`. + // - We guard that rhs is identical to the compile-time sample + // - In general, for any two Class instances A, B, `A < B` does not change at runtime. + // Class#superclass is stable. + + let sample_rhs = jit.peek_at_stack(&asm.ctx, 0); + let sample_lhs = jit.peek_at_stack(&asm.ctx, 1); + + // We are not allowing module here because the module hierarchy can change at runtime. + if !unsafe { RB_TYPE_P(sample_rhs, RUBY_T_CLASS) } { + return false; + } + let sample_is_a = unsafe { rb_obj_is_kind_of(sample_lhs, sample_rhs) == Qtrue }; + + asm_comment!(asm, "Kernel#is_a?"); + asm.cmp(asm.stack_opnd(0), sample_rhs.into()); + asm.jne(Target::side_exit(Counter::guard_send_is_a_class_mismatch)); + + asm.stack_pop(2); + + if sample_is_a { + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); + } else { + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); + } + return true; +} + +/// Codegen for Kernel#instance_of? +fn jit_rb_kernel_instance_of( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + // If this is a super call we might not know the class + if known_recv_class.is_none() { + return false; + } + + // Important note: The output code will simply `return true/false`. + // Correctness follows from: + // - `known_recv_class` implies there is a guard scheduled before here + // for a particular `CLASS_OF(lhs)`. + // - We guard that rhs is identical to the compile-time sample + // - For a particular `CLASS_OF(lhs)`, `rb_obj_class(lhs)` does not change. + // (because for any singleton class `s`, `s.superclass.equal?(s.attached_object.class)`) + + let sample_rhs = jit.peek_at_stack(&asm.ctx, 0); + let sample_lhs = jit.peek_at_stack(&asm.ctx, 1); + + // Filters out cases where the C implementation raises + if unsafe { !(RB_TYPE_P(sample_rhs, RUBY_T_CLASS) || RB_TYPE_P(sample_rhs, RUBY_T_MODULE)) } { + return false; + } + + // We need to grab the class here to deal with singleton classes. + // Instance of grabs the "real class" of the object rather than the + // singleton class. + let sample_lhs_real_class = unsafe { rb_obj_class(sample_lhs) }; + + let sample_instance_of = sample_lhs_real_class == sample_rhs; + + asm_comment!(asm, "Kernel#instance_of?"); + asm.cmp(asm.stack_opnd(0), sample_rhs.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_instance_of_class_mismatch, + ); + + asm.stack_pop(2); + + if sample_instance_of { + let stack_ret = asm.stack_push(Type::True); + asm.mov(stack_ret, Qtrue.into()); + } else { + let stack_ret = asm.stack_push(Type::False); + asm.mov(stack_ret, Qfalse.into()); + } + return true; +} + +fn jit_rb_mod_eqq( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + + asm_comment!(asm, "Module#==="); + // By being here, we know that the receiver is a T_MODULE or a T_CLASS, because Module#=== can + // only live on these objects. With that, we can call rb_obj_is_kind_of() without + // jit_prepare_non_leaf_call() or a control frame push because it can't raise, allocate, or call + // Ruby methods with these inputs. + // Note the difference in approach from Kernel#is_a? because we don't get a free guard for the + // right hand side. + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); // the module + let ret = asm.ccall(rb_obj_is_kind_of as *const u8, vec![rhs, lhs]); + + // Return the result + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret); + + return true; +} + +// Codegen for rb_obj_equal() +// object identity comparison +fn jit_rb_obj_equal( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "equal?"); + let obj1 = asm.stack_pop(1); + let obj2 = asm.stack_pop(1); + + asm.cmp(obj1, obj2); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + true +} + +// Codegen for rb_obj_not_equal() +// object identity comparison +fn jit_rb_obj_not_equal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + gen_equality_specialized(jit, asm, ocb, false) == Some(true) +} + +// Codegen for rb_int_equal() +fn jit_rb_int_equal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Check that both operands are fixnums + guard_two_fixnums(jit, asm, ocb); + + // Compare the arguments + asm_comment!(asm, "rb_int_equal"); + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + asm.cmp(arg0, arg1); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + true +} + +fn jit_rb_int_succ( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard the receiver is fixnum + let recv_type = asm.ctx.get_opnd_type(StackOpnd(0)); + let recv = asm.stack_pop(1); + if recv_type != Type::Fixnum { + asm_comment!(asm, "guard object is fixnum"); + asm.test(recv, Opnd::Imm(RUBY_FIXNUM_FLAG as i64)); + asm.jz(Target::side_exit(Counter::opt_succ_not_fixnum)); + } + + asm_comment!(asm, "Integer#succ"); + let out_val = asm.add(recv, Opnd::Imm(2)); // 2 is untagged Fixnum 1 + asm.jo(Target::side_exit(Counter::opt_succ_overflow)); + + // Push the output onto the stack + let dst = asm.stack_push(Type::Fixnum); + asm.mov(dst, out_val); + + true +} + +fn jit_rb_int_div( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + // rb_fix_div_fix may GC-allocate for Bignum + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Integer#/"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + // Check for arg0 % 0 + asm.cmp(obj, VALUE::fixnum_from_usize(0).as_i64().into()); + asm.je(Target::side_exit(Counter::opt_div_zero)); + + let ret = asm.ccall(rb_fix_div_fix as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep them during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_int_lshift( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + let comptime_shift = jit.peek_at_stack(&asm.ctx, 0); + + if !comptime_shift.fixnum_p() { + return false; + } + + // Untag the fixnum shift amount + let shift_amt = comptime_shift.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return false; + } + + // Fallback to a C call if the shift amount varies + // This check is needed because the chain guard will side-exit + // if its max depth is reached + if asm.ctx.get_chain_depth() > 0 { + return false; + } + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // Guard on the shift amount we speculated on + asm.cmp(rhs, comptime_shift.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + 1, + Counter::lshift_amount_changed, + ); + + fixnum_left_shift_body(asm, lhs, shift_amt as u64); + true +} + +fn fixnum_left_shift_body(asm: &mut Assembler, lhs: Opnd, shift_amt: u64) { + let in_val = asm.sub(lhs, 1.into()); + let shift_opnd = Opnd::UImm(shift_amt); + let out_val = asm.lshift(in_val, shift_opnd); + let unshifted = asm.rshift(out_val, shift_opnd); + + // Guard that we did not overflow + asm.cmp(unshifted, in_val); + asm.jne(Target::side_exit(Counter::lshift_overflow)); + + // Re-tag the output value + let out_val = asm.add(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); +} + +fn jit_rb_int_rshift( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + let comptime_shift = jit.peek_at_stack(&asm.ctx, 0); + + // Untag the fixnum shift amount + let shift_amt = comptime_shift.as_isize() >> 1; + if shift_amt > 63 || shift_amt < 0 { + return false; + } + + // Fallback to a C call if the shift amount varies + // This check is needed because the chain guard will side-exit + // if its max depth is reached + if asm.ctx.get_chain_depth() > 0 { + return false; + } + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // Guard on the shift amount we speculated on + asm.cmp(rhs, comptime_shift.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + 1, + Counter::rshift_amount_changed, + ); + + let shift_opnd = Opnd::UImm(shift_amt as u64); + let out_val = asm.rshift(lhs, shift_opnd); + let out_val = asm.or(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); + true +} + +fn jit_rb_int_xor( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + let rhs = asm.stack_pop(1); + let lhs = asm.stack_pop(1); + + // XOR and then re-tag the resulting fixnum + let out_val = asm.xor(lhs, rhs); + let out_val = asm.or(out_val, 1.into()); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, out_val); + true +} + +fn jit_rb_int_aref( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 1 { + return false; + } + if asm.ctx.two_fixnums_on_stack(jit) != Some(true) { + return false; + } + guard_two_fixnums(jit, asm, ocb); + + asm_comment!(asm, "Integer#[]"); + let obj = asm.stack_pop(1); + let recv = asm.stack_pop(1); + + let ret = asm.ccall(rb_fix_aref as *const u8, vec![recv, obj]); + + let ret_opnd = asm.stack_push(Type::Fixnum); + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_plus( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#+"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_plus as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_minus( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#-"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_minus as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_mul( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#*"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_mul as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +fn jit_rb_float_div( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin + let comptime_obj = jit.peek_at_stack(&asm.ctx, 0); + if comptime_obj.fixnum_p() || comptime_obj.flonum_p() { + let obj = asm.stack_opnd(0); + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_obj.class_of(), + obj, + obj.into(), + comptime_obj, + SEND_MAX_DEPTH, + Counter::guard_send_not_fixnum_or_flonum, + ); + } else { + return false; + } + + // Save the PC and SP because the callee may allocate Float on heap + jit_prepare_call_with_gc(jit, asm); + + asm_comment!(asm, "Float#/"); + let obj = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let ret = asm.ccall(rb_float_div as *const u8, vec![recv, obj]); + asm.stack_pop(2); // Keep recv during ccall for GC + + let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float + asm.mov(ret_opnd, ret); + true +} + +/// If string is frozen, duplicate it to get a non-frozen string. Otherwise, return it. +fn jit_rb_str_uplus( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool +{ + if argc != 0 { + return false; + } + + // We allocate when we dup the string + jit_prepare_call_with_gc(jit, asm); + asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. + + asm_comment!(asm, "Unary plus on string"); + let recv_opnd = asm.stack_pop(1); + let recv_opnd = asm.load(recv_opnd); + let flags_opnd = asm.load(Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS)); + asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64 | RSTRING_CHILLED as i64)); + + let ret_label = asm.new_label("stack_ret"); + + // String#+@ can only exist on T_STRING + let stack_ret = asm.stack_push(Type::TString); + + // If the string isn't frozen, we just return it. + asm.mov(stack_ret, recv_opnd); + asm.jz(ret_label); + + // Str is frozen - duplicate it + asm.spill_temps(); // for ccall + let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); + asm.mov(stack_ret, ret_opnd); + + asm.write_label(ret_label); + + true +} + +fn jit_rb_str_length( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "String#length"); + extern "C" { + fn rb_str_length(str: VALUE) -> VALUE; + } + + // This function cannot allocate or raise an exceptions + let recv = asm.stack_opnd(0); + let ret_opnd = asm.ccall(rb_str_length as *const u8, vec![recv]); + asm.stack_pop(1); // Keep recv on stack during ccall for GC + + // Should be guaranteed to be a fixnum on 64-bit systems + let out_opnd = asm.stack_push(Type::Fixnum); + asm.mov(out_opnd, ret_opnd); + + true +} + +fn jit_rb_str_bytesize( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "String#bytesize"); + + let recv = asm.stack_pop(1); + + asm_comment!(asm, "get string length"); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv), + RUBY_OFFSET_RSTRING_LEN as i32, + ); + + let len = asm.load(str_len_opnd); + let shifted_val = asm.lshift(len, Opnd::UImm(1)); + let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + let out_opnd = asm.stack_push(Type::Fixnum); + + asm.mov(out_opnd, out_val); + + true +} + +fn jit_rb_str_byteslice( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + if argc != 2 { + return false + } + + // rb_str_byte_substr should be leaf if indexes are fixnums + match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) { + (Type::Fixnum, Type::Fixnum) => {}, + // Raises when non-integers are passed in, which requires the method frame + // to be pushed for the backtrace + _ => if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } + } + asm_comment!(asm, "String#byteslice"); + + // rb_str_byte_substr allocates a substring + jit_prepare_call_with_gc(jit, asm); + + // Get stack operands after potential SP change + let len = asm.stack_opnd(0); + let beg = asm.stack_opnd(1); + let recv = asm.stack_opnd(2); + + let ret_opnd = asm.ccall(rb_str_byte_substr as *const u8, vec![recv, beg, len]); + asm.stack_pop(3); + + let out_opnd = asm.stack_push(Type::Unknown); + asm.mov(out_opnd, ret_opnd); + + true +} + +fn jit_rb_str_getbyte( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "String#getbyte"); + + // Don't pop since we may bail + let idx = asm.stack_opnd(0); + let recv = asm.stack_opnd(1); + + let comptime_idx = jit.peek_at_stack(&asm.ctx, 0); + if comptime_idx.fixnum_p(){ + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_idx.class_of(), + idx, + idx.into(), + comptime_idx, + SEND_MAX_DEPTH, + Counter::getbyte_idx_not_fixnum, + ); + } else { + return false; + } + + // Untag the index + let idx = asm.rshift(idx, Opnd::UImm(1)); + + // If index is negative, exit + asm.cmp(idx, Opnd::UImm(0)); + asm.jl(Target::side_exit(Counter::getbyte_idx_negative)); + + asm_comment!(asm, "get string length"); + let recv = asm.load(recv); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv), + RUBY_OFFSET_RSTRING_LEN as i32, + ); + + // Exit if the indes is out of bounds + asm.cmp(idx, str_len_opnd); + asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds)); + + let str_ptr = get_string_ptr(asm, recv); + // FIXME: could use SIB indexing here with proper support in backend + let str_ptr = asm.add(str_ptr, idx); + let byte = asm.load(Opnd::mem(8, str_ptr, 0)); + + // Zero-extend the byte to 64 bits + let byte = byte.with_num_bits(64).unwrap(); + let byte = asm.and(byte, 0xFF.into()); + + // Tag the byte + let byte = asm.lshift(byte, Opnd::UImm(1)); + let byte = asm.or(byte, Opnd::UImm(1)); + + asm.stack_pop(2); // Keep them on stack during ccall for GC + let out_opnd = asm.stack_push(Type::Fixnum); + asm.mov(out_opnd, byte); + + true +} + +fn jit_rb_str_setbyte( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // Raises when index is out of range. Lazily push a frame in that case. + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) { + return false; + } + asm_comment!(asm, "String#setbyte"); + + let value = asm.stack_opnd(0); + let index = asm.stack_opnd(1); + let recv = asm.stack_opnd(2); + + let ret_opnd = asm.ccall(rb_str_setbyte as *const u8, vec![recv, index, value]); + asm.stack_pop(3); // Keep them on stack during ccall for GC + + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(out_opnd, ret_opnd); + + true +} + +// Codegen for rb_str_to_s() +// When String#to_s is called on a String instance, the method returns self and +// most of the overhead comes from setting up the method call. We observed that +// this situation happens a lot in some workloads. +fn jit_rb_str_to_s( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if unsafe { known_recv_class == Some(rb_cString) } { + asm_comment!(asm, "to_s on plain string"); + // The method returns the receiver, which is already on the stack. + // No stack movement. + return true; + } + false +} + +// Codegen for rb_str_empty_p() +fn jit_rb_str_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let recv_opnd = asm.stack_pop(1); + + asm_comment!(asm, "get string length"); + let str_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + asm.load(recv_opnd), + RUBY_OFFSET_RSTRING_LEN as i32, + ); + + asm.cmp(str_len_opnd, Opnd::UImm(0)); + let string_empty = asm.csel_e(Qtrue.into(), Qfalse.into()); + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(out_opnd, string_empty); + + return true; +} + +// Codegen for rb_str_concat() -- *not* String#concat +// Frequently strings are concatenated using "out_str << next_str". +// This is common in Erb and similar templating languages. +fn jit_rb_str_concat( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + // The << operator can accept integer codepoints for characters + // as the argument. We only specially optimise string arguments. + // If the peeked-at compile time argument is something other than + // a string, assume it won't be a string later either. + let comptime_arg = jit.peek_at_stack(&asm.ctx, 0); + if ! unsafe { RB_TYPE_P(comptime_arg, RUBY_T_STRING) } { + return false; + } + + // Guard that the concat argument is a string + guard_object_is_string(asm, asm.stack_opnd(0), StackOpnd(0), Counter::guard_send_not_string); + + // Guard buffers from GC since rb_str_buf_append may allocate. + // rb_str_buf_append may raise Encoding::CompatibilityError, but we accept compromised + // backtraces on this method since the interpreter does the same thing on opt_ltlt. + jit_prepare_non_leaf_call(jit, asm); + asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. + + let concat_arg = asm.stack_pop(1); + let recv = asm.stack_pop(1); + + // Test if string encodings differ. If different, use rb_str_append. If the same, + // use rb_yjit_str_simple_append, which calls rb_str_cat. + asm_comment!(asm, "<< on strings"); + + // Take receiver's object flags XOR arg's flags. If any + // string-encoding flags are different between the two, + // the encodings don't match. + let recv_reg = asm.load(recv); + let concat_arg_reg = asm.load(concat_arg); + let flags_xor = asm.xor( + Opnd::mem(64, recv_reg, RUBY_OFFSET_RBASIC_FLAGS), + Opnd::mem(64, concat_arg_reg, RUBY_OFFSET_RBASIC_FLAGS) + ); + asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64)); + + let enc_mismatch = asm.new_label("enc_mismatch"); + asm.jnz(enc_mismatch); + + // If encodings match, call the simple append function and jump to return + let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, vec![recv, concat_arg]); + let ret_label = asm.new_label("func_return"); + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, ret_opnd); + asm.stack_pop(1); // forget stack_ret to re-push after ccall + asm.jmp(ret_label); + + // If encodings are different, use a slower encoding-aware concatenate + asm.write_label(enc_mismatch); + asm.spill_temps(); // Ignore the register for the other local branch + let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]); + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, ret_opnd); + // Drop through to return + + asm.write_label(ret_label); + + true +} + +// Codegen for rb_ary_empty_p() +fn jit_rb_ary_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let array_opnd = asm.stack_pop(1); + let array_reg = asm.load(array_opnd); + let len_opnd = get_array_len(asm, array_reg); + + asm.test(len_opnd, len_opnd); + let bool_val = asm.csel_z(Qtrue.into(), Qfalse.into()); + + let out_opnd = asm.stack_push(Type::UnknownImm); + asm.store(out_opnd, bool_val); + + return true; +} + +// Codegen for rb_ary_length() +fn jit_rb_ary_length( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + let array_opnd = asm.stack_pop(1); + let array_reg = asm.load(array_opnd); + let len_opnd = get_array_len(asm, array_reg); + + // Convert the length to a fixnum + let shifted_val = asm.lshift(len_opnd, Opnd::UImm(1)); + let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + + let out_opnd = asm.stack_push(Type::Fixnum); + asm.store(out_opnd, out_val); + + return true; +} + +fn jit_rb_ary_push( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "Array#<<"); + + // rb_ary_push allocates memory for buffer extension and can raise FrozenError + // Not using a lazy frame here since the interpreter also has a truncated + // stack trace from opt_ltlt. + jit_prepare_non_leaf_call(jit, asm); + + let item_opnd = asm.stack_opnd(0); + let ary_opnd = asm.stack_opnd(1); + let ret = asm.ccall(rb_ary_push as *const u8, vec![ary_opnd, item_opnd]); + asm.stack_pop(2); // Keep them on stack during ccall for GC + + let ret_opnd = asm.stack_push(Type::TArray); + asm.mov(ret_opnd, ret); + true +} + +// Just a leaf method, but not using `Primitive.attr! :leaf` since BOP methods can't use it. +fn jit_rb_hash_empty_p( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "Hash#empty?"); + + let hash_opnd = asm.stack_pop(1); + let ret = asm.ccall(rb_hash_empty_p as *const u8, vec![hash_opnd]); + + let ret_opnd = asm.stack_push(Type::UnknownImm); + asm.mov(ret_opnd, ret); + true +} + +fn jit_obj_respond_to( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + // respond_to(:sym) or respond_to(:sym, true) + if argc != 1 && argc != 2 { + return false; + } + + let recv_class = match known_recv_class { + Some(class) => class, + None => return false, + }; + + // Get the method_id from compile time. We will later add a guard against it. + let mid_sym = jit.peek_at_stack(&asm.ctx, (argc - 1) as isize); + if !mid_sym.static_sym_p() { + return false + } + let mid = unsafe { rb_sym2id(mid_sym) }; + + // Option<bool> representing the value of the "include_all" argument and whether it's known + let allow_priv = if argc == 1 { + // Default is false + Some(false) + } else { + // Get value from type information (may or may not be known) + asm.ctx.get_opnd_type(StackOpnd(0)).known_truthy() + }; + + let target_cme = unsafe { rb_callable_method_entry_or_negative(recv_class, mid) }; + + // Should never be null, as in that case we will be returned a "negative CME" + assert!(!target_cme.is_null()); + + let cme_def_type = unsafe { get_cme_def_type(target_cme) }; + + if cme_def_type == VM_METHOD_TYPE_REFINED { + return false; + } + + let visibility = if cme_def_type == VM_METHOD_TYPE_UNDEF { + METHOD_VISI_UNDEF + } else { + unsafe { METHOD_ENTRY_VISI(target_cme) } + }; + + let result = match (visibility, allow_priv) { + (METHOD_VISI_UNDEF, _) => { + // No method, we can return false given respond_to_missing? hasn't been overridden. + // In the future, we might want to jit the call to respond_to_missing? + if !assume_method_basic_definition(jit, asm, ocb, recv_class, ID!(respond_to_missing)) { + return false; + } + Qfalse + } + (METHOD_VISI_PUBLIC, _) | // Public method => fine regardless of include_all + (_, Some(true)) => { // include_all => all visibility are acceptable + // Method exists and has acceptable visibility + if cme_def_type == VM_METHOD_TYPE_NOTIMPLEMENTED { + // C method with rb_f_notimplement(). `respond_to?` returns false + // without consulting `respond_to_missing?`. See also: rb_add_method_cfunc() + Qfalse + } else { + Qtrue + } + } + (_, _) => return false // not public and include_all not known, can't compile + }; + + // Invalidate this block if method lookup changes for the method being queried. This works + // both for the case where a method does or does not exist, as for the latter we asked for a + // "negative CME" earlier. + jit.assume_method_lookup_stable(asm, ocb, target_cme); + + if argc == 2 { + // pop include_all argument (we only use its type info) + asm.stack_pop(1); + } + + let sym_opnd = asm.stack_pop(1); + let _recv_opnd = asm.stack_pop(1); + + // This is necessary because we have no guarantee that sym_opnd is a constant + asm_comment!(asm, "guard known mid"); + asm.cmp(sym_opnd, mid_sym.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_respond_to_mid_mismatch, + ); + + jit_putobject(asm, result); + + true +} + +fn jit_rb_f_block_given_p( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm.stack_pop(1); + let out_opnd = asm.stack_push(Type::UnknownImm); + + gen_block_given(jit, asm, out_opnd, Qtrue.into(), Qfalse.into()); + + true +} + +fn gen_block_given( + jit: &mut JITState, + asm: &mut Assembler, + out_opnd: Opnd, + true_opnd: Opnd, + false_opnd: Opnd, +) { + asm_comment!(asm, "block_given?"); + + // Same as rb_vm_frame_block_handler + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + // Return `block_handler != VM_BLOCK_HANDLER_NONE` + asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); + let block_given = asm.csel_ne(true_opnd, false_opnd); + asm.mov(out_opnd, block_given); +} + +// Codegen for rb_class_superclass() +fn jit_rb_class_superclass( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + _block: Option<crate::codegen::BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + extern "C" { + fn rb_class_superclass(klass: VALUE) -> VALUE; + } + + if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(0)) { + return false; + } + + asm_comment!(asm, "Class#superclass"); + let recv_opnd = asm.stack_opnd(0); + let ret = asm.ccall(rb_class_superclass as *const u8, vec![recv_opnd]); + + asm.stack_pop(1); + let ret_opnd = asm.stack_push(Type::Unknown); + asm.mov(ret_opnd, ret); + + true +} + +fn jit_rb_case_equal( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + known_recv_class: Option<VALUE>, +) -> bool { + if !jit.assume_expected_cfunc( asm, ocb, known_recv_class.unwrap(), ID!(eq), rb_obj_equal as _) { + return false; + } + + asm_comment!(asm, "case_equal: {}#===", get_class_name(known_recv_class)); + + // Compare the arguments + let arg1 = asm.stack_pop(1); + let arg0 = asm.stack_pop(1); + asm.cmp(arg0, arg1); + let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into()); + + let stack_ret = asm.stack_push(Type::UnknownImm); + asm.mov(stack_ret, ret_opnd); + + true +} + +fn jit_thread_s_current( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, + _ci: *const rb_callinfo, + _cme: *const rb_callable_method_entry_t, + _block: Option<BlockHandler>, + _argc: i32, + _known_recv_class: Option<VALUE>, +) -> bool { + asm_comment!(asm, "Thread.current"); + asm.stack_pop(1); + + // ec->thread_ptr + let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR)); + + // thread->self + let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF); + + let stack_ret = asm.stack_push(Type::UnknownHeap); + asm.mov(stack_ret, thread_self); + true +} + +// Check if we know how to codegen for a particular cfunc method +fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option<MethodGenFn> { + let method_serial = unsafe { get_def_method_serial(def) }; + let table = unsafe { METHOD_CODEGEN_TABLE.as_ref().unwrap() }; + + let option_ref = table.get(&method_serial); + match option_ref { + None => None, + Some(&mgf) => Some(mgf), // Deref + } +} + +// Is anyone listening for :c_call and :c_return event currently? +fn c_method_tracing_currently_enabled(jit: &JITState) -> bool { + // Defer to C implementation in yjit.c + unsafe { + rb_c_method_tracing_currently_enabled(jit.ec) + } +} + +// Similar to args_kw_argv_to_hash. It is called at runtime from within the +// generated assembly to build a Ruby hash of the passed keyword arguments. The +// keys are the Symbol objects associated with the keywords and the values are +// the actual values. In the representation, both keys and values are VALUEs. +unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> VALUE { + let kw_arg = vm_ci_kwarg(ci); + let kw_len: usize = get_cikw_keyword_len(kw_arg).try_into().unwrap(); + let hash = rb_hash_new_with_size(kw_len as u64); + + for kwarg_idx in 0..kw_len { + let key = get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()); + let val = sp.sub(kw_len).add(kwarg_idx).read(); + rb_hash_aset(hash, key, val); + } + hash +} + +// SpecVal is a single value in an iseq invocation's environment on the stack, +// at sp[-2]. Depending on the frame type, it can serve different purposes, +// which are covered here by enum variants. +enum SpecVal { + BlockHandler(Option<BlockHandler>), + PrevEP(*const VALUE), + PrevEPOpnd(Opnd), +} + +// Each variant represents a branch in vm_caller_setup_arg_block. +#[derive(Clone, Copy)] +pub enum BlockHandler { + // send, invokesuper: blockiseq operand + BlockISeq(IseqPtr), + // invokesuper: GET_BLOCK_HANDLER() (GET_LEP()[VM_ENV_DATA_INDEX_SPECVAL]) + LEPSpecVal, + // part of the allocate-free block forwarding scheme + BlockParamProxy, + // To avoid holding the block arg (e.g. proc and symbol) across C calls, + // we might need to set the block handler early in the call sequence + AlreadySet, +} + +struct ControlFrame { + recv: Opnd, + sp: Opnd, + iseq: Option<IseqPtr>, + pc: Option<u64>, + frame_type: u32, + specval: SpecVal, + cme: *const rb_callable_method_entry_t, +} + +// Codegen performing a similar (but not identical) function to vm_push_frame +// +// This will generate the code to: +// * initialize locals to Qnil +// * push the environment (cme, block handler, frame type) +// * push a new CFP +// * save the new CFP to ec->cfp +// +// Notes: +// * Provided sp should point to the new frame's sp, immediately following locals and the environment +// * At entry, CFP points to the caller (not callee) frame +// * At exit, ec->cfp is updated to the pushed CFP +// * SP register is updated only if frame.iseq is set +// * Stack overflow is not checked (should be done by the caller) +// * Interrupts are not checked (should be done by the caller) +fn gen_push_frame( + jit: &mut JITState, + asm: &mut Assembler, + frame: ControlFrame, +) { + let sp = frame.sp; + + asm_comment!(asm, "push cme, specval, frame type"); + + // Write method entry at sp[-3] + // sp[-3] = me; + // Use compile time cme. It's assumed to be valid because we are notified when + // any cme we depend on become outdated. See yjit_method_lookup_change(). + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -3), VALUE::from(frame.cme).into()); + + // Write special value at sp[-2]. It's either a block handler or a pointer to + // the outer environment depending on the frame type. + // sp[-2] = specval; + let specval: Opnd = match frame.specval { + SpecVal::BlockHandler(None) => VM_BLOCK_HANDLER_NONE.into(), + SpecVal::BlockHandler(Some(block_handler)) => { + match block_handler { + BlockHandler::BlockISeq(block_iseq) => { + // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). + // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases + // with cfp->block_code. + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); + + let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self, Opnd::Imm(1)) + } + BlockHandler::LEPSpecVal => { + let lep_opnd = gen_get_lep(jit, asm); + asm.load(Opnd::mem(64, lep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)) + } + BlockHandler::BlockParamProxy => { + let ep_opnd = gen_get_lep(jit, asm); + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + block_handler + } + BlockHandler::AlreadySet => 0.into(), // unused + } + } + SpecVal::PrevEP(prev_ep) => { + let tagged_prev_ep = (prev_ep as usize) | 1; + VALUE(tagged_prev_ep).into() + } + SpecVal::PrevEPOpnd(ep_opnd) => { + asm.or(ep_opnd, 1.into()) + } + }; + if let SpecVal::BlockHandler(Some(BlockHandler::AlreadySet)) = frame.specval { + asm_comment!(asm, "specval should have been set"); + } else { + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), specval); + } + + // Write env flags at sp[-1] + // sp[-1] = frame_type; + asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -1), frame.frame_type.into()); + + // Allocate a new CFP (ec->cfp--) + fn cfp_opnd(offset: i32) -> Opnd { + Opnd::mem(64, CFP, offset - (RUBY_SIZEOF_CONTROL_FRAME as i32)) + } + + // Setup the new frame + // *cfp = (const struct rb_control_frame_struct) { + // .pc = <unset for iseq, 0 for cfunc>, + // .sp = sp, + // .iseq = <iseq for iseq, 0 for cfunc>, + // .self = recv, + // .ep = <sp - 1>, + // .block_code = 0, + // }; + asm_comment!(asm, "push callee control frame"); + + // For an iseq call PC may be None, in which case we will not set PC and will allow jitted code + // to set it as necessary. + if let Some(pc) = frame.pc { + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_PC), pc.into()); + }; + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SP), sp); + let iseq: Opnd = if let Some(iseq) = frame.iseq { + VALUE::from(iseq).into() + } else { + 0.into() + }; + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_ISEQ), iseq); + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv); + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into()); + + if frame.iseq.is_some() { + // Spill stack temps to let the callee use them (must be done before changing the SP register) + asm.spill_temps(); + + // Saving SP before calculating ep avoids a dependency on a register + // However this must be done after referencing frame.recv, which may be SP-relative + asm.mov(SP, sp); + } + let ep = asm.sub(sp, SIZEOF_VALUE.into()); + asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep); +} + +fn gen_send_cfunc( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + block: Option<BlockHandler>, + recv_known_class: Option<VALUE>, + flags: u32, + argc: i32, +) -> Option<CodegenStatus> { + let cfunc = unsafe { get_cme_def_body_cfunc(cme) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let mut argc = argc; + + // Splat call to a C method that takes `VALUE *` and `len` + let variable_splat = flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc == -1; + let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; + + // If it's a splat and the method expects a Ruby array of arguments + if cfunc_argc == -2 && flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_cfunc_splat_neg2); + return None; + } + + exit_if_kwsplat_non_nil(asm, flags, Counter::send_cfunc_kw_splat_non_nil)?; + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; + + let kw_arg = unsafe { vm_ci_kwarg(ci) }; + let kw_arg_num = if kw_arg.is_null() { + 0 + } else { + unsafe { get_cikw_keyword_len(kw_arg) } + }; + + if kw_arg_num != 0 && flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_cfunc_splat_with_kw); + return None; + } + + if c_method_tracing_currently_enabled(jit) { + // Don't JIT if tracing c_call or c_return + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } + + // Increment total cfunc send count + gen_counter_incr(asm, Counter::num_send_cfunc); + + // Delegate to codegen for C methods if we have it. + if kw_arg.is_null() && + !kw_splat && + flags & VM_CALL_OPT_SEND == 0 && + flags & VM_CALL_ARGS_SPLAT == 0 && + (cfunc_argc == -1 || argc == cfunc_argc) { + let expected_stack_after = asm.ctx.get_stack_size() as i32 - argc; + if let Some(known_cfunc_codegen) = lookup_cfunc_codegen(unsafe { (*cme).def }) { + // We don't push a frame for specialized cfunc codegen, so the generated code must be leaf. + // However, the interpreter doesn't push a frame on opt_* instruction either, so we allow + // non-sendish instructions to break this rule as an exception. + let cfunc_codegen = if jit.is_sendish() { + asm.with_leaf_ccall(|asm| + perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ocb, ci, cme, block, argc, recv_known_class)) + ) + } else { + perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ocb, ci, cme, block, argc, recv_known_class)) + }; + + if cfunc_codegen { + assert_eq!(expected_stack_after, asm.ctx.get_stack_size() as i32); + gen_counter_incr(asm, Counter::num_send_cfunc_inline); + // cfunc codegen generated code. Terminate the block so + // there isn't multiple calls in the same block. + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } + } + } + + // Check for interrupts + gen_check_ints(asm, Counter::guard_send_interrupted); + + // Stack overflow check + // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) + // REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t) + asm_comment!(asm, "stack overflow check"); + const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)"); + let stack_limit = asm.lea(asm.ctx.sp_opnd((4 + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE)) as i32)); + asm.cmp(CFP, stack_limit); + asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow)); + + // Guard for variable length splat call before any modifications to the stack + if variable_splat { + let splat_array_idx = i32::from(kw_splat) + i32::from(block_arg); + let comptime_splat_array = jit.peek_at_stack(&asm.ctx, splat_array_idx as isize); + if unsafe { rb_yjit_ruby2_keywords_splat_p(comptime_splat_array) } != 0 { + gen_counter_incr(asm, Counter::send_cfunc_splat_varg_ruby2_keywords); + return None; + } + + let splat_array = asm.stack_opnd(splat_array_idx); + guard_object_is_array(asm, splat_array, splat_array.into(), Counter::guard_send_splat_not_array); + + asm_comment!(asm, "guard variable length splat call servicable"); + let sp = asm.ctx.sp_opnd(0); + let proceed = asm.ccall(rb_yjit_splat_varg_checks as _, vec![sp, splat_array, CFP]); + asm.cmp(proceed, Qfalse.into()); + asm.je(Target::side_exit(Counter::guard_send_cfunc_bad_splat_vargs)); + } + + // Number of args which will be passed through to the callee + // This is adjusted by the kwargs being combined into a hash. + let mut passed_argc = if kw_arg.is_null() { + argc + } else { + argc - kw_arg_num + 1 + }; + + // Exclude the kw_splat hash from arity check + if kw_splat { + passed_argc -= 1; + } + + // If the argument count doesn't match + if cfunc_argc >= 0 && cfunc_argc != passed_argc && flags & VM_CALL_ARGS_SPLAT == 0 { + gen_counter_incr(asm, Counter::send_cfunc_argc_mismatch); + return None; + } + + // Don't JIT functions that need C stack arguments for now + if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) { + gen_counter_incr(asm, Counter::send_cfunc_toomany_args); + return None; + } + + let block_arg_type = if block_arg { + Some(asm.ctx.get_opnd_type(StackOpnd(0))) + } else { + None + }; + + match block_arg_type { + Some(Type::Nil | Type::BlockParamProxy) => { + // We'll handle this later + } + None => { + // Nothing to do + } + _ => { + gen_counter_incr(asm, Counter::send_cfunc_block_arg); + return None; + } + } + + match block_arg_type { + Some(Type::Nil) => { + // We have a nil block arg, so let's pop it off the args + asm.stack_pop(1); + } + Some(Type::BlockParamProxy) => { + // We don't need the actual stack value + asm.stack_pop(1); + } + None => { + // Nothing to do + } + _ => { + assert!(false); + } + } + + // Pop the empty kw_splat hash + if kw_splat { + // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil() + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.stack_pop(1); + argc -= 1; + } + + // Splat handling when C method takes a static number of arguments. + // push_splat_args() does stack manipulation so we can no longer side exit + if flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc >= 0 { + let required_args : u32 = (cfunc_argc as u32).saturating_sub(argc as u32 - 1); + // + 1 because we pass self + if required_args + 1 >= C_ARG_OPNDS.len() as u32 { + gen_counter_incr(asm, Counter::send_cfunc_toomany_args); + return None; + } + + // We are going to assume that the splat fills + // all the remaining arguments. So the number of args + // should just equal the number of args the cfunc takes. + // In the generated code we test if this is true + // and if not side exit. + argc = cfunc_argc; + passed_argc = argc; + push_splat_args(required_args, asm) + } + + // This is a .send call and we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + // Push a dynamic number of items from the splat array to the stack when calling a vargs method + let dynamic_splat_size = if variable_splat { + asm_comment!(asm, "variable length splat"); + let stack_splat_array = asm.lea(asm.stack_opnd(0)); + Some(asm.ccall(rb_yjit_splat_varg_cfunc as _, vec![stack_splat_array])) + } else { + None + }; + + // Points to the receiver operand on the stack + let recv = asm.stack_opnd(argc); + + // Store incremented PC into current control frame in case callee raises. + jit_save_pc(jit, asm); + + // Find callee's SP with space for metadata. + // Usually sp+3. + let sp = if let Some(splat_size) = dynamic_splat_size { + // Compute the callee's SP at runtime in case we accept a variable size for the splat array + const _: () = assert!(SIZEOF_VALUE == 8, "opting for a shift since mul on A64 takes no immediates"); + let splat_size_bytes = asm.lshift(splat_size, 3usize.into()); + // 3 items for method metadata, minus one to remove the splat array + let static_stack_top = asm.lea(asm.ctx.sp_opnd(2)); + asm.add(static_stack_top, splat_size_bytes) + } else { + asm.lea(asm.ctx.sp_opnd(3)) + }; + + let specval = if block_arg_type == Some(Type::BlockParamProxy) { + SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy)) + } else { + SpecVal::BlockHandler(block) + }; + + let mut frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL; + if !kw_arg.is_null() { + frame_type |= VM_FRAME_FLAG_CFRAME_KW + } + + perf_call!("gen_send_cfunc: ", gen_push_frame(jit, asm, ControlFrame { + frame_type, + specval, + cme, + recv, + sp, + pc: if cfg!(debug_assertions) { + Some(!0) // Poison value. Helps to fail fast. + } else { + None // Leave PC uninitialized as cfuncs shouldn't read it + }, + iseq: None, + })); + + asm_comment!(asm, "set ec->cfp"); + let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32))); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp); + + if !kw_arg.is_null() { + // Build a hash from all kwargs passed + asm_comment!(asm, "build_kwhash"); + let imemo_ci = VALUE(ci as usize); + assert_ne!(0, unsafe { rb_IMEMO_TYPE_P(imemo_ci, imemo_callinfo) }, + "we assume all callinfos with kwargs are on the GC heap"); + let sp = asm.lea(asm.ctx.sp_opnd(0)); + let kwargs = asm.ccall(build_kwhash as *const u8, vec![imemo_ci.into(), sp]); + + // Replace the stack location at the start of kwargs with the new hash + let stack_opnd = asm.stack_opnd(argc - passed_argc); + asm.mov(stack_opnd, kwargs); + } + + // Write interpreter SP into CFP. + // We don't pop arguments yet to use registers for passing them, but we + // have to set cfp->sp below them for full_cfunc_return() invalidation. + gen_save_sp_with_offset(asm, -(argc + 1) as i8); + + // Non-variadic method + let args = if cfunc_argc >= 0 { + // Copy the arguments from the stack to the C argument registers + // self is the 0th argument and is at index argc from the stack top + (0..=passed_argc).map(|i| + asm.stack_opnd(argc - i) + ).collect() + } + // Variadic method + else if cfunc_argc == -1 { + // The method gets a pointer to the first argument + // rb_f_puts(int argc, VALUE *argv, VALUE recv) + + let passed_argc_opnd = if let Some(splat_size) = dynamic_splat_size { + // The final argc is the size of the splat, minus one for the splat array itself + asm.add(splat_size, (passed_argc - 1).into()) + } else { + // Without a splat, passed_argc is static + Opnd::Imm(passed_argc.into()) + }; + + vec![ + passed_argc_opnd, + asm.lea(asm.ctx.sp_opnd(-argc)), + asm.stack_opnd(argc), + ] + } + // Variadic method taking a Ruby array + else if cfunc_argc == -2 { + // Slurp up all the arguments into an array + let stack_args = asm.lea(asm.ctx.sp_opnd(-argc)); + let args_array = asm.ccall( + rb_ec_ary_new_from_values as _, + vec![EC, passed_argc.into(), stack_args] + ); + + // Example signature: + // VALUE neg2_method(VALUE self, VALUE argv) + vec![asm.stack_opnd(argc), args_array] + } else { + panic!("unexpected cfunc_args: {}", cfunc_argc) + }; + + // Call the C function + // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]); + // cfunc comes from compile-time cme->def, which we assume to be stable. + // Invalidation logic is in yjit_method_lookup_change() + asm_comment!(asm, "call C function"); + let ret = asm.ccall(unsafe { get_mct_func(cfunc) }.cast(), args); + asm.stack_pop((argc + 1).try_into().unwrap()); // Pop arguments after ccall to use registers for passing them. + + // Record code position for TracePoint patching. See full_cfunc_return(). + record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos()); + + // Push the return value on the Ruby stack + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + // Log the name of the method we're calling to. We intentionally don't do this for inlined cfuncs. + // We also do this after the C call to minimize the impact of spill_temps() on asm.ccall(). + if get_option!(gen_stats) { + // Assemble the method name string + let mid = unsafe { vm_ci_mid(ci) }; + let name_str = get_method_name(recv_known_class, mid); + + // Get an index for this cfunc name + let cfunc_idx = get_cfunc_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]); + } + + // Pop the stack frame (ec->cfp++) + // Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved + // register + let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP); + asm.store(ec_cfp_opnd, CFP); + + // cfunc calls may corrupt types + asm.clear_local_types(); + + // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1 + // which allows for sharing the same successor. + + // Jump (fall through) to the call continuation block + // We do this to end the current block after the call + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) +} + +// Generate RARRAY_LEN. For array_opnd, use Opnd::Reg to reduce memory access, +// and use Opnd::Mem to save registers. +fn get_array_len(asm: &mut Assembler, array_opnd: Opnd) -> Opnd { + asm_comment!(asm, "get array length for embedded or heap"); + + // Pull out the embed flag to check if it's an embedded array. + let array_reg = match array_opnd { + Opnd::InsnOut { .. } => array_opnd, + _ => asm.load(array_opnd), + }; + let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + + // Get the length of the array + let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into()); + let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into()); + + // Conditionally move the length of the heap array + let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); + + let array_reg = match array_opnd { + Opnd::InsnOut { .. } => array_opnd, + _ => asm.load(array_opnd), + }; + let array_len_opnd = Opnd::mem( + std::os::raw::c_long::BITS as u8, + array_reg, + RUBY_OFFSET_RARRAY_AS_HEAP_LEN, + ); + + // Select the array length value + asm.csel_nz(emb_len_opnd, array_len_opnd) +} + +// Generate RARRAY_CONST_PTR (part of RARRAY_AREF) +fn get_array_ptr(asm: &mut Assembler, array_reg: Opnd) -> Opnd { + asm_comment!(asm, "get array pointer for embedded or heap"); + + let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into()); + let heap_ptr_opnd = Opnd::mem( + usize::BITS as u8, + array_reg, + RUBY_OFFSET_RARRAY_AS_HEAP_PTR, + ); + + // Load the address of the embedded array + // (struct RArray *)(obj)->as.ary + let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY)); + asm.csel_nz(ary_opnd, heap_ptr_opnd) +} + +// Generate RSTRING_PTR +fn get_string_ptr(asm: &mut Assembler, string_reg: Opnd) -> Opnd { + asm_comment!(asm, "get string pointer for embedded or heap"); + + let flags_opnd = Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags_opnd, (RSTRING_NOEMBED as u64).into()); + let heap_ptr_opnd = asm.load(Opnd::mem( + usize::BITS as u8, + string_reg, + RUBY_OFFSET_RSTRING_AS_HEAP_PTR, + )); + + // Load the address of the embedded array + // (struct RString *)(obj)->as.ary + let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RSTRING_AS_ARY)); + asm.csel_nz(heap_ptr_opnd, ary_opnd) +} + +/// Pushes arguments from an array to the stack. Differs from push splat because +/// the array can have items left over. Array is assumed to be T_ARRAY without guards. +fn copy_splat_args_for_rest_callee(array: Opnd, num_args: u32, asm: &mut Assembler) { + asm_comment!(asm, "copy_splat_args_for_rest_callee"); + + // Unused operands cause the backend to panic + if num_args == 0 { + return; + } + + asm_comment!(asm, "Push arguments from array"); + + let array_reg = asm.load(array); + let ary_opnd = get_array_ptr(asm, array_reg); + for i in 0..num_args { + let top = asm.stack_push(Type::Unknown); + asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); + } +} + +/// Pushes arguments from an array to the stack that are passed with a splat (i.e. *args) +/// It optimistically compiles to a static size that is the exact number of arguments +/// needed for the function. +fn push_splat_args(required_args: u32, asm: &mut Assembler) { + asm_comment!(asm, "push_splat_args"); + + let array_opnd = asm.stack_opnd(0); + guard_object_is_array( + asm, + array_opnd, + array_opnd.into(), + Counter::guard_send_splat_not_array, + ); + + let array_len_opnd = get_array_len(asm, array_opnd); + + asm_comment!(asm, "Guard for expected splat length"); + asm.cmp(array_len_opnd, required_args.into()); + asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal)); + + asm_comment!(asm, "Check last argument is not ruby2keyword hash"); + + // Need to repeat this here to deal with register allocation + let array_reg = asm.load(asm.stack_opnd(0)); + + let ary_opnd = get_array_ptr(asm, array_reg); + + let last_array_value = asm.load(Opnd::mem(64, ary_opnd, (required_args as i32 - 1) * (SIZEOF_VALUE as i32))); + + guard_object_is_not_ruby2_keyword_hash( + asm, + last_array_value, + Counter::guard_send_splatarray_last_ruby2_keywords, + ); + + asm_comment!(asm, "Push arguments from array"); + let array_opnd = asm.stack_pop(1); + + if required_args > 0 { + let array_reg = asm.load(array_opnd); + let ary_opnd = get_array_ptr(asm, array_reg); + + for i in 0..required_args { + let top = asm.stack_push(Type::Unknown); + asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32)); + } + + asm_comment!(asm, "end push_each"); + } +} + +fn gen_send_bmethod( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + block: Option<BlockHandler>, + flags: u32, + argc: i32, +) -> Option<CodegenStatus> { + let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; + + let proc = unsafe { rb_yjit_get_proc_ptr(procv) }; + let proc_block = unsafe { &(*proc).block }; + + if proc_block.type_ != block_type_iseq { + return None; + } + + let capture = unsafe { proc_block.as_.captured.as_ref() }; + let iseq = unsafe { *capture.code.iseq.as_ref() }; + + // Optimize for single ractor mode and avoid runtime check for + // "defined with an un-shareable Proc in a different Ractor" + if !assume_single_ractor_mode(jit, asm, ocb) { + gen_counter_incr(asm, Counter::send_bmethod_ractor); + return None; + } + + // Passing a block to a block needs logic different from passing + // a block to a method and sometimes requires allocation. Bail for now. + if block.is_some() { + gen_counter_incr(asm, Counter::send_bmethod_block_arg); + return None; + } + + let frame_type = VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA; + perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc, None) } +} + +/// The kind of a value an ISEQ returns +enum IseqReturn { + Value(VALUE), + LocalVariable(u32), + Receiver, +} + +extern { + fn rb_simple_iseq_p(iseq: IseqPtr) -> bool; +} + +/// Return the ISEQ's return value if it consists of one simple instruction and leave. +fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, ci_flags: u32) -> Option<IseqReturn> { + // Expect only two instructions and one possible operand + let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; + if !(2..=3).contains(&iseq_size) { + return None; + } + + // Get the first two instructions + let first_insn = iseq_opcode_at_idx(iseq, 0); + let second_insn = iseq_opcode_at_idx(iseq, insn_len(first_insn as usize)); + + // Extract the return value if known + if second_insn != YARVINSN_leave { + return None; + } + match first_insn { + YARVINSN_getlocal_WC_0 => { + // Only accept simple positional only cases for both the caller and the callee. + // Reject block ISEQs to avoid autosplat and other block parameter complications. + if captured_opnd.is_none() && unsafe { rb_simple_iseq_p(iseq) } && ci_flags & VM_CALL_ARGS_SIMPLE != 0 { + let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32(); + let local_idx = ep_offset_to_local_idx(iseq, ep_offset); + Some(IseqReturn::LocalVariable(local_idx)) + } else { + None + } + } + YARVINSN_putnil => Some(IseqReturn::Value(Qnil)), + YARVINSN_putobject => Some(IseqReturn::Value(unsafe { *rb_iseq_pc_at_idx(iseq, 1) })), + YARVINSN_putobject_INT2FIX_0_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(0))), + YARVINSN_putobject_INT2FIX_1_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(1))), + // We don't support invokeblock for now. Such ISEQs are likely not used by blocks anyway. + YARVINSN_putself if captured_opnd.is_none() => Some(IseqReturn::Receiver), + _ => None, + } +} + +fn gen_send_iseq( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + iseq: *const rb_iseq_t, + ci: *const rb_callinfo, + frame_type: u32, + prev_ep: Option<*const VALUE>, + cme: *const rb_callable_method_entry_t, + block: Option<BlockHandler>, + flags: u32, + argc: i32, + captured_opnd: Option<Opnd>, +) -> Option<CodegenStatus> { + // Argument count. We will change this as we gather values from + // sources to satisfy the callee's parameters. To help make sense + // of changes, note that: + // - Parameters syntactically on the left have lower addresses. + // For example, all the lead (required) and optional parameters + // have lower addresses than the rest parameter array. + // - The larger the index one passes to Assembler::stack_opnd(), + // the *lower* the address. + let mut argc = argc; + + // Iseqs with keyword parameters have a hidden, unnamed parameter local + // that the callee could use to know which keywords are unspecified + // (see the `checkkeyword` instruction and check `ruby --dump=insn -e 'def foo(k:itself)=k'`). + // We always need to set up this local if the call goes through. + let has_kwrest = unsafe { get_iseq_flags_has_kwrest(iseq) }; + let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) } || has_kwrest; + let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0; + let iseq_has_rest = unsafe { get_iseq_flags_has_rest(iseq) }; + let iseq_has_block_param = unsafe { get_iseq_flags_has_block(iseq) }; + let arg_setup_block = captured_opnd.is_some(); // arg_setup_type: arg_setup_block (invokeblock) + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; + let splat_call = flags & VM_CALL_ARGS_SPLAT != 0; + + // For computing offsets to callee locals + let num_params = unsafe { get_iseq_body_param_size(iseq) as i32 }; + let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 }; + + let mut start_pc_offset: u16 = 0; + let required_num = unsafe { get_iseq_body_param_lead_num(iseq) }; + + // This struct represents the metadata about the caller-specified + // keyword arguments. + let kw_arg = unsafe { vm_ci_kwarg(ci) }; + let kw_arg_num = if kw_arg.is_null() { + 0 + } else { + unsafe { get_cikw_keyword_len(kw_arg) } + }; + + // Arity handling and optional parameter setup for positional arguments. + // Splats are handled later. + let mut opts_filled = argc - required_num - kw_arg_num - i32::from(kw_splat) - i32::from(splat_call); + let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) }; + // With a rest parameter or a yield to a block, + // callers can pass more than required + optional. + // So we cap ops_filled at opt_num. + if iseq_has_rest || arg_setup_block { + opts_filled = min(opts_filled, opt_num); + } + let mut opts_missing: i32 = opt_num - opts_filled; + + let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0; + // Stack index of the splat array + let splat_pos = i32::from(block_arg) + i32::from(kw_splat) + kw_arg_num; + + exit_if_stack_too_large(iseq)?; + exit_if_tail_call(asm, ci)?; + exit_if_has_post(asm, iseq)?; + exit_if_kwsplat_non_nil(asm, flags, Counter::send_iseq_kw_splat_non_nil)?; + exit_if_has_rest_and_captured(asm, iseq_has_rest, captured_opnd)?; + exit_if_has_kwrest_and_captured(asm, has_kwrest, captured_opnd)?; + exit_if_has_rest_and_supplying_kws(asm, iseq_has_rest, supplying_kws)?; + exit_if_supplying_kw_and_has_no_kw(asm, supplying_kws, doing_kw_call)?; + exit_if_supplying_kws_and_accept_no_kwargs(asm, supplying_kws, iseq)?; + exit_if_doing_kw_and_splat(asm, doing_kw_call, flags)?; + exit_if_wrong_number_arguments(asm, arg_setup_block, opts_filled, flags, opt_num, iseq_has_rest)?; + exit_if_doing_kw_and_opts_missing(asm, doing_kw_call, opts_missing)?; + exit_if_has_rest_and_optional_and_block(asm, iseq_has_rest, opt_num, iseq, block_arg)?; + let block_arg_type = exit_if_unsupported_block_arg_type(jit, asm, block_arg)?; + + // Bail if we can't drop extra arguments for a yield by just popping them + if supplying_kws && arg_setup_block && argc > (kw_arg_num + required_num + opt_num) { + gen_counter_incr(asm, Counter::send_iseq_complex_discard_extras); + return None; + } + + // Block parameter handling. This mirrors setup_parameters_complex(). + if iseq_has_block_param { + if unsafe { get_iseq_body_local_iseq(iseq) == iseq } { + // Do nothing + } else { + // In this case (param.flags.has_block && local_iseq != iseq), + // the block argument is setup as a local variable and requires + // materialization (allocation). Bail. + gen_counter_incr(asm, Counter::send_iseq_materialized_block); + return None; + } + } + + // Check that required keyword arguments are supplied and find any extras + // that should go into the keyword rest parameter (**kw_rest). + if doing_kw_call { + gen_iseq_kw_call_checks(asm, iseq, kw_arg, has_kwrest, kw_arg_num)?; + } + + let splat_array_length = if splat_call { + let array = jit.peek_at_stack(&asm.ctx, splat_pos as isize); + let array_length = if array == Qnil { + 0 + } else if unsafe { !RB_TYPE_P(array, RUBY_T_ARRAY) } { + gen_counter_incr(asm, Counter::send_iseq_splat_not_array); + return None; + } else { + unsafe { rb_yjit_array_len(array) as u32} + }; + + // Arity check accounting for size of the splat. When callee has rest parameters, we insert + // runtime guards later in copy_splat_args_for_rest_callee() + if !iseq_has_rest { + let supplying = argc - 1 - i32::from(kw_splat) + array_length as i32; + if (required_num..=required_num + opt_num).contains(&supplying) == false { + gen_counter_incr(asm, Counter::send_iseq_splat_arity_error); + return None; + } + } + + if iseq_has_rest && opt_num > 0 { + // If we have a rest and option arguments + // we are going to set the pc_offset for where + // to jump in the called method. + // If the number of args change, that would need to + // change and we don't change that dynmically so we side exit. + // On a normal splat without rest and option args this is handled + // elsewhere depending on the case + asm_comment!(asm, "Side exit if length doesn't not equal compile time length"); + let array_len_opnd = get_array_len(asm, asm.stack_opnd(splat_pos)); + asm.cmp(array_len_opnd, array_length.into()); + asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal)); + } + + Some(array_length) + } else { + None + }; + + // Check if we need the arg0 splat handling of vm_callee_setup_block_arg() + // Also known as "autosplat" inside setup_parameters_complex(). + // Autosplat checks argc == 1 after splat and kwsplat processing, so make + // sure to amend this if we start support kw_splat. + let block_arg0_splat = arg_setup_block + && (argc == 1 || (argc == 2 && splat_array_length == Some(0))) + && !supplying_kws && !doing_kw_call + && unsafe { + (get_iseq_flags_has_lead(iseq) || opt_num > 1) + && !get_iseq_flags_ambiguous_param0(iseq) + }; + if block_arg0_splat { + // If block_arg0_splat, we still need side exits after splat, but + // the splat modifies the stack which breaks side exits. So bail out. + if splat_call { + gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_args_splat); + return None; + } + // The block_arg0_splat implementation cannot deal with optional parameters. + // This is a setup_parameters_complex() situation and interacts with the + // starting position of the callee. + if opt_num > 1 { + gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_optional); + return None; + } + } + + // Adjust `opts_filled` and `opts_missing` taking + // into account the size of the splat expansion. + if let Some(len) = splat_array_length { + assert_eq!(kw_arg_num, 0); // Due to exit_if_doing_kw_and_splat(). + // Simplifies calculation below. + let num_args = argc - 1 - i32::from(kw_splat) + len as i32; + + opts_filled = if num_args >= required_num { + min(num_args - required_num, opt_num) + } else { + 0 + }; + opts_missing = opt_num - opts_filled; + } + + assert_eq!(opts_missing + opts_filled, opt_num); + assert!(opts_filled >= 0); + + // ISeq with optional parameters start at different + // locations depending on the number of optionals given. + if opt_num > 0 { + assert!(opts_filled >= 0); + unsafe { + let opt_table = get_iseq_body_param_opt_table(iseq); + start_pc_offset = opt_table.offset(opts_filled as isize).read().try_into().unwrap(); + } + } + + // Increment total ISEQ send count + gen_counter_incr(asm, Counter::num_send_iseq); + + // Shortcut for special `Primitive.attr! :leaf` builtins + let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) }; + let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) }; + let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) }; + let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins + if let (None, Some(builtin_info), true, false, None | Some(0)) = + (block, builtin_func, builtin_attrs & BUILTIN_ATTR_LEAF != 0, opt_send_call, splat_array_length) { + let builtin_argc = unsafe { (*builtin_info).argc }; + if builtin_argc + 1 < (C_ARG_OPNDS.len() as i32) { + // We pop the block arg without using it because: + // - the builtin is leaf, so it promises to not `yield`. + // - no leaf builtins have block param at the time of writing, and + // adding one requires interpreter changes to support. + if block_arg_type.is_some() { + if iseq_has_block_param { + gen_counter_incr(asm, Counter::send_iseq_leaf_builtin_block_arg_block_param); + return None; + } + asm.stack_pop(1); + } + + // Pop empty kw_splat hash which passes nothing (exit_if_kwsplat_non_nil()) + if kw_splat { + asm.stack_pop(1); + } + + // Pop empty splat array which passes nothing + if let Some(0) = splat_array_length { + asm.stack_pop(1); + } + + asm_comment!(asm, "inlined leaf builtin"); + gen_counter_incr(asm, Counter::num_send_iseq_leaf); + + // The callee may allocate, e.g. Integer#abs on a Bignum. + // Save SP for GC, save PC for allocation tracing, and prepare + // for global invalidation after GC's VM lock contention. + jit_prepare_call_with_gc(jit, asm); + + // Call the builtin func (ec, recv, arg1, arg2, ...) + let mut args = vec![EC]; + + // Copy self and arguments + for i in 0..=builtin_argc { + let stack_opnd = asm.stack_opnd(builtin_argc - i); + args.push(stack_opnd); + } + let val = asm.ccall(unsafe { (*builtin_info).func_ptr as *const u8 }, args); + asm.stack_pop((builtin_argc + 1).try_into().unwrap()); // Keep them on stack during ccall for GC + + // Push the return value + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + + // Note: assuming that the leaf builtin doesn't change local variables here. + // Seems like a safe assumption. + + // Let guard chains share the same successor + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } + } + + // Inline simple ISEQs whose return value is known at compile time + if let (Some(value), None, false) = (iseq_get_return_value(iseq, captured_opnd, flags), block_arg_type, opt_send_call) { + asm_comment!(asm, "inlined simple ISEQ"); + gen_counter_incr(asm, Counter::num_send_iseq_inline); + + match value { + IseqReturn::LocalVariable(local_idx) => { + // Put the local variable at the return slot + let stack_local = asm.stack_opnd(argc - 1 - local_idx as i32); + let stack_return = asm.stack_opnd(argc); + asm.mov(stack_return, stack_local); + + // Update the mapping for the return value + let mapping = asm.ctx.get_opnd_mapping(stack_local.into()); + asm.ctx.set_opnd_mapping(stack_return.into(), mapping); + + // Pop everything but the return value + asm.stack_pop(argc as usize); + } + IseqReturn::Value(value) => { + // Pop receiver and arguments + asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 }); + + // Push the return value + let stack_ret = asm.stack_push(Type::from(value)); + asm.mov(stack_ret, value.into()); + }, + IseqReturn::Receiver => { + // Just pop arguments and leave the receiver on stack + asm.stack_pop(argc as usize); + } + } + + // Let guard chains share the same successor + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } + + // Stack overflow check + // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2. + // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) + asm_comment!(asm, "stack overflow check"); + const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)"); + let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap(); + let locals_offs = (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE) as i32; + let stack_limit = asm.lea(asm.ctx.sp_opnd(locals_offs)); + asm.cmp(CFP, stack_limit); + asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow)); + + if iseq_has_rest && splat_call { + // Insert length guard for a call to copy_splat_args_for_rest_callee() + // that will come later. We will have made changes to + // the stack by spilling or handling __send__ shifting + // by the time we get to that code, so we need the + // guard here where we can still side exit. + let non_rest_arg_count = argc - i32::from(kw_splat) - 1; + if non_rest_arg_count < required_num + opt_num { + let take_count: u32 = (required_num - non_rest_arg_count + opts_filled) + .try_into().unwrap(); + + if take_count > 0 { + asm_comment!(asm, "guard splat_array_length >= {take_count}"); + + let splat_array = asm.stack_opnd(splat_pos); + let array_len_opnd = get_array_len(asm, splat_array); + asm.cmp(array_len_opnd, take_count.into()); + asm.jl(Target::side_exit(Counter::guard_send_iseq_has_rest_and_splat_too_few)); + } + } + + // All splats need to guard for ruby2_keywords hash. Check with a function call when + // splatting into a rest param since the index for the last item in the array is dynamic. + asm_comment!(asm, "guard no ruby2_keywords hash in splat"); + let bad_splat = asm.ccall(rb_yjit_ruby2_keywords_splat_p as _, vec![asm.stack_opnd(splat_pos)]); + asm.cmp(bad_splat, 0.into()); + asm.jnz(Target::side_exit(Counter::guard_send_splatarray_last_ruby2_keywords)); + } + + match block_arg_type { + Some(BlockArg::Nil) => { + // We have a nil block arg, so let's pop it off the args + asm.stack_pop(1); + } + Some(BlockArg::BlockParamProxy) => { + // We don't need the actual stack value + asm.stack_pop(1); + } + Some(BlockArg::TProc) => { + // Place the proc as the block handler. We do this early because + // the block arg being at the top of the stack gets in the way of + // rest param handling later. Also, since there are C calls that + // come later, we can't hold this value in a register and place it + // near the end when we push a new control frame. + asm_comment!(asm, "guard block arg is a proc"); + // Simple predicate, no need for jit_prepare_non_leaf_call(). + let is_proc = asm.ccall(rb_obj_is_proc as _, vec![asm.stack_opnd(0)]); + asm.cmp(is_proc, Qfalse.into()); + jit_chain_guard( + JCC_JE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_block_arg_type, + ); + + let callee_ep = -argc + num_locals + VM_ENV_DATA_SIZE as i32 - 1; + let callee_specval = callee_ep + VM_ENV_DATA_INDEX_SPECVAL; + if callee_specval < 0 { + // Can't write to sp[-n] since that's where the arguments are + gen_counter_incr(asm, Counter::send_iseq_clobbering_block_arg); + return None; + } + let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg + let callee_specval = asm.ctx.sp_opnd(callee_specval); + asm.store(callee_specval, proc); + } + None => { + // Nothing to do + } + } + + if kw_splat { + // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil() + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + asm.stack_pop(1); + argc -= 1; + } + + // push_splat_args does stack manipulation so we can no longer side exit + if let Some(array_length) = splat_array_length { + if !iseq_has_rest { + // Speculate that future splats will be done with + // an array that has the same length. We will insert guards. + argc = argc - 1 + array_length as i32; + if argc + asm.ctx.get_stack_size() as i32 > MAX_SPLAT_LENGTH { + gen_counter_incr(asm, Counter::send_splat_too_long); + return None; + } + push_splat_args(array_length, asm); + } + } + + // This is a .send call and we need to adjust the stack + // TODO: This can be more efficient if we do it before + // extracting from the splat array above. + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + if iseq_has_rest { + // We are going to allocate so setting pc and sp. + jit_save_pc(jit, asm); + gen_save_sp(asm); + + let rest_param_array = if splat_call { + let non_rest_arg_count = argc - 1; + // We start by dupping the array because someone else might have + // a reference to it. This also normalizes to an ::Array instance. + let array = asm.stack_opnd(0); + let array = asm.ccall( + rb_ary_dup as *const u8, + vec![array], + ); + asm.stack_pop(1); // Pop array after ccall to use a register for passing it. + + // This is the end stack state of all `non_rest_arg_count` situations below + argc = required_num + opts_filled; + + if non_rest_arg_count > required_num + opt_num { + // If we have more arguments than required, we need to prepend + // the items from the stack onto the array. + let diff: u32 = (non_rest_arg_count - (required_num + opt_num)) + .try_into().unwrap(); + + // diff is >0 so no need to worry about null pointer + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(diff as i32)); + let values_ptr = asm.lea(values_opnd); + + asm_comment!(asm, "prepend stack values to rest array"); + let array = asm.ccall( + rb_ary_unshift_m as *const u8, + vec![Opnd::UImm(diff as u64), values_ptr, array], + ); + asm.stack_pop(diff as usize); + + array + } else if non_rest_arg_count < required_num + opt_num { + // If we have fewer arguments than required, we need to take some + // from the array and move them to the stack. + asm_comment!(asm, "take items from splat array"); + + let take_count: u32 = (required_num - non_rest_arg_count + opts_filled) + .try_into().unwrap(); + + // Copy required arguments to the stack without modifying the array + copy_splat_args_for_rest_callee(array, take_count, asm); + + // We will now slice the array to give us a new array of the correct size + let sliced = asm.ccall(rb_yjit_rb_ary_subseq_length as *const u8, vec![array, Opnd::UImm(take_count.into())]); + + sliced + } else { + // The arguments are equal so we can just push to the stack + asm_comment!(asm, "same length for splat array and rest param"); + assert!(non_rest_arg_count == required_num + opt_num); + + array + } + } else { + asm_comment!(asm, "rest parameter without splat"); + + assert!(argc >= required_num); + let n = (argc - required_num - opts_filled) as u32; + argc = required_num + opts_filled; + // If n is 0, then elts is never going to be read, so we can just pass null + let values_ptr = if n == 0 { + Opnd::UImm(0) + } else { + asm_comment!(asm, "load pointer to array elements"); + let values_opnd = asm.ctx.sp_opnd(-(n as i32)); + asm.lea(values_opnd) + }; + + let new_ary = asm.ccall( + rb_ec_ary_new_from_values as *const u8, + vec![ + EC, + Opnd::UImm(n.into()), + values_ptr + ] + ); + asm.stack_pop(n.as_usize()); + + new_ary + }; + + // Find where to put the rest parameter array + let rest_param = if opts_missing == 0 { + // All optionals are filled, the rest param goes at the top of the stack + argc += 1; + asm.stack_push(Type::TArray) + } else { + // The top of the stack will be a missing optional, but the rest + // parameter needs to be placed after all the missing optionals. + // Place it using a stack operand with a negative stack index. + // (Higher magnitude negative stack index have higher address.) + assert!(opts_missing > 0); + // The argument deepest in the stack will be the 0th local in the callee. + let callee_locals_base = argc - 1; + let rest_param_stack_idx = callee_locals_base - required_num - opt_num; + assert!(rest_param_stack_idx < 0); + asm.stack_opnd(rest_param_stack_idx) + }; + // Store rest param to memory to avoid register shuffle as + // we won't be reading it for the remainder of the block. + asm.ctx.dealloc_temp_reg(rest_param.stack_idx()); + asm.store(rest_param, rest_param_array); + } + + // Pop surplus positional arguments when yielding + if arg_setup_block { + let extras = argc - required_num - opt_num; + if extras > 0 { + // Checked earlier. If there are keyword args, then + // the positional arguments are not at the stack top. + assert_eq!(0, kw_arg_num); + + asm.stack_pop(extras as usize); + argc = required_num + opt_num; + } + } + + // Keyword argument passing + if doing_kw_call { + argc = gen_iseq_kw_call(jit, asm, kw_arg, iseq, argc, has_kwrest); + } + + // Same as vm_callee_setup_block_arg_arg0_check and vm_callee_setup_block_arg_arg0_splat + // on vm_callee_setup_block_arg for arg_setup_block. This is done after CALLER_SETUP_ARG + // and CALLER_REMOVE_EMPTY_KW_SPLAT, so this implementation is put here. This may need + // side exits, so you still need to allow side exits here if block_arg0_splat is true. + // Note that you can't have side exits after this arg0 splat. + if block_arg0_splat { + let arg0_opnd = asm.stack_opnd(0); + + // Only handle the case that you don't need to_ary conversion + let not_array_counter = Counter::invokeblock_iseq_arg0_not_array; + guard_object_is_array(asm, arg0_opnd, arg0_opnd.into(), not_array_counter); + + // Only handle the same that the array length == ISEQ's lead_num (most common) + let arg0_len_opnd = get_array_len(asm, arg0_opnd); + let lead_num = unsafe { rb_get_iseq_body_param_lead_num(iseq) }; + asm.cmp(arg0_len_opnd, lead_num.into()); + asm.jne(Target::side_exit(Counter::invokeblock_iseq_arg0_wrong_len)); + + let arg0_reg = asm.load(arg0_opnd); + let array_opnd = get_array_ptr(asm, arg0_reg); + asm_comment!(asm, "push splat arg0 onto the stack"); + asm.stack_pop(argc.try_into().unwrap()); + for i in 0..lead_num { + let stack_opnd = asm.stack_push(Type::Unknown); + asm.mov(stack_opnd, Opnd::mem(64, array_opnd, SIZEOF_VALUE_I32 * i)); + } + argc = lead_num; + } + + fn nil_fill(comment: &'static str, fill_range: std::ops::Range<i32>, asm: &mut Assembler) { + if fill_range.is_empty() { + return; + } + + asm_comment!(asm, "{}", comment); + for i in fill_range { + let value_slot = asm.ctx.sp_opnd(i); + asm.store(value_slot, Qnil.into()); + } + } + + // Nil-initialize missing optional parameters + nil_fill( + "nil-initialize missing optionals", + { + let begin = -argc + required_num + opts_filled; + let end = -argc + required_num + opt_num; + + begin..end + }, + asm + ); + // Nil-initialize the block parameter. It's the last parameter local + if iseq_has_block_param { + let block_param = asm.ctx.sp_opnd(-argc + num_params - 1); + asm.store(block_param, Qnil.into()); + } + // Nil-initialize non-parameter locals + nil_fill( + "nil-initialize locals", + { + let begin = -argc + num_params; + let end = -argc + num_locals; + + begin..end + }, + asm + ); + + // Points to the receiver operand on the stack unless a captured environment is used + let recv = match captured_opnd { + Some(captured_opnd) => asm.load(Opnd::mem(64, captured_opnd, 0)), // captured->self + _ => asm.stack_opnd(argc), + }; + let captured_self = captured_opnd.is_some(); + let sp_offset = argc + if captured_self { 0 } else { 1 }; + + // Store the updated SP on the current frame (pop arguments and receiver) + asm_comment!(asm, "store caller sp"); + let caller_sp = asm.lea(asm.ctx.sp_opnd(-sp_offset)); + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp); + + // Store the next PC in the current frame + jit_save_pc(jit, asm); + + // Adjust the callee's stack pointer + let callee_sp = asm.lea(asm.ctx.sp_opnd(-argc + num_locals + VM_ENV_DATA_SIZE as i32)); + + let specval = if let Some(prev_ep) = prev_ep { + // We've already side-exited if the callee expects a block, so we + // ignore any supplied block here + SpecVal::PrevEP(prev_ep) + } else if let Some(captured_opnd) = captured_opnd { + let ep_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32)); // captured->ep + SpecVal::PrevEPOpnd(ep_opnd) + } else if let Some(BlockArg::TProc) = block_arg_type { + SpecVal::BlockHandler(Some(BlockHandler::AlreadySet)) + } else if let Some(BlockArg::BlockParamProxy) = block_arg_type { + SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy)) + } else { + SpecVal::BlockHandler(block) + }; + + // Setup the new frame + perf_call!("gen_send_iseq: ", gen_push_frame(jit, asm, ControlFrame { + frame_type, + specval, + cme, + recv, + sp: callee_sp, + iseq: Some(iseq), + pc: None, // We are calling into jitted code, which will set the PC as necessary + })); + + // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs. + // We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall(). + if get_option!(gen_stats) { + // Assemble the ISEQ name string + let name_str = get_iseq_name(iseq); + + // Get an index for this ISEQ name + let iseq_idx = get_iseq_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); + } + + // No need to set cfp->pc since the callee sets it whenever calling into routines + // that could look at it through jit_save_pc(). + // mov(cb, REG0, const_ptr_opnd(start_pc)); + // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0); + + // Stub so we can return to JITted code + let return_block = BlockId { + iseq: jit.iseq, + idx: jit.next_insn_idx(), + }; + + // Create a context for the callee + let mut callee_ctx = Context::default(); + + // If the callee has :inline_block annotation and the callsite has a block ISEQ, + // duplicate a callee block for each block ISEQ to make its `yield` monomorphic. + if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) { + callee_ctx.set_inline_block(iseq); + } + + // Set the argument types in the callee's context + for arg_idx in 0..argc { + let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap(); + let arg_type = asm.ctx.get_opnd_type(StackOpnd(stack_offs)); + callee_ctx.set_local_type(arg_idx.try_into().unwrap(), arg_type); + } + + let recv_type = if captured_self { + Type::Unknown // we don't track the type information of captured->self for now + } else { + asm.ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap())) + }; + callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type); + + // The callee might change locals through Kernel#binding and other means. + asm.clear_local_types(); + + // Pop arguments and receiver in return context and + // mark it as a continuation of gen_leave() + let mut return_asm = Assembler::new(); + return_asm.ctx = asm.ctx; + return_asm.stack_pop(sp_offset.try_into().unwrap()); + return_asm.ctx.set_sp_offset(0); // We set SP on the caller's frame above + return_asm.ctx.reset_chain_depth_and_defer(); + return_asm.ctx.set_as_return_landing(); + + // Write the JIT return address on the callee frame + gen_branch( + jit, + asm, + ocb, + return_block, + &return_asm.ctx, + None, + None, + BranchGenFn::JITReturn, + ); + + // ec->cfp is updated after cfp->jit_return for rb_profile_frames() safety + asm_comment!(asm, "switch to new CFP"); + let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + + // Directly jump to the entry point of the callee + gen_direct_jump( + jit, + &callee_ctx, + BlockId { + iseq: iseq, + idx: start_pc_offset, + }, + asm, + ); + + Some(EndBlock) +} + +// Check if we can handle a keyword call +fn gen_iseq_kw_call_checks( + asm: &mut Assembler, + iseq: *const rb_iseq_t, + kw_arg: *const rb_callinfo_kwarg, + has_kwrest: bool, + caller_kw_num: i32 +) -> Option<()> { + // This struct represents the metadata about the callee-specified + // keyword parameters. + let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap(); + let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); + + let mut required_kwargs_filled = 0; + + if keyword_num > 30 || caller_kw_num > 64 { + // We have so many keywords that (1 << num) encoded as a FIXNUM + // (which shifts it left one more) no longer fits inside a 32-bit + // immediate. Similarly, we use a u64 in case of keyword rest parameter. + gen_counter_incr(asm, Counter::send_iseq_too_many_kwargs); + return None; + } + + // Check that the kwargs being passed are valid + if caller_kw_num > 0 { + // This is the list of keyword arguments that the callee specified + // in its initial declaration. + // SAFETY: see compile.c for sizing of this slice. + let callee_kwargs = if keyword_num == 0 { + &[] + } else { + unsafe { slice::from_raw_parts((*keyword).table, keyword_num) } + }; + + // Here we're going to build up a list of the IDs that correspond to + // the caller-specified keyword arguments. If they're not in the + // same order as the order specified in the callee declaration, then + // we're going to need to generate some code to swap values around + // on the stack. + let kw_arg_keyword_len = caller_kw_num as usize; + let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len]; + for kwarg_idx in 0..kw_arg_keyword_len { + let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) }; + caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) }; + } + + // First, we're going to be sure that the names of every + // caller-specified keyword argument correspond to a name in the + // list of callee-specified keyword parameters. + for caller_kwarg in caller_kwargs { + let search_result = callee_kwargs + .iter() + .enumerate() // inject element index + .find(|(_, &kwarg)| kwarg == caller_kwarg); + + match search_result { + None if !has_kwrest => { + // If the keyword was never found, then we know we have a + // mismatch in the names of the keyword arguments, so we need to + // bail. + gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch); + return None; + } + Some((callee_idx, _)) if callee_idx < keyword_required_num => { + // Keep a count to ensure all required kwargs are specified + required_kwargs_filled += 1; + } + _ => (), + } + } + } + assert!(required_kwargs_filled <= keyword_required_num); + if required_kwargs_filled != keyword_required_num { + gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch); + return None; + } + + Some(()) +} + +// Codegen for keyword argument handling. Essentially private to gen_send_iseq() since +// there are a lot of preconditions to check before reaching this code. +fn gen_iseq_kw_call( + jit: &mut JITState, + asm: &mut Assembler, + ci_kwarg: *const rb_callinfo_kwarg, + iseq: *const rb_iseq_t, + mut argc: i32, + has_kwrest: bool, +) -> i32 { + let caller_keyword_len_i32: i32 = if ci_kwarg.is_null() { + 0 + } else { + unsafe { get_cikw_keyword_len(ci_kwarg) } + }; + let caller_keyword_len: usize = caller_keyword_len_i32.try_into().unwrap(); + let anon_kwrest = unsafe { rb_get_iseq_flags_anon_kwrest(iseq) && !get_iseq_flags_has_kw(iseq) }; + + // This struct represents the metadata about the callee-specified + // keyword parameters. + let keyword = unsafe { get_iseq_body_param_keyword(iseq) }; + + asm_comment!(asm, "keyword args"); + + // This is the list of keyword arguments that the callee specified + // in its initial declaration. + let callee_kwargs = unsafe { (*keyword).table }; + let callee_kw_count_i32: i32 = unsafe { (*keyword).num }; + let callee_kw_count: usize = callee_kw_count_i32.try_into().unwrap(); + let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap(); + + // Here we're going to build up a list of the IDs that correspond to + // the caller-specified keyword arguments. If they're not in the + // same order as the order specified in the callee declaration, then + // we're going to need to generate some code to swap values around + // on the stack. + let mut kwargs_order: Vec<ID> = vec![0; cmp::max(caller_keyword_len, callee_kw_count)]; + for kwarg_idx in 0..caller_keyword_len { + let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) }; + kwargs_order[kwarg_idx] = unsafe { rb_sym2id(sym) }; + } + + let mut unspecified_bits = 0; + + // The stack_opnd() index to the 0th keyword argument. + let kwargs_stack_base = caller_keyword_len_i32 - 1; + + // Build the keyword rest parameter hash before we make any changes to the order of + // the supplied keyword arguments + let kwrest_type = if has_kwrest { + c_callable! { + fn build_kw_rest(rest_mask: u64, stack_kwargs: *const VALUE, keywords: *const rb_callinfo_kwarg) -> VALUE { + if keywords.is_null() { + return unsafe { rb_hash_new() }; + } + + // Use the total number of supplied keywords as a size upper bound + let keyword_len = unsafe { (*keywords).keyword_len } as usize; + let hash = unsafe { rb_hash_new_with_size(keyword_len as u64) }; + + // Put pairs into the kwrest hash as the mask describes + for kwarg_idx in 0..keyword_len { + if (rest_mask & (1 << kwarg_idx)) != 0 { + unsafe { + let keyword_symbol = (*keywords).keywords.as_ptr().add(kwarg_idx).read(); + let keyword_value = stack_kwargs.add(kwarg_idx).read(); + rb_hash_aset(hash, keyword_symbol, keyword_value); + } + } + } + return hash; + } + } + + asm_comment!(asm, "build kwrest hash"); + + // Make a bit mask describing which keywords should go into kwrest. + let mut rest_mask: u64 = 0; + // Index for one argument that will go into kwrest. + let mut rest_collected_idx = None; + for (supplied_kw_idx, &supplied_kw) in kwargs_order.iter().take(caller_keyword_len).enumerate() { + let mut found = false; + for callee_idx in 0..callee_kw_count { + let callee_kw = unsafe { callee_kwargs.add(callee_idx).read() }; + if callee_kw == supplied_kw { + found = true; + break; + } + } + if !found { + rest_mask |= 1 << supplied_kw_idx; + if rest_collected_idx.is_none() { + rest_collected_idx = Some(supplied_kw_idx as i32); + } + } + } + + let (kwrest, kwrest_type) = if rest_mask == 0 && anon_kwrest { + // In case the kwrest hash should be empty and is anonymous in the callee, + // we can pass nil instead of allocating. Anonymous kwrest can only be + // delegated, and nil is the same as an empty hash when delegating. + (Qnil.into(), Type::Nil) + } else { + // Save PC and SP before allocating + jit_save_pc(jit, asm); + gen_save_sp(asm); + + // Build the kwrest hash. `struct rb_callinfo_kwarg` is malloc'd, so no GC concerns. + let kwargs_start = asm.lea(asm.ctx.sp_opnd(-caller_keyword_len_i32)); + let hash = asm.ccall( + build_kw_rest as _, + vec![rest_mask.into(), kwargs_start, Opnd::const_ptr(ci_kwarg.cast())] + ); + (hash, Type::THash) + }; + + // The kwrest parameter sits after `unspecified_bits` if the callee specifies any + // keywords. + let stack_kwrest_idx = kwargs_stack_base - callee_kw_count_i32 - i32::from(callee_kw_count > 0); + let stack_kwrest = asm.stack_opnd(stack_kwrest_idx); + // If `stack_kwrest` already has another argument there, we need to stow it elsewhere + // first before putting kwrest there. Use `rest_collected_idx` because that value went + // into kwrest so the slot is now free. + let kwrest_idx = callee_kw_count + usize::from(callee_kw_count > 0); + if let (Some(rest_collected_idx), true) = (rest_collected_idx, kwrest_idx < caller_keyword_len) { + let rest_collected = asm.stack_opnd(kwargs_stack_base - rest_collected_idx); + let mapping = asm.ctx.get_opnd_mapping(stack_kwrest.into()); + asm.mov(rest_collected, stack_kwrest); + asm.ctx.set_opnd_mapping(rest_collected.into(), mapping); + // Update our bookkeeping to inform the reordering step later. + kwargs_order[rest_collected_idx as usize] = kwargs_order[kwrest_idx]; + kwargs_order[kwrest_idx] = 0; + } + // Put kwrest straight into memory, since we might pop it later + asm.ctx.dealloc_temp_reg(stack_kwrest.stack_idx()); + asm.mov(stack_kwrest, kwrest); + if stack_kwrest_idx >= 0 { + asm.ctx.set_opnd_mapping(stack_kwrest.into(), TempMapping::map_to_stack(kwrest_type)); + } + + Some(kwrest_type) + } else { + None + }; + + // Ensure the stack is large enough for the callee + for _ in caller_keyword_len..callee_kw_count { + argc += 1; + asm.stack_push(Type::Unknown); + } + // Now this is the stack_opnd() index to the 0th keyword argument. + let kwargs_stack_base = kwargs_order.len() as i32 - 1; + + // Next, we're going to loop through every keyword that was + // specified by the caller and make sure that it's in the correct + // place. If it's not we're going to swap it around with another one. + for kwarg_idx in 0..callee_kw_count { + let callee_kwarg = unsafe { callee_kwargs.add(kwarg_idx).read() }; + + // If the argument is already in the right order, then we don't + // need to generate any code since the expected value is already + // in the right place on the stack. + if callee_kwarg == kwargs_order[kwarg_idx] { + continue; + } + + // In this case the argument is not in the right place, so we + // need to find its position where it _should_ be and swap with + // that location. + for swap_idx in 0..kwargs_order.len() { + if callee_kwarg == kwargs_order[swap_idx] { + // First we're going to generate the code that is going + // to perform the actual swapping at runtime. + let swap_idx_i32: i32 = swap_idx.try_into().unwrap(); + let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap(); + let offset0 = kwargs_stack_base - swap_idx_i32; + let offset1 = kwargs_stack_base - kwarg_idx_i32; + stack_swap(asm, offset0, offset1); + + // Next we're going to do some bookkeeping on our end so + // that we know the order that the arguments are + // actually in now. + kwargs_order.swap(kwarg_idx, swap_idx); + + break; + } + } + } + + // Now that every caller specified kwarg is in the right place, filling + // in unspecified default paramters won't overwrite anything. + for kwarg_idx in keyword_required_num..callee_kw_count { + if kwargs_order[kwarg_idx] != unsafe { callee_kwargs.add(kwarg_idx).read() } { + let default_param_idx = kwarg_idx - keyword_required_num; + let mut default_value = unsafe { (*keyword).default_values.add(default_param_idx).read() }; + + if default_value == Qundef { + // Qundef means that this value is not constant and must be + // recalculated at runtime, so we record it in unspecified_bits + // (Qnil is then used as a placeholder instead of Qundef). + unspecified_bits |= 0x01 << default_param_idx; + default_value = Qnil; + } + + let default_param = asm.stack_opnd(kwargs_stack_base - kwarg_idx as i32); + let param_type = Type::from(default_value); + asm.mov(default_param, default_value.into()); + asm.ctx.set_opnd_mapping(default_param.into(), TempMapping::map_to_stack(param_type)); + } + } + + // Pop extra arguments that went into kwrest now that they're at stack top + if has_kwrest && caller_keyword_len > callee_kw_count { + let extra_kwarg_count = caller_keyword_len - callee_kw_count; + asm.stack_pop(extra_kwarg_count); + argc = argc - extra_kwarg_count as i32; + } + + // Keyword arguments cause a special extra local variable to be + // pushed onto the stack that represents the parameters that weren't + // explicitly given a value and have a non-constant default. + if callee_kw_count > 0 { + let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64(); + let top = asm.stack_push(Type::Fixnum); + asm.mov(top, unspec_opnd.into()); + argc += 1; + } + + // The kwrest parameter sits after `unspecified_bits` + if let Some(kwrest_type) = kwrest_type { + let kwrest = asm.stack_push(kwrest_type); + // We put the kwrest parameter in memory earlier + asm.ctx.dealloc_temp_reg(kwrest.stack_idx()); + argc += 1; + } + + argc +} + +/// This is a helper function to allow us to exit early +/// during code generation if a predicate is true. +/// We return Option<()> here because we will be able to +/// short-circuit using the ? operator if we return None. +/// It would be great if rust let you implement ? for your +/// own types, but as of right now they don't. +fn exit_if(asm: &mut Assembler, pred: bool, counter: Counter) -> Option<()> { + if pred { + gen_counter_incr(asm, counter); + return None + } + Some(()) +} + +#[must_use] +fn exit_if_tail_call(asm: &mut Assembler, ci: *const rb_callinfo) -> Option<()> { + exit_if(asm, unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0, Counter::send_iseq_tailcall) +} + +#[must_use] +fn exit_if_has_post(asm: &mut Assembler, iseq: *const rb_iseq_t) -> Option<()> { + exit_if(asm, unsafe { get_iseq_flags_has_post(iseq) }, Counter::send_iseq_has_post) +} + +#[must_use] +fn exit_if_kwsplat_non_nil(asm: &mut Assembler, flags: u32, counter: Counter) -> Option<()> { + let kw_splat = flags & VM_CALL_KW_SPLAT != 0; + let kw_splat_stack = StackOpnd((flags & VM_CALL_ARGS_BLOCKARG != 0).into()); + exit_if(asm, kw_splat && asm.ctx.get_opnd_type(kw_splat_stack) != Type::Nil, counter) +} + +#[must_use] +fn exit_if_has_rest_and_captured(asm: &mut Assembler, iseq_has_rest: bool, captured_opnd: Option<Opnd>) -> Option<()> { + exit_if(asm, iseq_has_rest && captured_opnd.is_some(), Counter::send_iseq_has_rest_and_captured) +} + +#[must_use] +fn exit_if_has_kwrest_and_captured(asm: &mut Assembler, iseq_has_kwrest: bool, captured_opnd: Option<Opnd>) -> Option<()> { + // We need to call a C function to allocate the kwrest hash, but also need to hold the captred + // block across the call, which we can't do. + exit_if(asm, iseq_has_kwrest && captured_opnd.is_some(), Counter::send_iseq_has_kwrest_and_captured) +} + +#[must_use] +fn exit_if_has_rest_and_supplying_kws(asm: &mut Assembler, iseq_has_rest: bool, supplying_kws: bool) -> Option<()> { + // There can be a gap between the rest parameter array and the supplied keywords, or + // no space to put the rest array (e.g. `def foo(*arr, k:) = arr; foo(k: 1)` 1 is + // sitting where the rest array should be). + exit_if( + asm, + iseq_has_rest && supplying_kws, + Counter::send_iseq_has_rest_and_kw_supplied, + ) +} + +#[must_use] +fn exit_if_supplying_kw_and_has_no_kw(asm: &mut Assembler, supplying_kws: bool, callee_kws: bool) -> Option<()> { + // Passing keyword arguments to a callee means allocating a hash and treating + // that as a positional argument. Bail for now. + exit_if( + asm, + supplying_kws && !callee_kws, + Counter::send_iseq_has_no_kw, + ) +} + +#[must_use] +fn exit_if_supplying_kws_and_accept_no_kwargs(asm: &mut Assembler, supplying_kws: bool, iseq: *const rb_iseq_t) -> Option<()> { + // If we have a method accepting no kwargs (**nil), exit if we have passed + // it any kwargs. + exit_if( + asm, + supplying_kws && unsafe { get_iseq_flags_accepts_no_kwarg(iseq) }, + Counter::send_iseq_accepts_no_kwarg + ) +} + +#[must_use] +fn exit_if_doing_kw_and_splat(asm: &mut Assembler, doing_kw_call: bool, flags: u32) -> Option<()> { + exit_if(asm, doing_kw_call && flags & VM_CALL_ARGS_SPLAT != 0, Counter::send_iseq_splat_with_kw) +} + +#[must_use] +fn exit_if_wrong_number_arguments( + asm: &mut Assembler, + args_setup_block: bool, + opts_filled: i32, + flags: u32, + opt_num: i32, + iseq_has_rest: bool, +) -> Option<()> { + // Too few arguments and no splat to make up for it + let too_few = opts_filled < 0 && flags & VM_CALL_ARGS_SPLAT == 0; + // Too many arguments and no sink that take them + let too_many = opts_filled > opt_num && !(iseq_has_rest || args_setup_block); + + exit_if(asm, too_few || too_many, Counter::send_iseq_arity_error) +} + +#[must_use] +fn exit_if_doing_kw_and_opts_missing(asm: &mut Assembler, doing_kw_call: bool, opts_missing: i32) -> Option<()> { + // If we have unfilled optional arguments and keyword arguments then we + // would need to adjust the arguments location to account for that. + // For now we aren't handling this case. + exit_if(asm, doing_kw_call && opts_missing > 0, Counter::send_iseq_missing_optional_kw) +} + +#[must_use] +fn exit_if_has_rest_and_optional_and_block(asm: &mut Assembler, iseq_has_rest: bool, opt_num: i32, iseq: *const rb_iseq_t, block_arg: bool) -> Option<()> { + exit_if( + asm, + iseq_has_rest && opt_num != 0 && (unsafe { get_iseq_flags_has_block(iseq) } || block_arg), + Counter::send_iseq_has_rest_opt_and_block + ) +} + +#[derive(Clone, Copy)] +enum BlockArg { + Nil, + /// A special sentinel value indicating the block parameter should be read from + /// the current surrounding cfp + BlockParamProxy, + /// A proc object. Could be an instance of a subclass of ::rb_cProc + TProc, +} + +#[must_use] +fn exit_if_unsupported_block_arg_type( + jit: &mut JITState, + asm: &mut Assembler, + supplying_block_arg: bool +) -> Option<Option<BlockArg>> { + let block_arg_type = if supplying_block_arg { + asm.ctx.get_opnd_type(StackOpnd(0)) + } else { + // Passing no block argument + return Some(None); + }; + + match block_arg_type { + // We'll handle Nil and BlockParamProxy later + Type::Nil => Some(Some(BlockArg::Nil)), + Type::BlockParamProxy => Some(Some(BlockArg::BlockParamProxy)), + _ if { + let sample_block_arg = jit.peek_at_stack(&asm.ctx, 0); + unsafe { rb_obj_is_proc(sample_block_arg) }.test() + } => { + // Speculate that we'll have a proc as the block arg + Some(Some(BlockArg::TProc)) + } + _ => { + gen_counter_incr(asm, Counter::send_iseq_block_arg_type); + None + } + } +} + +#[must_use] +fn exit_if_stack_too_large(iseq: *const rb_iseq_t) -> Option<()> { + let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; + // Reject ISEQs with very large temp stacks, + // this will allow us to use u8/i8 values to track stack_size and sp_offset + if stack_max >= i8::MAX as u32 { + incr_counter!(iseq_stack_too_large); + return None; + } + Some(()) +} + +fn gen_struct_aref( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + comptime_recv: VALUE, + flags: u32, + argc: i32, +) -> Option<CodegenStatus> { + + if unsafe { vm_ci_argc(ci) } != 0 { + return None; + } + + let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) } + .try_into() + .unwrap(); + + // Confidence checks + assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); + assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); + + // We are going to use an encoding that takes a 4-byte immediate which + // limits the offset to INT32_MAX. + { + let native_off = (off as i64) * (SIZEOF_VALUE as i64); + if native_off > (i32::MAX as i64) { + return None; + } + } + + if c_method_tracing_currently_enabled(jit) { + // Struct accesses need fire c_call and c_return events, which we can't support + // See :attr-tracing: + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } + + // This is a .send call and we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + // All structs from the same Struct class should have the same + // length. So if our comptime_recv is embedded all runtime + // structs of the same class should be as well, and the same is + // true of the converse. + let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) }; + + asm_comment!(asm, "struct aref"); + + let recv = asm.stack_pop(1); + let recv = asm.load(recv); + + let val = if embedded != VALUE(0) { + Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_ARY + (SIZEOF_VALUE_I32 * off)) + } else { + let rstruct_ptr = asm.load(Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR)); + Opnd::mem(64, rstruct_ptr, SIZEOF_VALUE_I32 * off) + }; + + let ret = asm.stack_push(Type::Unknown); + asm.mov(ret, val); + + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) +} + +fn gen_struct_aset( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + comptime_recv: VALUE, + flags: u32, + argc: i32, +) -> Option<CodegenStatus> { + if unsafe { vm_ci_argc(ci) } != 1 { + return None; + } + + if c_method_tracing_currently_enabled(jit) { + // Struct accesses need fire c_call and c_return events, which we can't support + // See :attr-tracing: + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } + + // This is a .send call and we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) } + .try_into() + .unwrap(); + + // Confidence checks + assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) }); + assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) }); + + asm_comment!(asm, "struct aset"); + + let val = asm.stack_pop(1); + let recv = asm.stack_pop(1); + + let val = asm.ccall(RSTRUCT_SET as *const u8, vec![recv, (off as i64).into(), val]); + + let ret = asm.stack_push(Type::Unknown); + asm.mov(ret, val); + + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) +} + +// Generate code that calls a method with dynamic dispatch +fn gen_send_dynamic<F: Fn(&mut Assembler) -> Opnd>( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cd: *const rb_call_data, + sp_pops: usize, + vm_sendish: F, +) -> Option<CodegenStatus> { + // Our frame handling is not compatible with tailcall + if unsafe { vm_ci_flag((*cd).ci) } & VM_CALL_TAILCALL != 0 { + return None; + } + jit_perf_symbol_push!(jit, asm, "gen_send_dynamic", PerfMap::Codegen); + + // Rewind stack_size using ctx.with_stack_size to allow stack_size changes + // before you return None. + asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc); + + // Save PC and SP to prepare for dynamic dispatch + jit_prepare_non_leaf_call(jit, asm); + + // Squash stack canary that might be left over from elsewhere + assert_eq!(false, asm.get_leaf_ccall()); + if cfg!(debug_assertions) { + asm.store(asm.ctx.sp_opnd(0), 0.into()); + } + + // Dispatch a method + let ret = vm_sendish(asm); + + // Pop arguments and a receiver + asm.stack_pop(sp_pops); + + // Push the return value + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + // Fix the interpreter SP deviated by vm_sendish + asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), SP); + + gen_counter_incr(asm, Counter::num_send_dynamic); + + jit_perf_symbol_pop!(jit, asm, PerfMap::Codegen); + + // End the current block for invalidationg and sharing the same successor + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) +} + +fn gen_send_general( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cd: *const rb_call_data, + block: Option<BlockHandler>, +) -> Option<CodegenStatus> { + // Relevant definitions: + // rb_execution_context_t : vm_core.h + // invoker, cfunc logic : method.h, vm_method.c + // rb_callinfo : vm_callinfo.h + // rb_callable_method_entry_t : method.h + // vm_call_cfunc_with_frame : vm_insnhelper.c + // + // For a general overview for how the interpreter calls methods, + // see vm_call_method(). + + let ci = unsafe { get_call_data_ci(cd) }; // info about the call site + let mut argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + let mut mid = unsafe { vm_ci_mid(ci) }; + let mut flags = unsafe { vm_ci_flag(ci) }; + + // Defer compilation so we can specialize on class of receiver + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let recv_idx = argc + if flags & VM_CALL_ARGS_BLOCKARG != 0 { 1 } else { 0 }; + let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize); + let comptime_recv_klass = comptime_recv.class_of(); + assert_eq!(RUBY_T_CLASS, comptime_recv_klass.builtin_type(), + "objects visible to ruby code should have a T_CLASS in their klass field"); + + // Don't compile calls through singleton classes to avoid retaining the receiver. + // Make an exception for class methods since classes tend to be retained anyways. + // Also compile calls on top_self to help tests. + if VALUE(0) != unsafe { FL_TEST(comptime_recv_klass, VALUE(RUBY_FL_SINGLETON as usize)) } + && comptime_recv != unsafe { rb_vm_top_self() } + && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_CLASS) } + && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_MODULE) } { + gen_counter_incr(asm, Counter::send_singleton_class); + return None; + } + + // Points to the receiver operand on the stack + let recv = asm.stack_opnd(recv_idx); + let recv_opnd: YARVOpnd = recv.into(); + + // Log the name of the method we're calling to + #[cfg(feature = "disasm")] + asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid)); + + // Gather some statistics about sends + gen_counter_incr(asm, Counter::num_send); + if let Some(_known_klass) = asm.ctx.get_opnd_type(recv_opnd).known_class() { + gen_counter_incr(asm, Counter::num_send_known_class); + } + if asm.ctx.get_chain_depth() > 1 { + gen_counter_incr(asm, Counter::num_send_polymorphic); + } + // If megamorphic, let the caller fallback to dynamic dispatch + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(asm, Counter::send_megamorphic); + return None; + } + + perf_call!("gen_send_general: ", jit_guard_known_klass( + jit, + asm, + ocb, + comptime_recv_klass, + recv, + recv_opnd, + comptime_recv, + SEND_MAX_DEPTH, + Counter::guard_send_klass_megamorphic, + )); + + // Do method lookup + let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; + if cme.is_null() { + gen_counter_incr(asm, Counter::send_cme_not_found); + return None; + } + + // Load an overloaded cme if applicable. See vm_search_cc(). + // It allows you to use a faster ISEQ if possible. + cme = unsafe { rb_check_overloaded_cme(cme, ci) }; + + let visi = unsafe { METHOD_ENTRY_VISI(cme) }; + match visi { + METHOD_VISI_PUBLIC => { + // Can always call public methods + } + METHOD_VISI_PRIVATE => { + if flags & VM_CALL_FCALL == 0 { + // Can only call private methods with FCALL callsites. + // (at the moment they are callsites without a receiver or an explicit `self` receiver) + gen_counter_incr(asm, Counter::send_private_not_fcall); + return None; + } + } + METHOD_VISI_PROTECTED => { + // If the method call is an FCALL, it is always valid + if flags & VM_CALL_FCALL == 0 { + // otherwise we need an ancestry check to ensure the receiver is valid to be called + // as protected + jit_protected_callee_ancestry_guard(asm, cme); + } + } + _ => { + panic!("cmes should always have a visibility!"); + } + } + + // Register block for invalidation + //assert!(cme->called_id == mid); + jit.assume_method_lookup_stable(asm, ocb, cme); + + // To handle the aliased method case (VM_METHOD_TYPE_ALIAS) + loop { + let def_type = unsafe { get_cme_def_type(cme) }; + + match def_type { + VM_METHOD_TYPE_ISEQ => { + let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; + let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; + return perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, block, flags, argc, None) }; + } + VM_METHOD_TYPE_CFUNC => { + return perf_call! { gen_send_cfunc( + jit, + asm, + ocb, + ci, + cme, + block, + Some(comptime_recv_klass), + flags, + argc, + ) }; + } + VM_METHOD_TYPE_IVAR => { + // This is a .send call not supported right now for attr_reader + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(asm, Counter::send_send_attr_reader); + return None; + } + + if flags & VM_CALL_ARGS_BLOCKARG != 0 { + match asm.ctx.get_opnd_type(StackOpnd(0)) { + Type::Nil | Type::BlockParamProxy => { + // Getters ignore the block arg, and these types of block args can be + // passed without side-effect (never any `to_proc` call). + asm.stack_pop(1); + } + _ => { + gen_counter_incr(asm, Counter::send_getter_block_arg); + return None; + } + } + } + + if argc != 0 { + // Guard for simple splat of empty array + if VM_CALL_ARGS_SPLAT == flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KWARG | VM_CALL_KW_SPLAT) + && argc == 1 { + // Not using chain guards since on failure these likely end up just raising + // ArgumentError + let splat = asm.stack_opnd(0); + guard_object_is_array(asm, splat, splat.into(), Counter::guard_send_getter_splat_non_empty); + let splat_len = get_array_len(asm, splat); + asm.cmp(splat_len, 0.into()); + asm.jne(Target::side_exit(Counter::guard_send_getter_splat_non_empty)); + asm.stack_pop(1); + } else { + // Argument count mismatch. Getters take no arguments. + gen_counter_incr(asm, Counter::send_getter_arity); + return None; + } + } + + if c_method_tracing_currently_enabled(jit) { + // Can't generate code for firing c_call and c_return events + // :attr-tracing: + // Handling the C method tracing events for attr_accessor + // methods is easier than regular C methods as we know the + // "method" we are calling into never enables those tracing + // events. We are never inside the code that needs to be + // invalidated when invalidation happens. + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } + + let recv = asm.stack_opnd(0); // the receiver should now be the stack top + let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; + + return gen_get_ivar( + jit, + asm, + ocb, + SEND_MAX_DEPTH, + comptime_recv, + ivar_name, + recv, + recv.into(), + ); + } + VM_METHOD_TYPE_ATTRSET => { + // This is a .send call not supported right now for attr_writer + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(asm, Counter::send_send_attr_writer); + return None; + } + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_attrset); + return None; + } + if flags & VM_CALL_KWARG != 0 { + gen_counter_incr(asm, Counter::send_attrset_kwargs); + return None; + } else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } { + gen_counter_incr(asm, Counter::send_ivar_set_method); + return None; + } else if c_method_tracing_currently_enabled(jit) { + // Can't generate code for firing c_call and c_return events + // See :attr-tracing: + gen_counter_incr(asm, Counter::send_cfunc_tracing); + return None; + } else if flags & VM_CALL_ARGS_BLOCKARG != 0 { + gen_counter_incr(asm, Counter::send_attrset_block_arg); + return None; + } else { + let ivar_name = unsafe { get_cme_def_body_attr_id(cme) }; + return gen_set_ivar(jit, asm, ocb, comptime_recv, ivar_name, StackOpnd(1), None); + } + } + // Block method, e.g. define_method(:foo) { :my_block } + VM_METHOD_TYPE_BMETHOD => { + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_bmethod); + return None; + } + return gen_send_bmethod(jit, asm, ocb, ci, cme, block, flags, argc); + } + VM_METHOD_TYPE_ALIAS => { + // Retrieve the aliased method and re-enter the switch + cme = unsafe { rb_aliased_callable_method_entry(cme) }; + continue; + } + // Send family of methods, e.g. call/apply + VM_METHOD_TYPE_OPTIMIZED => { + if flags & VM_CALL_ARGS_BLOCKARG != 0 { + gen_counter_incr(asm, Counter::send_optimized_block_arg); + return None; + } + + let opt_type = unsafe { get_cme_def_body_optimized_type(cme) }; + match opt_type { + OPTIMIZED_METHOD_TYPE_SEND => { + // This is for method calls like `foo.send(:bar)` + // The `send` method does not get its own stack frame. + // instead we look up the method and call it, + // doing some stack shifting based on the VM_CALL_OPT_SEND flag + + // Reject nested cases such as `send(:send, :alias_for_send, :foo))`. + // We would need to do some stack manipulation here or keep track of how + // many levels deep we need to stack manipulate. Because of how exits + // currently work, we can't do stack manipulation until we will no longer + // side exit. + if flags & VM_CALL_OPT_SEND != 0 { + gen_counter_incr(asm, Counter::send_send_nested); + return None; + } + + if argc == 0 { + gen_counter_incr(asm, Counter::send_send_wrong_args); + return None; + } + + argc -= 1; + + let compile_time_name = jit.peek_at_stack(&asm.ctx, argc as isize); + + mid = unsafe { rb_get_symbol_id(compile_time_name) }; + if mid == 0 { + // This also rejects method names that need conversion + gen_counter_incr(asm, Counter::send_send_null_mid); + return None; + } + + cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) }; + if cme.is_null() { + gen_counter_incr(asm, Counter::send_send_null_cme); + return None; + } + + flags |= VM_CALL_FCALL | VM_CALL_OPT_SEND; + + jit.assume_method_lookup_stable(asm, ocb, cme); + + asm_comment!( + asm, + "guard sending method name \'{}\'", + unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap_or_else(|| "<unknown>".to_owned()), + ); + + let name_opnd = asm.stack_opnd(argc); + let symbol_id_opnd = asm.ccall(rb_get_symbol_id as *const u8, vec![name_opnd]); + + asm.cmp(symbol_id_opnd, mid.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_send_send_name_chain, + ); + + // We have changed the argc, flags, mid, and cme, so we need to re-enter the match + // and compile whatever method we found from send. + continue; + + } + OPTIMIZED_METHOD_TYPE_CALL => { + + if block.is_some() { + gen_counter_incr(asm, Counter::send_call_block); + return None; + } + + if flags & VM_CALL_KWARG != 0 { + gen_counter_incr(asm, Counter::send_call_kwarg); + return None; + } + + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_opt_call); + return None; + } + + // Optimize for single ractor mode and avoid runtime check for + // "defined with an un-shareable Proc in a different Ractor" + if !assume_single_ractor_mode(jit, asm, ocb) { + gen_counter_incr(asm, Counter::send_call_multi_ractor); + return None; + } + + // If this is a .send call we need to adjust the stack + if flags & VM_CALL_OPT_SEND != 0 { + handle_opt_send_shift_stack(asm, argc); + } + + // About to reset the SP, need to load this here + let recv_load = asm.load(recv); + + let sp = asm.lea(asm.ctx.sp_opnd(0)); + + // Save the PC and SP because the callee can make Ruby calls + jit_prepare_non_leaf_call(jit, asm); + + let kw_splat = flags & VM_CALL_KW_SPLAT; + let stack_argument_pointer = asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32)); + + let ret = asm.ccall(rb_optimized_call as *const u8, vec![ + recv_load, + EC, + argc.into(), + stack_argument_pointer, + kw_splat.into(), + VM_BLOCK_HANDLER_NONE.into(), + ]); + + asm.stack_pop(argc as usize + 1); + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + return Some(KeepCompiling); + + } + OPTIMIZED_METHOD_TYPE_BLOCK_CALL => { + gen_counter_incr(asm, Counter::send_optimized_method_block_call); + return None; + } + OPTIMIZED_METHOD_TYPE_STRUCT_AREF => { + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_aref); + return None; + } + return gen_struct_aref( + jit, + asm, + ocb, + ci, + cme, + comptime_recv, + flags, + argc, + ); + } + OPTIMIZED_METHOD_TYPE_STRUCT_ASET => { + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::send_args_splat_aset); + return None; + } + return gen_struct_aset( + jit, + asm, + ocb, + ci, + cme, + comptime_recv, + flags, + argc, + ); + } + _ => { + panic!("unknown optimized method type!") + } + } + } + VM_METHOD_TYPE_ZSUPER => { + gen_counter_incr(asm, Counter::send_zsuper_method); + return None; + } + VM_METHOD_TYPE_UNDEF => { + gen_counter_incr(asm, Counter::send_undef_method); + return None; + } + VM_METHOD_TYPE_NOTIMPLEMENTED => { + gen_counter_incr(asm, Counter::send_not_implemented_method); + return None; + } + VM_METHOD_TYPE_MISSING => { + gen_counter_incr(asm, Counter::send_missing_method); + return None; + } + VM_METHOD_TYPE_REFINED => { + gen_counter_incr(asm, Counter::send_refined_method); + return None; + } + _ => { + unreachable!(); + } + } + } +} + +/// Get class name from a class pointer. +fn get_class_name(class: Option<VALUE>) -> String { + class.and_then(|class| unsafe { + cstr_to_rust_string(rb_class2name(class)) + }).unwrap_or_else(|| "Unknown".to_string()) +} + +/// Assemble "{class_name}#{method_name}" from a class pointer and a method ID +fn get_method_name(class: Option<VALUE>, mid: u64) -> String { + let class_name = get_class_name(class); + let method_name = if mid != 0 { + unsafe { cstr_to_rust_string(rb_id2name(mid)) } + } else { + None + }.unwrap_or_else(|| "Unknown".to_string()); + format!("{}#{}", class_name, method_name) +} + +/// Assemble "{label}@{iseq_path}:{lineno}" (iseq_inspect() format) from an ISEQ +fn get_iseq_name(iseq: IseqPtr) -> String { + let c_string = unsafe { rb_yjit_iseq_inspect(iseq) }; + let string = unsafe { CStr::from_ptr(c_string) }.to_str() + .unwrap_or_else(|_| "not UTF-8").to_string(); + unsafe { ruby_xfree(c_string as *mut c_void); } + string +} + +/// Shifts the stack for send in order to remove the name of the method +/// Comment below borrow from vm_call_opt_send in vm_insnhelper.c +/// E.g. when argc == 2 +/// | | | | TOPN +/// +------+ | | +/// | arg1 | ---+ | | 0 +/// +------+ | +------+ +/// | arg0 | -+ +-> | arg1 | 1 +/// +------+ | +------+ +/// | sym | +---> | arg0 | 2 +/// +------+ +------+ +/// | recv | | recv | 3 +///--+------+--------+------+------ +/// +/// We do this for our compiletime context and the actual stack +fn handle_opt_send_shift_stack(asm: &mut Assembler, argc: i32) { + asm_comment!(asm, "shift_stack"); + for j in (0..argc).rev() { + let opnd = asm.stack_opnd(j); + let opnd2 = asm.stack_opnd(j + 1); + asm.mov(opnd2, opnd); + } + asm.shift_stack(argc as usize); +} + +fn gen_opt_send_without_block( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = perf_call! { gen_send_general(jit, asm, ocb, cd, None) } { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_opt_send_without_block as *const u8, + vec![EC, CFP, (cd as usize).into()], + ) + }) +} + +fn gen_send( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq)); + if let Some(status) = perf_call! { gen_send_general(jit, asm, ocb, cd, block) } { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_send as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) +} + +fn gen_invokeblock( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokeblock_specialized(jit, asm, ocb, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_invokeblock_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokeblock(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; + } + asm.ccall( + rb_vm_invokeblock as *const u8, + vec![EC, CFP, (cd as usize).into()], + ) + }) +} + +fn gen_invokeblock_specialized( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cd: *const rb_call_data, +) -> Option<CodegenStatus> { + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + // Fallback to dynamic dispatch if this callsite is megamorphic + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(asm, Counter::invokeblock_megamorphic); + return None; + } + + // Get call info + let ci = unsafe { get_call_data_ci(cd) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + let flags = unsafe { vm_ci_flag(ci) }; + + // Get block_handler + let cfp = jit.get_cfp(); + let lep = unsafe { rb_vm_ep_local_ep(get_cfp_ep(cfp)) }; + let comptime_handler = unsafe { *lep.offset(VM_ENV_DATA_INDEX_SPECVAL.try_into().unwrap()) }; + + // Handle each block_handler type + if comptime_handler.0 == VM_BLOCK_HANDLER_NONE as usize { // no block given + gen_counter_incr(asm, Counter::invokeblock_none); + None + } else if comptime_handler.0 & 0x3 == 0x1 { // VM_BH_ISEQ_BLOCK_P + asm_comment!(asm, "get local EP"); + let ep_opnd = gen_get_lep(jit, asm); + let block_handler_opnd = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + asm_comment!(asm, "guard block_handler type"); + let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer + asm.cmp(tag_opnd, 0x1.into()); // VM_BH_ISEQ_BLOCK_P + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_tag_changed, + ); + + // If the current ISEQ is annotated to be inlined but it's not being inlined here, + // generate a dynamic dispatch to avoid making this yield megamorphic. + if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { + gen_counter_incr(asm, Counter::invokeblock_iseq_not_inlined); + return None; + } + + let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() }; + let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() }; + + asm_comment!(asm, "guard known ISEQ"); + let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3)); + let iseq_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32 * 2)); + asm.cmp(iseq_opnd, VALUE::from(comptime_iseq).into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_iseq_block_changed, + ); + + perf_call! { gen_send_iseq(jit, asm, ocb, comptime_iseq, ci, VM_FRAME_MAGIC_BLOCK, None, 0 as _, None, flags, argc, Some(captured_opnd)) } + } else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P + // We aren't handling CALLER_SETUP_ARG and CALLER_REMOVE_EMPTY_KW_SPLAT yet. + if flags & VM_CALL_ARGS_SPLAT != 0 { + gen_counter_incr(asm, Counter::invokeblock_ifunc_args_splat); + return None; + } + if flags & VM_CALL_KW_SPLAT != 0 { + gen_counter_incr(asm, Counter::invokeblock_ifunc_kw_splat); + return None; + } + + asm_comment!(asm, "get local EP"); + let ep_opnd = gen_get_lep(jit, asm); + let block_handler_opnd = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + asm_comment!(asm, "guard block_handler type"); + let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer + asm.cmp(tag_opnd, 0x3.into()); // VM_BH_IFUNC_P + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokeblock_tag_changed, + ); + + // The cfunc may not be leaf + jit_prepare_non_leaf_call(jit, asm); + + extern "C" { + fn rb_vm_yield_with_cfunc(ec: EcPtr, captured: *const rb_captured_block, argc: c_int, argv: *const VALUE) -> VALUE; + } + asm_comment!(asm, "call ifunc"); + let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3)); + let argv = asm.lea(asm.ctx.sp_opnd(-argc)); + let ret = asm.ccall( + rb_vm_yield_with_cfunc as *const u8, + vec![EC, captured_opnd, argc.into(), argv], + ); + + asm.stack_pop(argc.try_into().unwrap()); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, ret); + + // cfunc calls may corrupt types + asm.clear_local_types(); + + // Share the successor with other chains + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) + } else if comptime_handler.symbol_p() { + gen_counter_incr(asm, Counter::invokeblock_symbol); + None + } else { // Proc + gen_counter_incr(asm, Counter::invokeblock_proc); + None + } +} + +fn gen_invokesuper( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Generate specialized code if possible + let cd = jit.get_arg(0).as_ptr(); + if let Some(status) = gen_invokesuper_specialized(jit, asm, ocb, cd) { + return Some(status); + } + + // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send + let blockiseq = jit.get_arg(1).as_iseq(); + gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| { + extern "C" { + fn rb_vm_invokesuper(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm.ccall( + rb_vm_invokesuper as *const u8, + vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()], + ) + }) +} + +fn gen_invokesuper_specialized( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + cd: *const rb_call_data, +) -> Option<CodegenStatus> { + // Defer compilation so we can specialize on class of receiver + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + // Handle the last two branches of vm_caller_setup_arg_block + let block = if let Some(iseq) = jit.get_arg(1).as_optional_ptr() { + BlockHandler::BlockISeq(iseq) + } else { + BlockHandler::LEPSpecVal + }; + + // Fallback to dynamic dispatch if this callsite is megamorphic + if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH { + gen_counter_incr(asm, Counter::invokesuper_megamorphic); + return None; + } + + let me = unsafe { rb_vm_frame_method_entry(jit.get_cfp()) }; + if me.is_null() { + gen_counter_incr(asm, Counter::invokesuper_no_me); + return None; + } + + // FIXME: We should track and invalidate this block when this cme is invalidated + let current_defined_class = unsafe { (*me).defined_class }; + let mid = unsafe { get_def_original_id((*me).def) }; + + // vm_search_normal_superclass + let rbasic_ptr: *const RBasic = current_defined_class.as_ptr(); + if current_defined_class.builtin_type() == RUBY_T_ICLASS + && unsafe { RB_TYPE_P((*rbasic_ptr).klass, RUBY_T_MODULE) && FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT.as_usize())) != VALUE(0) } + { + gen_counter_incr(asm, Counter::invokesuper_refinement); + return None; + } + let comptime_superclass = + unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; + + let ci = unsafe { get_call_data_ci(cd) }; + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); + + let ci_flags = unsafe { vm_ci_flag(ci) }; + + // Don't JIT calls that aren't simple + // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. + + if ci_flags & VM_CALL_KWARG != 0 { + gen_counter_incr(asm, Counter::invokesuper_kwarg); + return None; + } + if ci_flags & VM_CALL_KW_SPLAT != 0 { + gen_counter_incr(asm, Counter::invokesuper_kw_splat); + return None; + } + + // Ensure we haven't rebound this method onto an incompatible class. + // In the interpreter we try to avoid making this check by performing some + // cheaper calculations first, but since we specialize on the method entry + // and so only have to do this once at compile time this is fine to always + // check and side exit. + let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize); + if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) { + gen_counter_incr(asm, Counter::invokesuper_defined_class_mismatch); + return None; + } + + // Don't compile `super` on objects with singleton class to avoid retaining the receiver. + if VALUE(0) != unsafe { FL_TEST(comptime_recv.class_of(), VALUE(RUBY_FL_SINGLETON as usize)) } { + gen_counter_incr(asm, Counter::invokesuper_singleton_class); + return None; + } + + // Do method lookup + let cme = unsafe { rb_callable_method_entry(comptime_superclass, mid) }; + if cme.is_null() { + gen_counter_incr(asm, Counter::invokesuper_no_cme); + return None; + } + + // Check that we'll be able to write this method dispatch before generating checks + let cme_def_type = unsafe { get_cme_def_type(cme) }; + if cme_def_type != VM_METHOD_TYPE_ISEQ && cme_def_type != VM_METHOD_TYPE_CFUNC { + // others unimplemented + gen_counter_incr(asm, Counter::invokesuper_not_iseq_or_cfunc); + return None; + } + + asm_comment!(asm, "guard known me"); + let lep_opnd = gen_get_lep(jit, asm); + let ep_me_opnd = Opnd::mem( + 64, + lep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, + ); + + let me_as_value = VALUE(me as usize); + asm.cmp(ep_me_opnd, me_as_value.into()); + jit_chain_guard( + JCC_JNE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::guard_invokesuper_me_changed, + ); + + // We need to assume that both our current method entry and the super + // method entry we invoke remain stable + jit.assume_method_lookup_stable(asm, ocb, me); + jit.assume_method_lookup_stable(asm, ocb, cme); + + // Method calls may corrupt types + asm.clear_local_types(); + + match cme_def_type { + VM_METHOD_TYPE_ISEQ => { + let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; + let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL; + perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, Some(block), ci_flags, argc, None) } + } + VM_METHOD_TYPE_CFUNC => { + perf_call! { gen_send_cfunc(jit, asm, ocb, ci, cme, Some(block), None, ci_flags, argc) } + } + _ => unreachable!(), + } +} + +fn gen_leave( + _jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Only the return value should be on the stack + assert_eq!(1, asm.ctx.get_stack_size(), "leave instruction expects stack size 1, but was: {}", asm.ctx.get_stack_size()); + + // Check for interrupts + gen_check_ints(asm, Counter::leave_se_interrupt); + + // Pop the current frame (ec->cfp++) + // Note: the return PC is already in the previous CFP + asm_comment!(asm, "pop stack frame"); + let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, incr_cfp); + asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + + // Load the return value + let retval_opnd = asm.stack_pop(1); + + // Move the return value into the C return register + asm.mov(C_RET_OPND, retval_opnd); + + // Jump to the JIT return address on the frame that was just popped. + // There are a few possible jump targets: + // - gen_leave_exit() and gen_leave_exception(), for C callers + // - Return context set up by gen_send_iseq() + // We don't write the return value to stack memory like the interpreter here. + // Each jump target do it as necessary. + let offset_to_jit_return = + -(RUBY_SIZEOF_CONTROL_FRAME as i32) + RUBY_OFFSET_CFP_JIT_RETURN; + asm.jmp_opnd(Opnd::mem(64, CFP, offset_to_jit_return)); + + Some(EndBlock) +} + +fn gen_getglobal( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let gid = jit.get_arg(0).as_usize(); + + // Save the PC and SP because we might make a Ruby call for warning + jit_prepare_non_leaf_call(jit, asm); + + let val_opnd = asm.ccall( + rb_gvar_get as *const u8, + vec![ gid.into() ] + ); + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); + + Some(KeepCompiling) +} + +fn gen_setglobal( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let gid = jit.get_arg(0).as_usize(); + + // Save the PC and SP because we might make a Ruby call for + // Kernel#set_trace_var + jit_prepare_non_leaf_call(jit, asm); + + let val = asm.stack_opnd(0); + asm.ccall( + rb_gvar_set as *const u8, + vec![ + gid.into(), + val, + ], + ); + asm.stack_pop(1); // Keep it during ccall for GC + + Some(KeepCompiling) +} + +fn gen_anytostring( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Save the PC and SP since we might call #to_s + jit_prepare_non_leaf_call(jit, asm); + + let str = asm.stack_opnd(0); + let val = asm.stack_opnd(1); + + let val = asm.ccall(rb_obj_as_string_result as *const u8, vec![str, val]); + asm.stack_pop(2); // Keep them during ccall for GC + + // Push the return value + let stack_ret = asm.stack_push(Type::TString); + asm.mov(stack_ret, val); + + Some(KeepCompiling) +} + +fn gen_objtostring( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + let recv = asm.stack_opnd(0); + let comptime_recv = jit.peek_at_stack(&asm.ctx, 0); + + if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } { + jit_guard_known_klass( + jit, + asm, + ocb, + comptime_recv.class_of(), + recv, + recv.into(), + comptime_recv, + SEND_MAX_DEPTH, + Counter::objtostring_not_string, + ); + + // No work needed. The string value is already on the top of the stack. + Some(KeepCompiling) + } else { + let cd = jit.get_arg(0).as_ptr(); + perf_call! { gen_send_general(jit, asm, ocb, cd, None) } + } +} + +fn gen_intern( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // Save the PC and SP because we might allocate + jit_prepare_call_with_gc(jit, asm); + + let str = asm.stack_opnd(0); + let sym = asm.ccall(rb_str_intern as *const u8, vec![str]); + asm.stack_pop(1); // Keep it during ccall for GC + + // Push the return value + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, sym); + + Some(KeepCompiling) +} + +fn gen_toregexp( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let opt = jit.get_arg(0).as_i64(); + let cnt = jit.get_arg(1).as_usize(); + + // Save the PC and SP because this allocates an object and could + // raise an exception. + jit_prepare_non_leaf_call(jit, asm); + + let values_ptr = asm.lea(asm.ctx.sp_opnd(-(cnt as i32))); + + let ary = asm.ccall( + rb_ary_tmp_new_from_values as *const u8, + vec![ + Opnd::Imm(0), + cnt.into(), + values_ptr, + ] + ); + asm.stack_pop(cnt); // Let ccall spill them + + // Save the array so we can clear it later + asm.cpush(ary); + asm.cpush(ary); // Alignment + + let val = asm.ccall( + rb_reg_new_ary as *const u8, + vec![ + ary, + Opnd::Imm(opt), + ] + ); + + // The actual regex is in RAX now. Pop the temp array from + // rb_ary_tmp_new_from_values into C arg regs so we can clear it + let ary = asm.cpop(); // Alignment + asm.cpop_into(ary); + + // The value we want to push on the stack is in RAX right now + let stack_ret = asm.stack_push(Type::UnknownHeap); + asm.mov(stack_ret, val); + + // Clear the temp array. + asm.ccall(rb_ary_clear as *const u8, vec![ary]); + + Some(KeepCompiling) +} + +fn gen_getspecial( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // This takes two arguments, key and type + // key is only used when type == 0 + // A non-zero type determines which type of backref to fetch + //rb_num_t key = jit.jit_get_arg(0); + let rtype = jit.get_arg(1).as_u64(); + + if rtype == 0 { + // not yet implemented + return None; + } else if rtype & 0x01 != 0 { + // Fetch a "special" backref based on a char encoded by shifting by 1 + + // Can raise if matchdata uninitialized + jit_prepare_non_leaf_call(jit, asm); + + // call rb_backref_get() + asm_comment!(asm, "rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); + + let rt_u8: u8 = (rtype >> 1).try_into().unwrap(); + let val = match rt_u8.into() { + '&' => { + asm_comment!(asm, "rb_reg_last_match"); + asm.ccall(rb_reg_last_match as *const u8, vec![backref]) + } + '`' => { + asm_comment!(asm, "rb_reg_match_pre"); + asm.ccall(rb_reg_match_pre as *const u8, vec![backref]) + } + '\'' => { + asm_comment!(asm, "rb_reg_match_post"); + asm.ccall(rb_reg_match_post as *const u8, vec![backref]) + } + '+' => { + asm_comment!(asm, "rb_reg_match_last"); + asm.ccall(rb_reg_match_last as *const u8, vec![backref]) + } + _ => panic!("invalid back-ref"), + }; + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + + Some(KeepCompiling) + } else { + // Fetch the N-th match from the last backref based on type shifted by 1 + + // Can raise if matchdata uninitialized + jit_prepare_non_leaf_call(jit, asm); + + // call rb_backref_get() + asm_comment!(asm, "rb_backref_get"); + let backref = asm.ccall(rb_backref_get as *const u8, vec![]); + + // rb_reg_nth_match((int)(type >> 1), backref); + asm_comment!(asm, "rb_reg_nth_match"); + let val = asm.ccall( + rb_reg_nth_match as *const u8, + vec![ + Opnd::Imm((rtype >> 1).try_into().unwrap()), + backref, + ] + ); + + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + + Some(KeepCompiling) + } +} + +fn gen_getclassvariable( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // rb_vm_getclassvariable can raise exceptions. + jit_prepare_non_leaf_call(jit, asm); + + let val_opnd = asm.ccall( + rb_vm_getclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit.get_arg(0).as_u64()), + Opnd::UImm(jit.get_arg(1).as_u64()), + ], + ); + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); + + Some(KeepCompiling) +} + +fn gen_setclassvariable( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // rb_vm_setclassvariable can raise exceptions. + jit_prepare_non_leaf_call(jit, asm); + + let val = asm.stack_opnd(0); + asm.ccall( + rb_vm_setclassvariable as *const u8, + vec![ + Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ), + CFP, + Opnd::UImm(jit.get_arg(0).as_u64()), + val, + Opnd::UImm(jit.get_arg(1).as_u64()), + ], + ); + asm.stack_pop(1); // Keep it during ccall for GC + + Some(KeepCompiling) +} + +fn gen_getconstant( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + + let id = jit.get_arg(0).as_usize(); + + // vm_get_ev_const can raise exceptions. + jit_prepare_non_leaf_call(jit, asm); + + let allow_nil_opnd = asm.stack_opnd(0); + let klass_opnd = asm.stack_opnd(1); + + extern "C" { + fn rb_vm_get_ev_const(ec: EcPtr, klass: VALUE, id: ID, allow_nil: VALUE) -> VALUE; + } + + let val_opnd = asm.ccall( + rb_vm_get_ev_const as *const u8, + vec![ + EC, + klass_opnd, + id.into(), + allow_nil_opnd + ], + ); + asm.stack_pop(2); // Keep them during ccall for GC + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, val_opnd); + + Some(KeepCompiling) +} + +fn gen_opt_getconstant_path( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let const_cache_as_value = jit.get_arg(0); + let ic: *const iseq_inline_constant_cache = const_cache_as_value.as_ptr(); + let idlist: *const ID = unsafe { (*ic).segments }; + + // Make sure there is an exit for this block as the interpreter might want + // to invalidate this block from yjit_constant_ic_update(). + jit_ensure_block_entry_exit(jit, asm, ocb)?; + + // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update(). + // If a cache is not filled, fallback to the general C call. + let ice = unsafe { (*ic).entry }; + if ice.is_null() { + // Prepare for const_missing + jit_prepare_non_leaf_call(jit, asm); + + // If this does not trigger const_missing, vm_ic_update will invalidate this block. + extern "C" { + fn rb_vm_opt_getconstant_path(ec: EcPtr, cfp: CfpPtr, ic: *const u8) -> VALUE; + } + let val = asm.ccall( + rb_vm_opt_getconstant_path as *const u8, + vec![EC, CFP, Opnd::const_ptr(ic as *const u8)], + ); + + let stack_top = asm.stack_push(Type::Unknown); + asm.store(stack_top, val); + + jump_to_next_insn(jit, asm, ocb); + return Some(EndBlock); + } + + if !unsafe { (*ice).ic_cref }.is_null() { + // Cache is keyed on a certain lexical scope. Use the interpreter's cache. + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); + + // Call function to verify the cache. It doesn't allocate or call methods. + let ret_val = asm.ccall( + rb_vm_ic_hit_p as *const u8, + vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)] + ); + + // Check the result. SysV only specifies one byte for _Bool return values, + // so it's important we only check one bit to ignore the higher bits in the register. + asm.test(ret_val, 1.into()); + asm.jz(Target::side_exit(Counter::opt_getconstant_path_ic_miss)); + + let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8)); + + let ic_entry = asm.load(Opnd::mem( + 64, + inline_cache, + RUBY_OFFSET_IC_ENTRY + )); + + let ic_entry_val = asm.load(Opnd::mem( + 64, + ic_entry, + RUBY_OFFSET_ICE_VALUE + )); + + // Push ic->entry->value + let stack_top = asm.stack_push(Type::Unknown); + asm.store(stack_top, ic_entry_val); + } else { + // Optimize for single ractor mode. + if !assume_single_ractor_mode(jit, asm, ocb) { + gen_counter_incr(asm, Counter::opt_getconstant_path_multi_ractor); + return None; + } + + // Invalidate output code on any constant writes associated with + // constants referenced within the current block. + jit.assume_stable_constant_names(asm, ocb, idlist); + + jit_putobject(asm, unsafe { (*ice).value }); + } + + jump_to_next_insn(jit, asm, ocb); + Some(EndBlock) +} + +// Push the explicit block parameter onto the temporary stack. Part of the +// interpreter's scheme for avoiding Proc allocations when delegating +// explicit block parameters. +fn gen_getblockparamproxy( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + if !jit.at_current_insn() { + defer_compilation(jit, asm, ocb); + return Some(EndBlock); + } + + // EP level + let level = jit.get_arg(1).as_u32(); + + // Peek at the block handler so we can check whether it's nil + let comptime_handler = jit.peek_at_block_handler(level); + + // Filter for the 4 cases we currently handle + if !(comptime_handler.as_u64() == 0 || // no block given + comptime_handler.as_u64() & 0x3 == 0x1 || // iseq block (no associated GC managed object) + comptime_handler.as_u64() & 0x3 == 0x3 || // ifunc block (no associated GC managed object) + unsafe { rb_obj_is_proc(comptime_handler) }.test() // block is a Proc + ) { + // Missing the symbol case, where we basically need to call Symbol#to_proc at runtime + gen_counter_incr(asm, Counter::gbpp_unsupported_type); + return None; + } + + // Load environment pointer EP from CFP + let ep_opnd = gen_get_ep(asm, level); + + // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero + let flag_check = Opnd::mem( + 64, + ep_opnd, + SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32), + ); + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.jnz(Target::side_exit(Counter::gbpp_block_param_modified)); + + // Load the block handler for the current frame + // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) + let block_handler = asm.load( + Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL) + ); + + // Use block handler sample to guide specialization... + // NOTE: we use jit_chain_guard() in this decision tree, and since + // there are only a few cases, it should never reach the depth limit use + // the exit counter we pass to it. + // + // No block given + if comptime_handler.as_u64() == 0 { + // Bail if there is a block handler + asm.cmp(block_handler, Opnd::UImm(0)); + + jit_chain_guard( + JCC_JNZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::gbpp_block_handler_not_none, + ); + + jit_putobject(asm, Qnil); + } else if comptime_handler.as_u64() & 0x1 == 0x1 { + // This handles two cases which are nearly identical + // Block handler is a tagged pointer. Look at the tag. + // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 + // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 + // So to check for either of those cases we can use: val & 0x1 == 0x1 + const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); + // Procs are aligned heap pointers so testing the bit rejects them too. + + asm.test(block_handler, 0x1.into()); + jit_chain_guard( + JCC_JZ, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::gbpp_block_handler_not_iseq, + ); + + // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr. + assert!(!unsafe { rb_block_param_proxy }.special_const_p()); + + let top = asm.stack_push(Type::BlockParamProxy); + asm.mov(top, Opnd::const_ptr(unsafe { rb_block_param_proxy }.as_ptr())); + } else if unsafe { rb_obj_is_proc(comptime_handler) }.test() { + // The block parameter is a Proc + c_callable! { + // We can't hold values across C calls due to a backend limitation, + // so we'll use this thin wrapper around rb_obj_is_proc(). + fn is_proc(object: VALUE) -> VALUE { + if unsafe { rb_obj_is_proc(object) }.test() { + // VM_BH_TO_PROC() is the identify function. + object + } else { + Qfalse + } + } + } + + // Simple predicate, no need to jit_prepare_non_leaf_call() + let proc_or_false = asm.ccall(is_proc as _, vec![block_handler]); + + // Guard for proc + asm.cmp(proc_or_false, Qfalse.into()); + jit_chain_guard( + JCC_JE, + jit, + asm, + ocb, + SEND_MAX_DEPTH, + Counter::gbpp_block_handler_not_proc, + ); + + let top = asm.stack_push(Type::Unknown); + asm.mov(top, proc_or_false); + } else { + unreachable!("absurd given initial filtering"); + } + + jump_to_next_insn(jit, asm, ocb); + + Some(EndBlock) +} + +fn gen_getblockparam( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + // EP level + let level = jit.get_arg(1).as_u32(); + + // Save the PC and SP because we might allocate + jit_prepare_call_with_gc(jit, asm); + asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. + + // A mirror of the interpreter code. Checking for the case + // where it's pushing rb_block_param_proxy. + + // Load environment pointer EP from CFP + let ep_opnd = gen_get_ep(asm, level); + + // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero + let flag_check = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); + // FIXME: This is testing bits in the same place that the WB check is testing. + // We should combine these at some point + asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + + // If the frame flag has been modified, then the actual proc value is + // already in the EP and we should just use the value. + let frame_flag_modified = asm.new_label("frame_flag_modified"); + asm.jnz(frame_flag_modified); + + // This instruction writes the block handler to the EP. If we need to + // fire a write barrier for the write, then exit (we'll let the + // interpreter handle it so it can fire the write barrier). + // flags & VM_ENV_FLAG_WB_REQUIRED + let flags_opnd = Opnd::mem( + 64, + ep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, + ); + asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); + + // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 + asm.jnz(Target::side_exit(Counter::gbp_wb_required)); + + // Convert the block handler in to a proc + // call rb_vm_bh_to_procval(const rb_execution_context_t *ec, VALUE block_handler) + let proc = asm.ccall( + rb_vm_bh_to_procval as *const u8, + vec![ + EC, + // The block handler for the current frame + // note, VM_ASSERT(VM_ENV_LOCAL_P(ep)) + Opnd::mem( + 64, + ep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL, + ), + ] + ); + + // Load environment pointer EP from CFP (again) + let ep_opnd = gen_get_ep(asm, level); + + // Write the value at the environment pointer + let idx = jit.get_arg(0).as_i32(); + let offs = -(SIZEOF_VALUE_I32 * idx); + asm.mov(Opnd::mem(64, ep_opnd, offs), proc); + + // Set the frame modified flag + let flag_check = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); + let modified_flag = asm.or(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); + asm.store(flag_check, modified_flag); + + asm.write_label(frame_flag_modified); + + // Push the proc on the stack + let stack_ret = asm.stack_push(Type::Unknown); + let ep_opnd = gen_get_ep(asm, level); + asm.mov(stack_ret, Opnd::mem(64, ep_opnd, offs)); + + Some(KeepCompiling) +} + +fn gen_invokebuiltin( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr(); + let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc"); + + // ec, self, and arguments + if bf_argc + 2 > C_ARG_OPNDS.len() { + incr_counter!(invokebuiltin_too_many_args); + return None; + } + + // If the calls don't allocate, do they need up to date PC, SP? + jit_prepare_non_leaf_call(jit, asm); + + // Call the builtin func (ec, recv, arg1, arg2, ...) + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; + + // Copy arguments from locals + for i in 0..bf_argc { + let stack_opnd = asm.stack_opnd((bf_argc - i - 1) as i32); + args.push(stack_opnd); + } + + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); + + // Push the return value + asm.stack_pop(bf_argc); + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + + Some(KeepCompiling) +} + +// opt_invokebuiltin_delegate calls a builtin function, like +// invokebuiltin does, but instead of taking arguments from the top of the +// stack uses the argument locals (and self) from the current method. +fn gen_opt_invokebuiltin_delegate( + jit: &mut JITState, + asm: &mut Assembler, + _ocb: &mut OutlinedCb, +) -> Option<CodegenStatus> { + let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr(); + let bf_argc = unsafe { (*bf).argc }; + let start_index = jit.get_arg(1).as_i32(); + + // ec, self, and arguments + if bf_argc + 2 > (C_ARG_OPNDS.len() as i32) { + incr_counter!(invokebuiltin_too_many_args); + return None; + } + + // If the calls don't allocate, do they need up to date PC, SP? + jit_prepare_non_leaf_call(jit, asm); + + // Call the builtin func (ec, recv, arg1, arg2, ...) + let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)]; + + // Copy arguments from locals + if bf_argc > 0 { + // Load environment pointer EP from CFP + let ep = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)); + + for i in 0..bf_argc { + let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; + let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i; + let local_opnd = Opnd::mem(64, ep, offs * SIZEOF_VALUE_I32); + args.push(local_opnd); + } + } + let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args); + + // Push the return value + let stack_ret = asm.stack_push(Type::Unknown); + asm.mov(stack_ret, val); + + Some(KeepCompiling) +} + +/// Maps a YARV opcode to a code generation function (if supported) +fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> { + let VALUE(opcode) = opcode; + let opcode = opcode as ruby_vminsn_type; + assert!(opcode < VM_INSTRUCTION_SIZE); + + match opcode { + YARVINSN_nop => Some(gen_nop), + YARVINSN_pop => Some(gen_pop), + YARVINSN_dup => Some(gen_dup), + YARVINSN_dupn => Some(gen_dupn), + YARVINSN_swap => Some(gen_swap), + YARVINSN_putnil => Some(gen_putnil), + YARVINSN_putobject => Some(gen_putobject), + YARVINSN_putobject_INT2FIX_0_ => Some(gen_putobject_int2fix), + YARVINSN_putobject_INT2FIX_1_ => Some(gen_putobject_int2fix), + YARVINSN_putself => Some(gen_putself), + YARVINSN_putspecialobject => Some(gen_putspecialobject), + YARVINSN_setn => Some(gen_setn), + YARVINSN_topn => Some(gen_topn), + YARVINSN_adjuststack => Some(gen_adjuststack), + + YARVINSN_getlocal => Some(gen_getlocal), + YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0), + YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1), + YARVINSN_setlocal => Some(gen_setlocal), + YARVINSN_setlocal_WC_0 => Some(gen_setlocal_wc0), + YARVINSN_setlocal_WC_1 => Some(gen_setlocal_wc1), + YARVINSN_opt_plus => Some(gen_opt_plus), + YARVINSN_opt_minus => Some(gen_opt_minus), + YARVINSN_opt_and => Some(gen_opt_and), + YARVINSN_opt_or => Some(gen_opt_or), + YARVINSN_newhash => Some(gen_newhash), + YARVINSN_duphash => Some(gen_duphash), + YARVINSN_newarray => Some(gen_newarray), + YARVINSN_duparray => Some(gen_duparray), + YARVINSN_checktype => Some(gen_checktype), + YARVINSN_opt_lt => Some(gen_opt_lt), + YARVINSN_opt_le => Some(gen_opt_le), + YARVINSN_opt_gt => Some(gen_opt_gt), + YARVINSN_opt_ge => Some(gen_opt_ge), + YARVINSN_opt_mod => Some(gen_opt_mod), + YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze), + YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus), + YARVINSN_opt_newarray_send => Some(gen_opt_newarray_send), + YARVINSN_splatarray => Some(gen_splatarray), + YARVINSN_splatkw => Some(gen_splatkw), + YARVINSN_concatarray => Some(gen_concatarray), + YARVINSN_concattoarray => Some(gen_concattoarray), + YARVINSN_pushtoarray => Some(gen_pushtoarray), + YARVINSN_newrange => Some(gen_newrange), + YARVINSN_putstring => Some(gen_putstring), + YARVINSN_putchilledstring => Some(gen_putchilledstring), + YARVINSN_expandarray => Some(gen_expandarray), + YARVINSN_defined => Some(gen_defined), + YARVINSN_definedivar => Some(gen_definedivar), + YARVINSN_checkmatch => Some(gen_checkmatch), + YARVINSN_checkkeyword => Some(gen_checkkeyword), + YARVINSN_concatstrings => Some(gen_concatstrings), + YARVINSN_getinstancevariable => Some(gen_getinstancevariable), + YARVINSN_setinstancevariable => Some(gen_setinstancevariable), + + YARVINSN_opt_eq => Some(gen_opt_eq), + YARVINSN_opt_neq => Some(gen_opt_neq), + YARVINSN_opt_aref => Some(gen_opt_aref), + YARVINSN_opt_aset => Some(gen_opt_aset), + YARVINSN_opt_aref_with => Some(gen_opt_aref_with), + YARVINSN_opt_mult => Some(gen_opt_mult), + YARVINSN_opt_div => Some(gen_opt_div), + YARVINSN_opt_ltlt => Some(gen_opt_ltlt), + YARVINSN_opt_nil_p => Some(gen_opt_nil_p), + YARVINSN_opt_empty_p => Some(gen_opt_empty_p), + YARVINSN_opt_succ => Some(gen_opt_succ), + YARVINSN_opt_not => Some(gen_opt_not), + YARVINSN_opt_size => Some(gen_opt_size), + YARVINSN_opt_length => Some(gen_opt_length), + YARVINSN_opt_regexpmatch2 => Some(gen_opt_regexpmatch2), + YARVINSN_getconstant => Some(gen_getconstant), + YARVINSN_opt_getconstant_path => Some(gen_opt_getconstant_path), + YARVINSN_invokebuiltin => Some(gen_invokebuiltin), + YARVINSN_opt_invokebuiltin_delegate => Some(gen_opt_invokebuiltin_delegate), + YARVINSN_opt_invokebuiltin_delegate_leave => Some(gen_opt_invokebuiltin_delegate), + YARVINSN_opt_case_dispatch => Some(gen_opt_case_dispatch), + YARVINSN_branchif => Some(gen_branchif), + YARVINSN_branchunless => Some(gen_branchunless), + YARVINSN_branchnil => Some(gen_branchnil), + YARVINSN_throw => Some(gen_throw), + YARVINSN_jump => Some(gen_jump), + + YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy), + YARVINSN_getblockparam => Some(gen_getblockparam), + YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block), + YARVINSN_send => Some(gen_send), + YARVINSN_invokeblock => Some(gen_invokeblock), + YARVINSN_invokesuper => Some(gen_invokesuper), + YARVINSN_leave => Some(gen_leave), + + YARVINSN_getglobal => Some(gen_getglobal), + YARVINSN_setglobal => Some(gen_setglobal), + YARVINSN_anytostring => Some(gen_anytostring), + YARVINSN_objtostring => Some(gen_objtostring), + YARVINSN_intern => Some(gen_intern), + YARVINSN_toregexp => Some(gen_toregexp), + YARVINSN_getspecial => Some(gen_getspecial), + YARVINSN_getclassvariable => Some(gen_getclassvariable), + YARVINSN_setclassvariable => Some(gen_setclassvariable), + + // Unimplemented opcode, YJIT won't generate code for this yet + _ => None, + } +} + +// Return true when the codegen function generates code. +// known_recv_class has Some value when the caller has used jit_guard_known_klass(). +// See yjit_reg_method(). +type MethodGenFn = fn( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + ci: *const rb_callinfo, + cme: *const rb_callable_method_entry_t, + block: Option<BlockHandler>, + argc: i32, + known_recv_class: Option<VALUE>, +) -> bool; + +/// Methods for generating code for hardcoded (usually C) methods +static mut METHOD_CODEGEN_TABLE: Option<HashMap<usize, MethodGenFn>> = None; + +/// Register codegen functions for some Ruby core methods +pub fn yjit_reg_method_codegen_fns() { + unsafe { + assert!(METHOD_CODEGEN_TABLE.is_none()); + METHOD_CODEGEN_TABLE = Some(HashMap::default()); + + // Specialization for C methods. See yjit_reg_method() for details. + yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not); + + yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true); + yjit_reg_method(rb_mKernel, "nil?", jit_rb_false); + yjit_reg_method(rb_mKernel, "is_a?", jit_rb_kernel_is_a); + yjit_reg_method(rb_mKernel, "kind_of?", jit_rb_kernel_is_a); + yjit_reg_method(rb_mKernel, "instance_of?", jit_rb_kernel_instance_of); + + yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal); + yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal); + yjit_reg_method(rb_cBasicObject, "!=", jit_rb_obj_not_equal); + yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal); + yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal); + yjit_reg_method(rb_cModule, "===", jit_rb_mod_eqq); + yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal); + yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal); + yjit_reg_method(rb_cInteger, "==", jit_rb_int_equal); + yjit_reg_method(rb_cInteger, "===", jit_rb_int_equal); + + yjit_reg_method(rb_cInteger, "succ", jit_rb_int_succ); + yjit_reg_method(rb_cInteger, "/", jit_rb_int_div); + yjit_reg_method(rb_cInteger, "<<", jit_rb_int_lshift); + yjit_reg_method(rb_cInteger, ">>", jit_rb_int_rshift); + yjit_reg_method(rb_cInteger, "^", jit_rb_int_xor); + yjit_reg_method(rb_cInteger, "[]", jit_rb_int_aref); + + yjit_reg_method(rb_cFloat, "+", jit_rb_float_plus); + yjit_reg_method(rb_cFloat, "-", jit_rb_float_minus); + yjit_reg_method(rb_cFloat, "*", jit_rb_float_mul); + yjit_reg_method(rb_cFloat, "/", jit_rb_float_div); + + yjit_reg_method(rb_cString, "empty?", jit_rb_str_empty_p); + yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s); + yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s); + yjit_reg_method(rb_cString, "length", jit_rb_str_length); + yjit_reg_method(rb_cString, "size", jit_rb_str_length); + yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize); + yjit_reg_method(rb_cString, "getbyte", jit_rb_str_getbyte); + yjit_reg_method(rb_cString, "setbyte", jit_rb_str_setbyte); + yjit_reg_method(rb_cString, "byteslice", jit_rb_str_byteslice); + yjit_reg_method(rb_cString, "<<", jit_rb_str_concat); + yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus); + + yjit_reg_method(rb_cNilClass, "===", jit_rb_case_equal); + yjit_reg_method(rb_cTrueClass, "===", jit_rb_case_equal); + yjit_reg_method(rb_cFalseClass, "===", jit_rb_case_equal); + + yjit_reg_method(rb_cArray, "empty?", jit_rb_ary_empty_p); + yjit_reg_method(rb_cArray, "length", jit_rb_ary_length); + yjit_reg_method(rb_cArray, "size", jit_rb_ary_length); + yjit_reg_method(rb_cArray, "<<", jit_rb_ary_push); + + yjit_reg_method(rb_cHash, "empty?", jit_rb_hash_empty_p); + + yjit_reg_method(rb_mKernel, "respond_to?", jit_obj_respond_to); + yjit_reg_method(rb_mKernel, "block_given?", jit_rb_f_block_given_p); + + yjit_reg_method(rb_cClass, "superclass", jit_rb_class_superclass); + + yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current); + } +} + +// Register a specialized codegen function for a particular method. Note that +// the if the function returns true, the code it generates runs without a +// control frame and without interrupt checks. To avoid creating observable +// behavior changes, the codegen function should only target simple code paths +// that do not allocate and do not make method calls. +fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) { + let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!"); + let mid = unsafe { rb_intern(id_string.as_ptr()) }; + let me = unsafe { rb_method_entry_at(klass, mid) }; + + if me.is_null() { + panic!("undefined optimized method!: {mid_str}"); + } + + // For now, only cfuncs are supported + //RUBY_ASSERT(me && me->def); + //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC); + + let method_serial = unsafe { + let def = (*me).def; + get_def_method_serial(def) + }; + + unsafe { METHOD_CODEGEN_TABLE.as_mut().unwrap().insert(method_serial, gen_fn); } +} + +/// Global state needed for code generation +pub struct CodegenGlobals { + /// Inline code block (fast path) + inline_cb: CodeBlock, + + /// Outlined code block (slow path) + outlined_cb: OutlinedCb, + + /// Code for exiting back to the interpreter from the leave instruction + leave_exit_code: CodePtr, + + /// Code for exiting back to the interpreter after handling an exception + leave_exception_code: CodePtr, + + // For exiting from YJIT frame from branch_stub_hit(). + // Filled by gen_stub_exit(). + stub_exit_code: CodePtr, + + // For servicing branch stubs + branch_stub_hit_trampoline: CodePtr, + + // For servicing entry stubs + entry_stub_hit_trampoline: CodePtr, + + // Code for full logic of returning from C method and exiting to the interpreter + outline_full_cfunc_return_pos: CodePtr, + + /// For implementing global code invalidation + global_inval_patches: Vec<CodepagePatch>, + + /// Page indexes for outlined code that are not associated to any ISEQ. + ocb_pages: Vec<usize>, + + /// Map of cfunc YARV PCs to CMEs and receiver indexes, used to lazily push + /// a frame when rb_yjit_lazy_push_frame() is called with a PC in this HashMap. + pc_to_cfunc: HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)>, +} + +/// For implementing global code invalidation. A position in the inline +/// codeblock to patch into a JMP rel32 which jumps into some code in +/// the outlined codeblock to exit to the interpreter. +pub struct CodepagePatch { + pub inline_patch_pos: CodePtr, + pub outlined_target_pos: CodePtr, +} + +/// Private singleton instance of the codegen globals +static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None; + +impl CodegenGlobals { + /// Initialize the codegen globals + pub fn init() { + // Executable memory and code page size in bytes + let mem_size = get_option!(exec_mem_size); + + #[cfg(not(test))] + let (mut cb, mut ocb) = { + let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) }; + + // Memory protection syscalls need page-aligned addresses, so check it here. Assuming + // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the + // page size in bytes is a power of two 2¹⁹ or smaller. This is because the user + // requested size is half of mem_option × 2²⁰ as it's in MiB. + // + // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB + // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though. + let page_size = unsafe { rb_yjit_get_page_size() }; + assert_eq!( + virt_block as usize % page_size.as_usize(), 0, + "Start of virtual address block should be page-aligned", + ); + + use crate::virtualmem::*; + use std::ptr::NonNull; + + let mem_block = VirtualMem::new( + SystemAllocator {}, + page_size, + NonNull::new(virt_block).unwrap(), + mem_size, + ); + let mem_block = Rc::new(RefCell::new(mem_block)); + + let freed_pages = Rc::new(None); + let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone()); + let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages)); + + (cb, ocb) + }; + + // In test mode we're not linking with the C code + // so we don't allocate executable memory + #[cfg(test)] + let mut cb = CodeBlock::new_dummy(mem_size / 2); + #[cfg(test)] + let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2)); + + let ocb_start_addr = ocb.unwrap().get_write_ptr(); + let leave_exit_code = gen_leave_exit(&mut ocb).unwrap(); + let leave_exception_code = gen_leave_exception(&mut ocb).unwrap(); + + let stub_exit_code = gen_stub_exit(&mut ocb).unwrap(); + + let branch_stub_hit_trampoline = gen_branch_stub_hit_trampoline(&mut ocb).unwrap(); + let entry_stub_hit_trampoline = gen_entry_stub_hit_trampoline(&mut ocb).unwrap(); + + // Generate full exit code for C func + let cfunc_exit_code = gen_full_cfunc_return(&mut ocb).unwrap(); + + let ocb_end_addr = ocb.unwrap().get_write_ptr(); + let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr); + + // Mark all code memory as executable + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + let codegen_globals = CodegenGlobals { + inline_cb: cb, + outlined_cb: ocb, + leave_exit_code, + leave_exception_code, + stub_exit_code, + outline_full_cfunc_return_pos: cfunc_exit_code, + branch_stub_hit_trampoline, + entry_stub_hit_trampoline, + global_inval_patches: Vec::new(), + ocb_pages, + pc_to_cfunc: HashMap::new(), + }; + + // Initialize the codegen globals instance + unsafe { + CODEGEN_GLOBALS = Some(codegen_globals); + } + } + + /// Get a mutable reference to the codegen globals instance + pub fn get_instance() -> &'static mut CodegenGlobals { + unsafe { CODEGEN_GLOBALS.as_mut().unwrap() } + } + + pub fn has_instance() -> bool { + unsafe { CODEGEN_GLOBALS.as_mut().is_some() } + } + + /// Get a mutable reference to the inline code block + pub fn get_inline_cb() -> &'static mut CodeBlock { + &mut CodegenGlobals::get_instance().inline_cb + } + + /// Get a mutable reference to the outlined code block + pub fn get_outlined_cb() -> &'static mut OutlinedCb { + &mut CodegenGlobals::get_instance().outlined_cb + } + + pub fn get_leave_exit_code() -> CodePtr { + CodegenGlobals::get_instance().leave_exit_code + } + + pub fn get_leave_exception_code() -> CodePtr { + CodegenGlobals::get_instance().leave_exception_code + } + + pub fn get_stub_exit_code() -> CodePtr { + CodegenGlobals::get_instance().stub_exit_code + } + + pub fn push_global_inval_patch(inline_pos: CodePtr, outlined_pos: CodePtr, cb: &CodeBlock) { + if let Some(last_patch) = CodegenGlobals::get_instance().global_inval_patches.last() { + let patch_offset = inline_pos.as_offset() - last_patch.inline_patch_pos.as_offset(); + assert!( + patch_offset < 0 || cb.jmp_ptr_bytes() as i64 <= patch_offset, + "patches should not overlap (patch_offset: {patch_offset})", + ); + } + + let patch = CodepagePatch { + inline_patch_pos: inline_pos, + outlined_target_pos: outlined_pos, + }; + CodegenGlobals::get_instance() + .global_inval_patches + .push(patch); + } + + // Drain the list of patches and return it + pub fn take_global_inval_patches() -> Vec<CodepagePatch> { + let globals = CodegenGlobals::get_instance(); + mem::take(&mut globals.global_inval_patches) + } + + pub fn get_outline_full_cfunc_return_pos() -> CodePtr { + CodegenGlobals::get_instance().outline_full_cfunc_return_pos + } + + pub fn get_branch_stub_hit_trampoline() -> CodePtr { + CodegenGlobals::get_instance().branch_stub_hit_trampoline + } + + pub fn get_entry_stub_hit_trampoline() -> CodePtr { + CodegenGlobals::get_instance().entry_stub_hit_trampoline + } + + pub fn get_ocb_pages() -> &'static Vec<usize> { + &CodegenGlobals::get_instance().ocb_pages + } + + pub fn get_pc_to_cfunc() -> &'static mut HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)> { + &mut CodegenGlobals::get_instance().pc_to_cfunc + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn setup_codegen() -> (JITState, Context, Assembler, CodeBlock, OutlinedCb) { + let cb = CodeBlock::new_dummy(256 * 1024); + + return ( + JITState::new( + BlockId { iseq: std::ptr::null(), idx: 0 }, + Context::default(), + cb.get_write_ptr(), + ptr::null(), // No execution context in tests. No peeking! + ), + Context::default(), + Assembler::new(), + cb, + OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)), + ); + } + + #[test] + fn test_gen_leave_exit() { + let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)); + gen_leave_exit(&mut ocb); + assert!(ocb.unwrap().get_write_pos() > 0); + } + + #[test] + fn test_gen_exit() { + let (_, _ctx, mut asm, mut cb, _) = setup_codegen(); + gen_exit(0 as *mut VALUE, &mut asm); + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); + } + + #[test] + fn test_get_side_exit() { + let (_jit, ctx, mut asm, _, mut ocb) = setup_codegen(); + let side_exit_context = SideExitContext::new(0 as _, ctx); + asm.get_side_exit(&side_exit_context, None, &mut ocb); + assert!(ocb.unwrap().get_write_pos() > 0); + } + + #[test] + fn test_gen_check_ints() { + let (_jit, _ctx, mut asm, _cb, _ocb) = setup_codegen(); + asm.set_side_exit_context(0 as _, 0); + gen_check_ints(&mut asm, Counter::guard_send_interrupted); + } + + #[test] + fn test_gen_nop() { + let (mut jit, context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_nop(&mut jit, &mut asm, &mut ocb); + asm.compile(&mut cb, None).unwrap(); + + assert_eq!(status, Some(KeepCompiling)); + assert_eq!(context.diff(&Context::default()), TypeDiff::Compatible(0)); + assert_eq!(cb.get_write_pos(), 0); + } + + #[test] + fn test_gen_pop() { + let (mut jit, _, mut asm, _cb, mut ocb) = setup_codegen(); + let context = Context::default(); + asm.stack_push(Type::Fixnum); + let status = gen_pop(&mut jit, &mut asm, &mut ocb); + + assert_eq!(status, Some(KeepCompiling)); + let mut default = Context::default(); + default.set_reg_temps(context.get_reg_temps()); + assert_eq!(context.diff(&default), TypeDiff::Compatible(0)); + } + + #[test] + fn test_gen_dup() { + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + let status = gen_dup(&mut jit, &mut asm, &mut ocb); + + assert_eq!(status, Some(KeepCompiling)); + + // Did we duplicate the type information for the Fixnum type? + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1))); + + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); // Write some movs + } + + #[test] + fn test_gen_dupn() { + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); + + let mut value_array: [u64; 2] = [0, 2]; // We only compile for n == 2 + let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; + jit.pc = pc; + + let status = gen_dupn(&mut jit, &mut asm, &mut ocb); + + assert_eq!(status, Some(KeepCompiling)); + + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(3))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); + + // TODO: this is writing zero bytes on x86. Why? + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); // Write some movs + } + + #[test] + fn test_gen_swap() { + let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); + + let status = gen_swap(&mut jit, &mut asm, &mut ocb); + + let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0)); + let tmp_type_next = asm.ctx.get_opnd_type(StackOpnd(1)); + + assert_eq!(status, Some(KeepCompiling)); + assert_eq!(tmp_type_top, Type::Fixnum); + assert_eq!(tmp_type_next, Type::Flonum); + } + + #[test] + fn test_putnil() { + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putnil(&mut jit, &mut asm, &mut ocb); + + let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0)); + + assert_eq!(status, Some(KeepCompiling)); + assert_eq!(tmp_type_top, Type::Nil); + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); + } + + + #[test] + fn test_putself() { + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + let status = gen_putself(&mut jit, &mut asm, &mut ocb); + + assert_eq!(status, Some(KeepCompiling)); + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); + } + + #[test] + fn test_gen_setn() { + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Fixnum); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); + + let mut value_array: [u64; 2] = [0, 2]; + let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; + jit.pc = pc; + + let status = gen_setn(&mut jit, &mut asm, &mut ocb); + + assert_eq!(status, Some(KeepCompiling)); + + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(0))); + + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); + } + + #[test] + fn test_gen_topn() { + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); + + let mut value_array: [u64; 2] = [0, 1]; + let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; + jit.pc = pc; + + let status = gen_topn(&mut jit, &mut asm, &mut ocb); + + assert_eq!(status, Some(KeepCompiling)); + + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2))); + assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(1))); + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); + + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() > 0); // Write some movs + } + + #[test] + fn test_gen_adjuststack() { + let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen(); + asm.stack_push(Type::Flonum); + asm.stack_push(Type::CString); + asm.stack_push(Type::Fixnum); + + let mut value_array: [u64; 3] = [0, 2, 0]; + let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE; + jit.pc = pc; + + let status = gen_adjuststack(&mut jit, &mut asm, &mut ocb); + + assert_eq!(status, Some(KeepCompiling)); + + assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0))); + + asm.compile(&mut cb, None).unwrap(); + assert!(cb.get_write_pos() == 0); // No instructions written + } + + #[test] + fn test_gen_leave() { + let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen(); + // Push return value + asm.stack_push(Type::Fixnum); + asm.set_side_exit_context(0 as _, 0); + gen_leave(&mut jit, &mut asm, &mut ocb); + } +} diff --git a/yjit/src/core.rs b/yjit/src/core.rs new file mode 100644 index 0000000000..cd6e649aa0 --- /dev/null +++ b/yjit/src/core.rs @@ -0,0 +1,3767 @@ +//! Code versioning, retained live control flow graph mutations, type tracking, etc. + +// So we can comment on individual uses of `unsafe` in `unsafe` functions +#![warn(unsafe_op_in_unsafe_fn)] + +use crate::asm::*; +use crate::backend::ir::*; +use crate::codegen::*; +use crate::virtualmem::CodePtr; +use crate::cruby::*; +use crate::options::*; +use crate::stats::*; +use crate::utils::*; +#[cfg(feature="disasm")] +use crate::disasm::*; +use core::ffi::c_void; +use std::cell::*; +use std::collections::HashSet; +use std::fmt; +use std::mem; +use std::mem::transmute; +use std::ops::Range; +use std::rc::Rc; +use mem::MaybeUninit; +use std::ptr; +use ptr::NonNull; +use YARVOpnd::*; +use TempMappingKind::*; +use crate::invariants::*; + +// Maximum number of temp value types we keep track of +pub const MAX_TEMP_TYPES: usize = 8; + +// Maximum number of local variable types we keep track of +const MAX_LOCAL_TYPES: usize = 8; + +/// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points +/// to a YARV instruction or an instruction operand. +pub type IseqIdx = u16; + +// Represent the type of a value (local/stack/self) in YJIT +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +#[repr(u8)] +pub enum Type { + Unknown = 0, + UnknownImm, + UnknownHeap, + Nil, + True, + False, + Fixnum, + Flonum, + ImmSymbol, + + TString, // An object with the T_STRING flag set, possibly an rb_cString + CString, // An object that at one point had its class field equal rb_cString (creating a singleton class changes it) + TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray + CArray, // An object that at one point had its class field equal rb_cArray (creating a singleton class changes it) + THash, // An object with the T_HASH flag set, possibly an rb_cHash + CHash, // An object that at one point had its class field equal rb_cHash (creating a singleton class changes it) + + BlockParamProxy, // A special sentinel value indicating the block parameter should be read from + // the current surrounding cfp + + // The context currently relies on types taking at most 4 bits (max value 15) + // to encode, so if we add any more, we will need to refactor the context. +} + +// Default initialization +impl Default for Type { + fn default() -> Self { + Type::Unknown + } +} + +impl Type { + /// This returns an appropriate Type based on a known value + pub fn from(val: VALUE) -> Type { + if val.special_const_p() { + if val.fixnum_p() { + Type::Fixnum + } else if val.nil_p() { + Type::Nil + } else if val == Qtrue { + Type::True + } else if val == Qfalse { + Type::False + } else if val.static_sym_p() { + Type::ImmSymbol + } else if val.flonum_p() { + Type::Flonum + } else { + unreachable!("Illegal value: {:?}", val) + } + } else { + // Core.rs can't reference rb_cString because it's linked by Rust-only tests. + // But CString vs TString is only an optimisation and shouldn't affect correctness. + #[cfg(not(test))] + match val.class_of() { + class if class == unsafe { rb_cArray } => return Type::CArray, + class if class == unsafe { rb_cHash } => return Type::CHash, + class if class == unsafe { rb_cString } => return Type::CString, + _ => {} + } + // We likewise can't reference rb_block_param_proxy, but it's again an optimisation; + // we can just treat it as a normal Object. + #[cfg(not(test))] + if val == unsafe { rb_block_param_proxy } { + return Type::BlockParamProxy; + } + match val.builtin_type() { + RUBY_T_ARRAY => Type::TArray, + RUBY_T_HASH => Type::THash, + RUBY_T_STRING => Type::TString, + _ => Type::UnknownHeap, + } + } + } + + /// Check if the type is an immediate + pub fn is_imm(&self) -> bool { + match self { + Type::UnknownImm => true, + Type::Nil => true, + Type::True => true, + Type::False => true, + Type::Fixnum => true, + Type::Flonum => true, + Type::ImmSymbol => true, + _ => false, + } + } + + /// Returns true when the type is not specific. + pub fn is_unknown(&self) -> bool { + match self { + Type::Unknown | Type::UnknownImm | Type::UnknownHeap => true, + _ => false, + } + } + + /// Returns true when we know the VALUE is a specific handle type, + /// such as a static symbol ([Type::ImmSymbol], i.e. true from RB_STATIC_SYM_P()). + /// Opposite of [Self::is_unknown]. + pub fn is_specific(&self) -> bool { + !self.is_unknown() + } + + /// Check if the type is a heap object + pub fn is_heap(&self) -> bool { + match self { + Type::UnknownHeap => true, + Type::TArray => true, + Type::CArray => true, + Type::THash => true, + Type::CHash => true, + Type::TString => true, + Type::CString => true, + Type::BlockParamProxy => true, + _ => false, + } + } + + /// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY) + pub fn is_array(&self) -> bool { + matches!(self, Type::TArray | Type::CArray) + } + + /// Check if it's a T_HASH object (both THash and CHash are T_HASH) + pub fn is_hash(&self) -> bool { + matches!(self, Type::THash | Type::CHash) + } + + /// Check if it's a T_STRING object (both TString and CString are T_STRING) + pub fn is_string(&self) -> bool { + matches!(self, Type::TString | Type::CString) + } + + /// Returns an Option with the T_ value type if it is known, otherwise None + pub fn known_value_type(&self) -> Option<ruby_value_type> { + match self { + Type::Nil => Some(RUBY_T_NIL), + Type::True => Some(RUBY_T_TRUE), + Type::False => Some(RUBY_T_FALSE), + Type::Fixnum => Some(RUBY_T_FIXNUM), + Type::Flonum => Some(RUBY_T_FLOAT), + Type::TArray | Type::CArray => Some(RUBY_T_ARRAY), + Type::THash | Type::CHash => Some(RUBY_T_HASH), + Type::ImmSymbol => Some(RUBY_T_SYMBOL), + Type::TString | Type::CString => Some(RUBY_T_STRING), + Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None, + Type::BlockParamProxy => None, + } + } + + /// Returns an Option with the class if it is known, otherwise None + pub fn known_class(&self) -> Option<VALUE> { + unsafe { + match self { + Type::Nil => Some(rb_cNilClass), + Type::True => Some(rb_cTrueClass), + Type::False => Some(rb_cFalseClass), + Type::Fixnum => Some(rb_cInteger), + Type::Flonum => Some(rb_cFloat), + Type::ImmSymbol => Some(rb_cSymbol), + Type::CArray => Some(rb_cArray), + Type::CHash => Some(rb_cHash), + Type::CString => Some(rb_cString), + _ => None, + } + } + } + + /// Returns an Option with the exact value if it is known, otherwise None + #[allow(unused)] // not yet used + pub fn known_exact_value(&self) -> Option<VALUE> { + match self { + Type::Nil => Some(Qnil), + Type::True => Some(Qtrue), + Type::False => Some(Qfalse), + _ => None, + } + } + + /// Returns an Option boolean representing whether the value is truthy if known, otherwise None + pub fn known_truthy(&self) -> Option<bool> { + match self { + Type::Nil => Some(false), + Type::False => Some(false), + Type::UnknownHeap => Some(true), + Type::Unknown | Type::UnknownImm => None, + _ => Some(true) + } + } + + /// Returns an Option boolean representing whether the value is equal to nil if known, otherwise None + pub fn known_nil(&self) -> Option<bool> { + match (self, self.known_truthy()) { + (Type::Nil, _) => Some(true), + (Type::False, _) => Some(false), // Qfalse is not nil + (_, Some(true)) => Some(false), // if truthy, can't be nil + (_, _) => None // otherwise unknown + } + } + + /// Compute a difference between two value types + pub fn diff(self, dst: Self) -> TypeDiff { + // Perfect match, difference is zero + if self == dst { + return TypeDiff::Compatible(0); + } + + // Any type can flow into an unknown type + if dst == Type::Unknown { + return TypeDiff::Compatible(1); + } + + // A CArray is also a TArray. + if self == Type::CArray && dst == Type::TArray { + return TypeDiff::Compatible(1); + } + + // A CHash is also a THash. + if self == Type::CHash && dst == Type::THash { + return TypeDiff::Compatible(1); + } + + // A CString is also a TString. + if self == Type::CString && dst == Type::TString { + return TypeDiff::Compatible(1); + } + + // Specific heap type into unknown heap type is imperfect but valid + if self.is_heap() && dst == Type::UnknownHeap { + return TypeDiff::Compatible(1); + } + + // Specific immediate type into unknown immediate type is imperfect but valid + if self.is_imm() && dst == Type::UnknownImm { + return TypeDiff::Compatible(1); + } + + // Incompatible types + return TypeDiff::Incompatible; + } + + /// Upgrade this type into a more specific compatible type + /// The new type must be compatible and at least as specific as the previously known type. + fn upgrade(&mut self, new_type: Self) { + // We can only upgrade to a type that is more specific + assert!(new_type.diff(*self) != TypeDiff::Incompatible); + *self = new_type; + } +} + +#[derive(Debug, Eq, PartialEq)] +pub enum TypeDiff { + // usize == 0: Same type + // usize >= 1: Different but compatible. The smaller, the more compatible. + Compatible(usize), + Incompatible, +} + +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] +#[repr(u8)] +pub enum TempMappingKind +{ + MapToStack = 0, + MapToSelf = 1, + MapToLocal = 2, +} + +// Potential mapping of a value on the temporary stack to +// self, a local variable or constant so that we can track its type +// +// The highest two bits represent TempMappingKind, and the rest of +// the bits are used differently across different kinds. +// * MapToStack: The lowest 5 bits are used for mapping Type. +// * MapToSelf: The remaining bits are not used; the type is stored in self_type. +// * MapToLocal: The lowest 3 bits store the index of a local variable. +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] +pub struct TempMapping(u8); + +impl TempMapping { + pub fn map_to_stack(t: Type) -> TempMapping + { + let kind_bits = TempMappingKind::MapToStack as u8; + let type_bits = t as u8; + assert!(type_bits <= 0b11111); + let bits = (kind_bits << 6) | (type_bits & 0b11111); + TempMapping(bits) + } + + pub fn map_to_self() -> TempMapping + { + let kind_bits = TempMappingKind::MapToSelf as u8; + let bits = kind_bits << 6; + TempMapping(bits) + } + + pub fn map_to_local(local_idx: u8) -> TempMapping + { + let kind_bits = TempMappingKind::MapToLocal as u8; + assert!(local_idx <= 0b111); + let bits = (kind_bits << 6) | (local_idx & 0b111); + TempMapping(bits) + } + + pub fn without_type(&self) -> TempMapping + { + if self.get_kind() != TempMappingKind::MapToStack { + return *self; + } + + TempMapping::map_to_stack(Type::Unknown) + } + + pub fn get_kind(&self) -> TempMappingKind + { + // Take the two highest bits + let TempMapping(bits) = self; + let kind_bits = bits >> 6; + assert!(kind_bits <= 2); + unsafe { transmute::<u8, TempMappingKind>(kind_bits) } + } + + pub fn get_type(&self) -> Type + { + assert!(self.get_kind() == TempMappingKind::MapToStack); + + // Take the 5 lowest bits + let TempMapping(bits) = self; + let type_bits = bits & 0b11111; + unsafe { transmute::<u8, Type>(type_bits) } + } + + pub fn get_local_idx(&self) -> u8 + { + assert!(self.get_kind() == TempMappingKind::MapToLocal); + + // Take the 3 lowest bits + let TempMapping(bits) = self; + bits & 0b111 + } +} + +impl Default for TempMapping { + fn default() -> Self { + TempMapping::map_to_stack(Type::Unknown) + } +} + +// Operand to a YARV bytecode instruction +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum YARVOpnd { + // The value is self + SelfOpnd, + + // Temporary stack operand with stack index + StackOpnd(u8), +} + +impl From<Opnd> for YARVOpnd { + fn from(value: Opnd) -> Self { + match value { + Opnd::Stack { idx, .. } => StackOpnd(idx.try_into().unwrap()), + _ => unreachable!("{:?} cannot be converted to YARVOpnd", value) + } + } +} + +/// Maximum index of stack temps that could be in a register +pub const MAX_REG_TEMPS: u8 = 8; + +/// Bitmap of which stack temps are in a register +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] +pub struct RegTemps(u8); + +impl RegTemps { + pub fn get(&self, index: u8) -> bool { + assert!(index < MAX_REG_TEMPS); + (self.0 >> index) & 1 == 1 + } + + pub fn set(&mut self, index: u8, value: bool) { + assert!(index < MAX_REG_TEMPS); + if value { + self.0 = self.0 | (1 << index); + } else { + self.0 = self.0 & !(1 << index); + } + } + + pub fn as_u8(&self) -> u8 { + self.0 + } + + /// Return true if there's a register that conflicts with a given stack_idx. + pub fn conflicts_with(&self, stack_idx: u8) -> bool { + let mut other_idx = stack_idx as usize % get_option!(num_temp_regs); + while other_idx < MAX_REG_TEMPS as usize { + if stack_idx as usize != other_idx && self.get(other_idx as u8) { + return true; + } + other_idx += get_option!(num_temp_regs); + } + false + } +} + +/// Bits for chain_depth_return_landing_defer +const RETURN_LANDING_BIT: u8 = 0b10000000; +const DEFER_BIT: u8 = 0b01000000; +const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63 + +/// Code generation context +/// Contains information we can use to specialize/optimize code +/// There are a lot of context objects so we try to keep the size small. +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] +#[repr(packed)] +pub struct Context { + // Number of values currently on the temporary stack + stack_size: u8, + + // Offset of the JIT SP relative to the interpreter SP + // This represents how far the JIT's SP is from the "real" SP + sp_offset: i8, + + /// Bitmap of which stack temps are in a register + reg_temps: RegTemps, + + /// Fields packed into u8 + /// - 1st bit from the left: Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing]) + /// - 2nd bit from the left: Whether the compilation of this code has been deferred ([Self::is_deferred]) + /// - Last 6 bits (max: 63): Depth of this block in the sidechain (eg: inline-cache chain) + chain_depth_and_flags: u8, + + // Type we track for self + self_type: Type, + + // Local variable types we keep track of + // We store 8 local types, requiring 4 bits each, for a total of 32 bits + local_types: u32, + + // Temp mapping kinds we track + // 8 temp mappings * 2 bits, total 16 bits + temp_mapping_kind: u16, + + // Stack slot type/local_idx we track + // 8 temp types * 4 bits, total 32 bits + temp_payload: u32, + + /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined. + /// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)] + /// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr + /// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which + /// could allow this to consume no bytes, so we're leaving this as is. + inline_block: u64, +} + +/// Tuple of (iseq, idx) used to identify basic blocks +/// There are a lot of blockid objects so we try to keep the size small. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(packed)] +pub struct BlockId { + /// Instruction sequence + pub iseq: IseqPtr, + + /// Index in the iseq where the block starts + pub idx: u16, +} + +/// Branch code shape enumeration +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum BranchShape { + Next0, // Target 0 is next + Next1, // Target 1 is next + Default, // Neither target is next +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BranchGenFn { + BranchIf(Cell<BranchShape>), + BranchNil(Cell<BranchShape>), + BranchUnless(Cell<BranchShape>), + JumpToTarget0(Cell<BranchShape>), + JNZToTarget0, + JZToTarget0, + JBEToTarget0, + JBToTarget0, + JOMulToTarget0, + JITReturn, +} + +impl BranchGenFn { + pub fn call(&self, asm: &mut Assembler, target0: Target, target1: Option<Target>) { + match self { + BranchGenFn::BranchIf(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jz(target1.unwrap()), + BranchShape::Next1 => asm.jnz(target0), + BranchShape::Default => { + asm.jnz(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::BranchNil(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jne(target1.unwrap()), + BranchShape::Next1 => asm.je(target0), + BranchShape::Default => { + asm.je(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::BranchUnless(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jnz(target1.unwrap()), + BranchShape::Next1 => asm.jz(target0), + BranchShape::Default => { + asm.jz(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::JumpToTarget0(shape) => { + if shape.get() == BranchShape::Next1 { + panic!("Branch shape Next1 not allowed in JumpToTarget0!"); + } + if shape.get() == BranchShape::Default { + asm.jmp(target0); + } + } + BranchGenFn::JNZToTarget0 => { + asm.jnz(target0) + } + BranchGenFn::JZToTarget0 => { + asm.jz(target0) + } + BranchGenFn::JBEToTarget0 => { + asm.jbe(target0) + } + BranchGenFn::JBToTarget0 => { + asm.jb(target0) + } + BranchGenFn::JOMulToTarget0 => { + asm.jo_mul(target0) + } + BranchGenFn::JITReturn => { + asm_comment!(asm, "update cfp->jit_return"); + let jit_return = RUBY_OFFSET_CFP_JIT_RETURN - RUBY_SIZEOF_CONTROL_FRAME as i32; + let raw_ptr = asm.lea_jump_target(target0); + asm.mov(Opnd::mem(64, CFP, jit_return), raw_ptr); + } + } + } + + pub fn get_shape(&self) -> BranchShape { + match self { + BranchGenFn::BranchIf(shape) | + BranchGenFn::BranchNil(shape) | + BranchGenFn::BranchUnless(shape) | + BranchGenFn::JumpToTarget0(shape) => shape.get(), + BranchGenFn::JNZToTarget0 | + BranchGenFn::JZToTarget0 | + BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | + BranchGenFn::JITReturn => BranchShape::Default, + } + } + + pub fn set_shape(&self, new_shape: BranchShape) { + match self { + BranchGenFn::BranchIf(shape) | + BranchGenFn::BranchNil(shape) | + BranchGenFn::BranchUnless(shape) => { + shape.set(new_shape); + } + BranchGenFn::JumpToTarget0(shape) => { + if new_shape == BranchShape::Next1 { + panic!("Branch shape Next1 not allowed in JumpToTarget0!"); + } + shape.set(new_shape); + } + BranchGenFn::JNZToTarget0 | + BranchGenFn::JZToTarget0 | + BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | + BranchGenFn::JITReturn => { + assert_eq!(new_shape, BranchShape::Default); + } + } + } +} + +/// A place that a branch could jump to +#[derive(Debug, Clone)] +enum BranchTarget { + Stub(Box<BranchStub>), // Not compiled yet + Block(BlockRef), // Already compiled +} + +impl BranchTarget { + fn get_address(&self) -> Option<CodePtr> { + match self { + BranchTarget::Stub(stub) => stub.address, + BranchTarget::Block(blockref) => Some(unsafe { blockref.as_ref() }.start_addr), + } + } + + fn get_blockid(&self) -> BlockId { + match self { + BranchTarget::Stub(stub) => BlockId { iseq: stub.iseq.get(), idx: stub.iseq_idx }, + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.get_blockid(), + } + } + + fn get_ctx(&self) -> Context { + match self { + BranchTarget::Stub(stub) => stub.ctx, + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx, + } + } + + fn get_block(&self) -> Option<BlockRef> { + match self { + BranchTarget::Stub(_) => None, + BranchTarget::Block(blockref) => Some(*blockref), + } + } + + fn set_iseq(&self, iseq: IseqPtr) { + match self { + BranchTarget::Stub(stub) => stub.iseq.set(iseq), + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.iseq.set(iseq), + } + } +} + +#[derive(Debug, Clone)] +struct BranchStub { + address: Option<CodePtr>, + iseq: Cell<IseqPtr>, + iseq_idx: IseqIdx, + ctx: Context, +} + +/// Store info about an outgoing branch in a code segment +/// Note: care must be taken to minimize the size of branch objects +pub struct Branch { + // Block this is attached to + block: BlockRef, + + // Positions where the generated code starts and ends + start_addr: CodePtr, + end_addr: Cell<CodePtr>, // exclusive + + // Branch target blocks and their contexts + targets: [Cell<Option<Box<BranchTarget>>>; 2], + + // Branch code generation function + gen_fn: BranchGenFn, +} + +/// A [Branch] for a [Block] that is under construction. +/// Fields correspond, but may be `None` during construction. +pub struct PendingBranch { + /// Allocation holder for the address of the constructed branch + /// in error paths Box deallocates it. + uninit_branch: Box<MaybeUninit<Branch>>, + + /// Branch code generation function + gen_fn: BranchGenFn, + + /// Positions where the generated code starts and ends + start_addr: Cell<Option<CodePtr>>, + end_addr: Cell<Option<CodePtr>>, // exclusive + + /// Branch target blocks and their contexts + targets: [Cell<Option<Box<BranchTarget>>>; 2], +} + +impl Branch { + // Compute the size of the branch code + fn code_size(&self) -> usize { + (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize + } + + /// Get the address of one of the branch destination + fn get_target_address(&self, target_idx: usize) -> Option<CodePtr> { + unsafe { + self.targets[target_idx] + .ref_unchecked() + .as_ref() + .and_then(|target| target.get_address()) + } + } + + fn get_stub_count(&self) -> usize { + let mut count = 0; + for target in self.targets.iter() { + if unsafe { + // SAFETY: no mutation + matches!( + target.ref_unchecked().as_ref().map(Box::as_ref), + Some(BranchTarget::Stub(_)) + ) + } { + count += 1; + } + } + count + } + + fn assert_layout(&self) { + let shape = self.gen_fn.get_shape(); + assert!( + !(shape == BranchShape::Default && 0 == self.code_size()), + "zero-size branches are incorrect when code for neither targets are adjacent" + // One needs to issue some instruction to steer to the branch target + // when falling through isn't an option. + ); + } +} + +impl std::fmt::Debug for Branch { + // Can't derive this because `targets: !Copy` due to Cell. + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let targets = unsafe { + // SAFETY: + // While the references are live for the result of this function, + // no mutation happens because we are only calling derived fmt::Debug functions. + [self.targets[0].as_ptr().as_ref().unwrap(), self.targets[1].as_ptr().as_ref().unwrap()] + }; + + formatter + .debug_struct("Branch") + .field("block", &self.block) + .field("start", &self.start_addr) + .field("end", &self.end_addr) + .field("targets", &targets) + .field("gen_fn", &self.gen_fn) + .finish() + } +} + +impl PendingBranch { + /// Set up a branch target at `target_idx`. Find an existing block to branch to + /// or generate a stub for one. + fn set_target( + &self, + target_idx: u32, + target: BlockId, + ctx: &Context, + ocb: &mut OutlinedCb, + ) -> Option<CodePtr> { + // If the block already exists + if let Some(blockref) = find_block_version(target, ctx) { + let block = unsafe { blockref.as_ref() }; + + // Fill out the target with this block + self.targets[target_idx.as_usize()] + .set(Some(Box::new(BranchTarget::Block(blockref)))); + return Some(block.start_addr); + } + + // The branch struct is uninitialized right now but as a stable address. + // We make sure the stub runs after the branch is initialized. + let branch_struct_addr = self.uninit_branch.as_ptr() as usize; + let stub_addr = gen_branch_stub(ctx, ocb, branch_struct_addr, target_idx); + + if let Some(stub_addr) = stub_addr { + // Fill the branch target with a stub + self.targets[target_idx.as_usize()].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + address: Some(stub_addr), + iseq: Cell::new(target.iseq), + iseq_idx: target.idx, + ctx: *ctx, + }))))); + } + + stub_addr + } + + // Construct the branch and wire it up in the grpah + fn into_branch(mut self, uninit_block: BlockRef) -> BranchRef { + // Make the branch + let branch = Branch { + block: uninit_block, + start_addr: self.start_addr.get().unwrap(), + end_addr: Cell::new(self.end_addr.get().unwrap()), + targets: self.targets, + gen_fn: self.gen_fn, + }; + // Move it to the designated place on + // the heap and unwrap MaybeUninit. + self.uninit_branch.write(branch); + let raw_branch: *mut MaybeUninit<Branch> = Box::into_raw(self.uninit_branch); + let branchref = NonNull::new(raw_branch as *mut Branch).expect("no null from Box"); + + // SAFETY: just allocated it + let branch = unsafe { branchref.as_ref() }; + // For block branch targets, put the new branch in the + // appropriate incoming list. + for target in branch.targets.iter() { + // SAFETY: no mutation + let out_block: Option<BlockRef> = unsafe { + target.ref_unchecked().as_ref().and_then(|target| target.get_block()) + }; + + if let Some(out_block) = out_block { + // SAFETY: These blockrefs come from set_target() which only puts blocks from + // ISeqs, which are all initialized. Note that uninit_block isn't in any ISeq + // payload yet. + unsafe { out_block.as_ref() }.incoming.push(branchref); + } + } + + branch.assert_layout(); + + branchref + } +} + +// Store info about code used on YJIT entry +pub struct Entry { + // Positions where the generated code starts and ends + start_addr: CodePtr, + end_addr: CodePtr, // exclusive +} + +/// A [Branch] for a [Block] that is under construction. +pub struct PendingEntry { + pub uninit_entry: Box<MaybeUninit<Entry>>, + start_addr: Cell<Option<CodePtr>>, + end_addr: Cell<Option<CodePtr>>, // exclusive +} + +impl PendingEntry { + // Construct the entry in the heap + pub fn into_entry(mut self) -> EntryRef { + // Make the entry + let entry = Entry { + start_addr: self.start_addr.get().unwrap(), + end_addr: self.end_addr.get().unwrap(), + }; + // Move it to the designated place on the heap and unwrap MaybeUninit. + self.uninit_entry.write(entry); + let raw_entry: *mut MaybeUninit<Entry> = Box::into_raw(self.uninit_entry); + NonNull::new(raw_entry as *mut Entry).expect("no null from Box") + } +} + +// In case a block is invalidated, this helps to remove all pointers to the block. +pub type CmePtr = *const rb_callable_method_entry_t; + +/// Basic block version +/// Represents a portion of an iseq compiled with a given context +/// Note: care must be taken to minimize the size of block_t objects +#[derive(Debug)] +pub struct Block { + // The byte code instruction sequence this is a version of. + // Can change due to moving GC. + iseq: Cell<IseqPtr>, + + // Index range covered by this version in `ISEQ_BODY(iseq)->iseq_encoded`. + iseq_range: Range<IseqIdx>, + + // Context at the start of the block + // This should never be mutated + ctx: Context, + + // Positions where the generated code starts and ends + start_addr: CodePtr, + end_addr: Cell<CodePtr>, + + // List of incoming branches (from predecessors) + // These are reference counted (ownership shared between predecessor and successors) + incoming: MutableBranchList, + + // NOTE: we might actually be able to store the branches here without refcounting + // however, using a RefCell makes it easy to get a pointer to Branch objects + // + // List of outgoing branches (to successors) + outgoing: Box<[BranchRef]>, + + // FIXME: should these be code pointers instead? + // Offsets for GC managed objects in the mainline code block + gc_obj_offsets: Box<[u32]>, + + // CME dependencies of this block, to help to remove all pointers to this + // block in the system. + cme_dependencies: Box<[Cell<CmePtr>]>, + + // Code address of an exit for `ctx` and `blockid`. + // Used for block invalidation. + entry_exit: Option<CodePtr>, +} + +/// Pointer to a [Block]. +/// +/// # Safety +/// +/// _Never_ derive a `&mut Block` from this and always use +/// [std::ptr::NonNull::as_ref] to get a `&Block`. `&'a mut` +/// in Rust asserts that there are no other references live +/// over the lifetime `'a`. This uniqueness assertion does +/// not hold in many situations for us, even when you ignore +/// the fact that our control flow graph can have cycles. +/// Here are just two examples where we have overlapping references: +/// - Yielding to a different OS thread within the same +/// ractor during compilation +/// - The GC calling [rb_yjit_iseq_mark] during compilation +/// +/// Technically, for soundness, we also need to ensure that +/// the we have the VM lock while the result of `as_ref()` +/// is live, so that no deallocation happens while the +/// shared reference is live. The vast majority of our code run while +/// holding the VM lock, though. +pub type BlockRef = NonNull<Block>; + +/// Pointer to a [Branch]. See [BlockRef] for notes about +/// proper usage. +pub type BranchRef = NonNull<Branch>; + +/// Pointer to an entry that is already added to an ISEQ +pub type EntryRef = NonNull<Entry>; + +/// List of block versions for a given blockid +type VersionList = Vec<BlockRef>; + +/// Map from iseq indices to lists of versions for that given blockid +/// An instance of this is stored on each iseq +type VersionMap = Vec<VersionList>; + +/// [Interior mutability][1] wrapper for a list of branches. +/// O(n) insertion, but space efficient. We generally expect +/// blocks to have only a few branches. +/// +/// [1]: https://doc.rust-lang.org/std/cell/struct.UnsafeCell.html +#[repr(transparent)] +struct MutableBranchList(Cell<Box<[BranchRef]>>); + +impl MutableBranchList { + fn push(&self, branch: BranchRef) { + // Temporary move the boxed slice out of self. + // oom=abort is load bearing here... + let mut current_list = self.0.take().into_vec(); + current_list.push(branch); + self.0.set(current_list.into_boxed_slice()); + } +} + +impl fmt::Debug for MutableBranchList { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: the derived Clone for boxed slices does not mutate this Cell + let branches = unsafe { self.0.ref_unchecked().clone() }; + + formatter.debug_list().entries(branches.into_iter()).finish() + } +} + +/// This is all the data YJIT stores on an iseq +/// This will be dynamically allocated by C code +/// C code should pass an &mut IseqPayload to us +/// when calling into YJIT +#[derive(Default)] +pub struct IseqPayload { + // Basic block versions + pub version_map: VersionMap, + + // Indexes of code pages used by this this ISEQ + pub pages: HashSet<usize>, + + // List of ISEQ entry codes + pub entries: Vec<EntryRef>, + + // Blocks that are invalidated but are not yet deallocated. + // The code GC will free them later. + pub dead_blocks: Vec<BlockRef>, +} + +impl IseqPayload { + /// Remove all block versions from the payload and then return them as an iterator + pub fn take_all_blocks(&mut self) -> impl Iterator<Item = BlockRef> { + // Empty the blocks + let version_map = mem::take(&mut self.version_map); + + // Turn it into an iterator that owns the blocks and return + version_map.into_iter().flatten() + } +} + +/// Get the payload for an iseq. For safety it's up to the caller to ensure the returned `&mut` +/// upholds aliasing rules and that the argument is a valid iseq. +pub fn get_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; + let payload: *mut IseqPayload = payload.cast(); + unsafe { payload.as_mut() } +} + +/// Get the payload object associated with an iseq. Create one if none exists. +pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload { + type VoidPtr = *mut c_void; + + let payload_non_null = unsafe { + let payload = rb_iseq_get_yjit_payload(iseq); + if payload.is_null() { + // Increment the compiled iseq count + incr_counter!(compiled_iseq_count); + + // Allocate a new payload with Box and transfer ownership to the GC. + // We drop the payload with Box::from_raw when the GC frees the iseq and calls us. + // NOTE(alan): Sometimes we read from an iseq without ever writing to it. + // We allocate in those cases anyways. + let new_payload = IseqPayload::default(); + let new_payload = Box::into_raw(Box::new(new_payload)); + rb_iseq_set_yjit_payload(iseq, new_payload as VoidPtr); + + new_payload + } else { + payload as *mut IseqPayload + } + }; + + // SAFETY: we should have the VM lock and all other Ruby threads should be asleep. So we have + // exclusive mutable access. + // Hmm, nothing seems to stop calling this on the same + // iseq twice, though, which violates aliasing rules. + unsafe { payload_non_null.as_mut() }.unwrap() +} + +/// Iterate over all existing ISEQs +pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) { + unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { + // SAFETY: points to the local below + let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; + callback(iseq); + } + let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; + unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; +} + +/// Iterate over all ISEQ payloads +pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) { + for_each_iseq(|iseq| { + if let Some(iseq_payload) = get_iseq_payload(iseq) { + callback(iseq_payload); + } + }); +} + +/// Iterate over all on-stack ISEQs +pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) { + unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { + // SAFETY: points to the local below + let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; + callback(iseq); + } + let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; + unsafe { rb_jit_cont_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; +} + +/// Iterate over all on-stack ISEQ payloads +pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) { + for_each_on_stack_iseq(|iseq| { + if let Some(iseq_payload) = get_iseq_payload(iseq) { + callback(iseq_payload); + } + }); +} + +/// Iterate over all NOT on-stack ISEQ payloads +pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback: F) { + // Get all ISEQs on the heap. Note that rb_objspace_each_objects() runs GC first, + // which could move ISEQ pointers when GC.auto_compact = true. + // So for_each_on_stack_iseq() must be called after this, which doesn't run GC. + let mut iseqs: Vec<IseqPtr> = vec![]; + for_each_iseq(|iseq| iseqs.push(iseq)); + + // Get all ISEQs that are on a CFP of existing ECs. + let mut on_stack_iseqs: HashSet<IseqPtr> = HashSet::new(); + for_each_on_stack_iseq(|iseq| { on_stack_iseqs.insert(iseq); }); + + // Invoke the callback for iseqs - on_stack_iseqs + for iseq in iseqs { + if !on_stack_iseqs.contains(&iseq) { + if let Some(iseq_payload) = get_iseq_payload(iseq) { + callback(iseq_payload); + } + } + } +} + +/// Free the per-iseq payload +#[no_mangle] +pub extern "C" fn rb_yjit_iseq_free(iseq: IseqPtr) { + // Free invariants for the ISEQ + iseq_free_invariants(iseq); + + let payload = { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; + if payload.is_null() { + // Nothing to free. + return; + } else { + payload as *mut IseqPayload + } + }; + + // Take ownership of the payload with Box::from_raw(). + // It drops right before this function returns. + // SAFETY: We got the pointer from Box::into_raw(). + let payload = unsafe { Box::from_raw(payload) }; + + // Free all blocks in version_map. The GC doesn't free running iseqs. + for versions in &payload.version_map { + for block in versions { + // SAFETY: blocks in the version_map are always well connected + unsafe { free_block(*block, true) }; + } + } + + // Free dead blocks + for block in payload.dead_blocks { + unsafe { free_block(block, false) }; + } + + // Free all entries + for entryref in payload.entries.iter() { + let entry = unsafe { Box::from_raw(entryref.as_ptr()) }; + mem::drop(entry); + } + + // Increment the freed iseq count + incr_counter!(freed_iseq_count); +} + +/// GC callback for marking GC objects in the per-iseq payload. +#[no_mangle] +pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { + let payload = if payload.is_null() { + // Nothing to mark. + return; + } else { + // SAFETY: The GC takes the VM lock while marking, which + // we assert, so we should be synchronized and data race free. + // + // For aliasing, having the VM lock hopefully also implies that no one + // else has an overlapping &mut IseqPayload. + unsafe { + rb_yjit_assert_holding_vm_lock(); + &*(payload as *const IseqPayload) + } + }; + + // For marking VALUEs written into the inline code block. + // We don't write VALUEs in the outlined block. + let cb: &CodeBlock = CodegenGlobals::get_inline_cb(); + + for versions in &payload.version_map { + for block in versions { + // SAFETY: all blocks inside version_map are initialized. + let block = unsafe { block.as_ref() }; + mark_block(block, cb, false); + } + } + // Mark dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + mark_block(block, cb, true); + } + + return; + + fn mark_block(block: &Block, cb: &CodeBlock, dead: bool) { + unsafe { rb_gc_mark_movable(block.iseq.get().into()) }; + + // Mark method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + unsafe { rb_gc_mark_movable(cme_dep.get().into()) }; + } + + // Mark outgoing branch entries + for branch in block.outgoing.iter() { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let target_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; + + if let Some(target_iseq) = target_iseq { + unsafe { rb_gc_mark_movable(target_iseq.into()) }; + } + } + } + + // Mark references to objects in generated code. + // Skip for dead blocks since they shouldn't run. + if !dead { + for offset in block.gc_obj_offsets.iter() { + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); + // Creating an unaligned pointer is well defined unlike in C. + let value_address = value_address as *const VALUE; + + // SAFETY: these point to YJIT's code buffer + unsafe { + let object = value_address.read_unaligned(); + rb_gc_mark_movable(object); + }; + } + } + } +} + +/// GC callback for updating GC objects in the per-iseq payload. +/// This is a mirror of [rb_yjit_iseq_mark]. +#[no_mangle] +pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; + let payload = if payload.is_null() { + // Nothing to update. + return; + } else { + // SAFETY: The GC takes the VM lock while marking, which + // we assert, so we should be synchronized and data race free. + // + // For aliasing, having the VM lock hopefully also implies that no one + // else has an overlapping &mut IseqPayload. + unsafe { + rb_yjit_assert_holding_vm_lock(); + &*(payload as *const IseqPayload) + } + }; + + // Evict other threads from generated code since we are about to patch them. + // Also acts as an assert that we hold the VM lock. + unsafe { rb_vm_barrier() }; + + // For updating VALUEs written into the inline code block. + let cb = CodegenGlobals::get_inline_cb(); + + for versions in &payload.version_map { + for version in versions { + // SAFETY: all blocks inside version_map are initialized + let block = unsafe { version.as_ref() }; + block_update_references(block, cb, false); + } + } + // Update dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + block_update_references(block, cb, true); + } + + // Note that we would have returned already if YJIT is off. + cb.mark_all_executable(); + + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_executable(); + + return; + + fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) { + block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq()); + + // Update method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + let cur_cme: VALUE = cme_dep.get().into(); + let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme(); + cme_dep.set(new_cme); + } + + // Update outgoing branch entries + for branch in block.outgoing.iter() { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let current_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; + + if let Some(current_iseq) = current_iseq { + let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) } + .as_iseq(); + // SAFETY: the Cell::set is not on the reference given out + // by ref_unchecked. + unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) }; + } + } + } + + // Update references to objects in generated code. + // Skip for dead blocks since they shouldn't run and + // so there is no potential of writing over invalidation jumps + if !dead { + for offset in block.gc_obj_offsets.iter() { + let offset_to_value = offset.as_usize(); + let value_code_ptr = cb.get_ptr(offset_to_value); + let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb); + // Creating an unaligned pointer is well defined unlike in C. + let value_ptr = value_ptr as *mut VALUE; + + // SAFETY: these point to YJIT's code buffer + let object = unsafe { value_ptr.read_unaligned() }; + let new_addr = unsafe { rb_gc_location(object) }; + + // Only write when the VALUE moves, to be copy-on-write friendly. + if new_addr != object { + for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() { + let byte_code_ptr = value_code_ptr.add_bytes(byte_idx); + cb.write_mem(byte_code_ptr, byte) + .expect("patching existing code should be within bounds"); + } + } + } + } + + } +} + +/// Get all blocks for a particular place in an iseq. +fn get_version_list(blockid: BlockId) -> Option<&'static mut VersionList> { + let insn_idx = blockid.idx.as_usize(); + match get_iseq_payload(blockid.iseq) { + Some(payload) if insn_idx < payload.version_map.len() => { + Some(payload.version_map.get_mut(insn_idx).unwrap()) + }, + _ => None + } +} + +/// Get or create all blocks for a particular place in an iseq. +fn get_or_create_version_list(blockid: BlockId) -> &'static mut VersionList { + let payload = get_or_create_iseq_payload(blockid.iseq); + let insn_idx = blockid.idx.as_usize(); + + // Expand the version map as necessary + if insn_idx >= payload.version_map.len() { + payload + .version_map + .resize(insn_idx + 1, VersionList::default()); + } + + return payload.version_map.get_mut(insn_idx).unwrap(); +} + +/// Take all of the blocks for a particular place in an iseq +pub fn take_version_list(blockid: BlockId) -> VersionList { + let insn_idx = blockid.idx.as_usize(); + match get_iseq_payload(blockid.iseq) { + Some(payload) if insn_idx < payload.version_map.len() => { + mem::take(&mut payload.version_map[insn_idx]) + }, + _ => VersionList::default(), + } +} + +/// Count the number of block versions matching a given blockid +/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions. +fn get_num_versions(blockid: BlockId, inlined: bool) -> usize { + let insn_idx = blockid.idx.as_usize(); + match get_iseq_payload(blockid.iseq) { + Some(payload) => { + payload + .version_map + .get(insn_idx) + .map(|versions| { + versions.iter().filter(|&&version| + unsafe { version.as_ref() }.ctx.inline() == inlined + ).count() + }) + .unwrap_or(0) + } + None => 0, + } +} + +/// Get or create a list of block versions generated for an iseq +/// This is used for disassembly (see disasm.rs) +pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> { + let payload = get_or_create_iseq_payload(iseq); + + let mut blocks = Vec::<BlockRef>::new(); + + // For each instruction index + for insn_idx in 0..payload.version_map.len() { + let version_list = &payload.version_map[insn_idx]; + + // For each version at this instruction index + for version in version_list { + // Clone the block ref and add it to the list + blocks.push(*version); + } + } + + return blocks; +} + +/// Retrieve a basic block version for an (iseq, idx) tuple +/// This will return None if no version is found +fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> { + let versions = match get_version_list(blockid) { + Some(versions) => versions, + None => return None, + }; + + // Best match found + let mut best_version: Option<BlockRef> = None; + let mut best_diff = usize::MAX; + + // For each version matching the blockid + for blockref in versions.iter() { + let block = unsafe { blockref.as_ref() }; + + // Note that we always prefer the first matching + // version found because of inline-cache chains + match ctx.diff(&block.ctx) { + TypeDiff::Compatible(diff) if diff < best_diff => { + best_version = Some(*blockref); + best_diff = diff; + } + _ => {} + } + } + + return best_version; +} + +/// Allow inlining a Block up to MAX_INLINE_VERSIONS times. +const MAX_INLINE_VERSIONS: usize = 1000; + +/// Produce a generic context when the block version limit is hit for a blockid +pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context { + // Guard chains implement limits separately, do nothing + if ctx.get_chain_depth() > 0 { + return *ctx; + } + + let next_versions = get_num_versions(blockid, ctx.inline()) + 1; + let max_versions = if ctx.inline() { + MAX_INLINE_VERSIONS + } else { + get_option!(max_versions) + }; + + // If this block version we're about to add will hit the version limit + if next_versions >= max_versions { + // Produce a generic context that stores no type information, + // but still respects the stack_size and sp_offset constraints. + // This new context will then match all future requests. + let generic_ctx = ctx.get_generic_ctx(); + + if cfg!(debug_assertions) { + let mut ctx = ctx.clone(); + if ctx.inline() { + // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible + // to keep inlining blocks until we hit the limit, but it's safe to give up inlining. + ctx.inline_block = 0; + assert!(generic_ctx.inline_block == 0); + } + + assert_ne!( + TypeDiff::Incompatible, + ctx.diff(&generic_ctx), + "should substitute a compatible context", + ); + } + + return generic_ctx; + } + incr_counter_to!(max_inline_versions, next_versions); + + return *ctx; +} + +/// Install a block version into its [IseqPayload], letting the GC track its +/// lifetime, and allowing it to be considered for use for other +/// blocks we might generate. Uses `cb` for running write barriers. +/// +/// # Safety +/// +/// The block must be fully initialized. Its incoming and outgoing edges, +/// if there are any, must point to initialized blocks, too. +/// +/// Note that the block might gain edges after this function returns, +/// as can happen during [gen_block_series]. Initialized here doesn't mean +/// ready to be consumed or that the machine code tracked by the block is +/// ready to be run. +/// +/// Due to this transient state where a block is tracked by the GC by +/// being inside an [IseqPayload] but not ready to be executed, it's +/// generally unsound to call any Ruby methods during codegen. That has +/// the potential to run blocks which are not ready. +unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) { + // SAFETY: caller ensures initialization + let block = unsafe { blockref.as_ref() }; + + // Function entry blocks must have stack size 0 + assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0)); + + let version_list = get_or_create_version_list(block.get_blockid()); + + // If this the first block being compiled with this block id + if version_list.len() == 0 { + incr_counter!(compiled_blockid_count); + } + + version_list.push(blockref); + version_list.shrink_to_fit(); + + // By writing the new block to the iseq, the iseq now + // contains new references to Ruby objects. Run write barriers. + let iseq: VALUE = block.iseq.get().into(); + for dep in block.iter_cme_deps() { + obj_written!(iseq, dep.into()); + } + + // Run write barriers for all objects in generated code. + for offset in block.gc_obj_offsets.iter() { + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); + // Creating an unaligned pointer is well defined unlike in C. + let value_address: *const VALUE = value_address.cast(); + + let object = unsafe { value_address.read_unaligned() }; + obj_written!(iseq, object); + } + + incr_counter!(compiled_block_count); + + // Mark code pages for code GC + let iseq_payload = get_iseq_payload(block.iseq.get()).unwrap(); + for page in cb.addrs_to_pages(block.start_addr, block.end_addr.get()) { + iseq_payload.pages.insert(page); + } +} + +/// Remove a block version from the version map of its parent ISEQ +fn remove_block_version(blockref: &BlockRef) { + let block = unsafe { blockref.as_ref() }; + let version_list = match get_version_list(block.get_blockid()) { + Some(version_list) => version_list, + None => return, + }; + + // Retain the versions that are not this one + version_list.retain(|other| blockref != other); +} + +impl JITState { + // Finish compiling and turn a jit state into a block + // note that the block is still not in shape. + pub fn into_block(self, end_insn_idx: IseqIdx, start_addr: CodePtr, end_addr: CodePtr, gc_obj_offsets: Vec<u32>) -> BlockRef { + // Allocate the block and get its pointer + let blockref: *mut MaybeUninit<Block> = Box::into_raw(Box::new(MaybeUninit::uninit())); + + incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len()); + + // Make the new block + let block = MaybeUninit::new(Block { + start_addr, + iseq: Cell::new(self.get_iseq()), + iseq_range: self.get_starting_insn_idx()..end_insn_idx, + ctx: self.get_starting_ctx(), + end_addr: Cell::new(end_addr), + incoming: MutableBranchList(Cell::default()), + gc_obj_offsets: gc_obj_offsets.into_boxed_slice(), + entry_exit: self.get_block_entry_exit(), + cme_dependencies: self.method_lookup_assumptions.into_iter().map(Cell::new).collect(), + // Pending branches => actual branches + outgoing: self.pending_outgoing.into_iter().map(|pending_out| { + let pending_out = Rc::try_unwrap(pending_out) + .ok().expect("all PendingBranchRefs should be unique when ready to construct a Block"); + pending_out.into_branch(NonNull::new(blockref as *mut Block).expect("no null from Box")) + }).collect() + }); + // Initialize it on the heap + // SAFETY: allocated with Box above + unsafe { ptr::write(blockref, block) }; + + // Block is initialized now. Note that MaybeUnint<T> has the same layout as T. + let blockref = NonNull::new(blockref as *mut Block).expect("no null from Box"); + + // Track all the assumptions the block makes as invariants + if self.block_assumes_single_ractor { + track_single_ractor_assumption(blockref); + } + for bop in self.bop_assumptions { + track_bop_assumption(blockref, bop); + } + // SAFETY: just allocated it above + for cme in unsafe { blockref.as_ref() }.cme_dependencies.iter() { + track_method_lookup_stability_assumption(blockref, cme.get()); + } + if let Some(idlist) = self.stable_constant_names_assumption { + track_stable_constant_names_assumption(blockref, idlist); + } + for klass in self.no_singleton_class_assumptions { + track_no_singleton_class_assumption(blockref, klass); + } + if self.no_ep_escape { + track_no_ep_escape_assumption(blockref, self.iseq); + } + + blockref + } +} + +impl Block { + pub fn get_blockid(&self) -> BlockId { + BlockId { iseq: self.iseq.get(), idx: self.iseq_range.start } + } + + pub fn get_end_idx(&self) -> IseqIdx { + self.iseq_range.end + } + + pub fn get_ctx_count(&self) -> usize { + let mut count = 1; // block.ctx + for branch in self.outgoing.iter() { + // SAFETY: &self implies it's initialized + count += unsafe { branch.as_ref() }.get_stub_count(); + } + count + } + + #[allow(unused)] + pub fn get_start_addr(&self) -> CodePtr { + self.start_addr + } + + #[allow(unused)] + pub fn get_end_addr(&self) -> CodePtr { + self.end_addr.get() + } + + /// Get an immutable iterator over cme dependencies + pub fn iter_cme_deps(&self) -> impl Iterator<Item = CmePtr> + '_ { + self.cme_dependencies.iter().map(Cell::get) + } + + // Push an incoming branch ref and shrink the vector + fn push_incoming(&self, branch: BranchRef) { + self.incoming.push(branch); + } + + // Compute the size of the block code + pub fn code_size(&self) -> usize { + (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap() + } +} + +impl Context { + pub fn get_stack_size(&self) -> u8 { + self.stack_size + } + + pub fn set_stack_size(&mut self, stack_size: u8) { + self.stack_size = stack_size; + } + + /// Create a new Context that is compatible with self but doesn't have type information. + pub fn get_generic_ctx(&self) -> Context { + let mut generic_ctx = Context::default(); + generic_ctx.stack_size = self.stack_size; + generic_ctx.sp_offset = self.sp_offset; + generic_ctx.reg_temps = self.reg_temps; + if self.is_return_landing() { + generic_ctx.set_as_return_landing(); + } + if self.is_deferred() { + generic_ctx.mark_as_deferred(); + } + generic_ctx + } + + /// Create a new Context instance with a given stack_size and sp_offset adjusted + /// accordingly. This is useful when you want to virtually rewind a stack_size for + /// generating a side exit while considering past sp_offset changes on gen_save_sp. + pub fn with_stack_size(&self, stack_size: u8) -> Context { + let mut ctx = *self; + ctx.sp_offset -= (ctx.get_stack_size() as isize - stack_size as isize) as i8; + ctx.stack_size = stack_size; + ctx + } + + pub fn get_sp_offset(&self) -> i8 { + self.sp_offset + } + + pub fn set_sp_offset(&mut self, offset: i8) { + self.sp_offset = offset; + } + + pub fn get_reg_temps(&self) -> RegTemps { + self.reg_temps + } + + pub fn set_reg_temps(&mut self, reg_temps: RegTemps) { + self.reg_temps = reg_temps; + } + + pub fn get_chain_depth(&self) -> u8 { + self.chain_depth_and_flags & CHAIN_DEPTH_MASK + } + + pub fn reset_chain_depth_and_defer(&mut self) { + self.chain_depth_and_flags &= !CHAIN_DEPTH_MASK; + self.chain_depth_and_flags &= !DEFER_BIT; + } + + pub fn increment_chain_depth(&mut self) { + if self.get_chain_depth() == CHAIN_DEPTH_MASK { + panic!("max block version chain depth reached!"); + } + self.chain_depth_and_flags += 1; + } + + pub fn set_as_return_landing(&mut self) { + self.chain_depth_and_flags |= RETURN_LANDING_BIT; + } + + pub fn clear_return_landing(&mut self) { + self.chain_depth_and_flags &= !RETURN_LANDING_BIT; + } + + pub fn is_return_landing(&self) -> bool { + self.chain_depth_and_flags & RETURN_LANDING_BIT != 0 + } + + pub fn mark_as_deferred(&mut self) { + self.chain_depth_and_flags |= DEFER_BIT; + } + + pub fn is_deferred(&self) -> bool { + self.chain_depth_and_flags & DEFER_BIT != 0 + } + + /// Get an operand for the adjusted stack pointer address + pub fn sp_opnd(&self, offset: i32) -> Opnd { + let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32; + return Opnd::mem(64, SP, offset); + } + + /// Get an operand for the adjusted environment pointer address using SP register. + /// This is valid only when a Binding object hasn't been created for the frame. + pub fn ep_opnd(&self, offset: i32) -> Opnd { + let ep_offset = self.get_stack_size() as i32 + 1; + self.sp_opnd(-ep_offset + offset) + } + + /// Stop using a register for a given stack temp. + /// This allows us to reuse the register for a value that we know is dead + /// and will no longer be used (e.g. popped stack temp). + pub fn dealloc_temp_reg(&mut self, stack_idx: u8) { + if stack_idx < MAX_REG_TEMPS { + let mut reg_temps = self.get_reg_temps(); + reg_temps.set(stack_idx, false); + self.set_reg_temps(reg_temps); + } + } + + /// Get the type of an instruction operand + pub fn get_opnd_type(&self, opnd: YARVOpnd) -> Type { + match opnd { + SelfOpnd => self.self_type, + StackOpnd(idx) => { + assert!(idx < self.stack_size); + let stack_idx: usize = (self.stack_size - 1 - idx).into(); + + // If outside of tracked range, do nothing + if stack_idx >= MAX_TEMP_TYPES { + return Type::Unknown; + } + + let mapping = self.get_temp_mapping(stack_idx); + + match mapping.get_kind() { + MapToSelf => self.self_type, + MapToStack => mapping.get_type(), + MapToLocal => { + let idx = mapping.get_local_idx(); + assert!((idx as usize) < MAX_LOCAL_TYPES); + return self.get_local_type(idx.into()); + } + } + } + } + } + + /// Get the currently tracked type for a local variable + pub fn get_local_type(&self, local_idx: usize) -> Type { + if local_idx >= MAX_LOCAL_TYPES { + return Type::Unknown + } else { + // Each type is stored in 4 bits + let type_bits = (self.local_types >> (4 * local_idx)) & 0b1111; + unsafe { transmute::<u8, Type>(type_bits as u8) } + } + } + + /// Get the current temp mapping for a given stack slot + fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping { + assert!(temp_idx < MAX_TEMP_TYPES); + + // Extract the temp mapping kind + let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11; + let temp_kind = unsafe { transmute::<u8, TempMappingKind>(kind_bits as u8) }; + + // Extract the payload bits (temp type or local idx) + let payload_bits = (self.temp_payload >> (4 * temp_idx)) & 0b1111; + + match temp_kind { + MapToSelf => TempMapping::map_to_self(), + + MapToStack => { + TempMapping::map_to_stack( + unsafe { transmute::<u8, Type>(payload_bits as u8) } + ) + } + + MapToLocal => { + TempMapping::map_to_local( + payload_bits as u8 + ) + } + } + } + + /// Get the current temp mapping for a given stack slot + fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) { + assert!(temp_idx < MAX_TEMP_TYPES); + + // Extract the kind bits + let mapping_kind = mapping.get_kind(); + let kind_bits = unsafe { transmute::<TempMappingKind, u8>(mapping_kind) }; + assert!(kind_bits <= 0b11); + + // Extract the payload bits + let payload_bits = match mapping_kind { + MapToSelf => 0, + + MapToStack => { + let t = mapping.get_type(); + unsafe { transmute::<Type, u8>(t) } + } + + MapToLocal => { + mapping.get_local_idx() + } + }; + assert!(payload_bits <= 0b1111); + + // Update the kind bits + { + let mask_bits = 0b11_u16 << (2 * temp_idx); + let shifted_bits = (kind_bits as u16) << (2 * temp_idx); + let all_kind_bits = self.temp_mapping_kind as u16; + self.temp_mapping_kind = (all_kind_bits & !mask_bits) | shifted_bits; + } + + // Update the payload bits + { + let mask_bits = 0b1111_u32 << (4 * temp_idx); + let shifted_bits = (payload_bits as u32) << (4 * temp_idx); + let all_payload_bits = self.temp_payload as u32; + self.temp_payload = (all_payload_bits & !mask_bits) | shifted_bits; + } + } + + /// Upgrade (or "learn") the type of an instruction operand + /// This value must be compatible and at least as specific as the previously known type. + /// If this value originated from self, or an lvar, the learned type will be + /// propagated back to its source. + pub fn upgrade_opnd_type(&mut self, opnd: YARVOpnd, opnd_type: Type) { + // If type propagation is disabled, store no types + if get_option!(no_type_prop) { + return; + } + + match opnd { + SelfOpnd => self.self_type.upgrade(opnd_type), + StackOpnd(idx) => { + assert!(idx < self.stack_size); + let stack_idx = (self.stack_size - 1 - idx) as usize; + + // If outside of tracked range, do nothing + if stack_idx >= MAX_TEMP_TYPES { + return; + } + + let mapping = self.get_temp_mapping(stack_idx); + + match mapping.get_kind() { + MapToSelf => self.self_type.upgrade(opnd_type), + MapToStack => { + let mut temp_type = mapping.get_type(); + temp_type.upgrade(opnd_type); + self.set_temp_mapping(stack_idx, TempMapping::map_to_stack(temp_type)); + } + MapToLocal => { + let idx = mapping.get_local_idx() as usize; + assert!(idx < MAX_LOCAL_TYPES); + let mut new_type = self.get_local_type(idx); + new_type.upgrade(opnd_type); + self.set_local_type(idx, new_type); + // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches + // all MapToLocal mappings, including the one we're upgrading here. + self.set_opnd_mapping(opnd, mapping); + } + } + } + } + } + + /* + Get both the type and mapping (where the value originates) of an operand. + This is can be used with stack_push_mapping or set_opnd_mapping to copy + a stack value's type while maintaining the mapping. + */ + pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> TempMapping { + let opnd_type = self.get_opnd_type(opnd); + + match opnd { + SelfOpnd => TempMapping::map_to_self(), + StackOpnd(idx) => { + assert!(idx < self.stack_size); + let stack_idx = (self.stack_size - 1 - idx) as usize; + + if stack_idx < MAX_TEMP_TYPES { + self.get_temp_mapping(stack_idx) + } else { + // We can't know the source of this stack operand, so we assume it is + // a stack-only temporary. type will be UNKNOWN + assert!(opnd_type == Type::Unknown); + TempMapping::map_to_stack(opnd_type) + } + } + } + } + + /// Overwrite both the type and mapping of a stack operand. + pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, mapping: TempMapping) { + match opnd { + SelfOpnd => unreachable!("self always maps to self"), + StackOpnd(idx) => { + assert!(idx < self.stack_size); + let stack_idx = (self.stack_size - 1 - idx) as usize; + + // If type propagation is disabled, store no types + if get_option!(no_type_prop) { + return; + } + + // If outside of tracked range, do nothing + if stack_idx >= MAX_TEMP_TYPES { + return; + } + + self.set_temp_mapping(stack_idx, mapping); + } + } + } + + /// Set the type of a local variable + pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) { + // If type propagation is disabled, store no types + if get_option!(no_type_prop) { + return; + } + + if local_idx >= MAX_LOCAL_TYPES { + return + } + + // If any values on the stack map to this local we must detach them + for mapping_idx in 0..MAX_TEMP_TYPES { + let mapping = self.get_temp_mapping(mapping_idx); + let tm = match mapping.get_kind() { + MapToStack => mapping, + MapToSelf => mapping, + MapToLocal => { + let idx = mapping.get_local_idx(); + if idx as usize == local_idx { + let local_type = self.get_local_type(local_idx); + TempMapping::map_to_stack(local_type) + } else { + TempMapping::map_to_local(idx) + } + } + }; + self.set_temp_mapping(mapping_idx, tm); + } + + // Update the type bits + let type_bits = local_type as u32; + assert!(type_bits <= 0b1111); + let mask_bits = 0b1111_u32 << (4 * local_idx); + let shifted_bits = type_bits << (4 * local_idx); + self.local_types = (self.local_types & !mask_bits) | shifted_bits; + } + + /// Erase local variable type information + /// eg: because of a call we can't track + pub fn clear_local_types(&mut self) { + // When clearing local types we must detach any stack mappings to those + // locals. Even if local values may have changed, stack values will not. + + for mapping_idx in 0..MAX_TEMP_TYPES { + let mapping = self.get_temp_mapping(mapping_idx); + if mapping.get_kind() == MapToLocal { + let local_idx = mapping.get_local_idx() as usize; + self.set_temp_mapping(mapping_idx, TempMapping::map_to_stack(self.get_local_type(local_idx))); + } + } + + // Clear the local types + self.local_types = 0; + } + + /// Return true if the code is inlined by the caller + pub fn inline(&self) -> bool { + self.inline_block != 0 + } + + /// Set a block ISEQ given to the Block of this Context + pub fn set_inline_block(&mut self, iseq: IseqPtr) { + self.inline_block = iseq as u64 + } + + /// Compute a difference score for two context objects + pub fn diff(&self, dst: &Context) -> TypeDiff { + // Self is the source context (at the end of the predecessor) + let src = self; + + // Can only lookup the first version in the chain + if dst.get_chain_depth() != 0 { + return TypeDiff::Incompatible; + } + + // Blocks with depth > 0 always produce new versions + // Sidechains cannot overlap + if src.get_chain_depth() != 0 { + return TypeDiff::Incompatible; + } + + if src.is_return_landing() != dst.is_return_landing() { + return TypeDiff::Incompatible; + } + + if src.is_deferred() != dst.is_deferred() { + return TypeDiff::Incompatible; + } + + if dst.stack_size != src.stack_size { + return TypeDiff::Incompatible; + } + + if dst.sp_offset != src.sp_offset { + return TypeDiff::Incompatible; + } + + if dst.reg_temps != src.reg_temps { + return TypeDiff::Incompatible; + } + + // Difference sum + let mut diff = 0; + + // Check the type of self + diff += match src.self_type.diff(dst.self_type) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; + + // Check the block to inline + if src.inline_block != dst.inline_block { + // find_block_version should not find existing blocks with different + // inline_block so that their yield will not be megamorphic. + return TypeDiff::Incompatible; + } + + // For each local type we track + for i in 0.. MAX_LOCAL_TYPES { + let t_src = src.get_local_type(i); + let t_dst = dst.get_local_type(i); + diff += match t_src.diff(t_dst) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; + } + + // For each value on the temp stack + for i in 0..src.stack_size { + let src_mapping = src.get_opnd_mapping(StackOpnd(i)); + let dst_mapping = dst.get_opnd_mapping(StackOpnd(i)); + + // If the two mappings aren't the same + if src_mapping != dst_mapping { + if dst_mapping.get_kind() == MapToStack { + // We can safely drop information about the source of the temp + // stack operand. + diff += 1; + } else { + return TypeDiff::Incompatible; + } + } + + let src_type = src.get_opnd_type(StackOpnd(i)); + let dst_type = dst.get_opnd_type(StackOpnd(i)); + + diff += match src_type.diff(dst_type) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; + } + + return TypeDiff::Compatible(diff); + } + + pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> { + if jit.at_current_insn() { + let comptime_recv = jit.peek_at_stack(self, 1); + let comptime_arg = jit.peek_at_stack(self, 0); + return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p()); + } + + let recv_type = self.get_opnd_type(StackOpnd(1)); + let arg_type = self.get_opnd_type(StackOpnd(0)); + match (recv_type, arg_type) { + (Type::Fixnum, Type::Fixnum) => Some(true), + (Type::Unknown | Type::UnknownImm, Type::Unknown | Type::UnknownImm) => None, + _ => Some(false), + } + } +} + +impl Assembler { + /// Push one new value on the temp stack with an explicit mapping + /// Return a pointer to the new stack top + pub fn stack_push_mapping(&mut self, mapping: TempMapping) -> Opnd { + // If type propagation is disabled, store no types + if get_option!(no_type_prop) { + return self.stack_push_mapping(mapping.without_type()); + } + + let stack_size: usize = self.ctx.stack_size.into(); + + // Keep track of the type and mapping of the value + if stack_size < MAX_TEMP_TYPES { + self.ctx.set_temp_mapping(stack_size, mapping); + + if mapping.get_kind() == MapToLocal { + let idx = mapping.get_local_idx(); + assert!((idx as usize) < MAX_LOCAL_TYPES); + } + } + + // Allocate a register to the stack operand + if self.ctx.stack_size < MAX_REG_TEMPS { + self.alloc_temp_reg(self.ctx.stack_size); + } + + self.ctx.stack_size += 1; + self.ctx.sp_offset += 1; + + return self.stack_opnd(0); + } + + /// Push one new value on the temp stack + /// Return a pointer to the new stack top + pub fn stack_push(&mut self, val_type: Type) -> Opnd { + return self.stack_push_mapping(TempMapping::map_to_stack(val_type)); + } + + /// Push the self value on the stack + pub fn stack_push_self(&mut self) -> Opnd { + return self.stack_push_mapping(TempMapping::map_to_self()); + } + + /// Push a local variable on the stack + pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { + if local_idx >= MAX_LOCAL_TYPES { + return self.stack_push(Type::Unknown); + } + + return self.stack_push_mapping(TempMapping::map_to_local(local_idx as u8)); + } + + // Pop N values off the stack + // Return a pointer to the stack top before the pop operation + pub fn stack_pop(&mut self, n: usize) -> Opnd { + assert!(n <= self.ctx.stack_size.into()); + + let top = self.stack_opnd(0); + + // Clear the types of the popped values + for i in 0..n { + let idx: usize = (self.ctx.stack_size as usize) - i - 1; + + if idx < MAX_TEMP_TYPES { + self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown)); + } + } + + self.ctx.stack_size -= n as u8; + self.ctx.sp_offset -= n as i8; + + return top; + } + + /// Shift stack temps to remove a Symbol for #send. + pub fn shift_stack(&mut self, argc: usize) { + assert!(argc < self.ctx.stack_size.into()); + + let method_name_index = (self.ctx.stack_size as usize) - argc - 1; + + for i in method_name_index..(self.ctx.stack_size - 1) as usize { + if i < MAX_TEMP_TYPES { + let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES { + self.ctx.get_temp_mapping(i + 1) + } else { + TempMapping::map_to_stack(Type::Unknown) + }; + self.ctx.set_temp_mapping(i, next_arg_mapping); + } + } + self.stack_pop(1); + } + + /// Get an operand pointing to a slot on the temp stack + pub fn stack_opnd(&self, idx: i32) -> Opnd { + Opnd::Stack { + idx, + num_bits: 64, + stack_size: self.ctx.stack_size, + sp_offset: self.ctx.sp_offset, + reg_temps: None, // push_insn will set this + } + } +} + +impl BlockId { + /// Print Ruby source location for debugging + #[cfg(debug_assertions)] + #[allow(dead_code)] + pub fn dump_src_loc(&self) { + unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx as u32) } + } +} + +/// See [gen_block_series_body]. This simply counts compilation failures. +fn gen_block_series( + blockid: BlockId, + start_ctx: &Context, + ec: EcPtr, + cb: &mut CodeBlock, + ocb: &mut OutlinedCb, +) -> Option<BlockRef> { + let result = gen_block_series_body(blockid, start_ctx, ec, cb, ocb); + if result.is_none() { + incr_counter!(compilation_failure); + } + + result +} + +/// Immediately compile a series of block versions at a starting point and +/// return the starting block. +fn gen_block_series_body( + blockid: BlockId, + start_ctx: &Context, + ec: EcPtr, + cb: &mut CodeBlock, + ocb: &mut OutlinedCb, +) -> Option<BlockRef> { + // Keep track of all blocks compiled in this batch + const EXPECTED_BATCH_SIZE: usize = 4; + let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE); + + // Generate code for the first block + let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?; + batch.push(first_block); // Keep track of this block version + + // Add the block version to the VersionMap for this ISEQ + unsafe { add_block_version(first_block, cb) }; + + // Loop variable + let mut last_blockref = first_block; + loop { + // Get the last outgoing branch from the previous block. + let last_branchref = { + let last_block = unsafe { last_blockref.as_ref() }; + match last_block.outgoing.last() { + Some(branch) => *branch, + None => { + break; + } // If last block has no branches, stop. + } + }; + let last_branch = unsafe { last_branchref.as_ref() }; + + incr_counter!(block_next_count); + + // gen_direct_jump() can request a block to be placed immediately after by + // leaving a single target that has a `None` address. + // SAFETY: no mutation inside the unsafe block + let (requested_blockid, requested_ctx) = unsafe { + match (last_branch.targets[0].ref_unchecked(), last_branch.targets[1].ref_unchecked()) { + (Some(last_target), None) if last_target.get_address().is_none() => { + (last_target.get_blockid(), last_target.get_ctx()) + } + _ => { + // We're done when no fallthrough block is requested + break; + } + } + }; + + // Generate new block using context from the last branch. + let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb); + + // If the block failed to compile + if result.is_err() { + // Remove previously compiled block + // versions from the version map + for blockref in batch { + remove_block_version(&blockref); + // SAFETY: block was well connected because it was in a version_map + unsafe { free_block(blockref, false) }; + } + + // Stop compiling + return None; + } + + let new_blockref = result.unwrap(); + + // Add the block version to the VersionMap for this ISEQ + unsafe { add_block_version(new_blockref, cb) }; + + // Connect the last branch and the new block + last_branch.targets[0].set(Some(Box::new(BranchTarget::Block(new_blockref)))); + unsafe { new_blockref.as_ref().incoming.push(last_branchref) }; + + // Track the block + batch.push(new_blockref); + + // Repeat with newest block + last_blockref = new_blockref; + } + + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. + // If so, we print the new blocks to the console. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(blockid.iseq, blockid.idx); + if iseq_location.contains(substr) { + let last_block = unsafe { last_blockref.as_ref() }; + let iseq_range = &last_block.iseq_range; + println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, iseq_range.start, iseq_range.end); + print!("{}", disasm_iseq_insn_range(blockid.iseq, iseq_range.start, iseq_range.end)); + } + } + } + + Some(first_block) +} + +/// Generate a block version that is an entry point inserted into an iseq +/// NOTE: this function assumes that the VM lock has been taken +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See [jit_compile_exception] for details. +pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> { + // Compute the current instruction index based on the current PC + let cfp = unsafe { get_ec_cfp(ec) }; + let insn_idx: u16 = unsafe { + let ec_pc = get_cfp_pc(cfp); + iseq_pc_to_insn_idx(iseq, ec_pc)? + }; + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? + }; + + // The entry context makes no assumptions about types + let blockid = BlockId { + iseq, + idx: insn_idx, + }; + + // Get the inline and outlined code blocks + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + // Write the interpreter entry prologue. Might be NULL when out of memory. + let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx, jit_exception); + + // Try to generate code for the entry block + let mut ctx = Context::default(); + ctx.stack_size = stack_size; + let block = gen_block_series(blockid, &ctx, ec, cb, ocb); + + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + match block { + // Compilation failed + None => { + // Trigger code GC. This entry point will be recompiled later. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + return None; + } + + // If the block contains no Ruby instructions + Some(block) => { + let block = unsafe { block.as_ref() }; + if block.iseq_range.is_empty() { + return None; + } + } + } + + // Count the number of entry points we compile + incr_counter!(compiled_iseq_entry); + + // Compilation successful and block not empty + code_ptr.map(|ptr| ptr.raw_ptr(cb)) +} + +// Change the entry's jump target from an entry stub to a next entry +pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) { + let mut asm = Assembler::new(); + asm_comment!(asm, "regenerate_entry"); + + // gen_entry_guard generates cmp + jne. We're rewriting only jne. + asm.jne(next_entry.into()); + + // Move write_pos to rewrite the entry + let old_write_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr); + cb.set_dropped_bytes(false); + asm.compile(cb, None).expect("can rewrite existing code"); + + // Rewind write_pos to the original one + assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr); + cb.set_pos(old_write_pos); + cb.set_dropped_bytes(old_dropped_bytes); +} + +pub type PendingEntryRef = Rc<PendingEntry>; + +/// Create a new entry reference for an ISEQ +pub fn new_pending_entry() -> PendingEntryRef { + let entry = PendingEntry { + uninit_entry: Box::new(MaybeUninit::uninit()), + start_addr: Cell::new(None), + end_addr: Cell::new(None), + }; + return Rc::new(entry); +} + +c_callable! { + /// Generated code calls this function with the SysV calling convention. + /// See [gen_call_entry_stub_hit]. + fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 { + with_compile_time(|| { + with_vm_lock(src_loc!(), || { + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + let addr = entry_stub_hit_body(entry_ptr, ec, cb, ocb) + .unwrap_or_else(|| { + // Trigger code GC (e.g. no space). + // This entry point will be recompiled later. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + CodegenGlobals::get_stub_exit_code().raw_ptr(cb) + }); + + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + addr + }) + }) + } +} + +/// Called by the generated code when an entry stub is executed +fn entry_stub_hit_body( + entry_ptr: *const c_void, + ec: EcPtr, + cb: &mut CodeBlock, + ocb: &mut OutlinedCb +) -> Option<*const u8> { + // Get ISEQ and insn_idx from the current ec->cfp + let cfp = unsafe { get_ec_cfp(ec) }; + let iseq = unsafe { get_cfp_iseq(cfp) }; + let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?; + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? + }; + + // Compile a new entry guard as a next entry + let next_entry = cb.get_write_ptr(); + let mut asm = Assembler::new(); + let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?; + asm.compile(cb, Some(ocb))?; + + // Find or compile a block version + let blockid = BlockId { iseq, idx: insn_idx }; + let mut ctx = Context::default(); + ctx.stack_size = stack_size; + let blockref = match find_block_version(blockid, &ctx) { + // If an existing block is found, generate a jump to the block. + Some(blockref) => { + let mut asm = Assembler::new(); + asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); + asm.compile(cb, Some(ocb))?; + Some(blockref) + } + // If this block hasn't yet been compiled, generate blocks after the entry guard. + None => gen_block_series(blockid, &ctx, ec, cb, ocb), + }; + + // Commit or retry the entry + if blockref.is_some() { + // Regenerate the previous entry + let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null"); + regenerate_entry(cb, &entryref, next_entry); + + // Write an entry to the heap and push it to the ISEQ + let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique"); + get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); + } + + // Let the stub jump to the block + blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb)) +} + +/// Generate a stub that calls entry_stub_hit +pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + + let mut asm = Assembler::new(); + asm_comment!(asm, "entry stub hit"); + + asm.mov(C_ARG_OPNDS[0], entry_address.into()); + + // Jump to trampoline to call entry_stub_hit() + // Not really a side exit, just don't need a padded jump here. + asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so +/// it's useful for Code GC to call entry_stub_hit from a globally shared code. +pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + // See gen_entry_guard for how it's used. + asm_comment!(asm, "entry_stub_hit() trampoline"); + let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]); + + // Jump to the address returned by the entry_stub_hit() call + asm.jmp_opnd(jump_addr); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Generate code for a branch, possibly rewriting and changing the size of it +fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) { + // Remove old comments + cb.remove_comments(branch.start_addr, branch.end_addr.get()); + + // SAFETY: having a &Branch implies branch.block is initialized. + let block = unsafe { branch.block.as_ref() }; + + let branch_terminates_block = branch.end_addr.get() == block.get_end_addr(); + + // Generate the branch + let mut asm = Assembler::new(); + asm_comment!(asm, "regenerate_branch"); + branch.gen_fn.call( + &mut asm, + Target::CodePtr(branch.get_target_address(0).unwrap()), + branch.get_target_address(1).map(|addr| Target::CodePtr(addr)), + ); + + // If the entire block is the branch and the block could be invalidated, + // we need to pad to ensure there is room for invalidation patching. + if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() { + asm.pad_inval_patch(); + } + + // Rewrite the branch + let old_write_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(branch.start_addr); + cb.set_dropped_bytes(false); + asm.compile(cb, None).expect("can rewrite existing code"); + let new_end_addr = cb.get_write_ptr(); + + branch.end_addr.set(new_end_addr); + + // The block may have shrunk after the branch is rewritten + if branch_terminates_block { + // Adjust block size + block.end_addr.set(new_end_addr); + } + + // cb.write_pos is both a write cursor and a marker for the end of + // everything written out so far. Leave cb->write_pos at the end of the + // block before returning. This function only ever bump or retain the end + // of block marker since that's what the majority of callers want. When the + // branch sits at the very end of the codeblock and it shrinks after + // regeneration, it's up to the caller to drop bytes off the end to + // not leave a gap and implement branch->shape. + if old_write_pos > cb.get_write_pos() { + // We rewound cb->write_pos to generate the branch, now restore it. + cb.set_pos(old_write_pos); + cb.set_dropped_bytes(old_dropped_bytes); + } else { + // The branch sits at the end of cb and consumed some memory. + // Keep cb.write_pos. + } + + branch.assert_layout(); +} + +pub type PendingBranchRef = Rc<PendingBranch>; + +/// Create a new outgoing branch entry for a block +fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchRef { + let branch = Rc::new(PendingBranch { + uninit_branch: Box::new(MaybeUninit::uninit()), + gen_fn, + start_addr: Cell::new(None), + end_addr: Cell::new(None), + targets: [Cell::new(None), Cell::new(None)], + }); + + incr_counter!(compiled_branch_count); // TODO not true. count at finalize time + + // Add to the list of outgoing branches for the block + jit.queue_outgoing_branch(branch.clone()); + + branch +} + +c_callable! { + /// Generated code calls this function with the SysV calling convention. + /// See [gen_branch_stub]. + fn branch_stub_hit( + branch_ptr: *const c_void, + target_idx: u32, + ec: EcPtr, + ) -> *const u8 { + with_vm_lock(src_loc!(), || { + with_compile_time(|| { branch_stub_hit_body(branch_ptr, target_idx, ec) }) + }) + } +} + +/// Called by the generated code when a branch stub is executed +/// Triggers compilation of branches and code patching +fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 { + if get_option!(dump_insns) { + println!("branch_stub_hit"); + } + + let branch_ref = NonNull::<Branch>::new(branch_ptr as *mut Branch) + .expect("Branches should not be null"); + + // SAFETY: We have the VM lock, and the branch is initialized by the time generated + // code calls this function. + let branch = unsafe { branch_ref.as_ref() }; + let branch_size_on_entry = branch.code_size(); + let housing_block = unsafe { branch.block.as_ref() }; + + let target_idx: usize = target_idx.as_usize(); + let target_branch_shape = match target_idx { + 0 => BranchShape::Next0, + 1 => BranchShape::Next1, + _ => unreachable!("target_idx < 2 must always hold"), + }; + + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + let (target_blockid, target_ctx): (BlockId, Context) = unsafe { + // SAFETY: no mutation of the target's Cell. Just reading out data. + let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); + + // If this branch has already been patched, return the dst address + // Note: recursion can cause the same stub to be hit multiple times + if let BranchTarget::Block(_) = target.as_ref() { + return target.get_address().unwrap().raw_ptr(cb); + } + + (target.get_blockid(), target.get_ctx()) + }; + + let (cfp, original_interp_sp) = unsafe { + let cfp = get_ec_cfp(ec); + let original_interp_sp = get_cfp_sp(cfp); + + let running_iseq = get_cfp_iseq(cfp); + assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq"); + + let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into()); + let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into()); + // Unlike in the interpreter, our `leave` doesn't write to the caller's + // SP -- we do it in the returned-to code. Account for this difference. + let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into()); + + // Update the PC in the current CFP, because it may be out of sync in JITted code + rb_set_cfp_pc(cfp, reconned_pc); + + // :stub-sp-flush: + // Generated code do stack operations without modifying cfp->sp, while the + // cfp->sp tells the GC what values on the stack to root. Generated code + // generally takes care of updating cfp->sp when it calls runtime routines that + // could trigger GC, but it's inconvenient to do it before calling this function. + // So we do it here instead. + rb_set_cfp_sp(cfp, reconned_sp); + + // Bail if code GC is disabled and we've already run out of spaces. + if !get_option!(code_gc) && (cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes()) { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + + // Bail if we're about to run out of native stack space. + // We've just reconstructed interpreter state. + if rb_ec_stack_check(ec as _) != 0 { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + + (cfp, original_interp_sp) + }; + + // Try to find an existing compiled version of this block + let mut block = find_block_version(target_blockid, &target_ctx); + let mut branch_modified = false; + // If this block hasn't yet been compiled + if block.is_none() { + let branch_old_shape = branch.gen_fn.get_shape(); + + // If the new block can be generated right after the branch (at cb->write_pos) + if cb.get_write_ptr() == branch.end_addr.get() { + // This branch should be terminating its block + assert!(branch.end_addr == housing_block.end_addr); + + // Change the branch shape to indicate the target block will be placed next + branch.gen_fn.set_shape(target_branch_shape); + + // Rewrite the branch with the new, potentially more compact shape + regenerate_branch(cb, branch); + branch_modified = true; + + // Ensure that the branch terminates the codeblock just like + // before entering this if block. This drops bytes off the end + // in case we shrank the branch when regenerating. + cb.set_write_ptr(branch.end_addr.get()); + } + + // Compile the new block version + block = gen_block_series(target_blockid, &target_ctx, ec, cb, ocb); + + if block.is_none() && branch_modified { + // We couldn't generate a new block for the branch, but we modified the branch. + // Restore the branch by regenerating it. + branch.gen_fn.set_shape(branch_old_shape); + regenerate_branch(cb, branch); + } + } + + // Finish building the new block + let dst_addr = match block { + Some(new_block) => { + let new_block = unsafe { new_block.as_ref() }; + + // Branch shape should reflect layout + assert!(!(branch.gen_fn.get_shape() == target_branch_shape && new_block.start_addr != branch.end_addr.get())); + + // Add this branch to the list of incoming branches for the target + new_block.push_incoming(branch_ref); + + // Update the branch target address + branch.targets[target_idx].set(Some(Box::new(BranchTarget::Block(new_block.into())))); + + // Rewrite the branch with the new jump target address + regenerate_branch(cb, branch); + + // Restore interpreter sp, since the code hitting the stub expects the original. + unsafe { rb_set_cfp_sp(cfp, original_interp_sp) }; + + new_block.start_addr + } + None => { + // Trigger code GC. The whole ISEQ will be recompiled later. + // We shouldn't trigger it in the middle of compilation in branch_stub_hit + // because incomplete code could be used when cb.dropped_bytes is flipped + // by code GC. So this place, after all compilation, is the safest place + // to hook code GC on branch_stub_hit. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + + // Failed to service the stub by generating a new block so now we + // need to exit to the interpreter at the stubbed location. We are + // intentionally *not* restoring original_interp_sp. At the time of + // writing, reconstructing interpreter state only involves setting + // cfp->sp and cfp->pc. We set both before trying to generate the + // block. All there is left to do to exit is to pop the native + // frame. We do that in code_for_exit_from_stub. + CodegenGlobals::get_stub_exit_code() + } + }; + + ocb.unwrap().mark_all_executable(); + cb.mark_all_executable(); + + let new_branch_size = branch.code_size(); + assert!( + new_branch_size <= branch_size_on_entry, + "branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})", + branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size, + ); + + // Return a pointer to the compiled block version + dst_addr.raw_ptr(cb) +} + +/// Generate a "stub", a piece of code that calls the compiler back when run. +/// A piece of code that redeems for more code; a thunk for code. +fn gen_branch_stub( + ctx: &Context, + ocb: &mut OutlinedCb, + branch_struct_address: usize, + target_idx: u32, +) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + + let mut asm = Assembler::new(); + asm.ctx = *ctx; + asm.set_reg_temps(ctx.reg_temps); + asm_comment!(asm, "branch stub hit"); + + if asm.ctx.is_return_landing() { + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + } + + // Save caller-saved registers before C_ARG_OPNDS get clobbered. + // Spill all registers for consistency with the trampoline. + for ® in caller_saved_temp_regs() { + asm.cpush(Opnd::Reg(reg)); + } + + // Spill temps to the VM stack as well for jit.peek_at_stack() + asm.spill_temps(); + + // Set up the arguments unique to this stub for: + // + // branch_stub_hit(branch_ptr, target_idx, ec) + // + // Bake pointer to Branch into output code. + // We make sure the block housing the branch is still alive when branch_stub_hit() is running. + asm.mov(C_ARG_OPNDS[0], branch_struct_address.into()); + asm.mov(C_ARG_OPNDS[1], target_idx.into()); + + // Jump to trampoline to call branch_stub_hit() + // Not really a side exit, just don't need a padded jump here. + asm.jmp(CodegenGlobals::get_branch_stub_hit_trampoline().as_side_exit()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + // For `branch_stub_hit(branch_ptr, target_idx, ec)`, + // `branch_ptr` and `target_idx` is different for each stub, + // but the call and what's after is the same. This trampoline + // is the unchanging part. + // Since this trampoline is static, it allows code GC inside + // branch_stub_hit() to free stubs without problems. + asm_comment!(asm, "branch_stub_hit() trampoline"); + let stub_hit_ret = asm.ccall( + branch_stub_hit as *mut u8, + vec![ + C_ARG_OPNDS[0], + C_ARG_OPNDS[1], + EC, + ] + ); + let jump_addr = asm.load(stub_hit_ret); + + // Restore caller-saved registers for stack temps + for ® in caller_saved_temp_regs().rev() { + asm.cpop_into(Opnd::Reg(reg)); + } + + // Jump to the address returned by the branch_stub_hit() call + asm.jmp_opnd(jump_addr); + + // HACK: popping into C_RET_REG clobbers the return value of branch_stub_hit() we need to jump + // to, so we need a scratch register to preserve it. This extends the live range of the C + // return register so we get something else for the return value. + let _ = asm.live_reg_opnd(stub_hit_ret); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Return registers to be pushed and popped on branch_stub_hit. +pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator { + let temp_regs = Assembler::get_temp_regs().iter(); + let len = temp_regs.len(); + // The return value gen_leave() leaves in C_RET_REG + // needs to survive the branch_stub_hit() call. + let regs = temp_regs.chain(std::iter::once(&C_RET_REG)); + + // On x86_64, maintain 16-byte stack alignment + if cfg!(target_arch = "x86_64") && len % 2 == 0 { + static ONE_MORE: [Reg; 1] = [C_RET_REG]; + regs.chain(ONE_MORE.iter()) + } else { + regs.chain(&[]) + } +} + +impl Assembler +{ + /// Mark the start position of a patchable entry point in the machine code + pub fn mark_entry_start(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr, _| { + entryref.start_addr.set(Some(code_ptr)); + }); + } + + /// Mark the end position of a patchable entry point in the machine code + pub fn mark_entry_end(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr, _| { + entryref.end_addr.set(Some(code_ptr)); + }); + } + + // Mark the start position of a patchable branch in the machine code + fn mark_branch_start(&mut self, branchref: &PendingBranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(move |code_ptr, _| { + branchref.start_addr.set(Some(code_ptr)); + }); + } + + // Mark the end position of a patchable branch in the machine code + fn mark_branch_end(&mut self, branchref: &PendingBranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(move |code_ptr, _| { + branchref.end_addr.set(Some(code_ptr)); + }); + } +} + +pub fn gen_branch( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + target0: BlockId, + ctx0: &Context, + target1: Option<BlockId>, + ctx1: Option<&Context>, + gen_fn: BranchGenFn, +) { + let branch = new_pending_branch(jit, gen_fn); + + // Get the branch targets or stubs + let target0_addr = branch.set_target(0, target0, ctx0, ocb); + let target1_addr = if let Some(ctx) = ctx1 { + let addr = branch.set_target(1, target1.unwrap(), ctx, ocb); + if addr.is_none() { + // target1 requested but we're out of memory. + // Avoid unwrap() in gen_fn() + return; + } + + addr + } else { None }; + + // Call the branch generation function + asm.mark_branch_start(&branch); + if let Some(dst_addr) = target0_addr { + branch.gen_fn.call(asm, Target::CodePtr(dst_addr), target1_addr.map(|addr| Target::CodePtr(addr))); + } + asm.mark_branch_end(&branch); +} + +pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) { + let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); + let maybe_block = find_block_version(target0, ctx); + + // If the block already exists + let new_target = if let Some(blockref) = maybe_block { + let block = unsafe { blockref.as_ref() }; + let block_addr = block.start_addr; + + // Call the branch generation function + asm_comment!(asm, "gen_direct_jmp: existing block"); + asm.mark_branch_start(&branch); + branch.gen_fn.call(asm, Target::CodePtr(block_addr), None); + asm.mark_branch_end(&branch); + + BranchTarget::Block(blockref) + } else { + // The branch is effectively empty (a noop) + asm_comment!(asm, "gen_direct_jmp: fallthrough"); + asm.mark_branch_start(&branch); + asm.mark_branch_end(&branch); + branch.gen_fn.set_shape(BranchShape::Next0); + + // `None` in new_target.address signals gen_block_series() to + // compile the target block right after this one (fallthrough). + BranchTarget::Stub(Box::new(BranchStub { + address: None, + ctx: *ctx, + iseq: Cell::new(target0.iseq), + iseq_idx: target0.idx, + })) + }; + + branch.targets[0].set(Some(Box::new(new_target))); +} + +/// Create a stub to force the code up to this point to be executed +pub fn defer_compilation( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, +) { + if asm.ctx.is_deferred() { + panic!("Double defer!"); + } + + let mut next_ctx = asm.ctx; + + next_ctx.mark_as_deferred(); + + let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); + + let blockid = BlockId { + iseq: jit.get_iseq(), + idx: jit.get_insn_idx(), + }; + + // Likely a stub due to the increased chain depth + let target0_address = branch.set_target(0, blockid, &next_ctx, ocb); + + // Pad the block if it has the potential to be invalidated. This must be + // done before gen_fn() in case the jump is overwritten by a fallthrough. + if jit.block_entry_exit.is_some() { + asm.pad_inval_patch(); + } + + // Call the branch generation function + asm_comment!(asm, "defer_compilation"); + asm.mark_branch_start(&branch); + if let Some(dst_addr) = target0_address { + branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None); + } + asm.mark_branch_end(&branch); + + // If the block we're deferring from is empty + if jit.get_starting_insn_idx() == jit.get_insn_idx() { + incr_counter!(defer_empty_count); + } + + incr_counter!(defer_count); +} + +/// Remove a block from the live control flow graph. +/// Block must be initialized and incoming/outgoing edges +/// must also point to initialized blocks. +unsafe fn remove_from_graph(blockref: BlockRef) { + let block = unsafe { blockref.as_ref() }; + + // Remove this block from the predecessor's targets + for pred_branchref in block.incoming.0.take().iter() { + // Branch from the predecessor to us + let pred_branch = unsafe { pred_branchref.as_ref() }; + + // If this is us, nullify the target block + for target_idx in 0..pred_branch.targets.len() { + // SAFETY: no mutation inside unsafe + let target_is_us = unsafe { + pred_branch.targets[target_idx] + .ref_unchecked() + .as_ref() + .and_then(|target| target.get_block()) + .and_then(|target_block| (target_block == blockref).then(|| ())) + .is_some() + }; + + if target_is_us { + pred_branch.targets[target_idx].set(None); + } + } + } + + // For each outgoing branch + for out_branchref in block.outgoing.iter() { + let out_branch = unsafe { out_branchref.as_ref() }; + // For each successor block + for out_target in out_branch.targets.iter() { + // SAFETY: copying out an Option<BlockRef>. No mutation. + let succ_block: Option<BlockRef> = unsafe { + out_target.ref_unchecked().as_ref().and_then(|target| target.get_block()) + }; + + if let Some(succ_block) = succ_block { + // Remove outgoing branch from the successor's incoming list + // SAFETY: caller promises the block has valid outgoing edges. + let succ_block = unsafe { succ_block.as_ref() }; + // Temporarily move out of succ_block.incoming. + let succ_incoming = succ_block.incoming.0.take(); + let mut succ_incoming = succ_incoming.into_vec(); + succ_incoming.retain(|branch| branch != out_branchref); + succ_block.incoming.0.set(succ_incoming.into_boxed_slice()); // allocs. Rely on oom=abort + } + } + } +} + +/// Tear down a block and deallocate it. +/// Caller has to ensure that the code tracked by the block is not +/// running, as running code may hit [branch_stub_hit] who exepcts +/// [Branch] to be live. +/// +/// We currently ensure this through the `jit_cont` system in cont.c +/// and sometimes through the GC calling [rb_yjit_iseq_free]. The GC +/// has proven that an ISeq is not running if it calls us to free it. +/// +/// For delayed deallocation, since dead blocks don't keep +/// blocks they refer alive, by the time we get here their outgoing +/// edges may be dangling. Pass `graph_intact=false` such these cases. +pub unsafe fn free_block(blockref: BlockRef, graph_intact: bool) { + // Careful with order here. + // First, remove all pointers to the referent block + unsafe { + block_assumptions_free(blockref); + + if graph_intact { + remove_from_graph(blockref); + } + } + + // SAFETY: we should now have a unique pointer to the block + unsafe { dealloc_block(blockref) } +} + +/// Deallocate a block and its outgoing branches. Blocks own their outgoing branches. +/// Caller must ensure that we have unique ownership for the referent block +unsafe fn dealloc_block(blockref: BlockRef) { + unsafe { + for outgoing in blockref.as_ref().outgoing.iter() { + // this Box::from_raw matches the Box::into_raw from PendingBranch::into_branch + mem::drop(Box::from_raw(outgoing.as_ptr())); + } + } + + // Deallocate the referent Block + unsafe { + // this Box::from_raw matches the Box::into_raw from JITState::into_block + mem::drop(Box::from_raw(blockref.as_ptr())); + } +} + +// Some runtime checks for integrity of a program location +pub fn verify_blockid(blockid: BlockId) { + unsafe { + assert!(rb_IMEMO_TYPE_P(blockid.iseq.into(), imemo_iseq) != 0); + assert!(u32::from(blockid.idx) < get_iseq_encoded_size(blockid.iseq)); + } +} + +// Invalidate one specific block version +pub fn invalidate_block_version(blockref: &BlockRef) { + //ASSERT_vm_locking(); + + // TODO: want to assert that all other ractors are stopped here. Can't patch + // machine code that some other thread is running. + + let block = unsafe { (*blockref).as_ref() }; + let id_being_invalidated = block.get_blockid(); + let mut cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + verify_blockid(id_being_invalidated); + + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_range = &block.iseq_range; + let iseq_location = iseq_get_location(block.iseq.get(), iseq_range.start); + if iseq_location.contains(substr) { + println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, iseq_range.start, iseq_range.end); + } + } + } + + // Remove this block from the version array + remove_block_version(blockref); + + // Get a pointer to the generated code for this block + let block_start = block.start_addr; + + // Make the start of the block do an exit. This handles OOM situations + // and some cases where we can't efficiently patch incoming branches. + // Do this first, since in case there is a fallthrough branch into this + // block, the patching loop below can overwrite the start of the block. + // In those situations, there is hopefully no jumps to the start of the block + // after patching as the start of the block would be in the middle of something + // generated by branch_t::gen_fn. + let block_entry_exit = block + .entry_exit + .expect("invalidation needs the entry_exit field"); + { + let block_end = block.get_end_addr(); + + if block_start == block_entry_exit { + // Some blocks exit on entry. Patching a jump to the entry at the + // entry makes an infinite loop. + } else { + // Patch in a jump to block.entry_exit. + + let cur_pos = cb.get_write_ptr(); + let cur_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(block_start); + + let mut asm = Assembler::new(); + asm.jmp(block_entry_exit.as_side_exit()); + cb.set_dropped_bytes(false); + asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code"); + + assert!( + cb.get_write_ptr() <= block_end, + "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})", + block.code_size(), + cb.get_write_ptr().as_offset() - block_start.as_offset(), + block.start_addr.raw_ptr(cb), + ); + cb.set_write_ptr(cur_pos); + cb.set_dropped_bytes(cur_dropped_bytes); + } + } + + // For each incoming branch + for branchref in block.incoming.0.take().iter() { + let branch = unsafe { branchref.as_ref() }; + let target_idx = if branch.get_target_address(0) == Some(block_start) { + 0 + } else { + 1 + }; + + // Assert that the incoming branch indeed points to the block being invalidated + // SAFETY: no mutation. + unsafe { + let incoming_target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); + assert_eq!(Some(block_start), incoming_target.get_address()); + if let Some(incoming_block) = &incoming_target.get_block() { + assert_eq!(blockref, incoming_block); + } + } + + // Create a stub for this branch target + let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32); + + // In case we were unable to generate a stub (e.g. OOM). Use the block's + // exit instead of a stub for the block. It's important that we + // still patch the branch in this situation so stubs are unique + // to branches. Think about what could go wrong if we run out of + // memory in the middle of this loop. + let stub_addr = stub_addr.unwrap_or(block_entry_exit); + + // Fill the branch target with a stub + branch.targets[target_idx].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + address: Some(stub_addr), + iseq: block.iseq.clone(), + iseq_idx: block.iseq_range.start, + ctx: block.ctx, + }))))); + + // Check if the invalidated block immediately follows + let target_next = block.start_addr == branch.end_addr.get(); + + if target_next { + // The new block will no longer be adjacent. + // Note that we could be enlarging the branch and writing into the + // start of the block being invalidated. + branch.gen_fn.set_shape(BranchShape::Default); + } + + // Rewrite the branch with the new jump target address + let old_branch_size = branch.code_size(); + regenerate_branch(cb, branch); + + if target_next && branch.end_addr > block.end_addr { + panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size()); + } + if !target_next && branch.code_size() > old_branch_size { + panic!( + "invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})", + branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size() + ); + } + } + + // Clear out the JIT func so that we can recompile later and so the + // interpreter will run the iseq. + // + // Only clear the jit_func when we're invalidating the JIT entry block. + // We only support compiling iseqs from index 0 right now. So entry + // points will always have an instruction index of 0. We'll need to + // change this in the future when we support optional parameters because + // they enter the function with a non-zero PC + if block.iseq_range.start == 0 { + // TODO: + // We could reset the exec counter to zero in rb_iseq_reset_jit_func() + // so that we eventually compile a new entry point when useful + unsafe { rb_iseq_reset_jit_func(block.iseq.get()) }; + } + + // FIXME: + // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub. + + // SAFETY: This block was in a version_map earlier + // in this function before we removed it, so it's well connected. + unsafe { remove_from_graph(*blockref) }; + + delayed_deallocation(*blockref); + + ocb.unwrap().mark_all_executable(); + cb.mark_all_executable(); + + incr_counter!(invalidation_count); +} + +// We cannot deallocate blocks immediately after invalidation since there +// could be stubs waiting to access branch pointers. Return stubs can do +// this since patching the code for setting up return addresses does not +// affect old return addresses that are already set up to use potentially +// invalidated branch pointers. Example: +// def foo(n) +// if n == 2 +// # 1.times.each to create a cfunc frame to preserve the JIT frame +// # which will return to a stub housed in an invalidated block +// return 1.times.each { Object.define_method(:foo) {} } +// end +// +// foo(n + 1) +// end +// p foo(1) +pub fn delayed_deallocation(blockref: BlockRef) { + block_assumptions_free(blockref); + + let payload = get_iseq_payload(unsafe { blockref.as_ref() }.iseq.get()).unwrap(); + payload.dead_blocks.push(blockref); +} + +trait RefUnchecked { + type Contained; + unsafe fn ref_unchecked(&self) -> &Self::Contained; +} + +impl<T> RefUnchecked for Cell<T> { + type Contained = T; + + /// Gives a reference to the contents of a [Cell]. + /// Dangerous; please include a SAFETY note. + /// + /// An easy way to use this without triggering Undefined Behavior is to + /// 1. ensure there is transitively no Cell/UnsafeCell mutation in the `unsafe` block + /// 2. ensure the `unsafe` block does not return any references, so our + /// analysis is lexically confined. This is trivially true if the block + /// returns a `bool`, for example. Aggregates that store references have + /// explicit lifetime parameters that look like `<'a>`. + /// + /// There are other subtler situations that don't follow these rules yet + /// are still sound. + /// See `test_miri_ref_unchecked()` for examples. You can play with it + /// with `cargo +nightly miri test miri`. + unsafe fn ref_unchecked(&self) -> &Self::Contained { + // SAFETY: pointer is dereferenceable because it's from a &Cell. + // It's up to the caller to follow aliasing rules with the output + // reference. + unsafe { self.as_ptr().as_ref().unwrap() } + } +} + +#[cfg(test)] +mod tests { + use crate::core::*; + + #[test] + fn type_size() { + // Check that we can store types in 4 bits, + // and all local types in 32 bits + assert_eq!(mem::size_of::<Type>(), 1); + assert!(Type::BlockParamProxy as usize <= 0b1111); + assert!(MAX_LOCAL_TYPES * 4 <= 32); + } + + #[test] + fn tempmapping_size() { + assert_eq!(mem::size_of::<TempMapping>(), 1); + } + + #[test] + fn local_types() { + let mut ctx = Context::default(); + + for i in 0..MAX_LOCAL_TYPES { + ctx.set_local_type(i, Type::Fixnum); + assert_eq!(ctx.get_local_type(i), Type::Fixnum); + ctx.set_local_type(i, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(i), Type::BlockParamProxy); + } + + ctx.set_local_type(0, Type::Fixnum); + ctx.clear_local_types(); + assert!(ctx.get_local_type(0) == Type::Unknown); + + // Make sure we don't accidentally set bits incorrectly + let mut ctx = Context::default(); + ctx.set_local_type(0, Type::Fixnum); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + ctx.set_local_type(2, Type::Fixnum); + ctx.set_local_type(1, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + assert_eq!(ctx.get_local_type(2), Type::Fixnum); + } + + #[test] + fn tempmapping() { + let t = TempMapping::map_to_stack(Type::Unknown); + assert_eq!(t.get_kind(), MapToStack); + assert_eq!(t.get_type(), Type::Unknown); + + let t = TempMapping::map_to_stack(Type::TString); + assert_eq!(t.get_kind(), MapToStack); + assert_eq!(t.get_type(), Type::TString); + + let t = TempMapping::map_to_local(7); + assert_eq!(t.get_kind(), MapToLocal); + assert_eq!(t.get_local_idx(), 7); + } + + #[test] + fn context_size() { + assert_eq!(mem::size_of::<Context>(), 23); + } + + #[test] + fn types() { + // Valid src => dst + assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0)); + assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), TypeDiff::Compatible(0)); + assert_ne!(Type::UnknownImm.diff(Type::Unknown), TypeDiff::Incompatible); + assert_ne!(Type::Fixnum.diff(Type::Unknown), TypeDiff::Incompatible); + assert_ne!(Type::Fixnum.diff(Type::UnknownImm), TypeDiff::Incompatible); + + // Invalid src => dst + assert_eq!(Type::Unknown.diff(Type::UnknownImm), TypeDiff::Incompatible); + assert_eq!(Type::Unknown.diff(Type::Fixnum), TypeDiff::Incompatible); + assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), TypeDiff::Incompatible); + } + + #[test] + fn reg_temps() { + let mut reg_temps = RegTemps(0); + + // 0 means every slot is not spilled + for stack_idx in 0..MAX_REG_TEMPS { + assert_eq!(reg_temps.get(stack_idx), false); + } + + // Set 0, 2, 7 (RegTemps: 10100001) + reg_temps.set(0, true); + reg_temps.set(2, true); + reg_temps.set(3, true); + reg_temps.set(3, false); + reg_temps.set(7, true); + + // Get 0..8 + assert_eq!(reg_temps.get(0), true); + assert_eq!(reg_temps.get(1), false); + assert_eq!(reg_temps.get(2), true); + assert_eq!(reg_temps.get(3), false); + assert_eq!(reg_temps.get(4), false); + assert_eq!(reg_temps.get(5), false); + assert_eq!(reg_temps.get(6), false); + assert_eq!(reg_temps.get(7), true); + + // Test conflicts + assert_eq!(5, get_option!(num_temp_regs)); + assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict + assert_eq!(reg_temps.conflicts_with(1), false); + assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7 + assert_eq!(reg_temps.conflicts_with(3), false); + assert_eq!(reg_temps.conflicts_with(4), false); + assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0 + assert_eq!(reg_temps.conflicts_with(6), false); + assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2 + } + + #[test] + fn context() { + // Valid src => dst + assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0)); + + // Try pushing an operand and getting its type + let mut asm = Assembler::new(); + asm.stack_push(Type::Fixnum); + let top_type = asm.ctx.get_opnd_type(StackOpnd(0)); + assert!(top_type == Type::Fixnum); + + // TODO: write more tests for Context type diff + } + + #[test] + fn context_upgrade_local() { + let mut asm = Assembler::new(); + asm.stack_push_local(0); + asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil); + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + } + + #[test] + fn context_chain_depth() { + let mut ctx = Context::default(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_return_landing(), false); + assert_eq!(ctx.is_deferred(), false); + + for _ in 0..5 { + ctx.increment_chain_depth(); + } + assert_eq!(ctx.get_chain_depth(), 5); + + ctx.set_as_return_landing(); + assert_eq!(ctx.is_return_landing(), true); + + ctx.clear_return_landing(); + assert_eq!(ctx.is_return_landing(), false); + + ctx.mark_as_deferred(); + assert_eq!(ctx.is_deferred(), true); + + ctx.reset_chain_depth_and_defer(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_deferred(), false); + } + + #[test] + fn shift_stack_for_send() { + let mut asm = Assembler::new(); + + // Push values to simulate send(:name, arg) with 6 items already on-stack + for _ in 0..6 { + asm.stack_push(Type::Fixnum); + } + asm.stack_push(Type::Unknown); + asm.stack_push(Type::ImmSymbol); + asm.stack_push(Type::Unknown); + + // This method takes argc of the sendee, not argc of send + asm.shift_stack(1); + + // The symbol should be gone + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(1))); + } + + #[test] + fn test_miri_ref_unchecked() { + let blockid = BlockId { + iseq: ptr::null(), + idx: 0, + }; + let cb = CodeBlock::new_dummy(1024); + let dumm_addr = cb.get_write_ptr(); + let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null()) + .into_block(0, dumm_addr, dumm_addr, vec![]); + let _dropper = BlockDropper(block); + + // Outside of brief moments during construction, + // we're always working with &Branch (a shared reference to a Branch). + let branch: &Branch = &Branch { + gen_fn: BranchGenFn::JZToTarget0, + block, + start_addr: dumm_addr, + end_addr: Cell::new(dumm_addr), + targets: [Cell::new(None), Cell::new(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + iseq: Cell::new(ptr::null()), + iseq_idx: 0, + address: None, + ctx: Context::default(), + })))))] + }; + // For easier soundness reasoning, make sure the reference returned does not out live the + // `unsafe` block! It's tempting to do, but it leads to non-local issues. + // Here is an example where it goes wrong: + if false { + for target in branch.targets.iter().as_ref() { + if let Some(btarget) = unsafe { target.ref_unchecked() } { + // btarget is derived from the usnafe block! + target.set(None); // This drops the contents of the cell... + assert!(btarget.get_address().is_none()); // but `btarget` is still live! UB. + } + } + } + + // Do something like this instead. It's not pretty, but it's easier to vet for UB this way. + for target in branch.targets.iter().as_ref() { + // SAFETY: no mutation within unsafe + if unsafe { target.ref_unchecked().is_none() } { + continue; + } + // SAFETY: no mutation within unsafe + assert!(unsafe { target.ref_unchecked().as_ref().unwrap().get_address().is_none() }); + target.set(None); + } + + // A more subtle situation where we do Cell/UnsafeCell mutation over the + // lifetime of the reference released by ref_unchecked(). + branch.targets[0].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + iseq: Cell::new(ptr::null()), + iseq_idx: 0, + address: None, + ctx: Context::default(), + }))))); + // Invalid ISeq; we never dereference it. + let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr(); + unsafe { + if let Some(branch_target) = branch.targets[0].ref_unchecked().as_ref() { + if let BranchTarget::Stub(stub) = branch_target.as_ref() { + // SAFETY: + // This is a Cell mutation, but it mutates the contents + // of a a Cell<IseqPtr>, which is a different type + // from the type of Cell found in `Branch::targets`, so + // there is no chance of mutating the Cell that we called + // ref_unchecked() on above. + Cell::set(&stub.iseq, secret_iseq); + } + } + }; + // Check that we indeed changed the iseq of the stub + // Cell::take moves out of the cell. + assert_eq!( + secret_iseq as usize, + branch.targets[0].take().unwrap().get_blockid().iseq as usize + ); + + struct BlockDropper(BlockRef); + impl Drop for BlockDropper { + fn drop(&mut self) { + // SAFETY: we have ownership because the test doesn't stash + // the block away in any global structure. + // Note that the test being self-contained is also why we + // use dealloc_block() over free_block(), as free_block() touches + // the global invariants tables unavailable in tests. + unsafe { dealloc_block(self.0) }; + } + } + } +} diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs new file mode 100644 index 0000000000..53586cb4f4 --- /dev/null +++ b/yjit/src/cruby.rs @@ -0,0 +1,818 @@ +//! This module deals with making relevant C functions available to Rust YJIT. +//! Some C functions we use we maintain, some are public C extension APIs, +//! some are internal CRuby APIs. +//! +//! ## General notes about linking +//! +//! The YJIT crate compiles to a native static library, which for our purposes +//! we can understand as a collection of object files. On ELF platforms at least, +//! object files can refer to "external symbols" which we could take some +//! liberty and understand as assembly labels that refer to code defined in other +//! object files resolved when linking. When we are linking, say to produce miniruby, +//! the linker resolves and put concrete addresses for each usage of C function in +//! the Rust static library. +//! +//! By declaring external functions and using them, we are asserting the symbols +//! we use have definition in one of the object files we pass to the linker. Declaring +//! a function here that has no definition anywhere causes a linking error. +//! +//! There are more things going on during linking and this section makes a lot of +//! simplifications but hopefully this gives a good enough working mental model. +//! +//! ## Difference from example in the Rustonomicon +//! +//! You might be wondering about why this is different from the [FFI example] +//! in the Nomicon, an official book about Unsafe Rust. +//! +//! There is no `#[link]` attribute because we are not linking against an external +//! library, but rather implicitly asserting that we'll supply a concrete definition +//! for all C functions we call, similar to how pure C projects put functions +//! across different compilation units and link them together. +//! +//! TODO(alan): is the model different enough on Windows that this setup is unworkable? +//! Seems prudent to at least learn more about Windows binary tooling before +//! committing to a design. +//! +//! Alan recommends reading the Nomicon cover to cover as he thinks the book is +//! not very long in general and especially for something that can save hours of +//! debugging Undefined Behavior (UB) down the road. +//! +//! UBs can cause Safe Rust to crash, at which point it's hard to tell which +//! usage of `unsafe` in the codebase invokes UB. Providing safe Rust interface +//! wrapping `unsafe` Rust is a good technique, but requires practice and knowledge +//! about what's well defined and what's undefined. +//! +//! For an extremely advanced example of building safe primitives using Unsafe Rust, +//! see the [GhostCell] paper. Some parts of the paper assume less background knowledge +//! than other parts, so there should be learning opportunities in it for all experience +//! levels. +//! +//! ## Binding generation +//! +//! For the moment declarations on the Rust side are hand written. The code is boilerplate +//! and could be generated automatically with a custom tooling that depend on +//! rust-lang/rust-bindgen. The output Rust code could be checked in to version control +//! and verified on CI like `make update-deps`. +//! +//! Upsides for this design: +//! - the YJIT static lib that links with miniruby and friends will not need bindgen +//! as a dependency at all. This is an important property so Ruby end users can +//! build a YJIT enabled Ruby with no internet connection using a release tarball +//! - Less hand-typed boilerplate +//! - Helps reduce risk of C definitions and Rust declaration going out of sync since +//! CI verifies synchronicity +//! +//! Downsides and known unknowns: +//! - Using rust-bindgen this way seems unusual. We might be depending on parts +//! that the project is not committed to maintaining +//! - This setup assumes rust-bindgen gives deterministic output, which can't be taken +//! for granted +//! - YJIT contributors will need to install libclang on their system to get rust-bindgen +//! to work if they want to run the generation tool locally +//! +//! The elephant in the room is that we'll still need to use Unsafe Rust to call C functions, +//! and the binding generation can't magically save us from learning Unsafe Rust. +//! +//! +//! [FFI example]: https://doc.rust-lang.org/nomicon/ffi.html +//! [GhostCell]: http://plv.mpi-sws.org/rustbelt/ghostcell/ + +// CRuby types use snake_case. Allow them so we use one name across languages. +#![allow(non_camel_case_types)] +// A lot of imported CRuby globals aren't all-caps +#![allow(non_upper_case_globals)] + +use std::convert::From; +use std::ffi::{CString, CStr}; +use std::os::raw::{c_char, c_int, c_uint}; +use std::panic::{catch_unwind, UnwindSafe}; + +// We check that we can do this with the configure script and a couple of +// static asserts. u64 and not usize to play nice with lowering to x86. +pub type size_t = u64; + +/// A type alias for the redefinition flags coming from CRuby. These are just +/// shifted 1s but not explicitly an enum. +pub type RedefinitionFlag = u32; + +#[allow(dead_code)] +#[allow(clippy::all)] +mod autogened { + use super::*; + // Textually include output from rust-bindgen as suggested by its user guide. + include!("cruby_bindings.inc.rs"); +} +pub use autogened::*; + +// TODO: For #defines that affect memory layout, we need to check for them +// on build and fail if they're wrong. e.g. USE_FLONUM *must* be true. + +// These are functions we expose from C files, not in any header. +// Parsing it would result in a lot of duplicate definitions. +// Use bindgen for functions that are defined in headers or in yjit.c. +#[cfg_attr(test, allow(unused))] // We don't link against C code when testing +extern "C" { + pub fn rb_check_overloaded_cme( + me: *const rb_callable_method_entry_t, + ci: *const rb_callinfo, + ) -> *const rb_callable_method_entry_t; + pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; + pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; + pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; + pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; + pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE; + pub fn rb_vm_defined( + ec: EcPtr, + reg_cfp: CfpPtr, + op_type: rb_num_t, + obj: VALUE, + v: VALUE, + ) -> bool; + pub fn rb_vm_set_ivar_id(obj: VALUE, idx: u32, val: VALUE) -> VALUE; + pub fn rb_vm_setinstancevariable(iseq: IseqPtr, obj: VALUE, id: ID, val: VALUE, ic: IVC); + pub fn rb_aliased_callable_method_entry( + me: *const rb_callable_method_entry_t, + ) -> *const rb_callable_method_entry_t; + pub fn rb_vm_getclassvariable(iseq: IseqPtr, cfp: CfpPtr, id: ID, ic: ICVARC) -> VALUE; + pub fn rb_vm_setclassvariable( + iseq: IseqPtr, + cfp: CfpPtr, + id: ID, + val: VALUE, + ic: ICVARC, + ) -> VALUE; + pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool; + pub fn rb_vm_stack_canary() -> VALUE; + pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int); +} + +// Renames +pub use rb_insn_name as raw_insn_name; +pub use rb_get_ec_cfp as get_ec_cfp; +pub use rb_get_cfp_iseq as get_cfp_iseq; +pub use rb_get_cfp_pc as get_cfp_pc; +pub use rb_get_cfp_sp as get_cfp_sp; +pub use rb_get_cfp_self as get_cfp_self; +pub use rb_get_cfp_ep as get_cfp_ep; +pub use rb_get_cfp_ep_level as get_cfp_ep_level; +pub use rb_vm_base_ptr as get_cfp_bp; +pub use rb_get_cme_def_type as get_cme_def_type; +pub use rb_get_cme_def_body_attr_id as get_cme_def_body_attr_id; +pub use rb_get_cme_def_body_optimized_type as get_cme_def_body_optimized_type; +pub use rb_get_cme_def_body_optimized_index as get_cme_def_body_optimized_index; +pub use rb_get_cme_def_body_cfunc as get_cme_def_body_cfunc; +pub use rb_get_def_method_serial as get_def_method_serial; +pub use rb_get_def_original_id as get_def_original_id; +pub use rb_get_mct_argc as get_mct_argc; +pub use rb_get_mct_func as get_mct_func; +pub use rb_get_def_iseq_ptr as get_def_iseq_ptr; +pub use rb_iseq_encoded_size as get_iseq_encoded_size; +pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; +pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; +pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; +pub use rb_get_iseq_body_type as get_iseq_body_type; +pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead; +pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; +pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; +pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest; +pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post; +pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest; +pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block; +pub use rb_get_iseq_flags_ambiguous_param0 as get_iseq_flags_ambiguous_param0; +pub use rb_get_iseq_flags_accepts_no_kwarg as get_iseq_flags_accepts_no_kwarg; +pub use rb_get_iseq_body_local_table_size as get_iseq_body_local_table_size; +pub use rb_get_iseq_body_param_keyword as get_iseq_body_param_keyword; +pub use rb_get_iseq_body_param_size as get_iseq_body_param_size; +pub use rb_get_iseq_body_param_lead_num as get_iseq_body_param_lead_num; +pub use rb_get_iseq_body_param_opt_num as get_iseq_body_param_opt_num; +pub use rb_get_iseq_body_param_opt_table as get_iseq_body_param_opt_table; +pub use rb_get_cikw_keyword_len as get_cikw_keyword_len; +pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx; +pub use rb_get_call_data_ci as get_call_data_ci; +pub use rb_yarv_str_eql_internal as rb_str_eql_internal; +pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; +pub use rb_yjit_fix_div_fix as rb_fix_div_fix; +pub use rb_yjit_fix_mod_fix as rb_fix_mod_fix; +pub use rb_FL_TEST as FL_TEST; +pub use rb_FL_TEST_RAW as FL_TEST_RAW; +pub use rb_RB_TYPE_P as RB_TYPE_P; +pub use rb_BASIC_OP_UNREDEFINED_P as BASIC_OP_UNREDEFINED_P; +pub use rb_RSTRUCT_LEN as RSTRUCT_LEN; +pub use rb_RSTRUCT_SET as RSTRUCT_SET; +pub use rb_vm_ci_argc as vm_ci_argc; +pub use rb_vm_ci_mid as vm_ci_mid; +pub use rb_vm_ci_flag as vm_ci_flag; +pub use rb_vm_ci_kwarg as vm_ci_kwarg; +pub use rb_METHOD_ENTRY_VISI as METHOD_ENTRY_VISI; +pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN; + +/// Helper so we can get a Rust string for insn_name() +pub fn insn_name(opcode: usize) -> String { + unsafe { + // Look up Ruby's NULL-terminated insn name string + let op_name = raw_insn_name(VALUE(opcode)); + + // Convert the op name C string to a Rust string and concat + let op_name = CStr::from_ptr(op_name).to_str().unwrap(); + + // Convert into an owned string + op_name.to_string() + } +} + +#[allow(unused_variables)] +pub fn insn_len(opcode: usize) -> u32 { + #[cfg(test)] + panic!("insn_len is a CRuby function, and we don't link against CRuby for Rust testing!"); + + #[cfg(not(test))] + unsafe { + rb_insn_len(VALUE(opcode)).try_into().unwrap() + } +} + +/// Opaque iseq type for opaque iseq pointers from vm_core.h +/// See: <https://doc.rust-lang.org/nomicon/ffi.html#representing-opaque-structs> +#[repr(C)] +pub struct rb_iseq_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +/// An object handle similar to VALUE in the C code. Our methods assume +/// that this is a handle. Sometimes the C code briefly uses VALUE as +/// an unsigned integer type and don't necessarily store valid handles but +/// thankfully those cases are rare and don't cross the FFI boundary. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +#[repr(transparent)] // same size and alignment as simply `usize` +pub struct VALUE(pub usize); + +/// Pointer to an ISEQ +pub type IseqPtr = *const rb_iseq_t; + +// Given an ISEQ pointer, convert PC to insn_idx +pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> { + let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; + unsafe { pc.offset_from(pc_zero) }.try_into().ok() +} + +/// Given an ISEQ pointer and an instruction index, return an opcode. +pub fn iseq_opcode_at_idx(iseq: IseqPtr, insn_idx: u32) -> u32 { + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) }; + unsafe { rb_iseq_opcode_at_pc(iseq, pc) as u32 } +} + +/// Return a poison value to be set above the stack top to verify leafness. +#[cfg(not(test))] +pub fn vm_stack_canary() -> u64 { + unsafe { rb_vm_stack_canary() }.as_u64() +} + +/// Avoid linking the C function in `cargo test` +#[cfg(test)] +pub fn vm_stack_canary() -> u64 { + 0 +} + +/// Opaque execution-context type from vm_core.h +#[repr(C)] +pub struct rb_execution_context_struct { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} +/// Alias for rb_execution_context_struct used by CRuby sometimes +pub type rb_execution_context_t = rb_execution_context_struct; + +/// Pointer to an execution context (rb_execution_context_struct) +pub type EcPtr = *const rb_execution_context_struct; + +// From method.h +#[repr(C)] +pub struct rb_method_definition_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} +type rb_method_definition_struct = rb_method_definition_t; + +/// Opaque cfunc type from method.h +#[repr(C)] +pub struct rb_method_cfunc_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +/// Opaque call-cache type from vm_callinfo.h +#[repr(C)] +pub struct rb_callcache { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +/// Opaque control_frame (CFP) struct from vm_core.h +#[repr(C)] +pub struct rb_control_frame_struct { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +/// Pointer to a control frame pointer (CFP) +pub type CfpPtr = *mut rb_control_frame_struct; + +/// Opaque struct from vm_core.h +#[repr(C)] +pub struct rb_cref_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +impl VALUE { + /// Dump info about the value to the console similarly to rp(VALUE) + pub fn dump_info(self) { + unsafe { rb_obj_info_dump(self) } + } + + /// Return whether the value is truthy or falsy in Ruby -- only nil and false are falsy. + pub fn test(self) -> bool { + let VALUE(cval) = self; + let VALUE(qnilval) = Qnil; + (cval & !qnilval) != 0 + } + + /// Return true if the number is an immediate integer, flonum or static symbol + fn immediate_p(self) -> bool { + let VALUE(cval) = self; + let mask = RUBY_IMMEDIATE_MASK as usize; + (cval & mask) != 0 + } + + /// Return true if the value is a Ruby immediate integer, flonum, static symbol, nil or false + pub fn special_const_p(self) -> bool { + self.immediate_p() || !self.test() + } + + /// Return true if the value is a heap object + pub fn heap_object_p(self) -> bool { + !self.special_const_p() + } + + /// Return true if the value is a Ruby Fixnum (immediate-size integer) + pub fn fixnum_p(self) -> bool { + let VALUE(cval) = self; + let flag = RUBY_FIXNUM_FLAG as usize; + (cval & flag) == flag + } + + /// Return true if the value is an immediate Ruby floating-point number (flonum) + pub fn flonum_p(self) -> bool { + let VALUE(cval) = self; + let mask = RUBY_FLONUM_MASK as usize; + let flag = RUBY_FLONUM_FLAG as usize; + (cval & mask) == flag + } + + /// Return true if the value is a Ruby symbol (RB_SYMBOL_P) + pub fn symbol_p(self) -> bool { + self.static_sym_p() || self.dynamic_sym_p() + } + + /// Return true for a static (non-heap) Ruby symbol (RB_STATIC_SYM_P) + pub fn static_sym_p(self) -> bool { + let VALUE(cval) = self; + let flag = RUBY_SYMBOL_FLAG as usize; + (cval & 0xff) == flag + } + + /// Return true for a dynamic Ruby symbol (RB_DYNAMIC_SYM_P) + fn dynamic_sym_p(self) -> bool { + return if self.special_const_p() { + false + } else { + self.builtin_type() == RUBY_T_SYMBOL + } + } + + /// Returns true if the value is T_HASH + pub fn hash_p(self) -> bool { + !self.special_const_p() && self.builtin_type() == RUBY_T_HASH + } + + /// Returns true or false depending on whether the value is nil + pub fn nil_p(self) -> bool { + self == Qnil + } + + pub fn string_p(self) -> bool { + self.class_of() == unsafe { rb_cString } + } + + /// Read the flags bits from the RBasic object, then return a Ruby type enum (e.g. RUBY_T_ARRAY) + pub fn builtin_type(self) -> ruby_value_type { + (self.builtin_flags() & (RUBY_T_MASK as usize)) as ruby_value_type + } + + pub fn builtin_flags(self) -> usize { + assert!(!self.special_const_p()); + + let VALUE(cval) = self; + let rbasic_ptr = cval as *const RBasic; + let flags_bits: usize = unsafe { (*rbasic_ptr).flags }.as_usize(); + return flags_bits; + } + + pub fn class_of(self) -> VALUE { + if !self.special_const_p() { + let builtin_type = self.builtin_type(); + assert_ne!(builtin_type, RUBY_T_NONE, "YJIT should only see live objects"); + assert_ne!(builtin_type, RUBY_T_MOVED, "YJIT should only see live objects"); + } + + unsafe { rb_yarv_class_of(self) } + } + + pub fn is_frozen(self) -> bool { + unsafe { rb_obj_frozen_p(self) != VALUE(0) } + } + + pub fn shape_too_complex(self) -> bool { + unsafe { rb_shape_obj_too_complex(self) } + } + + pub fn shape_id_of(self) -> u32 { + unsafe { rb_shape_get_shape_id(self) } + } + + pub fn shape_of(self) -> *mut rb_shape { + unsafe { + let shape = rb_shape_get_shape_by_id(self.shape_id_of()); + + if shape.is_null() { + panic!("Shape should not be null"); + } else { + shape + } + } + } + + pub fn embedded_p(self) -> bool { + unsafe { + FL_TEST_RAW(self, VALUE(ROBJECT_EMBED as usize)) != VALUE(0) + } + } + + pub fn as_isize(self) -> isize { + let VALUE(is) = self; + is as isize + } + + pub fn as_i32(self) -> i32 { + self.as_i64().try_into().unwrap() + } + + pub fn as_u32(self) -> u32 { + let VALUE(i) = self; + i.try_into().unwrap() + } + + pub fn as_i64(self) -> i64 { + let VALUE(i) = self; + i as i64 + } + + pub fn as_u64(self) -> u64 { + let VALUE(i) = self; + i.try_into().unwrap() + } + + pub fn as_usize(self) -> usize { + let VALUE(us) = self; + us + } + + pub fn as_ptr<T>(self) -> *const T { + let VALUE(us) = self; + us as *const T + } + + pub fn as_mut_ptr<T>(self) -> *mut T { + let VALUE(us) = self; + us as *mut T + } + + /// For working with opaque pointers and encoding null check. + /// Similar to [std::ptr::NonNull], but for `*const T`. `NonNull<T>` + /// is for `*mut T` while our C functions are setup to use `*const T`. + /// Casting from `NonNull<T>` to `*const T` is too noisy. + pub fn as_optional_ptr<T>(self) -> Option<*const T> { + let ptr: *const T = self.as_ptr(); + + if ptr.is_null() { + None + } else { + Some(ptr) + } + } + + /// Assert that `self` is an iseq in debug builds + pub fn as_iseq(self) -> IseqPtr { + let ptr: IseqPtr = self.as_ptr(); + + #[cfg(debug_assertions)] + if !ptr.is_null() { + unsafe { rb_assert_iseq_handle(self) } + } + + ptr + } + + /// Assert that `self` is a method entry in debug builds + pub fn as_cme(self) -> *const rb_callable_method_entry_t { + let ptr: *const rb_callable_method_entry_t = self.as_ptr(); + + #[cfg(debug_assertions)] + if !ptr.is_null() { + unsafe { rb_assert_cme_handle(self) } + } + + ptr + } +} + +impl VALUE { + pub fn fixnum_from_usize(item: usize) -> Self { + assert!(item <= (RUBY_FIXNUM_MAX as usize)); // An unsigned will always be greater than RUBY_FIXNUM_MIN + let k: usize = item.wrapping_add(item.wrapping_add(1)); + VALUE(k) + } +} + +impl From<IseqPtr> for VALUE { + /// For `.into()` convenience + fn from(iseq: IseqPtr) -> Self { + VALUE(iseq as usize) + } +} + +impl From<*const rb_callable_method_entry_t> for VALUE { + /// For `.into()` convenience + fn from(cme: *const rb_callable_method_entry_t) -> Self { + VALUE(cme as usize) + } +} + +impl From<VALUE> for u64 { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + uimm as u64 + } +} + +impl From<VALUE> for i64 { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + assert!(uimm <= (i64::MAX as usize)); + uimm as i64 + } +} + +impl From<VALUE> for i32 { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + assert!(uimm <= (i32::MAX as usize)); + uimm.try_into().unwrap() + } +} + +impl From<VALUE> for u16 { + fn from(value: VALUE) -> Self { + let VALUE(uimm) = value; + uimm.try_into().unwrap() + } +} + +/// Produce a Ruby string from a Rust string slice +#[cfg(feature = "disasm")] +pub fn rust_str_to_ruby(str: &str) -> VALUE { + unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) } +} + +/// Produce a Ruby symbol from a Rust string slice +pub fn rust_str_to_sym(str: &str) -> VALUE { + let c_str = CString::new(str).unwrap(); + let c_ptr: *const c_char = c_str.as_ptr(); + unsafe { rb_id2sym(rb_intern(c_ptr)) } +} + +/// Produce an owned Rust String from a C char pointer +pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> { + assert!(c_char_ptr != std::ptr::null()); + + let c_str: &CStr = unsafe { CStr::from_ptr(c_char_ptr) }; + + match c_str.to_str() { + Ok(rust_str) => Some(rust_str.to_string()), + Err(_) => None + } +} + +/// A location in Rust code for integrating with debugging facilities defined in C. +/// Use the [src_loc!] macro to crate an instance. +pub struct SourceLocation { + pub file: &'static CStr, + pub line: c_int, +} + +/// Make a [SourceLocation] at the current spot. +macro_rules! src_loc { + () => { + { + // Nul-terminated string with static lifetime, make a CStr out of it safely. + let file: &'static str = concat!(file!(), '\0'); + $crate::cruby::SourceLocation { + file: unsafe { std::ffi::CStr::from_ptr(file.as_ptr().cast()) }, + line: line!().try_into().unwrap(), + } + } + }; +} + +pub(crate) use src_loc; + +/// Run GC write barrier. Required after making a new edge in the object reference +/// graph from `old` to `young`. +macro_rules! obj_written { + ($old: expr, $young: expr) => { + let (old, young): (VALUE, VALUE) = ($old, $young); + let src_loc = $crate::cruby::src_loc!(); + unsafe { rb_yjit_obj_written(old, young, src_loc.file.as_ptr(), src_loc.line) }; + }; +} +pub(crate) use obj_written; + +/// Acquire the VM lock, make sure all other Ruby threads are asleep then run +/// some code while holding the lock. Returns whatever `func` returns. +/// Use with [src_loc!]. +/// +/// Required for code patching in the presence of ractors. +pub fn with_vm_lock<F, R>(loc: SourceLocation, func: F) -> R +where + F: FnOnce() -> R + UnwindSafe, +{ + let file = loc.file.as_ptr(); + let line = loc.line; + let mut recursive_lock_level: c_uint = 0; + + unsafe { rb_yjit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) }; + + let ret = match catch_unwind(func) { + Ok(result) => result, + Err(_) => { + // Theoretically we can recover from some of these panics, + // but it's too late if the unwind reaches here. + + let _ = catch_unwind(|| { + // IO functions can panic too. + eprintln!( + "YJIT panicked while holding VM lock acquired at {}:{}. Aborting...", + loc.file.to_string_lossy(), + line, + ); + }); + std::process::abort(); + } + }; + + unsafe { rb_yjit_vm_unlock(&mut recursive_lock_level, file, line) }; + + ret +} + +// Non-idiomatic capitalization for consistency with CRuby code +#[allow(non_upper_case_globals)] +pub const Qfalse: VALUE = VALUE(RUBY_Qfalse as usize); +#[allow(non_upper_case_globals)] +pub const Qnil: VALUE = VALUE(RUBY_Qnil as usize); +#[allow(non_upper_case_globals)] +pub const Qtrue: VALUE = VALUE(RUBY_Qtrue as usize); +#[allow(non_upper_case_globals)] +pub const Qundef: VALUE = VALUE(RUBY_Qundef as usize); + +#[allow(unused)] +mod manual_defs { + use super::*; + + pub const SIZEOF_VALUE: usize = 8; + pub const SIZEOF_VALUE_I32: i32 = SIZEOF_VALUE as i32; + pub const VALUE_BITS: u8 = 8 * SIZEOF_VALUE as u8; + + pub const RUBY_LONG_MIN: isize = std::os::raw::c_long::MIN as isize; + pub const RUBY_LONG_MAX: isize = std::os::raw::c_long::MAX as isize; + + pub const RUBY_FIXNUM_MIN: isize = RUBY_LONG_MIN / 2; + pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2; + + // From vm_callinfo.h - uses calculation that seems to confuse bindgen + pub const VM_CALL_ARGS_SIMPLE: u32 = 1 << VM_CALL_ARGS_SIMPLE_bit; + pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit; + pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit; + pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit; + pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit; + pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit; + pub const VM_CALL_TAILCALL: u32 = 1 << VM_CALL_TAILCALL_bit; + pub const VM_CALL_ZSUPER : u32 = 1 << VM_CALL_ZSUPER_bit; + pub const VM_CALL_OPT_SEND : u32 = 1 << VM_CALL_OPT_SEND_bit; + + // From internal/struct.h - in anonymous enum, so we can't easily import it + pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER7 | RUBY_FL_USER6 | RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 |RUBY_FL_USER2 | RUBY_FL_USER1) as usize; + + // From iseq.h - via a different constant, which seems to confuse bindgen + pub const ISEQ_TRANSLATED: usize = RUBY_FL_USER7 as usize; + + // We'll need to encode a lot of Ruby struct/field offsets as constants unless we want to + // redeclare all the Ruby C structs and write our own offsetof macro. For now, we use constants. + pub const RUBY_OFFSET_RBASIC_FLAGS: i32 = 0; // struct RBasic, field "flags" + pub const RUBY_OFFSET_RBASIC_KLASS: i32 = 8; // struct RBasic, field "klass" + pub const RUBY_OFFSET_RARRAY_AS_HEAP_LEN: i32 = 16; // struct RArray, subfield "as.heap.len" + pub const RUBY_OFFSET_RARRAY_AS_HEAP_PTR: i32 = 32; // struct RArray, subfield "as.heap.ptr" + pub const RUBY_OFFSET_RARRAY_AS_ARY: i32 = 16; // struct RArray, subfield "as.ary" + + pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr" + pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary" + + pub const RUBY_OFFSET_RSTRING_AS_HEAP_PTR: i32 = 24; // struct RString, subfield "as.heap.ptr" + pub const RUBY_OFFSET_RSTRING_AS_ARY: i32 = 24; // struct RString, subfield "as.embed.ary" + + // Constants from rb_control_frame_t vm_core.h + pub const RUBY_OFFSET_CFP_PC: i32 = 0; + pub const RUBY_OFFSET_CFP_SP: i32 = 8; + pub const RUBY_OFFSET_CFP_ISEQ: i32 = 16; + pub const RUBY_OFFSET_CFP_SELF: i32 = 24; + pub const RUBY_OFFSET_CFP_EP: i32 = 32; + pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40; + pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48; + pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56; + + // Constants from rb_execution_context_t vm_core.h + pub const RUBY_OFFSET_EC_CFP: i32 = 16; + pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: i32 = 32; // rb_atomic_t (u32) + pub const RUBY_OFFSET_EC_INTERRUPT_MASK: i32 = 36; // rb_atomic_t (u32) + pub const RUBY_OFFSET_EC_THREAD_PTR: i32 = 48; + + // Constants from rb_thread_t in vm_core.h + pub const RUBY_OFFSET_THREAD_SELF: i32 = 16; + + // Constants from iseq_inline_constant_cache (IC) and iseq_inline_constant_cache_entry (ICE) in vm_core.h + pub const RUBY_OFFSET_IC_ENTRY: i32 = 0; + pub const RUBY_OFFSET_ICE_VALUE: i32 = 8; +} +pub use manual_defs::*; + +/// Interned ID values for Ruby symbols and method names. +/// See [crate::cruby::ID] and usages outside of YJIT. +pub(crate) mod ids { + use std::sync::atomic::AtomicU64; + /// Globals to cache IDs on boot. Atomic to use with relaxed ordering + /// so reads can happen without `unsafe`. Initialization is done + /// single-threaded and release-acquire on [crate::yjit::YJIT_ENABLED] + /// makes sure we read the cached values after initialization is done. + macro_rules! def_ids { + ($(name: $ident:ident content: $str:literal)*) => { + $( + #[doc = concat!("[crate::cruby::ID] for `", stringify!($str), "`")] + pub static $ident: AtomicU64 = AtomicU64::new(0); + )* + + pub(crate) fn init() { + $( + let content = &$str; + let ptr: *const u8 = content.as_ptr(); + + // Lookup and cache each ID + $ident.store( + unsafe { $crate::cruby::rb_intern2(ptr.cast(), content.len() as _) }, + std::sync::atomic::Ordering::Relaxed + ); + )* + + } + } + } + + def_ids! { + name: NULL content: b"" + name: min content: b"min" + name: max content: b"max" + name: hash content: b"hash" + name: pack content: b"pack" + name: respond_to_missing content: b"respond_to_missing?" + name: to_ary content: b"to_ary" + name: eq content: b"==" + } +} + +/// Get an CRuby `ID` to an interned string, e.g. a particular method name. +macro_rules! ID { + ($id_name:ident) => { + $crate::cruby::ids::$id_name.load(std::sync::atomic::Ordering::Relaxed) + } +} +pub(crate) use ID; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs new file mode 100644 index 0000000000..3a1de5f674 --- /dev/null +++ b/yjit/src/cruby_bindings.inc.rs @@ -0,0 +1,1266 @@ +/* automatically generated by rust-bindgen 0.63.0 */ + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct __BindgenBitfieldUnit<Storage> { + storage: Storage, +} +impl<Storage> __BindgenBitfieldUnit<Storage> { + #[inline] + pub const fn new(storage: Storage) -> Self { + Self { storage } + } +} +impl<Storage> __BindgenBitfieldUnit<Storage> +where + Storage: AsRef<[u8]> + AsMut<[u8]>, +{ + #[inline] + pub fn get_bit(&self, index: usize) -> bool { + debug_assert!(index / 8 < self.storage.as_ref().len()); + let byte_index = index / 8; + let byte = self.storage.as_ref()[byte_index]; + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + let mask = 1 << bit_index; + byte & mask == mask + } + #[inline] + pub fn set_bit(&mut self, index: usize, val: bool) { + debug_assert!(index / 8 < self.storage.as_ref().len()); + let byte_index = index / 8; + let byte = &mut self.storage.as_mut()[byte_index]; + let bit_index = if cfg!(target_endian = "big") { + 7 - (index % 8) + } else { + index % 8 + }; + let mask = 1 << bit_index; + if val { + *byte |= mask; + } else { + *byte &= !mask; + } + } + #[inline] + pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()); + let mut val = 0; + for i in 0..(bit_width as usize) { + if self.get_bit(i + bit_offset) { + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + val |= 1 << index; + } + } + val + } + #[inline] + pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) { + debug_assert!(bit_width <= 64); + debug_assert!(bit_offset / 8 < self.storage.as_ref().len()); + debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len()); + for i in 0..(bit_width as usize) { + let mask = 1 << i; + let val_bit_is_set = val & mask == mask; + let index = if cfg!(target_endian = "big") { + bit_width as usize - 1 - i + } else { + i + }; + self.set_bit(index + bit_offset, val_bit_is_set); + } + } +} +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField<T>(::std::marker::PhantomData<T>, [T; 0]); +impl<T> __IncompleteArrayField<T> { + #[inline] + pub const fn new() -> Self { + __IncompleteArrayField(::std::marker::PhantomData, []) + } + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::std::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} +#[repr(C)] +pub struct __BindgenUnionField<T>(::std::marker::PhantomData<T>); +impl<T> __BindgenUnionField<T> { + #[inline] + pub const fn new() -> Self { + __BindgenUnionField(::std::marker::PhantomData) + } + #[inline] + pub unsafe fn as_ref(&self) -> &T { + ::std::mem::transmute(self) + } + #[inline] + pub unsafe fn as_mut(&mut self) -> &mut T { + ::std::mem::transmute(self) + } +} +impl<T> ::std::default::Default for __BindgenUnionField<T> { + #[inline] + fn default() -> Self { + Self::new() + } +} +impl<T> ::std::clone::Clone for __BindgenUnionField<T> { + #[inline] + fn clone(&self) -> Self { + Self::new() + } +} +impl<T> ::std::marker::Copy for __BindgenUnionField<T> {} +impl<T> ::std::fmt::Debug for __BindgenUnionField<T> { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + fmt.write_str("__BindgenUnionField") + } +} +impl<T> ::std::hash::Hash for __BindgenUnionField<T> { + fn hash<H: ::std::hash::Hasher>(&self, _state: &mut H) {} +} +impl<T> ::std::cmp::PartialEq for __BindgenUnionField<T> { + fn eq(&self, _other: &__BindgenUnionField<T>) -> bool { + true + } +} +impl<T> ::std::cmp::Eq for __BindgenUnionField<T> {} +pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1; +pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2; +pub const STRING_REDEFINED_OP_FLAG: u32 = 4; +pub const ARRAY_REDEFINED_OP_FLAG: u32 = 8; +pub const HASH_REDEFINED_OP_FLAG: u32 = 16; +pub const SYMBOL_REDEFINED_OP_FLAG: u32 = 64; +pub const TIME_REDEFINED_OP_FLAG: u32 = 128; +pub const REGEXP_REDEFINED_OP_FLAG: u32 = 256; +pub const NIL_REDEFINED_OP_FLAG: u32 = 512; +pub const TRUE_REDEFINED_OP_FLAG: u32 = 1024; +pub const FALSE_REDEFINED_OP_FLAG: u32 = 2048; +pub const PROC_REDEFINED_OP_FLAG: u32 = 4096; +pub const VM_ENV_DATA_SIZE: u32 = 3; +pub const VM_ENV_DATA_INDEX_ME_CREF: i32 = -2; +pub const VM_ENV_DATA_INDEX_SPECVAL: i32 = -1; +pub const VM_ENV_DATA_INDEX_FLAGS: u32 = 0; +pub const VM_BLOCK_HANDLER_NONE: u32 = 0; +pub const SHAPE_ID_NUM_BITS: u32 = 32; +pub const OBJ_TOO_COMPLEX_SHAPE_ID: u32 = 2; +pub type ID = ::std::os::raw::c_ulong; +pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>; +pub const RUBY_Qfalse: ruby_special_consts = 0; +pub const RUBY_Qnil: ruby_special_consts = 4; +pub const RUBY_Qtrue: ruby_special_consts = 20; +pub const RUBY_Qundef: ruby_special_consts = 36; +pub const RUBY_IMMEDIATE_MASK: ruby_special_consts = 7; +pub const RUBY_FIXNUM_FLAG: ruby_special_consts = 1; +pub const RUBY_FLONUM_MASK: ruby_special_consts = 3; +pub const RUBY_FLONUM_FLAG: ruby_special_consts = 2; +pub const RUBY_SYMBOL_FLAG: ruby_special_consts = 12; +pub const RUBY_SPECIAL_SHIFT: ruby_special_consts = 8; +pub type ruby_special_consts = u32; +#[repr(C)] +pub struct RBasic { + pub flags: VALUE, + pub klass: VALUE, +} +pub const RUBY_T_NONE: ruby_value_type = 0; +pub const RUBY_T_OBJECT: ruby_value_type = 1; +pub const RUBY_T_CLASS: ruby_value_type = 2; +pub const RUBY_T_MODULE: ruby_value_type = 3; +pub const RUBY_T_FLOAT: ruby_value_type = 4; +pub const RUBY_T_STRING: ruby_value_type = 5; +pub const RUBY_T_REGEXP: ruby_value_type = 6; +pub const RUBY_T_ARRAY: ruby_value_type = 7; +pub const RUBY_T_HASH: ruby_value_type = 8; +pub const RUBY_T_STRUCT: ruby_value_type = 9; +pub const RUBY_T_BIGNUM: ruby_value_type = 10; +pub const RUBY_T_FILE: ruby_value_type = 11; +pub const RUBY_T_DATA: ruby_value_type = 12; +pub const RUBY_T_MATCH: ruby_value_type = 13; +pub const RUBY_T_COMPLEX: ruby_value_type = 14; +pub const RUBY_T_RATIONAL: ruby_value_type = 15; +pub const RUBY_T_NIL: ruby_value_type = 17; +pub const RUBY_T_TRUE: ruby_value_type = 18; +pub const RUBY_T_FALSE: ruby_value_type = 19; +pub const RUBY_T_SYMBOL: ruby_value_type = 20; +pub const RUBY_T_FIXNUM: ruby_value_type = 21; +pub const RUBY_T_UNDEF: ruby_value_type = 22; +pub const RUBY_T_IMEMO: ruby_value_type = 26; +pub const RUBY_T_NODE: ruby_value_type = 27; +pub const RUBY_T_ICLASS: ruby_value_type = 28; +pub const RUBY_T_ZOMBIE: ruby_value_type = 29; +pub const RUBY_T_MOVED: ruby_value_type = 30; +pub const RUBY_T_MASK: ruby_value_type = 31; +pub type ruby_value_type = u32; +pub const RUBY_FL_USHIFT: ruby_fl_ushift = 12; +pub type ruby_fl_ushift = u32; +pub const RUBY_FL_WB_PROTECTED: ruby_fl_type = 32; +pub const RUBY_FL_PROMOTED: ruby_fl_type = 32; +pub const RUBY_FL_UNUSED6: ruby_fl_type = 64; +pub const RUBY_FL_FINALIZE: ruby_fl_type = 128; +pub const RUBY_FL_TAINT: ruby_fl_type = 0; +pub const RUBY_FL_SHAREABLE: ruby_fl_type = 256; +pub const RUBY_FL_UNTRUSTED: ruby_fl_type = 0; +pub const RUBY_FL_SEEN_OBJ_ID: ruby_fl_type = 512; +pub const RUBY_FL_EXIVAR: ruby_fl_type = 1024; +pub const RUBY_FL_FREEZE: ruby_fl_type = 2048; +pub const RUBY_FL_USER0: ruby_fl_type = 4096; +pub const RUBY_FL_USER1: ruby_fl_type = 8192; +pub const RUBY_FL_USER2: ruby_fl_type = 16384; +pub const RUBY_FL_USER3: ruby_fl_type = 32768; +pub const RUBY_FL_USER4: ruby_fl_type = 65536; +pub const RUBY_FL_USER5: ruby_fl_type = 131072; +pub const RUBY_FL_USER6: ruby_fl_type = 262144; +pub const RUBY_FL_USER7: ruby_fl_type = 524288; +pub const RUBY_FL_USER8: ruby_fl_type = 1048576; +pub const RUBY_FL_USER9: ruby_fl_type = 2097152; +pub const RUBY_FL_USER10: ruby_fl_type = 4194304; +pub const RUBY_FL_USER11: ruby_fl_type = 8388608; +pub const RUBY_FL_USER12: ruby_fl_type = 16777216; +pub const RUBY_FL_USER13: ruby_fl_type = 33554432; +pub const RUBY_FL_USER14: ruby_fl_type = 67108864; +pub const RUBY_FL_USER15: ruby_fl_type = 134217728; +pub const RUBY_FL_USER16: ruby_fl_type = 268435456; +pub const RUBY_FL_USER17: ruby_fl_type = 536870912; +pub const RUBY_FL_USER18: ruby_fl_type = 1073741824; +pub const RUBY_FL_USER19: ruby_fl_type = -2147483648; +pub const RUBY_ELTS_SHARED: ruby_fl_type = 16384; +pub const RUBY_FL_SINGLETON: ruby_fl_type = 8192; +pub type ruby_fl_type = i32; +pub const RSTRING_NOEMBED: ruby_rstring_flags = 8192; +pub const RSTRING_FSTR: ruby_rstring_flags = 536870912; +pub type ruby_rstring_flags = u32; +pub type st_data_t = ::std::os::raw::c_ulong; +pub type st_index_t = st_data_t; +pub const ST_CONTINUE: st_retval = 0; +pub const ST_STOP: st_retval = 1; +pub const ST_DELETE: st_retval = 2; +pub const ST_CHECK: st_retval = 3; +pub const ST_REPLACE: st_retval = 4; +pub type st_retval = u32; +pub type st_foreach_callback_func = ::std::option::Option< + unsafe extern "C" fn( + arg1: st_data_t, + arg2: st_data_t, + arg3: st_data_t, + ) -> ::std::os::raw::c_int, +>; +pub const RARRAY_EMBED_FLAG: ruby_rarray_flags = 8192; +pub const RARRAY_EMBED_LEN_MASK: ruby_rarray_flags = 4161536; +pub type ruby_rarray_flags = u32; +pub const RARRAY_EMBED_LEN_SHIFT: ruby_rarray_consts = 15; +pub type ruby_rarray_consts = u32; +pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 32768; +pub type ruby_rmodule_flags = u32; +pub const ROBJECT_EMBED: ruby_robject_flags = 8192; +pub type ruby_robject_flags = u32; +pub type rb_block_call_func = ::std::option::Option< + unsafe extern "C" fn( + yielded_arg: VALUE, + callback_arg: VALUE, + argc: ::std::os::raw::c_int, + argv: *const VALUE, + blockarg: VALUE, + ) -> VALUE, +>; +pub type rb_block_call_func_t = rb_block_call_func; +pub const RUBY_ENCODING_INLINE_MAX: ruby_encoding_consts = 127; +pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22; +pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608; +pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42; +pub type ruby_encoding_consts = u32; +pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0; +pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1; +pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2; +pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3; +pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4; +pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5; +pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6; +pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7; +pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8; +pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9; +pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10; +pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11; +pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12; +pub type ruby_preserved_encindex = u32; +pub const BOP_PLUS: ruby_basic_operators = 0; +pub const BOP_MINUS: ruby_basic_operators = 1; +pub const BOP_MULT: ruby_basic_operators = 2; +pub const BOP_DIV: ruby_basic_operators = 3; +pub const BOP_MOD: ruby_basic_operators = 4; +pub const BOP_EQ: ruby_basic_operators = 5; +pub const BOP_EQQ: ruby_basic_operators = 6; +pub const BOP_LT: ruby_basic_operators = 7; +pub const BOP_LE: ruby_basic_operators = 8; +pub const BOP_LTLT: ruby_basic_operators = 9; +pub const BOP_AREF: ruby_basic_operators = 10; +pub const BOP_ASET: ruby_basic_operators = 11; +pub const BOP_LENGTH: ruby_basic_operators = 12; +pub const BOP_SIZE: ruby_basic_operators = 13; +pub const BOP_EMPTY_P: ruby_basic_operators = 14; +pub const BOP_NIL_P: ruby_basic_operators = 15; +pub const BOP_SUCC: ruby_basic_operators = 16; +pub const BOP_GT: ruby_basic_operators = 17; +pub const BOP_GE: ruby_basic_operators = 18; +pub const BOP_NOT: ruby_basic_operators = 19; +pub const BOP_NEQ: ruby_basic_operators = 20; +pub const BOP_MATCH: ruby_basic_operators = 21; +pub const BOP_FREEZE: ruby_basic_operators = 22; +pub const BOP_UMINUS: ruby_basic_operators = 23; +pub const BOP_MAX: ruby_basic_operators = 24; +pub const BOP_MIN: ruby_basic_operators = 25; +pub const BOP_HASH: ruby_basic_operators = 26; +pub const BOP_CALL: ruby_basic_operators = 27; +pub const BOP_AND: ruby_basic_operators = 28; +pub const BOP_OR: ruby_basic_operators = 29; +pub const BOP_CMP: ruby_basic_operators = 30; +pub const BOP_DEFAULT: ruby_basic_operators = 31; +pub const BOP_PACK: ruby_basic_operators = 32; +pub const BOP_LAST_: ruby_basic_operators = 33; +pub type ruby_basic_operators = u32; +pub type rb_serial_t = ::std::os::raw::c_ulonglong; +pub const imemo_env: imemo_type = 0; +pub const imemo_cref: imemo_type = 1; +pub const imemo_svar: imemo_type = 2; +pub const imemo_throw_data: imemo_type = 3; +pub const imemo_ifunc: imemo_type = 4; +pub const imemo_memo: imemo_type = 5; +pub const imemo_ment: imemo_type = 6; +pub const imemo_iseq: imemo_type = 7; +pub const imemo_tmpbuf: imemo_type = 8; +pub const imemo_ast: imemo_type = 9; +pub const imemo_parser_strterm: imemo_type = 10; +pub const imemo_callinfo: imemo_type = 11; +pub const imemo_callcache: imemo_type = 12; +pub const imemo_constcache: imemo_type = 13; +pub type imemo_type = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct vm_ifunc_argc { + pub min: ::std::os::raw::c_int, + pub max: ::std::os::raw::c_int, +} +#[repr(C)] +pub struct vm_ifunc { + pub flags: VALUE, + pub svar_lep: *mut VALUE, + pub func: rb_block_call_func_t, + pub data: *const ::std::os::raw::c_void, + pub argc: vm_ifunc_argc, +} +pub const METHOD_VISI_UNDEF: rb_method_visibility_t = 0; +pub const METHOD_VISI_PUBLIC: rb_method_visibility_t = 1; +pub const METHOD_VISI_PRIVATE: rb_method_visibility_t = 2; +pub const METHOD_VISI_PROTECTED: rb_method_visibility_t = 3; +pub const METHOD_VISI_MASK: rb_method_visibility_t = 3; +pub type rb_method_visibility_t = u32; +#[repr(C)] +pub struct rb_method_entry_struct { + pub flags: VALUE, + pub defined_class: VALUE, + pub def: *mut rb_method_definition_struct, + pub called_id: ID, + pub owner: VALUE, +} +pub type rb_method_entry_t = rb_method_entry_struct; +#[repr(C)] +pub struct rb_callable_method_entry_struct { + pub flags: VALUE, + pub defined_class: VALUE, + pub def: *mut rb_method_definition_struct, + pub called_id: ID, + pub owner: VALUE, +} +pub type rb_callable_method_entry_t = rb_callable_method_entry_struct; +pub const VM_METHOD_TYPE_ISEQ: rb_method_type_t = 0; +pub const VM_METHOD_TYPE_CFUNC: rb_method_type_t = 1; +pub const VM_METHOD_TYPE_ATTRSET: rb_method_type_t = 2; +pub const VM_METHOD_TYPE_IVAR: rb_method_type_t = 3; +pub const VM_METHOD_TYPE_BMETHOD: rb_method_type_t = 4; +pub const VM_METHOD_TYPE_ZSUPER: rb_method_type_t = 5; +pub const VM_METHOD_TYPE_ALIAS: rb_method_type_t = 6; +pub const VM_METHOD_TYPE_UNDEF: rb_method_type_t = 7; +pub const VM_METHOD_TYPE_NOTIMPLEMENTED: rb_method_type_t = 8; +pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9; +pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10; +pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11; +pub type rb_method_type_t = u32; +pub type rb_cfunc_t = ::std::option::Option<unsafe extern "C" fn() -> VALUE>; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_method_cfunc_struct { + pub func: rb_cfunc_t, + pub invoker: ::std::option::Option< + unsafe extern "C" fn( + recv: VALUE, + argc: ::std::os::raw::c_int, + argv: *const VALUE, + func: ::std::option::Option<unsafe extern "C" fn() -> VALUE>, + ) -> VALUE, + >, + pub argc: ::std::os::raw::c_int, +} +pub const OPTIMIZED_METHOD_TYPE_SEND: method_optimized_type = 0; +pub const OPTIMIZED_METHOD_TYPE_CALL: method_optimized_type = 1; +pub const OPTIMIZED_METHOD_TYPE_BLOCK_CALL: method_optimized_type = 2; +pub const OPTIMIZED_METHOD_TYPE_STRUCT_AREF: method_optimized_type = 3; +pub const OPTIMIZED_METHOD_TYPE_STRUCT_ASET: method_optimized_type = 4; +pub const OPTIMIZED_METHOD_TYPE__MAX: method_optimized_type = 5; +pub type method_optimized_type = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_id_table { + _unused: [u8; 0], +} +pub type rb_num_t = ::std::os::raw::c_ulong; +pub const RUBY_TAG_NONE: ruby_tag_type = 0; +pub const RUBY_TAG_RETURN: ruby_tag_type = 1; +pub const RUBY_TAG_BREAK: ruby_tag_type = 2; +pub const RUBY_TAG_NEXT: ruby_tag_type = 3; +pub const RUBY_TAG_RETRY: ruby_tag_type = 4; +pub const RUBY_TAG_REDO: ruby_tag_type = 5; +pub const RUBY_TAG_RAISE: ruby_tag_type = 6; +pub const RUBY_TAG_THROW: ruby_tag_type = 7; +pub const RUBY_TAG_FATAL: ruby_tag_type = 8; +pub const RUBY_TAG_MASK: ruby_tag_type = 15; +pub type ruby_tag_type = u32; +pub const VM_THROW_NO_ESCAPE_FLAG: ruby_vm_throw_flags = 32768; +pub const VM_THROW_STATE_MASK: ruby_vm_throw_flags = 255; +pub type ruby_vm_throw_flags = u32; +#[repr(C)] +pub struct iseq_inline_constant_cache_entry { + pub flags: VALUE, + pub value: VALUE, + pub _unused1: VALUE, + pub _unused2: VALUE, + pub ic_cref: *const rb_cref_t, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct iseq_inline_constant_cache { + pub entry: *mut iseq_inline_constant_cache_entry, + pub segments: *const ID, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct iseq_inline_iv_cache_entry { + pub value: usize, + pub iv_set_name: ID, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct iseq_inline_cvar_cache_entry { + pub entry: *mut rb_cvar_class_tbl_entry, +} +pub const ISEQ_TYPE_TOP: rb_iseq_type = 0; +pub const ISEQ_TYPE_METHOD: rb_iseq_type = 1; +pub const ISEQ_TYPE_BLOCK: rb_iseq_type = 2; +pub const ISEQ_TYPE_CLASS: rb_iseq_type = 3; +pub const ISEQ_TYPE_RESCUE: rb_iseq_type = 4; +pub const ISEQ_TYPE_ENSURE: rb_iseq_type = 5; +pub const ISEQ_TYPE_EVAL: rb_iseq_type = 6; +pub const ISEQ_TYPE_MAIN: rb_iseq_type = 7; +pub const ISEQ_TYPE_PLAIN: rb_iseq_type = 8; +pub type rb_iseq_type = u32; +pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1; +pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2; +pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4; +pub type rb_builtin_attr = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword { + pub num: ::std::os::raw::c_int, + pub required_num: ::std::os::raw::c_int, + pub bits_start: ::std::os::raw::c_int, + pub rest_start: ::std::os::raw::c_int, + pub table: *const ID, + pub default_values: *mut VALUE, +} +#[repr(C)] +pub struct rb_captured_block { + pub self_: VALUE, + pub ep: *const VALUE, + pub code: rb_captured_block__bindgen_ty_1, +} +#[repr(C)] +pub struct rb_captured_block__bindgen_ty_1 { + pub iseq: __BindgenUnionField<*const rb_iseq_t>, + pub ifunc: __BindgenUnionField<*const vm_ifunc>, + pub val: __BindgenUnionField<VALUE>, + pub bindgen_union_field: u64, +} +pub const block_type_iseq: rb_block_type = 0; +pub const block_type_ifunc: rb_block_type = 1; +pub const block_type_symbol: rb_block_type = 2; +pub const block_type_proc: rb_block_type = 3; +pub type rb_block_type = u32; +#[repr(C)] +pub struct rb_block { + pub as_: rb_block__bindgen_ty_1, + pub type_: rb_block_type, +} +#[repr(C)] +pub struct rb_block__bindgen_ty_1 { + pub captured: __BindgenUnionField<rb_captured_block>, + pub symbol: __BindgenUnionField<VALUE>, + pub proc_: __BindgenUnionField<VALUE>, + pub bindgen_union_field: [u64; 3usize], +} +pub type rb_control_frame_t = rb_control_frame_struct; +#[repr(C)] +pub struct rb_proc_t { + pub block: rb_block, + pub _bitfield_align_1: [u8; 0], + pub _bitfield_1: __BindgenBitfieldUnit<[u8; 1usize]>, + pub __bindgen_padding_0: [u8; 7usize], +} +impl rb_proc_t { + #[inline] + pub fn is_from_method(&self) -> ::std::os::raw::c_uint { + unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) } + } + #[inline] + pub fn set_is_from_method(&mut self, val: ::std::os::raw::c_uint) { + unsafe { + let val: u32 = ::std::mem::transmute(val); + self._bitfield_1.set(0usize, 1u8, val as u64) + } + } + #[inline] + pub fn is_lambda(&self) -> ::std::os::raw::c_uint { + unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) } + } + #[inline] + pub fn set_is_lambda(&mut self, val: ::std::os::raw::c_uint) { + unsafe { + let val: u32 = ::std::mem::transmute(val); + self._bitfield_1.set(1usize, 1u8, val as u64) + } + } + #[inline] + pub fn is_isolated(&self) -> ::std::os::raw::c_uint { + unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) } + } + #[inline] + pub fn set_is_isolated(&mut self, val: ::std::os::raw::c_uint) { + unsafe { + let val: u32 = ::std::mem::transmute(val); + self._bitfield_1.set(2usize, 1u8, val as u64) + } + } + #[inline] + pub fn new_bitfield_1( + is_from_method: ::std::os::raw::c_uint, + is_lambda: ::std::os::raw::c_uint, + is_isolated: ::std::os::raw::c_uint, + ) -> __BindgenBitfieldUnit<[u8; 1usize]> { + let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 1usize]> = Default::default(); + __bindgen_bitfield_unit.set(0usize, 1u8, { + let is_from_method: u32 = unsafe { ::std::mem::transmute(is_from_method) }; + is_from_method as u64 + }); + __bindgen_bitfield_unit.set(1usize, 1u8, { + let is_lambda: u32 = unsafe { ::std::mem::transmute(is_lambda) }; + is_lambda as u64 + }); + __bindgen_bitfield_unit.set(2usize, 1u8, { + let is_isolated: u32 = unsafe { ::std::mem::transmute(is_isolated) }; + is_isolated as u64 + }); + __bindgen_bitfield_unit + } +} +pub const VM_CHECKMATCH_TYPE_WHEN: vm_check_match_type = 1; +pub const VM_CHECKMATCH_TYPE_CASE: vm_check_match_type = 2; +pub const VM_CHECKMATCH_TYPE_RESCUE: vm_check_match_type = 3; +pub type vm_check_match_type = u32; +pub const VM_SPECIAL_OBJECT_VMCORE: vm_special_object_type = 1; +pub const VM_SPECIAL_OBJECT_CBASE: vm_special_object_type = 2; +pub const VM_SPECIAL_OBJECT_CONST_BASE: vm_special_object_type = 3; +pub type vm_special_object_type = u32; +pub type IC = *mut iseq_inline_constant_cache; +pub type IVC = *mut iseq_inline_iv_cache_entry; +pub type ICVARC = *mut iseq_inline_cvar_cache_entry; +pub const VM_FRAME_MAGIC_METHOD: vm_frame_env_flags = 286326785; +pub const VM_FRAME_MAGIC_BLOCK: vm_frame_env_flags = 572653569; +pub const VM_FRAME_MAGIC_CLASS: vm_frame_env_flags = 858980353; +pub const VM_FRAME_MAGIC_TOP: vm_frame_env_flags = 1145307137; +pub const VM_FRAME_MAGIC_CFUNC: vm_frame_env_flags = 1431633921; +pub const VM_FRAME_MAGIC_IFUNC: vm_frame_env_flags = 1717960705; +pub const VM_FRAME_MAGIC_EVAL: vm_frame_env_flags = 2004287489; +pub const VM_FRAME_MAGIC_RESCUE: vm_frame_env_flags = 2022178817; +pub const VM_FRAME_MAGIC_DUMMY: vm_frame_env_flags = 2040070145; +pub const VM_FRAME_MAGIC_MASK: vm_frame_env_flags = 2147418113; +pub const VM_FRAME_FLAG_FINISH: vm_frame_env_flags = 32; +pub const VM_FRAME_FLAG_BMETHOD: vm_frame_env_flags = 64; +pub const VM_FRAME_FLAG_CFRAME: vm_frame_env_flags = 128; +pub const VM_FRAME_FLAG_LAMBDA: vm_frame_env_flags = 256; +pub const VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM: vm_frame_env_flags = 512; +pub const VM_FRAME_FLAG_CFRAME_KW: vm_frame_env_flags = 1024; +pub const VM_FRAME_FLAG_PASSED: vm_frame_env_flags = 2048; +pub const VM_ENV_FLAG_LOCAL: vm_frame_env_flags = 2; +pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4; +pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8; +pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16; +pub type vm_frame_env_flags = u32; +pub type attr_index_t = u32; +pub type shape_id_t = u32; +pub type redblack_id_t = u32; +pub type redblack_node_t = redblack_node; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_shape { + pub edges: *mut rb_id_table, + pub edge_name: ID, + pub next_iv_index: attr_index_t, + pub capacity: u32, + pub type_: u8, + pub size_pool_index: u8, + pub parent_id: shape_id_t, + pub ancestor_index: *mut redblack_node_t, +} +pub type rb_shape_t = rb_shape; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct redblack_node { + pub key: ID, + pub value: *mut rb_shape_t, + pub l: redblack_id_t, + pub r: redblack_id_t, +} +#[repr(C)] +pub struct rb_cvar_class_tbl_entry { + pub index: u32, + pub global_cvar_state: rb_serial_t, + pub cref: *const rb_cref_t, + pub class_value: VALUE, +} +pub const VM_CALL_ARGS_SPLAT_bit: vm_call_flag_bits = 0; +pub const VM_CALL_ARGS_BLOCKARG_bit: vm_call_flag_bits = 1; +pub const VM_CALL_FCALL_bit: vm_call_flag_bits = 2; +pub const VM_CALL_VCALL_bit: vm_call_flag_bits = 3; +pub const VM_CALL_ARGS_SIMPLE_bit: vm_call_flag_bits = 4; +pub const VM_CALL_KWARG_bit: vm_call_flag_bits = 5; +pub const VM_CALL_KW_SPLAT_bit: vm_call_flag_bits = 6; +pub const VM_CALL_TAILCALL_bit: vm_call_flag_bits = 7; +pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 8; +pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 9; +pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 10; +pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 11; +pub const VM_CALL_ARGS_SPLAT_MUT_bit: vm_call_flag_bits = 12; +pub const VM_CALL__END: vm_call_flag_bits = 13; +pub type vm_call_flag_bits = u32; +#[repr(C)] +pub struct rb_callinfo_kwarg { + pub keyword_len: ::std::os::raw::c_int, + pub references: ::std::os::raw::c_int, + pub keywords: __IncompleteArrayField<VALUE>, +} +#[repr(C)] +pub struct rb_callinfo { + pub flags: VALUE, + pub kwarg: *const rb_callinfo_kwarg, + pub mid: VALUE, + pub flag: VALUE, + pub argc: VALUE, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_call_data { + pub ci: *const rb_callinfo, + pub cc: *const rb_callcache, +} +pub const RSTRING_CHILLED: ruby_rstring_private_flags = 32768; +pub type ruby_rstring_private_flags = u32; +pub const RHASH_PASS_AS_KEYWORDS: ruby_rhash_flags = 8192; +pub const RHASH_PROC_DEFAULT: ruby_rhash_flags = 16384; +pub const RHASH_ST_TABLE_FLAG: ruby_rhash_flags = 32768; +pub const RHASH_AR_TABLE_SIZE_MASK: ruby_rhash_flags = 983040; +pub const RHASH_AR_TABLE_SIZE_SHIFT: ruby_rhash_flags = 16; +pub const RHASH_AR_TABLE_BOUND_MASK: ruby_rhash_flags = 15728640; +pub const RHASH_AR_TABLE_BOUND_SHIFT: ruby_rhash_flags = 20; +pub const RHASH_LEV_SHIFT: ruby_rhash_flags = 25; +pub const RHASH_LEV_MAX: ruby_rhash_flags = 127; +pub type ruby_rhash_flags = u32; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rb_builtin_function { + pub func_ptr: *const ::std::os::raw::c_void, + pub argc: ::std::os::raw::c_int, + pub index: ::std::os::raw::c_int, + pub name: *const ::std::os::raw::c_char, +} +pub const YARVINSN_nop: ruby_vminsn_type = 0; +pub const YARVINSN_getlocal: ruby_vminsn_type = 1; +pub const YARVINSN_setlocal: ruby_vminsn_type = 2; +pub const YARVINSN_getblockparam: ruby_vminsn_type = 3; +pub const YARVINSN_setblockparam: ruby_vminsn_type = 4; +pub const YARVINSN_getblockparamproxy: ruby_vminsn_type = 5; +pub const YARVINSN_getspecial: ruby_vminsn_type = 6; +pub const YARVINSN_setspecial: ruby_vminsn_type = 7; +pub const YARVINSN_getinstancevariable: ruby_vminsn_type = 8; +pub const YARVINSN_setinstancevariable: ruby_vminsn_type = 9; +pub const YARVINSN_getclassvariable: ruby_vminsn_type = 10; +pub const YARVINSN_setclassvariable: ruby_vminsn_type = 11; +pub const YARVINSN_opt_getconstant_path: ruby_vminsn_type = 12; +pub const YARVINSN_getconstant: ruby_vminsn_type = 13; +pub const YARVINSN_setconstant: ruby_vminsn_type = 14; +pub const YARVINSN_getglobal: ruby_vminsn_type = 15; +pub const YARVINSN_setglobal: ruby_vminsn_type = 16; +pub const YARVINSN_putnil: ruby_vminsn_type = 17; +pub const YARVINSN_putself: ruby_vminsn_type = 18; +pub const YARVINSN_putobject: ruby_vminsn_type = 19; +pub const YARVINSN_putspecialobject: ruby_vminsn_type = 20; +pub const YARVINSN_putstring: ruby_vminsn_type = 21; +pub const YARVINSN_putchilledstring: ruby_vminsn_type = 22; +pub const YARVINSN_concatstrings: ruby_vminsn_type = 23; +pub const YARVINSN_anytostring: ruby_vminsn_type = 24; +pub const YARVINSN_toregexp: ruby_vminsn_type = 25; +pub const YARVINSN_intern: ruby_vminsn_type = 26; +pub const YARVINSN_newarray: ruby_vminsn_type = 27; +pub const YARVINSN_newarraykwsplat: ruby_vminsn_type = 28; +pub const YARVINSN_pushtoarraykwsplat: ruby_vminsn_type = 29; +pub const YARVINSN_duparray: ruby_vminsn_type = 30; +pub const YARVINSN_duphash: ruby_vminsn_type = 31; +pub const YARVINSN_expandarray: ruby_vminsn_type = 32; +pub const YARVINSN_concatarray: ruby_vminsn_type = 33; +pub const YARVINSN_concattoarray: ruby_vminsn_type = 34; +pub const YARVINSN_pushtoarray: ruby_vminsn_type = 35; +pub const YARVINSN_splatarray: ruby_vminsn_type = 36; +pub const YARVINSN_splatkw: ruby_vminsn_type = 37; +pub const YARVINSN_newhash: ruby_vminsn_type = 38; +pub const YARVINSN_newrange: ruby_vminsn_type = 39; +pub const YARVINSN_pop: ruby_vminsn_type = 40; +pub const YARVINSN_dup: ruby_vminsn_type = 41; +pub const YARVINSN_dupn: ruby_vminsn_type = 42; +pub const YARVINSN_swap: ruby_vminsn_type = 43; +pub const YARVINSN_opt_reverse: ruby_vminsn_type = 44; +pub const YARVINSN_topn: ruby_vminsn_type = 45; +pub const YARVINSN_setn: ruby_vminsn_type = 46; +pub const YARVINSN_adjuststack: ruby_vminsn_type = 47; +pub const YARVINSN_defined: ruby_vminsn_type = 48; +pub const YARVINSN_definedivar: ruby_vminsn_type = 49; +pub const YARVINSN_checkmatch: ruby_vminsn_type = 50; +pub const YARVINSN_checkkeyword: ruby_vminsn_type = 51; +pub const YARVINSN_checktype: ruby_vminsn_type = 52; +pub const YARVINSN_defineclass: ruby_vminsn_type = 53; +pub const YARVINSN_definemethod: ruby_vminsn_type = 54; +pub const YARVINSN_definesmethod: ruby_vminsn_type = 55; +pub const YARVINSN_send: ruby_vminsn_type = 56; +pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 57; +pub const YARVINSN_objtostring: ruby_vminsn_type = 58; +pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 59; +pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 60; +pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 61; +pub const YARVINSN_opt_newarray_send: ruby_vminsn_type = 62; +pub const YARVINSN_invokesuper: ruby_vminsn_type = 63; +pub const YARVINSN_invokeblock: ruby_vminsn_type = 64; +pub const YARVINSN_leave: ruby_vminsn_type = 65; +pub const YARVINSN_throw: ruby_vminsn_type = 66; +pub const YARVINSN_jump: ruby_vminsn_type = 67; +pub const YARVINSN_branchif: ruby_vminsn_type = 68; +pub const YARVINSN_branchunless: ruby_vminsn_type = 69; +pub const YARVINSN_branchnil: ruby_vminsn_type = 70; +pub const YARVINSN_once: ruby_vminsn_type = 71; +pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 72; +pub const YARVINSN_opt_plus: ruby_vminsn_type = 73; +pub const YARVINSN_opt_minus: ruby_vminsn_type = 74; +pub const YARVINSN_opt_mult: ruby_vminsn_type = 75; +pub const YARVINSN_opt_div: ruby_vminsn_type = 76; +pub const YARVINSN_opt_mod: ruby_vminsn_type = 77; +pub const YARVINSN_opt_eq: ruby_vminsn_type = 78; +pub const YARVINSN_opt_neq: ruby_vminsn_type = 79; +pub const YARVINSN_opt_lt: ruby_vminsn_type = 80; +pub const YARVINSN_opt_le: ruby_vminsn_type = 81; +pub const YARVINSN_opt_gt: ruby_vminsn_type = 82; +pub const YARVINSN_opt_ge: ruby_vminsn_type = 83; +pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 84; +pub const YARVINSN_opt_and: ruby_vminsn_type = 85; +pub const YARVINSN_opt_or: ruby_vminsn_type = 86; +pub const YARVINSN_opt_aref: ruby_vminsn_type = 87; +pub const YARVINSN_opt_aset: ruby_vminsn_type = 88; +pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 89; +pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 90; +pub const YARVINSN_opt_length: ruby_vminsn_type = 91; +pub const YARVINSN_opt_size: ruby_vminsn_type = 92; +pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 93; +pub const YARVINSN_opt_succ: ruby_vminsn_type = 94; +pub const YARVINSN_opt_not: ruby_vminsn_type = 95; +pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 96; +pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 97; +pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 98; +pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 99; +pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 100; +pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 101; +pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 102; +pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 103; +pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 104; +pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 105; +pub const YARVINSN_trace_nop: ruby_vminsn_type = 106; +pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 107; +pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 108; +pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 109; +pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 110; +pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 111; +pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 112; +pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 113; +pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 114; +pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 115; +pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 116; +pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 117; +pub const YARVINSN_trace_opt_getconstant_path: ruby_vminsn_type = 118; +pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 119; +pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 120; +pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 121; +pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 122; +pub const YARVINSN_trace_putnil: ruby_vminsn_type = 123; +pub const YARVINSN_trace_putself: ruby_vminsn_type = 124; +pub const YARVINSN_trace_putobject: ruby_vminsn_type = 125; +pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 126; +pub const YARVINSN_trace_putstring: ruby_vminsn_type = 127; +pub const YARVINSN_trace_putchilledstring: ruby_vminsn_type = 128; +pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 129; +pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 130; +pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 131; +pub const YARVINSN_trace_intern: ruby_vminsn_type = 132; +pub const YARVINSN_trace_newarray: ruby_vminsn_type = 133; +pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 134; +pub const YARVINSN_trace_pushtoarraykwsplat: ruby_vminsn_type = 135; +pub const YARVINSN_trace_duparray: ruby_vminsn_type = 136; +pub const YARVINSN_trace_duphash: ruby_vminsn_type = 137; +pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 138; +pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 139; +pub const YARVINSN_trace_concattoarray: ruby_vminsn_type = 140; +pub const YARVINSN_trace_pushtoarray: ruby_vminsn_type = 141; +pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 142; +pub const YARVINSN_trace_splatkw: ruby_vminsn_type = 143; +pub const YARVINSN_trace_newhash: ruby_vminsn_type = 144; +pub const YARVINSN_trace_newrange: ruby_vminsn_type = 145; +pub const YARVINSN_trace_pop: ruby_vminsn_type = 146; +pub const YARVINSN_trace_dup: ruby_vminsn_type = 147; +pub const YARVINSN_trace_dupn: ruby_vminsn_type = 148; +pub const YARVINSN_trace_swap: ruby_vminsn_type = 149; +pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 150; +pub const YARVINSN_trace_topn: ruby_vminsn_type = 151; +pub const YARVINSN_trace_setn: ruby_vminsn_type = 152; +pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 153; +pub const YARVINSN_trace_defined: ruby_vminsn_type = 154; +pub const YARVINSN_trace_definedivar: ruby_vminsn_type = 155; +pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 156; +pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 157; +pub const YARVINSN_trace_checktype: ruby_vminsn_type = 158; +pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 159; +pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 160; +pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 161; +pub const YARVINSN_trace_send: ruby_vminsn_type = 162; +pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 163; +pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 164; +pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 165; +pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 166; +pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 167; +pub const YARVINSN_trace_opt_newarray_send: ruby_vminsn_type = 168; +pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 169; +pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 170; +pub const YARVINSN_trace_leave: ruby_vminsn_type = 171; +pub const YARVINSN_trace_throw: ruby_vminsn_type = 172; +pub const YARVINSN_trace_jump: ruby_vminsn_type = 173; +pub const YARVINSN_trace_branchif: ruby_vminsn_type = 174; +pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 175; +pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 176; +pub const YARVINSN_trace_once: ruby_vminsn_type = 177; +pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 178; +pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 179; +pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 180; +pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 181; +pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 182; +pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 183; +pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 184; +pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 185; +pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 186; +pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 187; +pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 188; +pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 189; +pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 190; +pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 191; +pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 192; +pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 193; +pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 194; +pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 195; +pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 196; +pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 197; +pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 198; +pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 199; +pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 200; +pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 201; +pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 202; +pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 203; +pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 204; +pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 205; +pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 206; +pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 207; +pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 208; +pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 209; +pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 210; +pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 211; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 212; +pub type ruby_vminsn_type = u32; +pub type rb_iseq_callback = ::std::option::Option< + unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), +>; +pub const DEFINED_NOT_DEFINED: defined_type = 0; +pub const DEFINED_NIL: defined_type = 1; +pub const DEFINED_IVAR: defined_type = 2; +pub const DEFINED_LVAR: defined_type = 3; +pub const DEFINED_GVAR: defined_type = 4; +pub const DEFINED_CVAR: defined_type = 5; +pub const DEFINED_CONST: defined_type = 6; +pub const DEFINED_METHOD: defined_type = 7; +pub const DEFINED_YIELD: defined_type = 8; +pub const DEFINED_ZSUPER: defined_type = 9; +pub const DEFINED_SELF: defined_type = 10; +pub const DEFINED_TRUE: defined_type = 11; +pub const DEFINED_FALSE: defined_type = 12; +pub const DEFINED_ASGN: defined_type = 13; +pub const DEFINED_EXPR: defined_type = 14; +pub const DEFINED_REF: defined_type = 15; +pub const DEFINED_FUNC: defined_type = 16; +pub const DEFINED_CONST_FROM: defined_type = 17; +pub type defined_type = u32; +pub const ROBJECT_OFFSET_AS_HEAP_IVPTR: robject_offsets = 16; +pub const ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL: robject_offsets = 24; +pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16; +pub type robject_offsets = u32; +pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16; +pub type rstring_offsets = u32; +pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; +extern "C" { + pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void); + pub fn rb_class_attached_object(klass: VALUE) -> VALUE; + pub fn rb_singleton_class(obj: VALUE) -> VALUE; + pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t; + pub fn rb_method_basic_definition_p(klass: VALUE, mid: ID) -> ::std::os::raw::c_int; + pub fn rb_bug(fmt: *const ::std::os::raw::c_char, ...) -> !; + pub fn rb_gc_mark(obj: VALUE); + pub fn rb_gc_mark_movable(obj: VALUE); + pub fn rb_gc_location(obj: VALUE) -> VALUE; + pub fn rb_gc_writebarrier(old: VALUE, young: VALUE); + pub fn rb_class_get_superclass(klass: VALUE) -> VALUE; + pub static mut rb_mKernel: VALUE; + pub static mut rb_cBasicObject: VALUE; + pub static mut rb_cArray: VALUE; + pub static mut rb_cClass: VALUE; + pub static mut rb_cFalseClass: VALUE; + pub static mut rb_cFloat: VALUE; + pub static mut rb_cHash: VALUE; + pub static mut rb_cIO: VALUE; + pub static mut rb_cInteger: VALUE; + pub static mut rb_cModule: VALUE; + pub static mut rb_cNilClass: VALUE; + pub static mut rb_cString: VALUE; + pub static mut rb_cSymbol: VALUE; + pub static mut rb_cThread: VALUE; + pub static mut rb_cTrueClass: VALUE; + pub fn rb_obj_class(obj: VALUE) -> VALUE; + pub fn rb_ary_new_capa(capa: ::std::os::raw::c_long) -> VALUE; + pub fn rb_ary_store(ary: VALUE, key: ::std::os::raw::c_long, val: VALUE); + pub fn rb_ary_dup(ary: VALUE) -> VALUE; + pub fn rb_ary_resurrect(ary: VALUE) -> VALUE; + pub fn rb_ary_cat(ary: VALUE, train: *const VALUE, len: ::std::os::raw::c_long) -> VALUE; + pub fn rb_ary_push(ary: VALUE, elem: VALUE) -> VALUE; + pub fn rb_ary_clear(ary: VALUE) -> VALUE; + pub fn rb_hash_new() -> VALUE; + pub fn rb_hash_aref(hash: VALUE, key: VALUE) -> VALUE; + pub fn rb_hash_aset(hash: VALUE, key: VALUE, val: VALUE) -> VALUE; + pub fn rb_hash_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE, hash: VALUE); + pub fn rb_obj_is_proc(recv: VALUE) -> VALUE; + pub fn rb_sym2id(obj: VALUE) -> ID; + pub fn rb_id2sym(id: ID) -> VALUE; + pub fn rb_intern(name: *const ::std::os::raw::c_char) -> ID; + pub fn rb_intern2(name: *const ::std::os::raw::c_char, len: ::std::os::raw::c_long) -> ID; + pub fn rb_id2name(id: ID) -> *const ::std::os::raw::c_char; + pub fn rb_class2name(klass: VALUE) -> *const ::std::os::raw::c_char; + pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE; + pub fn rb_obj_frozen_p(obj: VALUE) -> VALUE; + pub fn rb_backref_get() -> VALUE; + pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE; + pub fn rb_reg_nth_match(n: ::std::os::raw::c_int, md: VALUE) -> VALUE; + pub fn rb_reg_last_match(md: VALUE) -> VALUE; + pub fn rb_reg_match_pre(md: VALUE) -> VALUE; + pub fn rb_reg_match_post(md: VALUE) -> VALUE; + pub fn rb_reg_match_last(md: VALUE) -> VALUE; + pub fn rb_utf8_str_new( + ptr: *const ::std::os::raw::c_char, + len: ::std::os::raw::c_long, + ) -> VALUE; + pub fn rb_str_buf_append(dst: VALUE, src: VALUE) -> VALUE; + pub fn rb_str_dup(str_: VALUE) -> VALUE; + pub fn rb_str_intern(str_: VALUE) -> VALUE; + pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE; + pub fn rb_ivar_defined(obj: VALUE, name: ID) -> VALUE; + pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE; + pub fn rb_obj_info_dump(obj: VALUE); + pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE; + pub fn rb_obj_equal(obj1: VALUE, obj2: VALUE) -> VALUE; + pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE; + pub fn rb_ary_tmp_new_from_values( + arg1: VALUE, + arg2: ::std::os::raw::c_long, + arg3: *const VALUE, + ) -> VALUE; + pub fn rb_ec_ary_new_from_values( + ec: *mut rb_execution_context_struct, + n: ::std::os::raw::c_long, + elts: *const VALUE, + ) -> VALUE; + pub fn rb_vm_top_self() -> VALUE; + pub static mut rb_vm_insns_count: u64; + pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t; + pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t; + pub fn rb_callable_method_entry_or_negative( + klass: VALUE, + id: ID, + ) -> *const rb_callable_method_entry_t; + pub static mut rb_mRubyVMFrozenCore: VALUE; + pub static mut rb_block_param_proxy: VALUE; + pub fn rb_vm_ep_local_ep(ep: *const VALUE) -> *const VALUE; + pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE; + pub fn rb_vm_env_write(ep: *const VALUE, index: ::std::os::raw::c_int, v: VALUE); + pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE; + pub fn rb_vm_frame_method_entry( + cfp: *const rb_control_frame_t, + ) -> *const rb_callable_method_entry_t; + pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char; + pub fn rb_ec_stack_check(ec: *mut rb_execution_context_struct) -> ::std::os::raw::c_int; + pub fn rb_shape_id_offset() -> i32; + pub fn rb_shape_get_shape_by_id(shape_id: shape_id_t) -> *mut rb_shape_t; + pub fn rb_shape_get_shape_id(obj: VALUE) -> shape_id_t; + pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool; + pub fn rb_shape_obj_too_complex(obj: VALUE) -> bool; + pub fn rb_shape_get_next(shape: *mut rb_shape_t, obj: VALUE, id: ID) -> *mut rb_shape_t; + pub fn rb_shape_id(shape: *mut rb_shape_t) -> shape_id_t; + pub fn rb_gvar_get(arg1: ID) -> VALUE; + pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; + pub fn rb_ensure_iv_list_size(obj: VALUE, len: u32, newsize: u32); + pub fn rb_vm_barrier(); + pub fn rb_str_byte_substr(str_: VALUE, beg: VALUE, len: VALUE) -> VALUE; + pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE; + pub fn rb_str_concat_literals(num: usize, strary: *const VALUE) -> VALUE; + pub fn rb_ec_str_resurrect( + ec: *mut rb_execution_context_struct, + str_: VALUE, + chilled: bool, + ) -> VALUE; + pub fn rb_to_hash_type(obj: VALUE) -> VALUE; + pub fn rb_hash_stlike_foreach( + hash: VALUE, + func: st_foreach_callback_func, + arg: st_data_t, + ) -> ::std::os::raw::c_int; + pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE; + pub fn rb_hash_resurrect(hash: VALUE) -> VALUE; + pub fn rb_hash_stlike_lookup( + hash: VALUE, + key: st_data_t, + pval: *mut st_data_t, + ) -> ::std::os::raw::c_int; + pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int; + pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int; + pub fn rb_float_plus(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_float_minus(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_float_mul(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_float_div(x: VALUE, y: VALUE) -> VALUE; + pub fn rb_fix_aref(fix: VALUE, idx: VALUE) -> VALUE; + pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; + pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint; + pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t; + pub fn rb_iseq_label(iseq: *const rb_iseq_t) -> VALUE; + pub fn rb_profile_frames( + start: ::std::os::raw::c_int, + limit: ::std::os::raw::c_int, + buff: *mut VALUE, + lines: *mut ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int; + pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); + pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; + pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32); + pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool; + pub fn rb_yjit_array_len(a: VALUE) -> ::std::os::raw::c_long; + pub fn rb_yjit_icache_invalidate( + start: *mut ::std::os::raw::c_void, + end: *mut ::std::os::raw::c_void, + ); + pub fn rb_yjit_exit_locations_dict( + yjit_raw_samples: *mut VALUE, + yjit_line_samples: *mut ::std::os::raw::c_int, + samples_len: ::std::os::raw::c_int, + ) -> VALUE; + pub fn rb_yjit_get_page_size() -> u32; + pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8; + pub fn rb_c_method_tracing_currently_enabled(ec: *const rb_execution_context_t) -> bool; + pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE); + pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; + pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void; + pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void); + pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t); + pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE; + pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; + pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; + pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; + pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; + pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char; + pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; + pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID; + pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; + pub fn rb_vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg; + pub fn rb_get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> ::std::os::raw::c_int; + pub fn rb_get_cikw_keywords_idx( + cikw: *const rb_callinfo_kwarg, + idx: ::std::os::raw::c_int, + ) -> VALUE; + pub fn rb_METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t; + pub fn rb_get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t; + pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID; + pub fn rb_get_symbol_id(namep: VALUE) -> ID; + pub fn rb_get_cme_def_body_optimized_type( + cme: *const rb_callable_method_entry_t, + ) -> method_optimized_type; + pub fn rb_get_cme_def_body_optimized_index( + cme: *const rb_callable_method_entry_t, + ) -> ::std::os::raw::c_uint; + pub fn rb_get_cme_def_body_cfunc( + cme: *const rb_callable_method_entry_t, + ) -> *mut rb_method_cfunc_t; + pub fn rb_get_def_method_serial(def: *const rb_method_definition_t) -> usize; + pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; + pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; + pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; + pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t; + pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; + pub fn rb_get_iseq_body_local_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; + pub fn rb_get_iseq_body_parent_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t; + pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; + pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE; + pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; + pub fn rb_get_iseq_body_type(iseq: *const rb_iseq_t) -> rb_iseq_type; + pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_has_post(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_has_kwrest(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_anon_kwrest(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_has_rest(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_ruby2_keywords(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_has_block(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_ambiguous_param0(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_flags_accepts_no_kwarg(iseq: *const rb_iseq_t) -> bool; + pub fn rb_get_iseq_body_param_keyword( + iseq: *const rb_iseq_t, + ) -> *const rb_seq_param_keyword_struct; + pub fn rb_get_iseq_body_param_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; + pub fn rb_get_iseq_body_param_lead_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; + pub fn rb_get_iseq_body_param_opt_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int; + pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE; + pub fn rb_optimized_call( + recv: *mut VALUE, + ec: *mut rb_execution_context_t, + argc: ::std::os::raw::c_int, + argv: *mut VALUE, + kw_splat: ::std::os::raw::c_int, + block_handler: VALUE, + ) -> VALUE; + pub fn rb_yjit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; + pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; + pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct; + pub fn rb_get_cfp_iseq(cfp: *mut rb_control_frame_struct) -> *const rb_iseq_t; + pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE; + pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE; + pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE); + pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE); + pub fn rb_get_cfp_self(cfp: *mut rb_control_frame_struct) -> VALUE; + pub fn rb_get_cfp_ep(cfp: *mut rb_control_frame_struct) -> *mut VALUE; + pub fn rb_get_cfp_ep_level(cfp: *mut rb_control_frame_struct, lv: u32) -> *const VALUE; + pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; + pub fn rb_yarv_class_of(obj: VALUE) -> VALUE; + pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE; + pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE; + pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE; + pub fn rb_yjit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_yjit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize; + pub fn rb_yjit_splat_varg_checks( + sp: *mut VALUE, + splat_array: VALUE, + cfp: *mut rb_control_frame_t, + ) -> VALUE; + pub fn rb_yjit_splat_varg_cfunc(stack_splat_array: *mut VALUE) -> ::std::os::raw::c_int; + pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); + pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char; + pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; + pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; + pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; + pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long; + pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE); + pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; + pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool; + pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE; + pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int; + pub fn rb_yjit_multi_ractor_p() -> bool; + pub fn rb_assert_iseq_handle(handle: VALUE); + pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int; + pub fn rb_assert_cme_handle(handle: VALUE); + pub fn rb_yjit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); + pub fn rb_yjit_obj_written( + old: VALUE, + young: VALUE, + file: *const ::std::os::raw::c_char, + line: ::std::os::raw::c_int, + ); + pub fn rb_yjit_vm_lock_then_barrier( + recursive_lock_level: *mut ::std::os::raw::c_uint, + file: *const ::std::os::raw::c_char, + line: ::std::os::raw::c_int, + ); + pub fn rb_yjit_vm_unlock( + recursive_lock_level: *mut ::std::os::raw::c_uint, + file: *const ::std::os::raw::c_char, + line: ::std::os::raw::c_int, + ); + pub fn rb_yjit_assert_holding_vm_lock(); + pub fn rb_yjit_sendish_sp_pops(ci: *const rb_callinfo) -> usize; + pub fn rb_yjit_invokeblock_sp_pops(ci: *const rb_callinfo) -> usize; + pub fn rb_yjit_set_exception_return( + cfp: *mut rb_control_frame_t, + leave_exit: *mut ::std::os::raw::c_void, + leave_exception: *mut ::std::os::raw::c_void, + ); +} diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs new file mode 100644 index 0000000000..7875276815 --- /dev/null +++ b/yjit/src/disasm.rs @@ -0,0 +1,341 @@ +use crate::core::*; +use crate::cruby::*; +use crate::yjit::yjit_enabled_p; +#[cfg(feature = "disasm")] +use crate::asm::CodeBlock; +#[cfg(feature = "disasm")] +use crate::codegen::CodePtr; +#[cfg(feature = "disasm")] +use crate::options::DumpDisasm; + +#[cfg(feature = "disasm")] +use std::fmt::Write; + +/// Primitive called in yjit.rb +/// Produce a string representing the disassembly for an ISEQ +#[no_mangle] +pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE { + #[cfg(not(feature = "disasm"))] + { + let _ = iseqw; + return Qnil; + } + + #[cfg(feature = "disasm")] + { + // TODO: + //if unsafe { CLASS_OF(iseqw) != rb_cISeq } { + // return Qnil; + //} + + if !yjit_enabled_p() { + return Qnil; + } + + // Get the iseq pointer from the wrapper + let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; + + // This will truncate disassembly of methods with 10k+ bytecodes. + // That's a good thing - this prints to console. + let out_string = with_vm_lock(src_loc!(), || disasm_iseq_insn_range(iseq, 0, 9999)); + + return rust_str_to_ruby(&out_string); + } +} + +/// Only call while holding the VM lock. +#[cfg(feature = "disasm")] +pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u16, end_idx: u16) -> String { + let mut out = String::from(""); + + // Get a list of block versions generated for this iseq + let block_list = get_or_create_iseq_block_list(iseq); + let mut block_list: Vec<&Block> = block_list.into_iter().map(|blockref| { + // SAFETY: We have the VM lock here and all the blocks on iseqs are valid. + unsafe { blockref.as_ref() } + }).collect(); + + // Get a list of codeblocks relevant to this iseq + let global_cb = crate::codegen::CodegenGlobals::get_inline_cb(); + + // Sort the blocks by increasing start addresses + block_list.sort_by_key(|block| block.get_start_addr().as_offset()); + + // Compute total code size in bytes for all blocks in the function + let mut total_code_size = 0; + for blockref in &block_list { + total_code_size += blockref.code_size(); + } + + writeln!(out, "NUM BLOCK VERSIONS: {}", block_list.len()).unwrap(); + writeln!(out, "TOTAL INLINE CODE SIZE: {} bytes", total_code_size).unwrap(); + + // For each block, sorted by increasing start address + for (block_idx, block) in block_list.iter().enumerate() { + let blockid = block.get_blockid(); + if blockid.idx >= start_idx && blockid.idx < end_idx { + let end_idx = block.get_end_idx(); + let start_addr = block.get_start_addr(); + let end_addr = block.get_end_addr(); + let code_size = block.code_size(); + + // Write some info about the current block + let blockid_idx = blockid.idx; + let block_ident = format!( + "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ", + block_idx + 1, + block_list.len(), + blockid_idx, + end_idx, + code_size + ); + writeln!(out, "== {:=<60}", block_ident).unwrap(); + + // Disassemble the instructions + for (start_addr, end_addr) in global_cb.writable_addrs(start_addr, end_addr) { + out.push_str(&disasm_addr_range(global_cb, start_addr, end_addr)); + writeln!(out).unwrap(); + } + + // If this is not the last block + if block_idx < block_list.len() - 1 { + // Compute the size of the gap between this block and the next + let next_block = block_list[block_idx + 1]; + let next_start_addr = next_block.get_start_addr(); + let gap_size = next_start_addr.as_offset() - end_addr.as_offset(); + + // Log the size of the gap between the blocks if nonzero + if gap_size > 0 { + writeln!(out, "... {} byte gap ...", gap_size).unwrap(); + } + } + } + } + + return out; +} + +#[cfg(feature = "disasm")] +pub fn dump_disasm_addr_range(cb: &CodeBlock, start_addr: CodePtr, end_addr: CodePtr, dump_disasm: &DumpDisasm) { + use std::fs::File; + use std::io::Write; + + for (start_addr, end_addr) in cb.writable_addrs(start_addr, end_addr) { + let disasm = disasm_addr_range(cb, start_addr, end_addr); + if disasm.len() > 0 { + match dump_disasm { + DumpDisasm::Stdout => println!("{disasm}"), + DumpDisasm::File(path) => { + let mut f = File::options().create(true).append(true).open(path).unwrap(); + f.write_all(disasm.as_bytes()).unwrap(); + } + }; + } + } +} + +#[cfg(feature = "disasm")] +pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> String { + let mut out = String::from(""); + + // Initialize capstone + use capstone::prelude::*; + + #[cfg(target_arch = "x86_64")] + let mut cs = Capstone::new() + .x86() + .mode(arch::x86::ArchMode::Mode64) + .syntax(arch::x86::ArchSyntax::Intel) + .build() + .unwrap(); + + #[cfg(target_arch = "aarch64")] + let mut cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .detail(true) + .build() + .unwrap(); + cs.set_skipdata(true).unwrap(); + + // Disassemble the instructions + let code_size = end_addr - start_addr; + let code_slice = unsafe { std::slice::from_raw_parts(start_addr as _, code_size) }; + // Stabilize output for cargo test + #[cfg(test)] + let start_addr = 0; + let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap(); + + // For each instruction in this block + for insn in insns.as_ref() { + // Comments for this block + if let Some(comment_list) = cb.comments_at(insn.address() as usize) { + for comment in comment_list { + if cb.outlined { + write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue + } + writeln!(&mut out, " \x1b[1m# {comment}\x1b[22m").unwrap(); // Make comments bold + } + } + if cb.outlined { + write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue + } + writeln!(&mut out, " {insn}").unwrap(); + if cb.outlined { + write!(&mut out, "\x1b[0m").unwrap(); // Disable blue + } + } + + return out; +} + +/// Assert that CodeBlock has the code specified with hex. In addition, if tested with +/// `cargo test --all-features`, it also checks it generates the specified disasm. +#[cfg(test)] +macro_rules! assert_disasm { + ($cb:expr, $hex:expr, $disasm:expr) => { + #[cfg(feature = "disasm")] + { + let disasm = disasm_addr_range( + &$cb, + $cb.get_ptr(0).raw_addr(&$cb), + $cb.get_write_ptr().raw_addr(&$cb), + ); + assert_eq!(unindent(&disasm, false), unindent(&$disasm, true)); + } + assert_eq!(format!("{:x}", $cb), $hex); + }; +} +#[cfg(test)] +pub(crate) use assert_disasm; + +/// Remove the minimum indent from every line, skipping the first line if `skip_first`. +#[cfg(all(feature = "disasm", test))] +pub fn unindent(string: &str, trim_lines: bool) -> String { + fn split_lines(string: &str) -> Vec<String> { + let mut result: Vec<String> = vec![]; + let mut buf: Vec<u8> = vec![]; + for byte in string.as_bytes().iter() { + buf.push(*byte); + if *byte == b'\n' { + result.push(String::from_utf8(buf).unwrap()); + buf = vec![]; + } + } + if !buf.is_empty() { + result.push(String::from_utf8(buf).unwrap()); + } + result + } + + // Break up a string into multiple lines + let mut lines = split_lines(string); + if trim_lines { // raw string literals come with extra lines + lines.remove(0); + lines.remove(lines.len() - 1); + } + + // Count the minimum number of spaces + let spaces = lines.iter().filter_map(|line| { + for (i, ch) in line.as_bytes().iter().enumerate() { + if *ch != b' ' { + return Some(i); + } + } + None + }).min().unwrap_or(0); + + // Join lines, removing spaces + let mut unindented: Vec<u8> = vec![]; + for line in lines.iter() { + if line.len() > spaces { + unindented.extend_from_slice(&line.as_bytes()[spaces..]); + } else { + unindented.extend_from_slice(&line.as_bytes()); + } + } + String::from_utf8(unindented).unwrap() +} + +/// Primitive called in yjit.rb +/// Produce a list of instructions compiled for an isew +#[no_mangle] +pub extern "C" fn rb_yjit_insns_compiled(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE { + { + // TODO: + //if unsafe { CLASS_OF(iseqw) != rb_cISeq } { + // return Qnil; + //} + + if !yjit_enabled_p() { + return Qnil; + } + + // Get the iseq pointer from the wrapper + let iseq = unsafe { rb_iseqw_to_iseq(iseqw) }; + + // Get the list of instructions compiled + let insn_vec = insns_compiled(iseq); + + unsafe { + let insn_ary = rb_ary_new_capa((insn_vec.len() * 2) as i64); + + // For each instruction compiled + for idx in 0..insn_vec.len() { + let op_name = &insn_vec[idx].0; + let insn_idx = insn_vec[idx].1; + + let op_sym = rust_str_to_sym(&op_name); + + // Store the instruction index and opcode symbol + rb_ary_store( + insn_ary, + (2 * idx + 0) as i64, + VALUE::fixnum_from_usize(insn_idx as usize), + ); + rb_ary_store(insn_ary, (2 * idx + 1) as i64, op_sym); + } + + insn_ary + } + } +} + +fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u16)> { + let mut insn_vec = Vec::new(); + + // Get a list of block versions generated for this iseq + let block_list = get_or_create_iseq_block_list(iseq); + + // For each block associated with this iseq + for blockref in &block_list { + // SAFETY: Called as part of a Ruby method, which ensures the graph is + // well connected for the given iseq. + let block = unsafe { blockref.as_ref() }; + let start_idx = block.get_blockid().idx; + let end_idx = block.get_end_idx(); + assert!(u32::from(end_idx) <= unsafe { get_iseq_encoded_size(iseq) }); + + // For each YARV instruction in the block + let mut insn_idx = start_idx; + while insn_idx < end_idx { + // Get the current pc and opcode + let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) }; + // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. + let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } + .try_into() + .unwrap(); + + // Get the mnemonic for this opcode + let op_name = insn_name(opcode); + + // Add the instruction to the list + insn_vec.push((op_name, insn_idx)); + + // Move to the next instruction + insn_idx += insn_len(opcode) as u16; + } + } + + return insn_vec; +} diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs new file mode 100644 index 0000000000..6639fd677b --- /dev/null +++ b/yjit/src/invariants.rs @@ -0,0 +1,705 @@ +//! Code to track assumptions made during code generation and invalidate +//! generated code if and when these assumptions are invalidated. + +use crate::asm::OutlinedCb; +use crate::backend::ir::Assembler; +use crate::codegen::*; +use crate::core::*; +use crate::cruby::*; +use crate::stats::*; +use crate::utils::IntoUsize; +use crate::yjit::yjit_enabled_p; + +use std::collections::{HashMap, HashSet}; +use std::os::raw::c_void; +use std::mem; + +// Invariants to track: +// assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) +// assume_method_lookup_stable(comptime_recv_klass, cme, jit); +// assume_single_ractor_mode() +// track_stable_constant_names_assumption() + +/// Used to track all of the various block references that contain assumptions +/// about the state of the virtual machine. +pub struct Invariants { + /// Tracks block assumptions about callable method entry validity. + cme_validity: HashMap<*const rb_callable_method_entry_t, HashSet<BlockRef>>, + + /// A map from a class and its associated basic operator to a set of blocks + /// that are assuming that that operator is not redefined. This is used for + /// quick access to all of the blocks that are making this assumption when + /// the operator is redefined. + basic_operator_blocks: HashMap<(RedefinitionFlag, ruby_basic_operators), HashSet<BlockRef>>, + + /// A map from a block to a set of classes and their associated basic + /// operators that the block is assuming are not redefined. This is used for + /// quick access to all of the assumptions that a block is making when it + /// needs to be invalidated. + block_basic_operators: HashMap<BlockRef, HashSet<(RedefinitionFlag, ruby_basic_operators)>>, + + /// Tracks the set of blocks that are assuming the interpreter is running + /// with only one ractor. This is important for things like accessing + /// constants which can have different semantics when multiple ractors are + /// running. + single_ractor: HashSet<BlockRef>, + + /// A map from an ID to the set of blocks that are assuming a constant with + /// that ID as part of its name has not been redefined. For example, if + /// a constant `A::B` is redefined, then all blocks that are assuming that + /// `A` and `B` have not be redefined must be invalidated. + constant_state_blocks: HashMap<ID, HashSet<BlockRef>>, + + /// A map from a block to a set of IDs that it is assuming have not been + /// redefined. + block_constant_states: HashMap<BlockRef, HashSet<ID>>, + + /// A map from a class to a set of blocks that assume objects of the class + /// will have no singleton class. When the set is empty, it means that + /// there has been a singleton class for the class after boot, so you cannot + /// assume no singleton class going forward. + no_singleton_classes: HashMap<VALUE, HashSet<BlockRef>>, + + /// A map from an ISEQ to a set of blocks that assume base pointer is equal + /// to environment pointer. When the set is empty, it means that EP has been + /// escaped in the ISEQ. + no_ep_escape_iseqs: HashMap<IseqPtr, HashSet<BlockRef>>, +} + +/// Private singleton instance of the invariants global struct. +static mut INVARIANTS: Option<Invariants> = None; + +impl Invariants { + pub fn init() { + // Wrapping this in unsafe to assign directly to a global. + unsafe { + INVARIANTS = Some(Invariants { + cme_validity: HashMap::new(), + basic_operator_blocks: HashMap::new(), + block_basic_operators: HashMap::new(), + single_ractor: HashSet::new(), + constant_state_blocks: HashMap::new(), + block_constant_states: HashMap::new(), + no_singleton_classes: HashMap::new(), + no_ep_escape_iseqs: HashMap::new(), + }); + } + } + + /// Get a mutable reference to the codegen globals instance + pub fn get_instance() -> &'static mut Invariants { + unsafe { INVARIANTS.as_mut().unwrap() } + } +} + +/// Mark the pending block as assuming that certain basic operators (e.g. Integer#==) +/// have not been redefined. +#[must_use] +pub fn assume_bop_not_redefined( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + klass: RedefinitionFlag, + bop: ruby_basic_operators, +) -> bool { + if unsafe { BASIC_OP_UNREDEFINED_P(bop, klass) } { + if jit_ensure_block_entry_exit(jit, asm, ocb).is_none() { + return false; + } + jit.bop_assumptions.push((klass, bop)); + + return true; + } else { + return false; + } +} + +/// Track that a block is only valid when a certain basic operator has not been redefined +/// since the block's inception. +pub fn track_bop_assumption(uninit_block: BlockRef, bop: (RedefinitionFlag, ruby_basic_operators)) { + let invariants = Invariants::get_instance(); + invariants + .basic_operator_blocks + .entry(bop) + .or_default() + .insert(uninit_block); + invariants + .block_basic_operators + .entry(uninit_block) + .or_default() + .insert(bop); +} + +/// Track that a block will assume that `cme` is valid (false == METHOD_ENTRY_INVALIDATED(cme)). +/// [rb_yjit_cme_invalidate] invalidates the block when `cme` is invalidated. +pub fn track_method_lookup_stability_assumption( + uninit_block: BlockRef, + callee_cme: *const rb_callable_method_entry_t, +) { + Invariants::get_instance() + .cme_validity + .entry(callee_cme) + .or_default() + .insert(uninit_block); +} + +/// Track that a block will assume that `klass` objects will have no singleton class. +pub fn track_no_singleton_class_assumption(uninit_block: BlockRef, klass: VALUE) { + Invariants::get_instance() + .no_singleton_classes + .entry(klass) + .or_default() + .insert(uninit_block); +} + +/// Returns true if we've seen a singleton class of a given class since boot. +pub fn has_singleton_class_of(klass: VALUE) -> bool { + Invariants::get_instance() + .no_singleton_classes + .get(&klass) + .map_or(false, |blocks| blocks.is_empty()) +} + +/// Track that a block will assume that base pointer is equal to environment pointer. +pub fn track_no_ep_escape_assumption(uninit_block: BlockRef, iseq: IseqPtr) { + Invariants::get_instance() + .no_ep_escape_iseqs + .entry(iseq) + .or_default() + .insert(uninit_block); +} + +/// Returns true if a given ISEQ has previously escaped an environment. +pub fn iseq_escapes_ep(iseq: IseqPtr) -> bool { + Invariants::get_instance() + .no_ep_escape_iseqs + .get(&iseq) + .map_or(false, |blocks| blocks.is_empty()) +} + +/// Forget an ISEQ remembered in invariants +pub fn iseq_free_invariants(iseq: IseqPtr) { + if unsafe { INVARIANTS.is_none() } { + return; + } + Invariants::get_instance().no_ep_escape_iseqs.remove(&iseq); +} + +// Checks rb_method_basic_definition_p and registers the current block for invalidation if method +// lookup changes. +// A "basic method" is one defined during VM boot, so we can use this to check assumptions based on +// default behavior. +pub fn assume_method_basic_definition( + jit: &mut JITState, + asm: &mut Assembler, + ocb: &mut OutlinedCb, + klass: VALUE, + mid: ID +) -> bool { + if unsafe { rb_method_basic_definition_p(klass, mid) } != 0 { + let cme = unsafe { rb_callable_method_entry(klass, mid) }; + jit.assume_method_lookup_stable(asm, ocb, cme); + true + } else { + false + } +} + +/// Tracks that a block is assuming it is operating in single-ractor mode. +#[must_use] +pub fn assume_single_ractor_mode(jit: &mut JITState, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool { + if unsafe { rb_yjit_multi_ractor_p() } { + false + } else { + if jit_ensure_block_entry_exit(jit, asm, ocb).is_none() { + return false; + } + jit.block_assumes_single_ractor = true; + + true + } +} + +/// Track that the block will assume single ractor mode. +pub fn track_single_ractor_assumption(uninit_block: BlockRef) { + Invariants::get_instance() + .single_ractor + .insert(uninit_block); +} + +/// Track that a block will assume that the name components of a constant path expression +/// has not changed since the block's full initialization. +pub fn track_stable_constant_names_assumption(uninit_block: BlockRef, idlist: *const ID) { + fn assume_stable_constant_name( + uninit_block: BlockRef, + id: ID, + ) { + if id == ID!(NULL) { + // Used for :: prefix + return; + } + + let invariants = Invariants::get_instance(); + invariants + .constant_state_blocks + .entry(id) + .or_default() + .insert(uninit_block); + invariants + .block_constant_states + .entry(uninit_block) + .or_default() + .insert(id); + } + + + for i in 0.. { + match unsafe { *idlist.offset(i) } { + 0 => break, // End of NULL terminated list + id => assume_stable_constant_name(uninit_block, id), + } + } +} + +/// Called when a basic operator is redefined. Note that all the blocks assuming +/// the stability of different operators are invalidated together and we don't +/// do fine-grained tracking. +#[no_mangle] +pub extern "C" fn rb_yjit_bop_redefined(klass: RedefinitionFlag, bop: ruby_basic_operators) { + // If YJIT isn't enabled, do nothing + if !yjit_enabled_p() { + return; + } + + with_vm_lock(src_loc!(), || { + // Loop through the blocks that are associated with this class and basic + // operator and invalidate them. + if let Some(blocks) = Invariants::get_instance() + .basic_operator_blocks + .remove(&(klass, bop)) + { + for block in blocks.iter() { + invalidate_block_version(block); + incr_counter!(invalidate_bop_redefined); + } + } + }); +} + +/// Callback for when a cme becomes invalid. Invalidate all blocks that depend +/// on the given cme being valid. +#[no_mangle] +pub extern "C" fn rb_yjit_cme_invalidate(callee_cme: *const rb_callable_method_entry_t) { + // If YJIT isn't enabled, do nothing + if !yjit_enabled_p() { + return; + } + + with_vm_lock(src_loc!(), || { + if let Some(blocks) = Invariants::get_instance().cme_validity.remove(&callee_cme) { + for block in blocks.iter() { + invalidate_block_version(block); + incr_counter!(invalidate_method_lookup); + } + } + }); +} + +/// Callback for then Ruby is about to spawn a ractor. In that case we need to +/// invalidate every block that is assuming single ractor mode. +#[no_mangle] +pub extern "C" fn rb_yjit_before_ractor_spawn() { + // If YJIT isn't enabled, do nothing + if !yjit_enabled_p() { + return; + } + + with_vm_lock(src_loc!(), || { + // Clear the set of blocks inside Invariants + let blocks = mem::take(&mut Invariants::get_instance().single_ractor); + + // Invalidate the blocks + for block in &blocks { + invalidate_block_version(block); + incr_counter!(invalidate_ractor_spawn); + } + }); +} + +/// Callback for when the global constant state changes. +#[no_mangle] +pub extern "C" fn rb_yjit_constant_state_changed(id: ID) { + // If YJIT isn't enabled, do nothing + if !yjit_enabled_p() { + return; + } + + with_vm_lock(src_loc!(), || { + // Invalidate the blocks that are associated with the given ID. + if let Some(blocks) = Invariants::get_instance().constant_state_blocks.remove(&id) { + for block in &blocks { + invalidate_block_version(block); + incr_counter!(invalidate_constant_state_bump); + } + } + }); +} + +/// Callback for marking GC objects inside [Invariants]. +/// See `struct yjijt_root_struct` in C. +#[no_mangle] +pub extern "C" fn rb_yjit_root_mark(_: *mut c_void) { + // Call rb_gc_mark on exit location's raw_samples to + // wrap frames in a GC allocated object. This needs to be called + // at the same time as root mark. + YjitExitLocations::gc_mark_raw_samples(); + + // Comment from C YJIT: + // + // Why not let the GC move the cme keys in this table? + // Because this is basically a compare_by_identity Hash. + // If a key moves, we would need to reinsert it into the table so it is rehashed. + // That is tricky to do, especially as it could trigger allocation which could + // trigger GC. Not sure if it is okay to trigger GC while the GC is updating + // references. + // + // NOTE(alan): since we are using Rust data structures that don't interact + // with the Ruby GC now, it might be feasible to allow movement. + + let invariants = Invariants::get_instance(); + + // Mark CME imemos + for cme in invariants.cme_validity.keys() { + let cme: VALUE = (*cme).into(); + + unsafe { rb_gc_mark(cme) }; + } +} + +#[no_mangle] +pub extern "C" fn rb_yjit_root_update_references(_: *mut c_void) { + if unsafe { INVARIANTS.is_none() } { + return; + } + let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs; + + // Make a copy of the table with updated ISEQ keys + let mut updated_copy = HashMap::with_capacity(no_ep_escape_iseqs.len()); + for (iseq, blocks) in mem::take(no_ep_escape_iseqs) { + let new_iseq = unsafe { rb_gc_location(iseq.into()) }.as_iseq(); + updated_copy.insert(new_iseq, blocks); + } + + *no_ep_escape_iseqs = updated_copy; +} + +/// Remove all invariant assumptions made by the block by removing the block as +/// as a key in all of the relevant tables. +/// For safety, the block has to be initialized and the vm lock must be held. +/// However, outgoing/incoming references to the block does _not_ need to be valid. +pub fn block_assumptions_free(blockref: BlockRef) { + let invariants = Invariants::get_instance(); + + { + // SAFETY: caller ensures that this reference is valid + let block = unsafe { blockref.as_ref() }; + + // For each method lookup dependency + for dep in block.iter_cme_deps() { + // Remove tracking for cme validity + if let Some(blockset) = invariants.cme_validity.get_mut(&dep) { + blockset.remove(&blockref); + if blockset.is_empty() { + invariants.cme_validity.remove(&dep); + } + } + } + if invariants.cme_validity.is_empty() { + invariants.cme_validity.shrink_to_fit(); + } + } + + // Remove tracking for basic operators that the given block assumes have + // not been redefined. + if let Some(bops) = invariants.block_basic_operators.remove(&blockref) { + // Remove tracking for the given block from the list of blocks associated + // with the given basic operator. + for key in &bops { + if let Some(blocks) = invariants.basic_operator_blocks.get_mut(key) { + blocks.remove(&blockref); + if blocks.is_empty() { + invariants.basic_operator_blocks.remove(key); + } + } + } + } + if invariants.block_basic_operators.is_empty() { + invariants.block_basic_operators.shrink_to_fit(); + } + if invariants.basic_operator_blocks.is_empty() { + invariants.basic_operator_blocks.shrink_to_fit(); + } + + // Remove tracking for blocks assuming single ractor mode + invariants.single_ractor.remove(&blockref); + if invariants.single_ractor.is_empty() { + invariants.single_ractor.shrink_to_fit(); + } + + // Remove tracking for constant state for a given ID. + if let Some(ids) = invariants.block_constant_states.remove(&blockref) { + for id in ids { + if let Some(blocks) = invariants.constant_state_blocks.get_mut(&id) { + blocks.remove(&blockref); + if blocks.is_empty() { + invariants.constant_state_blocks.remove(&id); + } + } + } + } + if invariants.block_constant_states.is_empty() { + invariants.block_constant_states.shrink_to_fit(); + } + if invariants.constant_state_blocks.is_empty() { + invariants.constant_state_blocks.shrink_to_fit(); + } + + // Remove tracking for blocks assuming no singleton class + for (_, blocks) in invariants.no_singleton_classes.iter_mut() { + blocks.remove(&blockref); + } + // Remove tracking for blocks assuming EP doesn't escape + for (_, blocks) in invariants.no_ep_escape_iseqs.iter_mut() { + blocks.remove(&blockref); + } +} + +/// Callback from the opt_setinlinecache instruction in the interpreter. +/// Invalidate the block for the matching opt_getinlinecache so it could regenerate code +/// using the new value in the constant cache. +#[no_mangle] +pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, insn_idx: std::os::raw::c_uint) { + // If YJIT isn't enabled, do nothing + if !yjit_enabled_p() { + return; + } + + // Try to downcast the iseq index + let insn_idx: IseqIdx = if let Ok(idx) = insn_idx.try_into() { + idx + } else { + // The index is too large, YJIT can't possibly have code for it, + // so there is nothing to invalidate. + return; + }; + + if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_yjit_multi_ractor_p() } { + // We can't generate code in these situations, so no need to invalidate. + // See gen_opt_getinlinecache. + return; + } + + with_vm_lock(src_loc!(), || { + let code = unsafe { get_iseq_body_iseq_encoded(iseq) }; + + // This should come from a running iseq, so direct threading translation + // should have been done + assert!(unsafe { FL_TEST(iseq.into(), VALUE(ISEQ_TRANSLATED)) } != VALUE(0)); + assert!(u32::from(insn_idx) < unsafe { get_iseq_encoded_size(iseq) }); + + // Ensure that the instruction the insn_idx is pointing to is in + // fact a opt_getconstant_path instruction. + assert_eq!( + unsafe { + let opcode_pc = code.add(insn_idx.as_usize()); + let translated_opcode: VALUE = opcode_pc.read(); + rb_vm_insn_decode(translated_opcode) + }, + YARVINSN_opt_getconstant_path.try_into().unwrap() + ); + + // Find the matching opt_getinlinecache and invalidate all the blocks there + // RUBY_ASSERT(insn_op_type(BIN(opt_getinlinecache), 1) == TS_IC); + + let ic_pc = unsafe { code.add(insn_idx.as_usize() + 1) }; + let ic_operand: IC = unsafe { ic_pc.read() }.as_mut_ptr(); + + if ic == ic_operand { + for block in take_version_list(BlockId { + iseq, + idx: insn_idx, + }) { + invalidate_block_version(&block); + incr_counter!(invalidate_constant_ic_fill); + } + } else { + panic!("ic->get_insn_index not set properly"); + } + }); +} + +/// Invalidate blocks that assume objects of a given class will have no singleton class. +#[no_mangle] +pub extern "C" fn rb_yjit_invalidate_no_singleton_class(klass: VALUE) { + // Skip tracking singleton classes during boot. Such objects already have a singleton class + // before entering JIT code, so they get rejected when they're checked for the first time. + if unsafe { INVARIANTS.is_none() } { + return; + } + + // We apply this optimization only to Array, Hash, and String for now. + if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&klass) } { + with_vm_lock(src_loc!(), || { + let no_singleton_classes = &mut Invariants::get_instance().no_singleton_classes; + match no_singleton_classes.get_mut(&klass) { + Some(blocks) => { + // Invalidate existing blocks and let has_singleton_class_of() + // return true when they are compiled again + for block in mem::take(blocks) { + invalidate_block_version(&block); + incr_counter!(invalidate_no_singleton_class); + } + } + None => { + // Let has_singleton_class_of() return true for this class + no_singleton_classes.insert(klass, HashSet::new()); + } + } + }); + } +} + +/// Invalidate blocks for a given ISEQ that assumes environment pointer is +/// equal to base pointer. +#[no_mangle] +pub extern "C" fn rb_yjit_invalidate_ep_is_bp(iseq: IseqPtr) { + // Skip tracking EP escapes on boot. We don't need to invalidate anything during boot. + if unsafe { INVARIANTS.is_none() } { + return; + } + + with_vm_lock(src_loc!(), || { + // If an EP escape for this ISEQ is detected for the first time, invalidate all blocks + // associated to the ISEQ. + let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs; + match no_ep_escape_iseqs.get_mut(&iseq) { + Some(blocks) => { + // Invalidate existing blocks and make jit.ep_is_bp() return false + for block in mem::take(blocks) { + invalidate_block_version(&block); + incr_counter!(invalidate_ep_escape); + } + } + None => { + // Let jit.ep_is_bp() return false for this ISEQ + no_ep_escape_iseqs.insert(iseq, HashSet::new()); + } + } + }); +} + +// Invalidate all generated code and patch C method return code to contain +// logic for firing the c_return TracePoint event. Once rb_vm_barrier() +// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which +// means they are inside a C routine. If there are any generated code on-stack, +// they are waiting for a return from a C routine. For every routine call, we +// patch in an exit after the body of the containing VM instruction. This makes +// it so all the invalidated code exit as soon as execution logically reaches +// the next VM instruction. The interpreter takes care of firing the tracing +// event if it so happens that the next VM instruction has one attached. +// +// The c_return event needs special handling as our codegen never outputs code +// that contains tracing logic. If we let the normal output code run until the +// start of the next VM instruction by relying on the patching scheme above, we +// would fail to fire the c_return event. The interpreter doesn't fire the +// event at an instruction boundary, so simply exiting to the interpreter isn't +// enough. To handle it, we patch in the full logic at the return address. See +// full_cfunc_return(). +// +// In addition to patching, we prevent future entries into invalidated code by +// removing all live blocks from their iseq. +#[no_mangle] +pub extern "C" fn rb_yjit_tracing_invalidate_all() { + if !yjit_enabled_p() { + return; + } + + // Stop other ractors since we are going to patch machine code. + with_vm_lock(src_loc!(), || { + // Make it so all live block versions are no longer valid branch targets + let mut on_stack_iseqs = HashSet::new(); + for_each_on_stack_iseq(|iseq| { + on_stack_iseqs.insert(iseq); + }); + for_each_iseq(|iseq| { + if let Some(payload) = get_iseq_payload(iseq) { + let blocks = payload.take_all_blocks(); + + if on_stack_iseqs.contains(&iseq) { + // This ISEQ is running, so we can't free blocks immediately + for block in blocks { + delayed_deallocation(block); + } + payload.dead_blocks.shrink_to_fit(); + } else { + // Safe to free dead blocks since the ISEQ isn't running + // Since we're freeing _all_ blocks, we don't need to keep the graph well formed + for block in blocks { + unsafe { free_block(block, false) }; + } + mem::take(&mut payload.dead_blocks) + .into_iter() + .for_each(|block| unsafe { free_block(block, false) }); + } + } + + // Reset output code entry point + unsafe { rb_iseq_reset_jit_func(iseq) }; + }); + + let cb = CodegenGlobals::get_inline_cb(); + + // Prevent on-stack frames from jumping to the caller on jit_exec_exception + extern "C" { + fn rb_yjit_cancel_jit_return(leave_exit: *mut c_void, leave_exception: *mut c_void) -> VALUE; + } + unsafe { + rb_yjit_cancel_jit_return( + CodegenGlobals::get_leave_exit_code().raw_ptr(cb) as _, + CodegenGlobals::get_leave_exception_code().raw_ptr(cb) as _, + ); + } + + // Apply patches + let old_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + let mut patches = CodegenGlobals::take_global_inval_patches(); + patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr(cb)); + let mut last_patch_end = std::ptr::null(); + for patch in &patches { + let patch_pos = patch.inline_patch_pos.raw_ptr(cb); + assert!( + last_patch_end <= patch_pos, + "patches should not overlap (last_patch_end: {last_patch_end:?}, patch_pos: {patch_pos:?})", + ); + + cb.set_write_ptr(patch.inline_patch_pos); + cb.set_dropped_bytes(false); + cb.without_page_end_reserve(|cb| { + let mut asm = crate::backend::ir::Assembler::new(); + asm.jmp(patch.outlined_target_pos.as_side_exit()); + if asm.compile(cb, None).is_none() { + panic!("Failed to apply patch at {:?}", patch.inline_patch_pos); + } + }); + last_patch_end = cb.get_write_ptr().raw_ptr(cb); + } + cb.set_pos(old_pos); + cb.set_dropped_bytes(old_dropped_bytes); + + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_executable(); + cb.mark_all_executable(); + }); +} diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs new file mode 100644 index 0000000000..3f3d24be4b --- /dev/null +++ b/yjit/src/lib.rs @@ -0,0 +1,17 @@ +// Clippy disagreements +#![allow(clippy::style)] // We are laid back about style +#![allow(clippy::too_many_arguments)] // :shrug: +#![allow(clippy::identity_op)] // Sometimes we do it for style + +pub mod asm; +mod backend; +mod codegen; +mod core; +mod cruby; +mod disasm; +mod invariants; +mod options; +mod stats; +mod utils; +mod yjit; +mod virtualmem; diff --git a/yjit/src/options.rs b/yjit/src/options.rs new file mode 100644 index 0000000000..59ec864bf5 --- /dev/null +++ b/yjit/src/options.rs @@ -0,0 +1,347 @@ +use std::{ffi::{CStr, CString}, ptr::null, fs::File}; +use crate::{backend::current::TEMP_REGS, stats::Counter}; +use std::os::raw::{c_char, c_int, c_uint}; + +// Call threshold for small deployments and command-line apps +pub static SMALL_CALL_THRESHOLD: u64 = 30; + +// Call threshold for larger deployments and production-sized applications +pub static LARGE_CALL_THRESHOLD: u64 = 120; + +// Number of live ISEQs after which we consider an app to be large +pub static LARGE_ISEQ_COUNT: u64 = 40_000; + +// This option is exposed to the C side in a global variable for performance, see vm.c +// Number of method calls after which to start generating code +// Threshold==1 means compile on first execution +#[no_mangle] +pub static mut rb_yjit_call_threshold: u64 = SMALL_CALL_THRESHOLD; + +// This option is exposed to the C side in a global variable for performance, see vm.c +// Number of execution requests after which a method is no longer +// considered hot. Raising this results in more generated code. +#[no_mangle] +pub static mut rb_yjit_cold_threshold: u64 = 200_000; + +// Command-line options +#[derive(Clone, PartialEq, Eq, Debug)] +#[repr(C)] +pub struct Options { + // Size of the executable memory block to allocate in bytes + // Note that the command line argument is expressed in MiB and not bytes + pub exec_mem_size: usize, + + // Disable the propagation of type information + pub no_type_prop: bool, + + // Maximum number of versions per block + // 1 means always create generic versions + pub max_versions: usize, + + // The number of registers allocated for stack temps + pub num_temp_regs: usize, + + // Capture stats + pub gen_stats: bool, + + // Print stats on exit (when gen_stats is also true) + pub print_stats: bool, + + // Trace locations of exits + pub trace_exits: Option<TraceExits>, + + // how often to sample exit trace data + pub trace_exits_sample_rate: usize, + + // Whether to enable YJIT at boot. This option prevents other + // YJIT tuning options from enabling YJIT at boot. + pub disable: bool, + + /// Dump compiled and executed instructions for debugging + pub dump_insns: bool, + + /// Dump all compiled instructions of target cbs. + pub dump_disasm: Option<DumpDisasm>, + + /// Print when specific ISEQ items are compiled or invalidated + pub dump_iseq_disasm: Option<String>, + + /// Verify context objects (debug mode only) + pub verify_ctx: bool, + + /// Enable generating frame pointers (for x86. arm64 always does this) + pub frame_pointer: bool, + + /// Run code GC when exec_mem_size is reached. + pub code_gc: bool, + + /// Enable writing /tmp/perf-{pid}.map for Linux perf + pub perf_map: Option<PerfMap>, +} + +// Initialize the options to default values +pub static mut OPTIONS: Options = Options { + exec_mem_size: 48 * 1024 * 1024, + no_type_prop: false, + max_versions: 4, + num_temp_regs: 5, + gen_stats: false, + trace_exits: None, + print_stats: true, + trace_exits_sample_rate: 0, + disable: false, + dump_insns: false, + dump_disasm: None, + verify_ctx: false, + dump_iseq_disasm: None, + frame_pointer: false, + code_gc: false, + perf_map: None, +}; + +/// YJIT option descriptions for `ruby --help`. +static YJIT_OPTIONS: [(&str, &str); 9] = [ + ("--yjit-exec-mem-size=num", "Size of executable memory block in MiB (default: 48)."), + ("--yjit-call-threshold=num", "Number of calls to trigger JIT."), + ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."), + ("--yjit-stats", "Enable collecting YJIT statistics."), + ("--yjit-disable", "Disable YJIT for lazily enabling it with RubyVM::YJIT.enable."), + ("--yjit-code-gc", "Run code GC when the code size reaches the limit."), + ("--yjit-perf", "Enable frame pointers and perf profiling."), + ("--yjit-trace-exits", "Record Ruby source location when exiting from generated code."), + ("--yjit-trace-exits-sample-rate=num", "Trace exit locations only every Nth occurrence."), +]; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum TraceExits { + // Trace all exits + All, + // Trace a specific counted exit + CountedExit(Counter), +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum DumpDisasm { + // Dump to stdout + Stdout, + // Dump to "yjit_{pid}.log" file under the specified directory + File(String), +} + +/// Type of symbols to dump into /tmp/perf-{pid}.map +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum PerfMap { + // Dump ISEQ symbols + ISEQ, + // Dump YJIT codegen symbols + Codegen, +} + +/// Macro to get an option value by name +macro_rules! get_option { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($option_name:ident) => { + { + // Make this a statement since attributes on expressions are experimental + #[allow(unused_unsafe)] + let ret = unsafe { OPTIONS.$option_name }; + ret + } + }; +} +pub(crate) use get_option; + +/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same. +macro_rules! get_option_ref { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($option_name:ident) => { + unsafe { &($crate::options::OPTIONS.$option_name) } + }; +} +pub(crate) use get_option_ref; + +/// Expected to receive what comes after the third dash in "--yjit-*". +/// Empty string means user passed only "--yjit". C code rejects when +/// they pass exact "--yjit-". +pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { + let c_str: &CStr = unsafe { CStr::from_ptr(str_ptr) }; + let opt_str: &str = c_str.to_str().ok()?; + //println!("{}", opt_str); + + // Split the option name and value strings + // Note that some options do not contain an assignment + let parts = opt_str.split_once('='); + let (opt_name, opt_val) = match parts { + Some((before_eq, after_eq)) => (before_eq, after_eq), + None => (opt_str, ""), + }; + + // Match on the option name and value strings + match (opt_name, opt_val) { + ("", "") => (), // Simply --yjit + + ("exec-mem-size", _) => match opt_val.parse::<usize>() { + Ok(n) => { + if n == 0 || n > 2 * 1024 * 1024 { + return None + } + + // Convert from MiB to bytes internally for convenience + unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 } + } + Err(_) => { + return None; + } + }, + + ("call-threshold", _) => match opt_val.parse() { + Ok(n) => unsafe { rb_yjit_call_threshold = n }, + Err(_) => { + return None; + } + }, + + ("cold-threshold", _) => match opt_val.parse() { + Ok(n) => unsafe { rb_yjit_cold_threshold = n }, + Err(_) => { + return None; + } + }, + + ("max-versions", _) => match opt_val.parse() { + Ok(n) => unsafe { OPTIONS.max_versions = n }, + Err(_) => { + return None; + } + }, + + ("disable", "") => unsafe { + OPTIONS.disable = true; + }, + + ("temp-regs", _) => match opt_val.parse() { + Ok(n) => { + assert!(n <= TEMP_REGS.len(), "--yjit-temp-regs must be <= {}", TEMP_REGS.len()); + unsafe { OPTIONS.num_temp_regs = n } + } + Err(_) => { + return None; + } + }, + + ("code-gc", _) => unsafe { + OPTIONS.code_gc = true; + }, + + ("perf", _) => match opt_val { + "" => unsafe { + OPTIONS.frame_pointer = true; + OPTIONS.perf_map = Some(PerfMap::ISEQ); + }, + "fp" => unsafe { OPTIONS.frame_pointer = true }, + "iseq" => unsafe { OPTIONS.perf_map = Some(PerfMap::ISEQ) }, + // Accept --yjit-perf=map for backward compatibility + "codegen" | "map" => unsafe { OPTIONS.perf_map = Some(PerfMap::Codegen) }, + _ => return None, + }, + + ("dump-disasm", _) => { + if !cfg!(feature = "disasm") { + eprintln!("WARNING: the {} option is only available when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name); + } + + match opt_val { + "" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) }, + directory => { + let path = format!("{directory}/yjit_{}.log", std::process::id()); + match File::options().create(true).append(true).open(&path) { + Ok(_) => { + eprintln!("YJIT disasm dump: {path}"); + unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::File(path)) } + } + Err(err) => eprintln!("Failed to create {path}: {err}"), + } + } + } + }, + + ("dump-iseq-disasm", _) => unsafe { + if !cfg!(feature = "disasm") { + eprintln!("WARNING: the {} option is only available when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name); + } + + OPTIONS.dump_iseq_disasm = Some(opt_val.to_string()); + }, + + ("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true }, + ("stats", _) => match opt_val { + "" => unsafe { OPTIONS.gen_stats = true }, + "quiet" => unsafe { + OPTIONS.gen_stats = true; + OPTIONS.print_stats = false; + }, + _ => { + return None; + } + }, + ("trace-exits", _) => unsafe { + OPTIONS.gen_stats = true; + OPTIONS.trace_exits = match opt_val { + "" => Some(TraceExits::All), + name => match Counter::get(name) { + Some(counter) => Some(TraceExits::CountedExit(counter)), + None => return None, + }, + }; + }, + ("trace-exits-sample-rate", sample_rate) => unsafe { + OPTIONS.gen_stats = true; + if OPTIONS.trace_exits.is_none() { + OPTIONS.trace_exits = Some(TraceExits::All); + } + OPTIONS.trace_exits_sample_rate = sample_rate.parse().unwrap(); + }, + ("dump-insns", "") => unsafe { OPTIONS.dump_insns = true }, + ("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true }, + + // Option name not recognized + _ => { + return None; + } + } + + // before we continue, check that sample_rate is either 0 or a prime number + let trace_sample_rate = unsafe { OPTIONS.trace_exits_sample_rate }; + if trace_sample_rate > 1 { + let mut i = 2; + while i*i <= trace_sample_rate { + if trace_sample_rate % i == 0 { + println!("Warning: using a non-prime number as your sampling rate can result in less accurate sampling data"); + return Some(()); + } + i += 1; + } + } + + // dbg!(unsafe {OPTIONS}); + + // Option successfully parsed + return Some(()); +} + +/// Print YJIT options for `ruby --help`. `width` is width of option parts, and +/// `columns` is indent width of descriptions. +#[no_mangle] +pub extern "C" fn rb_yjit_show_usage(help: c_int, highlight: c_int, width: c_uint, columns: c_int) { + for &(name, description) in YJIT_OPTIONS.iter() { + extern "C" { + fn ruby_show_usage_line(name: *const c_char, secondary: *const c_char, description: *const c_char, + help: c_int, highlight: c_int, width: c_uint, columns: c_int); + } + let name = CString::new(name).unwrap(); + let description = CString::new(description).unwrap(); + unsafe { ruby_show_usage_line(name.as_ptr(), null(), description.as_ptr(), help, highlight, width, columns) } + } +} diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs new file mode 100644 index 0000000000..1f94c19398 --- /dev/null +++ b/yjit/src/stats.rs @@ -0,0 +1,1033 @@ +//! Everything related to the collection of runtime stats in YJIT +//! See the stats feature and the --yjit-stats command-line option + +#![allow(dead_code)] // Counters are only used with the stats features + +use std::alloc::{GlobalAlloc, Layout, System}; +use std::ptr::addr_of_mut; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Instant; +use std::collections::HashMap; + +use crate::codegen::CodegenGlobals; +use crate::core::Context; +use crate::core::for_each_iseq_payload; +use crate::cruby::*; +use crate::options::*; +use crate::yjit::yjit_enabled_p; + +/// Running total of how many ISeqs are in the system. +#[no_mangle] +pub static mut rb_yjit_live_iseq_count: u64 = 0; + +/// Monotonically increasing total of how many ISEQs were allocated +#[no_mangle] +pub static mut rb_yjit_iseq_alloc_count: u64 = 0; + +/// A middleware to count Rust-allocated bytes as yjit_alloc_size. +#[global_allocator] +static GLOBAL_ALLOCATOR: StatsAlloc = StatsAlloc { alloc_size: AtomicUsize::new(0) }; + +pub struct StatsAlloc { + alloc_size: AtomicUsize, +} + +unsafe impl GlobalAlloc for StatsAlloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst); + System.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + self.alloc_size.fetch_sub(layout.size(), Ordering::SeqCst); + System.dealloc(ptr, layout) + } + + unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { + self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst); + System.alloc_zeroed(layout) + } + + unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + if new_size > layout.size() { + self.alloc_size.fetch_add(new_size - layout.size(), Ordering::SeqCst); + } else if new_size < layout.size() { + self.alloc_size.fetch_sub(layout.size() - new_size, Ordering::SeqCst); + } + System.realloc(ptr, layout, new_size) + } +} + +/// Mapping of C function / ISEQ name to integer indices +/// This is accessed at compilation time only (protected by a lock) +static mut CFUNC_NAME_TO_IDX: Option<HashMap<String, usize>> = None; +static mut ISEQ_NAME_TO_IDX: Option<HashMap<String, usize>> = None; + +/// Vector of call counts for each C function / ISEQ index +/// This is modified (but not resized) by JITted code +static mut CFUNC_CALL_COUNT: Option<Vec<u64>> = None; +static mut ISEQ_CALL_COUNT: Option<Vec<u64>> = None; + +/// Assign an index to a given cfunc name string +pub fn get_cfunc_idx(name: &str) -> usize { + // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables. + unsafe { get_method_idx(name, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT)) } +} + +/// Assign an index to a given ISEQ name string +pub fn get_iseq_idx(name: &str) -> usize { + // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables. + unsafe { get_method_idx(name, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT)) } +} + +fn get_method_idx( + name: &str, + method_name_to_idx: &mut Option<HashMap<String, usize>>, + method_call_count: &mut Option<Vec<u64>>, +) -> usize { + //println!("{}", name); + + let name_to_idx = method_name_to_idx.get_or_insert_with(HashMap::default); + let call_count = method_call_count.get_or_insert_with(Vec::default); + + match name_to_idx.get(name) { + Some(idx) => *idx, + None => { + let idx = name_to_idx.len(); + name_to_idx.insert(name.to_string(), idx); + + // Resize the call count vector + if idx >= call_count.len() { + call_count.resize(idx + 1, 0); + } + + idx + } + } +} + +// Increment the counter for a C function +pub extern "C" fn incr_cfunc_counter(idx: usize) { + let cfunc_call_count = unsafe { CFUNC_CALL_COUNT.as_mut().unwrap() }; + assert!(idx < cfunc_call_count.len()); + cfunc_call_count[idx] += 1; +} + +// Increment the counter for an ISEQ +pub extern "C" fn incr_iseq_counter(idx: usize) { + let iseq_call_count = unsafe { ISEQ_CALL_COUNT.as_mut().unwrap() }; + assert!(idx < iseq_call_count.len()); + iseq_call_count[idx] += 1; +} + +// YJIT exit counts for each instruction type +const VM_INSTRUCTION_SIZE_USIZE: usize = VM_INSTRUCTION_SIZE as usize; +static mut EXIT_OP_COUNT: [u64; VM_INSTRUCTION_SIZE_USIZE] = [0; VM_INSTRUCTION_SIZE_USIZE]; + +/// Global state needed for collecting backtraces of exits +pub struct YjitExitLocations { + /// Vec to hold raw_samples which represent the control frames + /// of method entries. + raw_samples: Vec<VALUE>, + /// Vec to hold line_samples which represent line numbers of + /// the iseq caller. + line_samples: Vec<i32>, + /// Number of samples skipped when sampling + skipped_samples: usize +} + +/// Private singleton instance of yjit exit locations +static mut YJIT_EXIT_LOCATIONS: Option<YjitExitLocations> = None; + +impl YjitExitLocations { + /// Initialize the yjit exit locations + pub fn init() { + // Return if --yjit-trace-exits isn't enabled + if get_option!(trace_exits).is_none() { + return; + } + + let yjit_exit_locations = YjitExitLocations { + raw_samples: Vec::new(), + line_samples: Vec::new(), + skipped_samples: 0 + }; + + // Initialize the yjit exit locations instance + unsafe { + YJIT_EXIT_LOCATIONS = Some(yjit_exit_locations); + } + } + + /// Get a mutable reference to the yjit exit locations globals instance + pub fn get_instance() -> &'static mut YjitExitLocations { + unsafe { YJIT_EXIT_LOCATIONS.as_mut().unwrap() } + } + + /// Get a mutable reference to the yjit raw samples Vec + pub fn get_raw_samples() -> &'static mut Vec<VALUE> { + &mut YjitExitLocations::get_instance().raw_samples + } + + /// Get a mutable reference to yjit the line samples Vec. + pub fn get_line_samples() -> &'static mut Vec<i32> { + &mut YjitExitLocations::get_instance().line_samples + } + + /// Get the number of samples skipped + pub fn get_skipped_samples() -> &'static mut usize { + &mut YjitExitLocations::get_instance().skipped_samples + } + + /// Mark the data stored in YjitExitLocations::get_raw_samples that needs to be used by + /// rb_yjit_add_frame. YjitExitLocations::get_raw_samples are an array of + /// VALUE pointers, exit instruction, and number of times we've seen this stack row + /// as collected by rb_yjit_record_exit_stack. + /// + /// These need to have rb_gc_mark called so they can be used by rb_yjit_add_frame. + pub fn gc_mark_raw_samples() { + // Return if YJIT is not enabled + if !yjit_enabled_p() { + return; + } + + // Return if --yjit-trace-exits isn't enabled + if get_option!(trace_exits).is_none() { + return; + } + + let mut idx: size_t = 0; + let yjit_raw_samples = YjitExitLocations::get_raw_samples(); + + while idx < yjit_raw_samples.len() as size_t { + let num = yjit_raw_samples[idx as usize]; + let mut i = 0; + idx += 1; + + // Mark the yjit_raw_samples at the given index. These represent + // the data that needs to be GC'd which are the current frames. + while i < i32::from(num) { + unsafe { rb_gc_mark(yjit_raw_samples[idx as usize]); } + i += 1; + idx += 1; + } + + // Increase index for exit instruction. + idx += 1; + // Increase index for bookkeeping value (number of times we've seen this + // row in a stack). + idx += 1; + } + } +} + +// Macro to declare the stat counters +macro_rules! make_counters { + ($($counter_name:ident,)+) => { + /// Struct containing the counter values + #[derive(Default, Debug)] + pub struct Counters { $(pub $counter_name: u64),+ } + + /// Enum to represent a counter + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, PartialEq, Eq, Debug)] + pub enum Counter { $($counter_name),+ } + + impl Counter { + /// Map a counter name string to a counter enum + pub fn get(name: &str) -> Option<Counter> { + match name { + $( stringify!($counter_name) => { Some(Counter::$counter_name) } ),+ + _ => None, + } + } + + /// Get a counter name string + pub fn get_name(&self) -> String { + match self { + $( Counter::$counter_name => stringify!($counter_name).to_string() ),+ + } + } + } + + /// Global counters instance, initialized to zero + pub static mut COUNTERS: Counters = Counters { $($counter_name: 0),+ }; + + /// Counter names constant + const COUNTER_NAMES: &'static [&'static str] = &[ $(stringify!($counter_name)),+ ]; + + /// Map a counter name string to a counter pointer + pub fn get_counter_ptr(name: &str) -> *mut u64 { + match name { + $( stringify!($counter_name) => { ptr_to_counter!($counter_name) } ),+ + _ => panic!() + } + } + } +} + +/// The list of counters that are available without --yjit-stats. +/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`. +pub const DEFAULT_COUNTERS: [Counter; 17] = [ + Counter::code_gc_count, + Counter::compiled_iseq_entry, + Counter::cold_iseq_entry, + Counter::compiled_iseq_count, + Counter::compiled_blockid_count, + Counter::compiled_block_count, + Counter::compiled_branch_count, + Counter::compile_time_ns, + Counter::max_inline_versions, + + Counter::invalidation_count, + Counter::invalidate_method_lookup, + Counter::invalidate_bop_redefined, + Counter::invalidate_ractor_spawn, + Counter::invalidate_constant_state_bump, + Counter::invalidate_constant_ic_fill, + Counter::invalidate_no_singleton_class, + Counter::invalidate_ep_escape, +]; + +/// Macro to increase a counter by name and count +macro_rules! incr_counter_by { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($counter_name:ident, $count:expr) => { + #[allow(unused_unsafe)] + { + unsafe { $crate::stats::COUNTERS.$counter_name += $count as u64 } + } + }; +} +pub(crate) use incr_counter_by; + +/// Macro to increase a counter if the given value is larger +macro_rules! incr_counter_to { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($counter_name:ident, $count:expr) => { + #[allow(unused_unsafe)] + { + unsafe { + $crate::stats::COUNTERS.$counter_name = u64::max( + $crate::stats::COUNTERS.$counter_name, + $count as u64, + ) + } + } + }; +} +pub(crate) use incr_counter_to; + +/// Macro to increment a counter by name +macro_rules! incr_counter { + // Unsafe is ok here because options are initialized + // once before any Ruby code executes + ($counter_name:ident) => { + #[allow(unused_unsafe)] + { + unsafe { $crate::stats::COUNTERS.$counter_name += 1 } + } + }; +} +pub(crate) use incr_counter; + +/// Macro to get a raw pointer to a given counter +macro_rules! ptr_to_counter { + ($counter_name:ident) => { + unsafe { + let ctr_ptr = std::ptr::addr_of_mut!(COUNTERS.$counter_name); + ctr_ptr + } + }; +} +pub(crate) use ptr_to_counter; + +// Declare all the counters we track +make_counters! { + yjit_insns_count, + + // Method calls that fallback to dynamic dispatch + send_singleton_class, + send_ivar_set_method, + send_zsuper_method, + send_undef_method, + send_optimized_method_block_call, + send_call_block, + send_call_kwarg, + send_call_multi_ractor, + send_cme_not_found, + send_megamorphic, + send_missing_method, + send_refined_method, + send_private_not_fcall, + send_cfunc_kw_splat_non_nil, + send_cfunc_splat_neg2, + send_cfunc_argc_mismatch, + send_cfunc_block_arg, + send_cfunc_toomany_args, + send_cfunc_tracing, + send_cfunc_splat_with_kw, + send_cfunc_splat_varg_ruby2_keywords, + send_attrset_kwargs, + send_attrset_block_arg, + send_iseq_tailcall, + send_iseq_arity_error, + send_iseq_block_arg_type, + send_iseq_clobbering_block_arg, + send_iseq_complex_discard_extras, + send_iseq_leaf_builtin_block_arg_block_param, + send_iseq_kw_splat_non_nil, + send_iseq_kwargs_mismatch, + send_iseq_has_post, + send_iseq_has_no_kw, + send_iseq_accepts_no_kwarg, + send_iseq_materialized_block, + send_iseq_splat_not_array, + send_iseq_splat_with_kw, + send_iseq_missing_optional_kw, + send_iseq_too_many_kwargs, + send_not_implemented_method, + send_getter_arity, + send_getter_block_arg, + send_args_splat_attrset, + send_args_splat_bmethod, + send_args_splat_aref, + send_args_splat_aset, + send_args_splat_opt_call, + send_iseq_splat_arity_error, + send_splat_too_long, + send_send_wrong_args, + send_send_null_mid, + send_send_null_cme, + send_send_nested, + send_send_attr_reader, + send_send_attr_writer, + send_iseq_has_rest_and_captured, + send_iseq_has_kwrest_and_captured, + send_iseq_has_rest_and_kw_supplied, + send_iseq_has_rest_opt_and_block, + send_bmethod_ractor, + send_bmethod_block_arg, + send_optimized_block_arg, + + invokesuper_defined_class_mismatch, + invokesuper_kw_splat, + invokesuper_kwarg, + invokesuper_megamorphic, + invokesuper_no_cme, + invokesuper_no_me, + invokesuper_not_iseq_or_cfunc, + invokesuper_refinement, + invokesuper_singleton_class, + + invokeblock_megamorphic, + invokeblock_none, + invokeblock_iseq_arg0_optional, + invokeblock_iseq_arg0_args_splat, + invokeblock_iseq_arg0_not_array, + invokeblock_iseq_arg0_wrong_len, + invokeblock_iseq_not_inlined, + invokeblock_ifunc_args_splat, + invokeblock_ifunc_kw_splat, + invokeblock_proc, + invokeblock_symbol, + + // Method calls that exit to the interpreter + guard_send_block_arg_type, + guard_send_getter_splat_non_empty, + guard_send_klass_megamorphic, + guard_send_se_cf_overflow, + guard_send_se_protected_check_failed, + guard_send_splatarray_length_not_equal, + guard_send_splatarray_last_ruby2_keywords, + guard_send_splat_not_array, + guard_send_send_name_chain, + guard_send_iseq_has_rest_and_splat_too_few, + guard_send_is_a_class_mismatch, + guard_send_instance_of_class_mismatch, + guard_send_interrupted, + guard_send_not_fixnums, + guard_send_not_fixnum_or_flonum, + guard_send_not_string, + guard_send_respond_to_mid_mismatch, + + guard_send_cfunc_bad_splat_vargs, + + guard_invokesuper_me_changed, + + guard_invokeblock_tag_changed, + guard_invokeblock_iseq_block_changed, + + traced_cfunc_return, + + leave_se_interrupt, + leave_interp_return, + + getivar_megamorphic, + getivar_not_heap, + + setivar_not_heap, + setivar_frozen, + setivar_megamorphic, + + definedivar_not_heap, + definedivar_megamorphic, + + setlocal_wb_required, + + invokebuiltin_too_many_args, + + opt_plus_overflow, + opt_minus_overflow, + opt_mult_overflow, + + opt_succ_not_fixnum, + opt_succ_overflow, + + opt_mod_zero, + opt_div_zero, + + lshift_amount_changed, + lshift_overflow, + + rshift_amount_changed, + + opt_aref_argc_not_one, + opt_aref_arg_not_fixnum, + opt_aref_not_array, + opt_aref_not_hash, + + opt_aset_not_array, + opt_aset_not_fixnum, + opt_aset_not_hash, + + opt_aref_with_qundef, + + opt_case_dispatch_megamorphic, + + opt_getconstant_path_ic_miss, + opt_getconstant_path_multi_ractor, + + expandarray_splat, + expandarray_postarg, + expandarray_not_array, + expandarray_to_ary, + expandarray_chain_max_depth, + + // getblockparam + gbp_wb_required, + + // getblockparamproxy + gbpp_unsupported_type, + gbpp_block_param_modified, + gbpp_block_handler_not_none, + gbpp_block_handler_not_iseq, + gbpp_block_handler_not_proc, + + branchif_interrupted, + branchunless_interrupted, + branchnil_interrupted, + jump_interrupted, + + objtostring_not_string, + + getbyte_idx_not_fixnum, + getbyte_idx_negative, + getbyte_idx_out_of_bounds, + + splatkw_not_hash, + splatkw_not_nil, + + binding_allocations, + binding_set, + + compiled_iseq_entry, + cold_iseq_entry, + compiled_iseq_count, + compiled_blockid_count, + compiled_block_count, + compiled_branch_count, + compile_time_ns, + compilation_failure, + block_next_count, + defer_count, + defer_empty_count, + branch_insn_count, + branch_known_count, + max_inline_versions, + + freed_iseq_count, + + exit_from_branch_stub, + + invalidation_count, + invalidate_method_lookup, + invalidate_bop_redefined, + invalidate_ractor_spawn, + invalidate_constant_state_bump, + invalidate_constant_ic_fill, + invalidate_no_singleton_class, + invalidate_ep_escape, + + // Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in + // executable memory, so this should be 0. + exec_mem_non_bump_alloc, + + code_gc_count, + + num_gc_obj_refs, + + num_send, + num_send_known_class, + num_send_polymorphic, + num_send_x86_rel32, + num_send_x86_reg, + num_send_dynamic, + num_send_cfunc, + num_send_cfunc_inline, + num_send_iseq, + num_send_iseq_leaf, + num_send_iseq_inline, + + num_getivar_megamorphic, + num_setivar_megamorphic, + num_opt_case_dispatch_megamorphic, + + num_throw, + num_throw_break, + num_throw_retry, + num_throw_return, + + num_lazy_frame_check, + num_lazy_frame_push, + lazy_frame_count, + lazy_frame_failure, + + iseq_stack_too_large, + iseq_too_long, + + temp_reg_opnd, + temp_mem_opnd, + temp_spill, +} + +//=========================================================================== + +/// Primitive called in yjit.rb +/// Check if stats generation is enabled +#[no_mangle] +pub extern "C" fn rb_yjit_stats_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + + if get_option!(gen_stats) { + return Qtrue; + } else { + return Qfalse; + } +} + +/// Primitive called in yjit.rb +/// Check if stats generation should print at exit +#[no_mangle] +pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if yjit_enabled_p() && get_option!(print_stats) { + return Qtrue; + } else { + return Qfalse; + } +} + +/// Primitive called in yjit.rb. +/// Export all YJIT statistics as a Ruby hash. +#[no_mangle] +pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, context: VALUE) -> VALUE { + with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(context == Qtrue)) +} + +/// Primitive called in yjit.rb +/// +/// Check if trace_exits generation is enabled. Requires the stats feature +/// to be enabled. +#[no_mangle] +pub extern "C" fn rb_yjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if get_option!(trace_exits).is_some() { + return Qtrue; + } + + return Qfalse; +} + +/// Call the C function to parse the raw_samples and line_samples +/// into raw, lines, and frames hash for RubyVM::YJIT.exit_locations. +#[no_mangle] +pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + // Return if YJIT is not enabled + if !yjit_enabled_p() { + return Qnil; + } + + // Return if --yjit-trace-exits isn't enabled + if get_option!(trace_exits).is_none() { + return Qnil; + } + + // If the stats feature is enabled, pass yjit_raw_samples and yjit_line_samples + // to the C function called rb_yjit_exit_locations_dict for parsing. + let yjit_raw_samples = YjitExitLocations::get_raw_samples(); + let yjit_line_samples = YjitExitLocations::get_line_samples(); + + // Assert that the two Vec's are the same length. If they aren't + // equal something went wrong. + assert_eq!(yjit_raw_samples.len(), yjit_line_samples.len()); + + // yjit_raw_samples and yjit_line_samples are the same length so + // pass only one of the lengths in the C function. + let samples_len = yjit_raw_samples.len() as i32; + + unsafe { + rb_yjit_exit_locations_dict(yjit_raw_samples.as_mut_ptr(), yjit_line_samples.as_mut_ptr(), samples_len) + } +} + +/// Increment a counter by name from the CRuby side +/// Warning: this is not fast because it requires a hash lookup, so don't use in tight loops +#[no_mangle] +pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char) { + use std::ffi::CStr; + let counter_name = unsafe { CStr::from_ptr(counter_name).to_str().unwrap() }; + let counter_ptr = get_counter_ptr(counter_name); + unsafe { *counter_ptr += 1 }; +} + +/// Export all YJIT statistics as a Ruby hash. +fn rb_yjit_gen_stats_dict(context: bool) -> VALUE { + // If YJIT is not enabled, return Qnil + if !yjit_enabled_p() { + return Qnil; + } + + macro_rules! hash_aset_usize { + ($hash:ident, $counter_name:expr, $value:expr) => { + let key = rust_str_to_sym($counter_name); + let value = VALUE::fixnum_from_usize($value); + rb_hash_aset($hash, key, value); + } + } + + let hash = unsafe { rb_hash_new() }; + + unsafe { + // Get the inline and outlined code blocks + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + // Inline code size + hash_aset_usize!(hash, "inline_code_size", cb.code_size()); + + // Outlined code size + hash_aset_usize!(hash, "outlined_code_size", ocb.unwrap().code_size()); + + // GCed pages + let freed_page_count = cb.num_freed_pages(); + hash_aset_usize!(hash, "freed_page_count", freed_page_count); + + // GCed code size + hash_aset_usize!(hash, "freed_code_size", freed_page_count * cb.page_size()); + + // Live pages + hash_aset_usize!(hash, "live_page_count", cb.num_mapped_pages() - freed_page_count); + + // Size of memory region allocated for JIT code + hash_aset_usize!(hash, "code_region_size", cb.mapped_region_size()); + + // Rust global allocations in bytes + hash_aset_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)); + + // `context` is true at RubyVM::YJIT._print_stats for --yjit-stats. It's false by default + // for RubyVM::YJIT.runtime_stats because counting all Contexts could be expensive. + if context { + let live_context_count = get_live_context_count(); + let context_size = std::mem::size_of::<Context>(); + hash_aset_usize!(hash, "live_context_count", live_context_count); + hash_aset_usize!(hash, "live_context_size", live_context_count * context_size); + } + + // VM instructions count + hash_aset_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize); + + hash_aset_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize); + hash_aset_usize!(hash, "iseq_alloc_count", rb_yjit_iseq_alloc_count as usize); + } + + // If we're not generating stats, put only default counters + if !get_option!(gen_stats) { + for counter in DEFAULT_COUNTERS { + // Get the counter value + let counter_ptr = get_counter_ptr(&counter.get_name()); + let counter_val = unsafe { *counter_ptr }; + + // Put counter into hash + let key = rust_str_to_sym(&counter.get_name()); + let value = VALUE::fixnum_from_usize(counter_val as usize); + unsafe { rb_hash_aset(hash, key, value); } + } + + return hash; + } + + unsafe { + // Indicate that the complete set of stats is available + rb_hash_aset(hash, rust_str_to_sym("all_stats"), Qtrue); + + // For each counter we track + for counter_name in COUNTER_NAMES { + // Get the counter value + let counter_ptr = get_counter_ptr(counter_name); + let counter_val = *counter_ptr; + + // Put counter into hash + let key = rust_str_to_sym(counter_name); + let value = VALUE::fixnum_from_usize(counter_val as usize); + rb_hash_aset(hash, key, value); + } + + // For each entry in exit_op_count, add a stats entry with key "exit_INSTRUCTION_NAME" + // and the value is the count of side exits for that instruction. + for op_idx in 0..VM_INSTRUCTION_SIZE_USIZE { + let op_name = insn_name(op_idx); + let key_string = "exit_".to_owned() + &op_name; + let key = rust_str_to_sym(&key_string); + let value = VALUE::fixnum_from_usize(EXIT_OP_COUNT[op_idx] as usize); + rb_hash_aset(hash, key, value); + } + + // Set method call counts in a Ruby dict + fn set_call_counts( + calls_hash: VALUE, + method_name_to_idx: &mut Option<HashMap<String, usize>>, + method_call_count: &mut Option<Vec<u64>>, + ) { + if let (Some(name_to_idx), Some(call_counts)) = (method_name_to_idx, method_call_count) { + // Create a list of (name, call_count) pairs + let mut pairs = Vec::new(); + for (name, idx) in name_to_idx { + let count = call_counts[*idx]; + pairs.push((name, count)); + } + + // Sort the vectors by decreasing call counts + pairs.sort_by_key(|e| -(e.1 as i64)); + + // Cap the number of counts reported to avoid + // bloating log files, etc. + pairs.truncate(20); + + // Add the pairs to the dict + for (name, call_count) in pairs { + let key = rust_str_to_sym(name); + let value = VALUE::fixnum_from_usize(call_count as usize); + unsafe { rb_hash_aset(calls_hash, key, value); } + } + } + } + + // Create a hash for the cfunc call counts + let cfunc_calls = rb_hash_new(); + rb_hash_aset(hash, rust_str_to_sym("cfunc_calls"), cfunc_calls); + set_call_counts(cfunc_calls, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT)); + + // Create a hash for the ISEQ call counts + let iseq_calls = rb_hash_new(); + rb_hash_aset(hash, rust_str_to_sym("iseq_calls"), iseq_calls); + set_call_counts(iseq_calls, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT)); + } + + hash +} + +fn get_live_context_count() -> usize { + let mut count = 0; + for_each_iseq_payload(|iseq_payload| { + for blocks in iseq_payload.version_map.iter() { + for block in blocks.iter() { + count += unsafe { block.as_ref() }.get_ctx_count(); + } + } + for block in iseq_payload.dead_blocks.iter() { + count += unsafe { block.as_ref() }.get_ctx_count(); + } + }); + count +} + +/// Record the backtrace when a YJIT exit occurs. This functionality requires +/// that the stats feature is enabled as well as the --yjit-trace-exits option. +/// +/// This function will fill two Vec's in YjitExitLocations to record the raw samples +/// and line samples. Their length should be the same, however the data stored in +/// them is different. +#[no_mangle] +pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE) +{ + // Return if YJIT is not enabled + if !yjit_enabled_p() { + return; + } + + // Return if --yjit-trace-exits isn't enabled + if get_option!(trace_exits).is_none() { + return; + } + + if get_option!(trace_exits_sample_rate) > 0 { + if get_option!(trace_exits_sample_rate) <= *YjitExitLocations::get_skipped_samples() { + YjitExitLocations::get_instance().skipped_samples = 0; + } else { + YjitExitLocations::get_instance().skipped_samples += 1; + return; + } + } + + // rb_vm_insn_addr2opcode won't work in cargo test --all-features + // because it's a C function. Without insn call, this function is useless + // so wrap the whole thing in a not test check. + #[cfg(not(test))] + { + // Get the opcode from the encoded insn handler at this PC + let insn = unsafe { rb_vm_insn_addr2opcode((*_exit_pc).as_ptr()) }; + + // Use the same buffer size as Stackprof. + const BUFF_LEN: usize = 2048; + + // Create 2 array buffers to be used to collect frames and lines. + let mut frames_buffer = [VALUE(0_usize); BUFF_LEN]; + let mut lines_buffer = [0; BUFF_LEN]; + + // Records call frame and line information for each method entry into two + // temporary buffers. Returns the number of times we added to the buffer (ie + // the length of the stack). + // + // Call frame info is stored in the frames_buffer, line number information + // in the lines_buffer. The first argument is the start point and the second + // argument is the buffer limit, set at 2048. + let stack_length = unsafe { rb_profile_frames(0, BUFF_LEN as i32, frames_buffer.as_mut_ptr(), lines_buffer.as_mut_ptr()) }; + let samples_length = (stack_length as usize) + 3; + + let yjit_raw_samples = YjitExitLocations::get_raw_samples(); + let yjit_line_samples = YjitExitLocations::get_line_samples(); + + // If yjit_raw_samples is less than or equal to the current length of the samples + // we might have seen this stack trace previously. + if yjit_raw_samples.len() >= samples_length { + let prev_stack_len_index = yjit_raw_samples.len() - samples_length; + let prev_stack_len = i64::from(yjit_raw_samples[prev_stack_len_index]); + let mut idx = stack_length - 1; + let mut prev_frame_idx = 0; + let mut seen_already = true; + + // If the previous stack length and current stack length are equal, + // loop and compare the current frame to the previous frame. If they are + // not equal, set seen_already to false and break out of the loop. + if prev_stack_len == stack_length as i64 { + while idx >= 0 { + let current_frame = frames_buffer[idx as usize]; + let prev_frame = yjit_raw_samples[prev_stack_len_index + prev_frame_idx + 1]; + + // If the current frame and previous frame are not equal, set + // seen_already to false and break out of the loop. + if current_frame != prev_frame { + seen_already = false; + break; + } + + idx -= 1; + prev_frame_idx += 1; + } + + // If we know we've seen this stack before, increment the counter by 1. + if seen_already { + let prev_idx = yjit_raw_samples.len() - 1; + let prev_count = i64::from(yjit_raw_samples[prev_idx]); + let new_count = prev_count + 1; + + yjit_raw_samples[prev_idx] = VALUE(new_count as usize); + yjit_line_samples[prev_idx] = new_count as i32; + + return; + } + } + } + + yjit_raw_samples.push(VALUE(stack_length as usize)); + yjit_line_samples.push(stack_length); + + let mut idx = stack_length - 1; + + while idx >= 0 { + let frame = frames_buffer[idx as usize]; + let line = lines_buffer[idx as usize]; + + yjit_raw_samples.push(frame); + yjit_line_samples.push(line); + + idx -= 1; + } + + // Push the insn value into the yjit_raw_samples Vec. + yjit_raw_samples.push(VALUE(insn as usize)); + + // We don't know the line + yjit_line_samples.push(0); + + // Push number of times seen onto the stack, which is 1 + // because it's the first time we've seen it. + yjit_raw_samples.push(VALUE(1_usize)); + yjit_line_samples.push(1); + } +} + +/// Primitive called in yjit.rb. Zero out all the counters. +#[no_mangle] +pub extern "C" fn rb_yjit_reset_stats_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + unsafe { + EXIT_OP_COUNT = [0; VM_INSTRUCTION_SIZE_USIZE]; + COUNTERS = Counters::default(); + } + + return Qnil; +} + +#[no_mangle] +pub extern "C" fn rb_yjit_collect_binding_alloc() { + incr_counter!(binding_allocations); +} + +#[no_mangle] +pub extern "C" fn rb_yjit_collect_binding_set() { + incr_counter!(binding_set); +} + +#[no_mangle] +pub extern "C" fn rb_yjit_count_side_exit_op(exit_pc: *const VALUE) -> *const VALUE { + #[cfg(not(test))] + unsafe { + // Get the opcode from the encoded insn handler at this PC + let opcode = rb_vm_insn_addr2opcode((*exit_pc).as_ptr()); + + // Increment the exit op count for this opcode + EXIT_OP_COUNT[opcode as usize] += 1; + }; + + // This function must return exit_pc! + return exit_pc; +} + +/// Measure the time taken by func() and add that to yjit_compile_time. +pub fn with_compile_time<F, R>(func: F) -> R where F: FnOnce() -> R { + let start = Instant::now(); + let ret = func(); + let nanos = Instant::now().duration_since(start).as_nanos(); + incr_counter_by!(compile_time_ns, nanos); + ret +} diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs new file mode 100644 index 0000000000..6bc66ee33e --- /dev/null +++ b/yjit/src/utils.rs @@ -0,0 +1,291 @@ +#![allow(dead_code)] // Some functions for print debugging in here + +use crate::backend::ir::*; +use crate::cruby::*; +use std::slice; + +/// Trait for casting to [usize] that allows you to say `.as_usize()`. +/// Implementation conditional on the cast preserving the numeric value on +/// all inputs and being inexpensive. +/// +/// [usize] is only guaranteed to be more than 16-bit wide, so we can't use +/// `.into()` to cast an `u32` or an `u64` to a `usize` even though in all +/// the platforms YJIT supports these two casts are pretty much no-ops. +/// We could say `as usize` or `.try_convert().unwrap()` everywhere +/// for those casts but they both have undesirable consequences if and when +/// we decide to support 32-bit platforms. Unfortunately we can't implement +/// [::core::convert::From] for [usize] since both the trait and the type are +/// external. Naming the method `into()` also runs into naming conflicts. +pub(crate) trait IntoUsize { + /// Convert to usize. Implementation conditional on width of [usize]. + fn as_usize(self) -> usize; +} + +#[cfg(target_pointer_width = "64")] +impl IntoUsize for u64 { + fn as_usize(self) -> usize { + self as usize + } +} + +#[cfg(target_pointer_width = "64")] +impl IntoUsize for u32 { + fn as_usize(self) -> usize { + self as usize + } +} + +impl IntoUsize for u16 { + /// Alias for `.into()`. For convenience so you could use the trait for + /// all unsgined types. + fn as_usize(self) -> usize { + self.into() + } +} + +impl IntoUsize for u8 { + /// Alias for `.into()`. For convenience so you could use the trait for + /// all unsgined types. + fn as_usize(self) -> usize { + self.into() + } +} + +/// The [Into<u64>] Rust does not provide. +/// Convert to u64 with assurance that the value is preserved. +/// Currently, `usize::BITS == 64` holds for all platforms we support. +pub(crate) trait IntoU64 { + fn as_u64(self) -> u64; +} + +#[cfg(target_pointer_width = "64")] +impl IntoU64 for usize { + fn as_u64(self) -> u64 { + self as u64 + } +} + +/// Compute an offset in bytes of a given struct field +#[allow(unused)] +macro_rules! offset_of { + ($struct_type:ty, $field_name:tt) => {{ + // This is basically the exact example for + // "creating a pointer to uninitialized data" from `std::ptr::addr_of_mut`. + // We make a dummy local that hopefully is optimized away because we never + // read or write its contents. Doing this dance to avoid UB. + let mut instance = std::mem::MaybeUninit::<$struct_type>::uninit(); + + let base_ptr = instance.as_mut_ptr(); + let field_ptr = unsafe { std::ptr::addr_of_mut!((*base_ptr).$field_name) }; + + (field_ptr as usize) - (base_ptr as usize) + }}; +} +#[allow(unused)] +pub(crate) use offset_of; + +// Convert a CRuby UTF-8-encoded RSTRING into a Rust string. +// This should work fine on ASCII strings and anything else +// that is considered legal UTF-8, including embedded nulls. +pub fn ruby_str_to_rust(v: VALUE) -> String { + let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; + let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); + let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) }; + match String::from_utf8(str_slice.to_vec()) { + Ok(utf8) => utf8, + Err(_) => String::new(), + } +} + +// Location is the file defining the method, colon, method name. +// Filenames are sometimes internal strings supplied to eval, +// so be careful with them. +pub fn iseq_get_location(iseq: IseqPtr, pos: u16) -> String { + let iseq_label = unsafe { rb_iseq_label(iseq) }; + let iseq_path = unsafe { rb_iseq_path(iseq) }; + let iseq_lineno = unsafe { rb_iseq_line_no(iseq, pos as usize) }; + + let mut s = if iseq_label == Qnil { + "None".to_string() + } else { + ruby_str_to_rust(iseq_label) + }; + s.push_str("@"); + if iseq_path == Qnil { + s.push_str("None"); + } else { + s.push_str(&ruby_str_to_rust(iseq_path)); + } + s.push_str(":"); + s.push_str(&iseq_lineno.to_string()); + s +} + +// TODO: we may want to move this function into yjit.c, maybe add a convenient Rust-side wrapper +/* +// For debugging. Print the bytecode for an iseq. +RBIMPL_ATTR_MAYBE_UNUSED() +static void +yjit_print_iseq(const rb_iseq_t *iseq) +{ + char *ptr; + long len; + VALUE disassembly = rb_iseq_disasm(iseq); + RSTRING_GETMEM(disassembly, ptr, len); + fprintf(stderr, "%.*s\n", (int)len, ptr); +} +*/ + +#[cfg(target_arch = "aarch64")] +macro_rules! c_callable { + ($(#[$outer:meta])* + fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { + $(#[$outer])* + extern "C" fn $f $args $(-> $ret)? $body + }; +} + +#[cfg(target_arch = "x86_64")] +macro_rules! c_callable { + ($(#[$outer:meta])* + fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => { + $(#[$outer])* + extern "sysv64" fn $f $args $(-> $ret)? $body + }; +} +pub(crate) use c_callable; + +pub fn print_int(asm: &mut Assembler, opnd: Opnd) { + c_callable!{ + fn print_int_fn(val: i64) { + println!("{}", val); + } + } + + asm.cpush_all(); + + let argument = match opnd { + Opnd::Mem(_) | Opnd::Reg(_) | Opnd::InsnOut { .. } => { + // Sign-extend the value if necessary + if opnd.rm_num_bits() < 64 { + asm.load_sext(opnd) + } else { + opnd + } + }, + Opnd::Imm(_) | Opnd::UImm(_) => opnd, + _ => unreachable!(), + }; + + asm.ccall(print_int_fn as *const u8, vec![argument]); + asm.cpop_all(); +} + +/// Generate code to print a pointer +pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) { + c_callable!{ + fn print_ptr_fn(ptr: *const u8) { + println!("{:p}", ptr); + } + } + + assert!(opnd.rm_num_bits() == 64); + + asm.cpush_all(); + asm.ccall(print_ptr_fn as *const u8, vec![opnd]); + asm.cpop_all(); +} + +/// Generate code to print a value +pub fn print_value(asm: &mut Assembler, opnd: Opnd) { + c_callable!{ + fn print_value_fn(val: VALUE) { + unsafe { rb_obj_info_dump(val) } + } + } + + assert!(matches!(opnd, Opnd::Value(_))); + + asm.cpush_all(); + asm.ccall(print_value_fn as *const u8, vec![opnd]); + asm.cpop_all(); +} + +/// Generate code to print constant string to stdout +pub fn print_str(asm: &mut Assembler, str: &str) { + c_callable!{ + fn print_str_cfun(ptr: *const u8, num_bytes: usize) { + unsafe { + let slice = slice::from_raw_parts(ptr, num_bytes); + let str = std::str::from_utf8(slice).unwrap(); + println!("{}", str); + } + } + } + + asm.cpush_all(); + + let string_data = asm.new_label("string_data"); + let after_string = asm.new_label("after_string"); + + asm.jmp(after_string); + asm.write_label(string_data); + asm.bake_string(str); + asm.write_label(after_string); + + let opnd = asm.lea_jump_target(string_data); + asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]); + + asm.cpop_all(); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::asm::CodeBlock; + + #[test] + fn min_max_preserved_after_cast_to_usize() { + use crate::utils::IntoUsize; + + let min: usize = u64::MIN.as_usize(); + assert_eq!(min, u64::MIN.try_into().unwrap()); + let max: usize = u64::MAX.as_usize(); + assert_eq!(max, u64::MAX.try_into().unwrap()); + + let min: usize = u32::MIN.as_usize(); + assert_eq!(min, u32::MIN.try_into().unwrap()); + let max: usize = u32::MAX.as_usize(); + assert_eq!(max, u32::MAX.try_into().unwrap()); + } + + #[test] + fn test_offset_of() { + #[repr(C)] + struct Foo { + a: u8, + b: u64, + } + + assert_eq!(0, offset_of!(Foo, a), "C99 6.7.2.1p13 says no padding at the front"); + assert_eq!(8, offset_of!(Foo, b), "ABI dependent, but should hold"); + } + + #[test] + fn test_print_int() { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + + print_int(&mut asm, Opnd::Imm(42)); + asm.compile(&mut cb, None).unwrap(); + } + + #[test] + fn test_print_str() { + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy(1024); + + print_str(&mut asm, "Hello, world!"); + asm.compile(&mut cb, None).unwrap(); + } +} diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs new file mode 100644 index 0000000000..f3c0ceefff --- /dev/null +++ b/yjit/src/virtualmem.rs @@ -0,0 +1,441 @@ +//! Memory management stuff for YJIT's code storage. Deals with virtual memory. +// I'm aware that there is an experiment in Rust Nightly right now for to see if banning +// usize->pointer casts is viable. It seems like a lot of work for us to participate for not much +// benefit. + +use std::ptr::NonNull; + +use crate::{utils::IntoUsize, backend::ir::Target}; + +#[cfg(not(test))] +pub type VirtualMem = VirtualMemory<sys::SystemAllocator>; + +#[cfg(test)] +pub type VirtualMem = VirtualMemory<tests::TestingAllocator>; + +/// Memory for generated executable machine code. When not testing, we reserve address space for +/// the entire region upfront and map physical memory into the reserved address space as needed. On +/// Linux, this is basically done using an `mmap` with `PROT_NONE` upfront and gradually using +/// `mprotect` with `PROT_READ|PROT_WRITE` as needed. The WIN32 equivalent seems to be +/// `VirtualAlloc` with `MEM_RESERVE` then later with `MEM_COMMIT`. +/// +/// This handles ["W^X"](https://en.wikipedia.org/wiki/W%5EX) semi-automatically. Writes +/// are always accepted and once writes are done a call to [Self::mark_all_executable] makes +/// the code in the region executable. +pub struct VirtualMemory<A: Allocator> { + /// Location of the virtual memory region. + region_start: NonNull<u8>, + + /// Size of the region in bytes. + region_size_bytes: usize, + + /// Number of bytes per "page", memory protection permission can only be controlled at this + /// granularity. + page_size_bytes: usize, + + /// Number of bytes that have we have allocated physical memory for starting at + /// [Self::region_start]. + mapped_region_bytes: usize, + + /// Keep track of the address of the last written to page. + /// Used for changing protection to implement W^X. + current_write_page: Option<usize>, + + /// Zero size member for making syscalls to get physical memory during normal operation. + /// When testing this owns some memory. + allocator: A, +} + +/// Groups together the two syscalls to get get new physical memory and to change +/// memory protection. See [VirtualMemory] for details. +pub trait Allocator { + #[must_use] + fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool; + + fn mark_executable(&mut self, ptr: *const u8, size: u32); + + fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool; +} + +/// Pointer into a [VirtualMemory] represented as an offset from the base. +/// Note: there is no NULL constant for [CodePtr]. You should use `Option<CodePtr>` instead. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] +#[repr(C, packed)] +pub struct CodePtr(u32); + +impl CodePtr { + /// Advance the CodePtr. Can return a dangling pointer. + pub fn add_bytes(self, bytes: usize) -> Self { + let CodePtr(raw) = self; + let bytes: u32 = bytes.try_into().unwrap(); + CodePtr(raw + bytes) + } + + /// Note that the raw pointer might be dangling if there hasn't + /// been any writes to it through the [VirtualMemory] yet. + pub fn raw_ptr(self, base: &impl CodePtrBase) -> *const u8 { + let CodePtr(offset) = self; + return base.base_ptr().as_ptr().wrapping_add(offset.as_usize()) + } + + /// Get the address of the code pointer. + pub fn raw_addr(self, base: &impl CodePtrBase) -> usize { + self.raw_ptr(base) as usize + } + + /// Get the offset component for the code pointer. Useful finding the distance between two + /// code pointers that share the same [VirtualMem]. + pub fn as_offset(self) -> i64 { + let CodePtr(offset) = self; + offset.into() + } + + pub fn as_side_exit(self) -> Target { + Target::SideExitPtr(self) + } +} + +/// Errors that can happen when writing to [VirtualMemory] +#[derive(Debug, PartialEq)] +pub enum WriteError { + OutOfBounds, + FailedPageMapping, +} + +use WriteError::*; + +impl<A: Allocator> VirtualMemory<A> { + /// Bring a part of the address space under management. + pub fn new(allocator: A, page_size: u32, virt_region_start: NonNull<u8>, size_bytes: usize) -> Self { + assert_ne!(0, page_size); + let page_size_bytes = page_size.as_usize(); + + Self { + region_start: virt_region_start, + region_size_bytes: size_bytes, + page_size_bytes, + mapped_region_bytes: 0, + current_write_page: None, + allocator, + } + } + + /// Return the start of the region as a raw pointer. Note that it could be a dangling + /// pointer so be careful dereferencing it. + pub fn start_ptr(&self) -> CodePtr { + CodePtr(0) + } + + pub fn mapped_end_ptr(&self) -> CodePtr { + self.start_ptr().add_bytes(self.mapped_region_bytes) + } + + pub fn virtual_end_ptr(&self) -> CodePtr { + self.start_ptr().add_bytes(self.region_size_bytes) + } + + /// Size of the region in bytes that we have allocated physical memory for. + pub fn mapped_region_size(&self) -> usize { + self.mapped_region_bytes + } + + /// Size of the region in bytes where writes could be attempted. + pub fn virtual_region_size(&self) -> usize { + self.region_size_bytes + } + + /// The granularity at which we can control memory permission. + /// On Linux, this is the page size that mmap(2) talks about. + pub fn system_page_size(&self) -> usize { + self.page_size_bytes + } + + /// Write a single byte. The first write to a page makes it readable. + pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> { + let page_size = self.page_size_bytes; + let raw: *mut u8 = write_ptr.raw_ptr(self) as *mut u8; + let page_addr = (raw as usize / page_size) * page_size; + + if self.current_write_page == Some(page_addr) { + // Writing within the last written to page, nothing to do + } else { + // Switching to a different and potentially new page + let start = self.region_start.as_ptr(); + let mapped_region_end = start.wrapping_add(self.mapped_region_bytes); + let whole_region_end = start.wrapping_add(self.region_size_bytes); + let alloc = &mut self.allocator; + + assert!((start..=whole_region_end).contains(&mapped_region_end)); + + if (start..mapped_region_end).contains(&raw) { + // Writing to a previously written to page. + // Need to make page writable, but no need to fill. + let page_size: u32 = page_size.try_into().unwrap(); + if !alloc.mark_writable(page_addr as *const _, page_size) { + return Err(FailedPageMapping); + } + + self.current_write_page = Some(page_addr); + } else if (start..whole_region_end).contains(&raw) { + // Writing to a brand new page + let mapped_region_end_addr = mapped_region_end as usize; + let alloc_size = page_addr - mapped_region_end_addr + page_size; + + assert_eq!(0, alloc_size % page_size, "allocation size should be page aligned"); + assert_eq!(0, mapped_region_end_addr % page_size, "pointer should be page aligned"); + + if alloc_size > page_size { + // This is unusual for the current setup, so keep track of it. + crate::stats::incr_counter!(exec_mem_non_bump_alloc); + } + + // Allocate new chunk + let alloc_size_u32: u32 = alloc_size.try_into().unwrap(); + unsafe { + if !alloc.mark_writable(mapped_region_end.cast(), alloc_size_u32) { + return Err(FailedPageMapping); + } + if cfg!(target_arch = "x86_64") { + // Fill new memory with PUSH DS (0x1E) so that executing uninitialized memory + // will fault with #UD in 64-bit mode. On Linux it becomes SIGILL and use the + // usual Ruby crash reporter. + std::slice::from_raw_parts_mut(mapped_region_end, alloc_size).fill(0x1E); + } else if cfg!(target_arch = "aarch64") { + // In aarch64, all zeros encodes UDF, so it's already what we want. + } else { + unreachable!("unknown arch"); + } + } + self.mapped_region_bytes = self.mapped_region_bytes + alloc_size; + + self.current_write_page = Some(page_addr); + } else { + return Err(OutOfBounds); + } + } + + // We have permission to write if we get here + unsafe { raw.write(byte) }; + + Ok(()) + } + + /// Make all the code in the region executable. Call this at the end of a write session. + /// See [Self] for usual usage flow. + pub fn mark_all_executable(&mut self) { + self.current_write_page = None; + + let region_start = self.region_start; + let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap(); + + // Make mapped region executable + self.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes); + } + + /// Free a range of bytes. start_ptr must be memory page-aligned. + pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) { + assert_eq!(start_ptr.raw_ptr(self) as usize % self.page_size_bytes, 0); + + // Bounds check the request. We should only free memory we manage. + let mapped_region = self.start_ptr().raw_ptr(self)..self.mapped_end_ptr().raw_ptr(self); + let virtual_region = self.start_ptr().raw_ptr(self)..self.virtual_end_ptr().raw_ptr(self); + let last_byte_to_free = start_ptr.add_bytes(size.saturating_sub(1).as_usize()).raw_ptr(self); + assert!(mapped_region.contains(&start_ptr.raw_ptr(self))); + // On platforms where code page size != memory page size (e.g. Linux), we often need + // to free code pages that contain unmapped memory pages. When it happens on the last + // code page, it's more appropriate to check the last byte against the virtual region. + assert!(virtual_region.contains(&last_byte_to_free)); + + self.allocator.mark_unused(start_ptr.raw_ptr(self), size); + } +} + +/// Something that could provide a base pointer to compute a raw pointer from a [CodePtr]. +pub trait CodePtrBase { + fn base_ptr(&self) -> NonNull<u8>; +} + +impl<A: Allocator> CodePtrBase for VirtualMemory<A> { + fn base_ptr(&self) -> NonNull<u8> { + self.region_start + } +} + +/// Requires linking with CRuby to work +#[cfg(not(test))] +mod sys { + use crate::cruby::*; + + /// Zero size! This just groups together syscalls that require linking with CRuby. + pub struct SystemAllocator; + + type VoidPtr = *mut std::os::raw::c_void; + + impl super::Allocator for SystemAllocator { + fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool { + unsafe { rb_yjit_mark_writable(ptr as VoidPtr, size) } + } + + fn mark_executable(&mut self, ptr: *const u8, size: u32) { + unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) } + } + + fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool { + unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) } + } + } +} + +#[cfg(not(test))] +pub(crate) use sys::*; + + +#[cfg(test)] +pub mod tests { + use crate::utils::IntoUsize; + use super::*; + + // Track allocation requests and owns some fixed size backing memory for requests. + // While testing we don't execute generated code. + pub struct TestingAllocator { + requests: Vec<AllocRequest>, + memory: Vec<u8>, + } + + #[derive(Debug)] + enum AllocRequest { + MarkWritable{ start_idx: usize, length: usize }, + MarkExecutable{ start_idx: usize, length: usize }, + MarkUnused, + } + use AllocRequest::*; + + impl TestingAllocator { + pub fn new(mem_size: usize) -> Self { + Self { requests: Vec::default(), memory: vec![0; mem_size] } + } + + pub fn mem_start(&self) -> *const u8 { + self.memory.as_ptr() + } + + // Verify that write_byte() bounds checks. Return `ptr` as an index. + fn bounds_check_request(&self, ptr: *const u8, size: u32) -> usize { + let mem_start = self.memory.as_ptr() as usize; + let index = ptr as usize - mem_start; + + assert!(index < self.memory.len()); + assert!(index + size.as_usize() <= self.memory.len()); + + index + } + } + + // Bounds check and then record the request + impl super::Allocator for TestingAllocator { + fn mark_writable(&mut self, ptr: *const u8, length: u32) -> bool { + let index = self.bounds_check_request(ptr, length); + self.requests.push(MarkWritable { start_idx: index, length: length.as_usize() }); + + true + } + + fn mark_executable(&mut self, ptr: *const u8, length: u32) { + let index = self.bounds_check_request(ptr, length); + self.requests.push(MarkExecutable { start_idx: index, length: length.as_usize() }); + + // We don't try to execute generated code in cfg(test) + // so no need to actually request executable memory. + } + + fn mark_unused(&mut self, ptr: *const u8, length: u32) -> bool { + self.bounds_check_request(ptr, length); + self.requests.push(MarkUnused); + + true + } + } + + // Fictional architecture where each page is 4 bytes long + const PAGE_SIZE: usize = 4; + fn new_dummy_virt_mem() -> VirtualMemory<TestingAllocator> { + let mem_size = PAGE_SIZE * 10; + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + + VirtualMemory::new( + alloc, + PAGE_SIZE.try_into().unwrap(), + NonNull::new(mem_start as *mut u8).unwrap(), + mem_size, + ) + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn new_memory_is_initialized() { + let mut virt = new_dummy_virt_mem(); + + virt.write_byte(virt.start_ptr(), 1).unwrap(); + assert!( + virt.allocator.memory[..PAGE_SIZE].iter().all(|&byte| byte != 0), + "Entire page should be initialized", + ); + + // Skip a few page + let three_pages = 3 * PAGE_SIZE; + virt.write_byte(virt.start_ptr().add_bytes(three_pages), 1).unwrap(); + assert!( + virt.allocator.memory[..three_pages].iter().all(|&byte| byte != 0), + "Gaps between write requests should be filled", + ); + } + + #[test] + fn no_redundant_syscalls_when_writing_to_the_same_page() { + let mut virt = new_dummy_virt_mem(); + + virt.write_byte(virt.start_ptr(), 1).unwrap(); + virt.write_byte(virt.start_ptr(), 0).unwrap(); + + assert!( + matches!( + virt.allocator.requests[..], + [MarkWritable { start_idx: 0, length: PAGE_SIZE }], + ) + ); + } + + #[test] + fn bounds_checking() { + use super::WriteError::*; + let mut virt = new_dummy_virt_mem(); + + let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size()); + assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0)); + + let end_of_addr_space = CodePtr(u32::MAX); + assert_eq!(Err(OutOfBounds), virt.write_byte(end_of_addr_space, 0)); + } + + #[test] + fn only_written_to_regions_become_executable() { + // ... so we catch attempts to read/write/execute never-written-to regions + const THREE_PAGES: usize = PAGE_SIZE * 3; + let mut virt = new_dummy_virt_mem(); + let page_two_start = virt.start_ptr().add_bytes(PAGE_SIZE * 2); + virt.write_byte(page_two_start, 1).unwrap(); + virt.mark_all_executable(); + + assert!(virt.virtual_region_size() > THREE_PAGES); + assert!( + matches!( + virt.allocator.requests[..], + [ + MarkWritable { start_idx: 0, length: THREE_PAGES }, + MarkExecutable { start_idx: 0, length: THREE_PAGES }, + ] + ), + ); + } +} diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs new file mode 100644 index 0000000000..cc2c8fe066 --- /dev/null +++ b/yjit/src/yjit.rs @@ -0,0 +1,225 @@ +use crate::codegen::*; +use crate::core::*; +use crate::cruby::*; +use crate::invariants::*; +use crate::options::*; +use crate::stats::YjitExitLocations; +use crate::stats::incr_counter; +use crate::stats::with_compile_time; + +use std::os::raw; + +/// Is YJIT on? The interpreter uses this variable to decide whether to trigger +/// compilation. See jit_exec() and jit_compile(). +#[allow(non_upper_case_globals)] +#[no_mangle] +pub static mut rb_yjit_enabled_p: bool = false; + +/// Parse one command-line option. +/// This is called from ruby.c +#[no_mangle] +pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool { + return parse_option(str_ptr).is_some(); +} + +/// Like rb_yjit_enabled_p, but for Rust code. +pub fn yjit_enabled_p() -> bool { + unsafe { rb_yjit_enabled_p } +} + +/// This function is called from C code +#[no_mangle] +pub extern "C" fn rb_yjit_init(yjit_enabled: bool) { + // Register the method codegen functions. This must be done at boot. + yjit_reg_method_codegen_fns(); + + // If --yjit-disable, yjit_init() will not be called until RubyVM::YJIT.enable. + if yjit_enabled && !get_option!(disable) { + yjit_init(); + } +} + +/// Initialize and enable YJIT. You should call this at boot or with GVL. +fn yjit_init() { + // TODO: need to make sure that command-line options have been + // initialized by CRuby + + // Catch panics to avoid UB for unwinding into C frames. + // See https://doc.rust-lang.org/nomicon/exception-safety.html + let result = std::panic::catch_unwind(|| { + Invariants::init(); + CodegenGlobals::init(); + YjitExitLocations::init(); + ids::init(); + + rb_bug_panic_hook(); + + // YJIT enabled and initialized successfully + assert!(unsafe{ !rb_yjit_enabled_p }); + unsafe { rb_yjit_enabled_p = true; } + }); + + if let Err(_) = result { + println!("YJIT: yjit_init() panicked. Aborting."); + std::process::abort(); + } + + // Make sure --yjit-perf doesn't append symbols to an old file + if get_option!(perf_map).is_some() { + let perf_map = format!("/tmp/perf-{}.map", std::process::id()); + let _ = std::fs::remove_file(&perf_map); + println!("YJIT perf map: {perf_map}"); + } + + // Initialize the GC hooks. Do this at last as some code depend on Rust initialization. + extern "C" { + fn rb_yjit_init_gc_hooks(); + } + unsafe { rb_yjit_init_gc_hooks() } +} + +/// At the moment, we abort in all cases we panic. +/// To aid with getting diagnostics in the wild without requiring +/// people to set RUST_BACKTRACE=1, register a panic hook that crash using rb_bug(). +/// rb_bug() might not be as good at printing a call trace as Rust's stdlib, but +/// it dumps some other info that might be relevant. +/// +/// In case we want to start doing fancier exception handling with panic=unwind, +/// we can revisit this later. For now, this helps to get us good bug reports. +fn rb_bug_panic_hook() { + use std::env; + use std::panic; + use std::io::{stderr, Write}; + + // Probably the default hook. We do this very early during process boot. + let previous_hook = panic::take_hook(); + + panic::set_hook(Box::new(move |panic_info| { + // Not using `eprintln` to avoid double panic. + let _ = stderr().write_all(b"ruby: YJIT has panicked. More info to follow...\n"); + + // Always show a Rust backtrace. + env::set_var("RUST_BACKTRACE", "1"); + previous_hook(panic_info); + + unsafe { rb_bug(b"YJIT panicked\0".as_ref().as_ptr() as *const raw::c_char); } + })); +} + +/// Called from C code to begin compiling a function +/// NOTE: this should be wrapped in RB_VM_LOCK_ENTER(), rb_vm_barrier() on the C side +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See [jit_compile_exception] for details. +#[no_mangle] +pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> *const u8 { + // Don't compile when there is insufficient native stack space + if unsafe { rb_ec_stack_check(ec as _) } != 0 { + return std::ptr::null(); + } + + // Reject ISEQs with very large temp stacks, + // this will allow us to use u8/i8 values to track stack_size and sp_offset + let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; + if stack_max >= i8::MAX as u32 { + incr_counter!(iseq_stack_too_large); + return std::ptr::null(); + } + + // Reject ISEQs that are too long, + // this will allow us to use u16 for instruction indices if we want to, + // very long ISEQs are also much more likely to be initialization code + let iseq_size = unsafe { get_iseq_encoded_size(iseq) }; + if iseq_size >= u16::MAX as u32 { + incr_counter!(iseq_too_long); + return std::ptr::null(); + } + + // If a custom call threshold was not specified on the command-line and + // this is a large application (has very many ISEQs), switch to + // using the call threshold for large applications after this entry point + use crate::stats::rb_yjit_live_iseq_count; + if unsafe { rb_yjit_call_threshold } == SMALL_CALL_THRESHOLD && unsafe { rb_yjit_live_iseq_count } > LARGE_ISEQ_COUNT { + unsafe { rb_yjit_call_threshold = LARGE_CALL_THRESHOLD; }; + } + + let maybe_code_ptr = with_compile_time(|| { gen_entry_point(iseq, ec, jit_exception) }); + + match maybe_code_ptr { + Some(ptr) => ptr, + None => std::ptr::null(), + } +} + +/// Free and recompile all existing JIT code +#[no_mangle] +pub extern "C" fn rb_yjit_code_gc(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if !yjit_enabled_p() { + return Qnil; + } + + with_vm_lock(src_loc!(), || { + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + cb.code_gc(ocb); + }); + + Qnil +} + +/// Enable YJIT compilation, returning true if YJIT was previously disabled +#[no_mangle] +pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE) -> VALUE { + with_vm_lock(src_loc!(), || { + // Initialize and enable YJIT + if gen_stats.test() { + unsafe { + OPTIONS.gen_stats = gen_stats.test(); + OPTIONS.print_stats = print_stats.test(); + } + } + yjit_init(); + + // Add "+YJIT" to RUBY_DESCRIPTION + extern "C" { + fn ruby_set_yjit_description(); + } + unsafe { ruby_set_yjit_description(); } + + Qtrue + }) +} + +/// Simulate a situation where we are out of executable memory +#[no_mangle] +pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + // If YJIT is not enabled, do nothing + if !yjit_enabled_p() { + return Qnil; + } + + // Enabled in debug mode only for security + if cfg!(debug_assertions) { + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb().unwrap(); + cb.set_pos(cb.get_mem_size()); + ocb.set_pos(ocb.get_mem_size()); + } + + return Qnil; +} + +/// Push a C method frame if the given PC is supposed to lazily push one. +/// This is called from rb_raise() (at rb_exc_new_str()) and other functions +/// that may make a method call (e.g. rb_to_int()). +#[no_mangle] +pub extern "C" fn rb_yjit_lazy_push_frame(pc: *mut VALUE) { + if !yjit_enabled_p() { + return; + } + + incr_counter!(num_lazy_frame_check); + if let Some(&(cme, recv_idx)) = CodegenGlobals::get_pc_to_cfunc().get(&pc) { + incr_counter!(num_lazy_frame_push); + unsafe { rb_vm_push_cfunc_frame(cme, recv_idx as i32) } + } +} |