summaryrefslogtreecommitdiff
path: root/yjit/src
diff options
context:
space:
mode:
Diffstat (limited to 'yjit/src')
-rw-r--r--yjit/src/asm/arm64/README.md16
-rw-r--r--yjit/src/asm/arm64/arg/bitmask_imm.rs255
-rw-r--r--yjit/src/asm/arm64/arg/condition.rs52
-rw-r--r--yjit/src/asm/arm64/arg/inst_offset.rs47
-rw-r--r--yjit/src/asm/arm64/arg/mod.rs18
-rw-r--r--yjit/src/asm/arm64/arg/sf.rs19
-rw-r--r--yjit/src/asm/arm64/arg/shifted_imm.rs81
-rw-r--r--yjit/src/asm/arm64/arg/sys_reg.rs6
-rw-r--r--yjit/src/asm/arm64/arg/truncate.rs66
-rw-r--r--yjit/src/asm/arm64/inst/atomic.rs86
-rw-r--r--yjit/src/asm/arm64/inst/branch.rs100
-rw-r--r--yjit/src/asm/arm64/inst/branch_cond.rs78
-rw-r--r--yjit/src/asm/arm64/inst/breakpoint.rs55
-rw-r--r--yjit/src/asm/arm64/inst/call.rs104
-rw-r--r--yjit/src/asm/arm64/inst/conditional.rs73
-rw-r--r--yjit/src/asm/arm64/inst/data_imm.rs143
-rw-r--r--yjit/src/asm/arm64/inst/data_reg.rs192
-rw-r--r--yjit/src/asm/arm64/inst/halfword_imm.rs179
-rw-r--r--yjit/src/asm/arm64/inst/load_literal.rs89
-rw-r--r--yjit/src/asm/arm64/inst/load_register.rs108
-rw-r--r--yjit/src/asm/arm64/inst/load_store.rs249
-rw-r--r--yjit/src/asm/arm64/inst/load_store_exclusive.rs109
-rw-r--r--yjit/src/asm/arm64/inst/logical_imm.rs154
-rw-r--r--yjit/src/asm/arm64/inst/logical_reg.rs207
-rw-r--r--yjit/src/asm/arm64/inst/madd.rs73
-rw-r--r--yjit/src/asm/arm64/inst/mod.rs54
-rw-r--r--yjit/src/asm/arm64/inst/mov.rs155
-rw-r--r--yjit/src/asm/arm64/inst/nop.rs44
-rw-r--r--yjit/src/asm/arm64/inst/pc_rel.rs107
-rw-r--r--yjit/src/asm/arm64/inst/reg_pair.rs212
-rw-r--r--yjit/src/asm/arm64/inst/sbfm.rs103
-rw-r--r--yjit/src/asm/arm64/inst/shift_imm.rs147
-rw-r--r--yjit/src/asm/arm64/inst/smulh.rs60
-rw-r--r--yjit/src/asm/arm64/inst/sys_reg.rs86
-rw-r--r--yjit/src/asm/arm64/inst/test_bit.rs133
-rw-r--r--yjit/src/asm/arm64/mod.rs1680
-rw-r--r--yjit/src/asm/arm64/opnd.rs195
-rw-r--r--yjit/src/asm/mod.rs686
-rw-r--r--yjit/src/asm/x86_64/mod.rs431
-rw-r--r--yjit/src/asm/x86_64/tests.rs47
-rw-r--r--yjit/src/backend/arm64/mod.rs1835
-rw-r--r--yjit/src/backend/ir.rs2095
-rw-r--r--yjit/src/backend/mod.rs14
-rw-r--r--yjit/src/backend/tests.rs330
-rw-r--r--yjit/src/backend/x86_64/mod.rs1322
-rw-r--r--yjit/src/codegen.rs11413
-rw-r--r--yjit/src/core.rs3452
-rw-r--r--yjit/src/cruby.rs487
-rw-r--r--yjit/src/cruby_bindings.inc.rs1704
-rw-r--r--yjit/src/disasm.rs273
-rw-r--r--yjit/src/invariants.rs560
-rw-r--r--yjit/src/lib.rs3
-rw-r--r--yjit/src/options.rs281
-rw-r--r--yjit/src/stats.rs729
-rw-r--r--yjit/src/utils.rs281
-rw-r--r--yjit/src/virtualmem.rs161
-rw-r--r--yjit/src/yjit.rs190
57 files changed, 25452 insertions, 6377 deletions
diff --git a/yjit/src/asm/arm64/README.md b/yjit/src/asm/arm64/README.md
new file mode 100644
index 0000000000..edae5773e8
--- /dev/null
+++ b/yjit/src/asm/arm64/README.md
@@ -0,0 +1,16 @@
+# Arm64
+
+This module is responsible for encoding YJIT operands into an appropriate Arm64 encoding.
+
+## Architecture
+
+Every instruction in the Arm64 instruction set is 32 bits wide and is represented in little-endian order. Because they're all going to the same size, we represent each instruction by a struct that implements `From<T> for u32`, which contains the mechanism for encoding each instruction. The encoding for each instruction is shown in the documentation for the struct that ends up being created.
+
+In general each set of bytes inside of the struct has either a direct value (usually a `u8`/`u16`) or some kind of `enum` that can be converted directly into a `u32`. For more complicated pieces of encoding (e.g., bitmask immediates) a corresponding module under the `arg` namespace is available.
+
+## Helpful links
+
+* [Arm A64 Instruction Set Architecture](https://developer.arm.com/documentation/ddi0596/2021-12?lang=en) Official documentation
+* [armconverter.com](https://armconverter.com/) A website that encodes Arm assembly syntax
+* [hatstone](https://github.com/tenderlove/hatstone) A wrapper around the Capstone disassembler written in Ruby
+* [onlinedisassembler.com](https://onlinedisassembler.com/odaweb/) A web-based disassembler
diff --git a/yjit/src/asm/arm64/arg/bitmask_imm.rs b/yjit/src/asm/arm64/arg/bitmask_imm.rs
new file mode 100644
index 0000000000..6b71a73d2c
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/bitmask_imm.rs
@@ -0,0 +1,255 @@
+/// Immediates used by the logical immediate instructions are not actually the
+/// immediate value, but instead are encoded into a 13-bit wide mask of 3
+/// elements. This allows many more values to be represented than 13 bits would
+/// normally allow, at the expense of not being able to represent every possible
+/// value.
+///
+/// In order for a number to be encodeable in this form, the binary
+/// representation must consist of a single set of contiguous 1s. That pattern
+/// must then be replicatable across all of the bits either 1, 2, 4, 8, 16, or
+/// 32 times (rotated or not).
+///
+/// For example, 1 (0b1), 2 (0b10), 3 (0b11), and 4 (0b100) are all valid.
+/// However, 5 (0b101) is invalid, because it contains 2 sets of 1s and cannot
+/// be replicated across 64 bits.
+///
+/// Some more examples to illustrate the idea of replication:
+/// * 0x5555555555555555 is a valid value (0b0101...) because it consists of a
+/// single set of 1s which can be replicated across all of the bits 32 times.
+/// * 0xf0f0f0f0f0f0f0f0 is a valid value (0b1111000011110000...) because it
+/// consists of a single set of 1s which can be replicated across all of the
+/// bits 8 times (rotated by 4 bits).
+/// * 0x0ff00ff00ff00ff0 is a valid value (0000111111110000...) because it
+/// consists of a single set of 1s which can be replicated across all of the
+/// bits 4 times (rotated by 12 bits).
+///
+/// To encode the values, there are 3 elements:
+/// * n = 1 if the pattern is 64-bits wide, 0 otherwise
+/// * imms = the size of the pattern, a 0, and then one less than the number of
+/// sequential 1s
+/// * immr = the number of right rotations to apply to the pattern to get the
+/// target value
+///
+pub struct BitmaskImmediate {
+ n: u8,
+ imms: u8,
+ immr: u8
+}
+
+impl TryFrom<u64> for BitmaskImmediate {
+ type Error = ();
+
+ /// Attempt to convert a u64 into a BitmaskImmediate.
+ ///
+ /// The implementation here is largely based on this blog post:
+ /// https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/
+ fn try_from(value: u64) -> Result<Self, Self::Error> {
+ if value == 0 || value == u64::MAX {
+ return Err(());
+ }
+
+ fn rotate_right(value: u64, rotations: u32) -> u64 {
+ (value >> (rotations & 0x3F)) |
+ (value << (rotations.wrapping_neg() & 0x3F))
+ }
+
+ let rotations = (value & (value + 1)).trailing_zeros();
+ let normalized = rotate_right(value, rotations & 0x3F);
+
+ let zeroes = normalized.leading_zeros();
+ let ones = (!normalized).trailing_zeros();
+ let size = zeroes + ones;
+
+ if rotate_right(value, size & 0x3F) != value {
+ return Err(());
+ }
+
+ Ok(BitmaskImmediate {
+ n: ((size >> 6) & 1) as u8,
+ imms: (((size << 1).wrapping_neg() | (ones - 1)) & 0x3F) as u8,
+ immr: ((rotations.wrapping_neg() & (size - 1)) & 0x3F) as u8
+ })
+ }
+}
+
+impl BitmaskImmediate {
+ /// Attempt to make a BitmaskImmediate for a 32 bit register.
+ /// The result has N==0, which is required for some 32-bit instructions.
+ /// Note that the exact same BitmaskImmediate produces different values
+ /// depending on the size of the target register.
+ pub fn new_32b_reg(value: u32) -> Result<Self, ()> {
+ // The same bit pattern replicated to u64
+ let value = value as u64;
+ let replicated: u64 = (value << 32) | value;
+ let converted = Self::try_from(replicated);
+ if let Ok(ref imm) = converted {
+ assert_eq!(0, imm.n);
+ }
+
+ converted
+ }
+}
+
+impl BitmaskImmediate {
+ /// Encode a bitmask immediate into a 32-bit value.
+ pub fn encode(self) -> u32 {
+ 0
+ | ((self.n as u32) << 12)
+ | ((self.immr as u32) << 6)
+ | (self.imms as u32)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_failures() {
+ [5, 9, 10, 11, 13, 17, 18, 19].iter().for_each(|&imm| {
+ assert!(BitmaskImmediate::try_from(imm).is_err());
+ });
+ }
+
+ #[test]
+ fn test_negative() {
+ let bitmask: BitmaskImmediate = (-9_i64 as u64).try_into().unwrap();
+ let encoded: u32 = bitmask.encode();
+ assert_eq!(7998, encoded);
+ }
+
+ #[test]
+ fn test_size_2_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x5555555555555555);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111100 })));
+ }
+
+ #[test]
+ fn test_size_2_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xaaaaaaaaaaaaaaaa);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000001, imms: 0b111100 })));
+ }
+
+ #[test]
+ fn test_size_4_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x1111111111111111);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b111000 })));
+ }
+
+ #[test]
+ fn test_size_4_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x6666666666666666);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111001 })));
+ }
+
+ #[test]
+ fn test_size_4_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xeeeeeeeeeeeeeeee);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000011, imms: 0b111010 })));
+ }
+
+ #[test]
+ fn test_size_8_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0101010101010101);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b110000 })));
+ }
+
+ #[test]
+ fn test_size_8_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x1818181818181818);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000101, imms: 0b110001 })));
+ }
+
+ #[test]
+ fn test_size_8_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfefefefefefefefe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000111, imms: 0b110110 })));
+ }
+
+ #[test]
+ fn test_size_16_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0001000100010001);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b100000 })));
+ }
+
+ #[test]
+ fn test_size_16_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0xff8fff8fff8fff8f);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001001, imms: 0b101100 })));
+ }
+
+ #[test]
+ fn test_size_16_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfffefffefffefffe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b001111, imms: 0b101110 })));
+ }
+
+ #[test]
+ fn test_size_32_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0000000100000001);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b000000, imms: 0b000000 })));
+ }
+
+ #[test]
+ fn test_size_32_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x3fffff003fffff00);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011000, imms: 0b010101 })));
+ }
+
+ #[test]
+ fn test_size_32_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfffffffefffffffe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 0, immr: 0b011111, imms: 0b011110 })));
+ }
+
+ #[test]
+ fn test_size_64_minimum() {
+ let bitmask = BitmaskImmediate::try_from(0x0000000000000001);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b000000, imms: 0b000000 })));
+ }
+
+ #[test]
+ fn test_size_64_rotated() {
+ let bitmask = BitmaskImmediate::try_from(0x0000001fffff0000);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b110000, imms: 0b010100 })));
+ }
+
+ #[test]
+ fn test_size_64_maximum() {
+ let bitmask = BitmaskImmediate::try_from(0xfffffffffffffffe);
+ assert!(matches!(bitmask, Ok(BitmaskImmediate { n: 1, immr: 0b111111, imms: 0b111110 })));
+ }
+
+ #[test]
+ fn test_size_64_invalid() {
+ let bitmask = BitmaskImmediate::try_from(u64::MAX);
+ assert!(matches!(bitmask, Err(())));
+ }
+
+ #[test]
+ fn test_all_valid_32b_pattern() {
+ let mut patterns = vec![];
+ for pattern_size in [2, 4, 8, 16, 32_u64] {
+ for ones_count in 1..pattern_size {
+ for rotation in 0..pattern_size {
+ let ones = (1_u64 << ones_count) - 1;
+ let rotated = (ones >> rotation) |
+ ((ones & ((1 << rotation) - 1)) << (pattern_size - rotation));
+ let mut replicated = rotated;
+ let mut shift = pattern_size;
+ while shift < 32 {
+ replicated |= replicated << shift;
+ shift *= 2;
+ }
+ let replicated: u32 = replicated.try_into().unwrap();
+ assert!(BitmaskImmediate::new_32b_reg(replicated).is_ok());
+ patterns.push(replicated);
+ }
+ }
+ }
+ patterns.sort();
+ patterns.dedup();
+ // Up to {size}-1 ones, and a total of {size} possible rotations.
+ assert_eq!(1*2 + 3*4 + 7*8 + 15*16 + 31*32, patterns.len());
+ }
+}
diff --git a/yjit/src/asm/arm64/arg/condition.rs b/yjit/src/asm/arm64/arg/condition.rs
new file mode 100644
index 0000000000..f711b8b0d8
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/condition.rs
@@ -0,0 +1,52 @@
+/// Various instructions in A64 can have condition codes attached. This enum
+/// includes all of the various kinds of conditions along with their respective
+/// encodings.
+pub struct Condition;
+
+impl Condition {
+ pub const EQ: u8 = 0b0000; // equal to
+ pub const NE: u8 = 0b0001; // not equal to
+ pub const CS: u8 = 0b0010; // carry set (alias for HS)
+ pub const CC: u8 = 0b0011; // carry clear (alias for LO)
+ pub const MI: u8 = 0b0100; // minus, negative
+ pub const PL: u8 = 0b0101; // positive or zero
+ pub const VS: u8 = 0b0110; // signed overflow
+ pub const VC: u8 = 0b0111; // no signed overflow
+ pub const HI: u8 = 0b1000; // greater than (unsigned)
+ pub const LS: u8 = 0b1001; // less than or equal to (unsigned)
+ pub const GE: u8 = 0b1010; // greater than or equal to (signed)
+ pub const LT: u8 = 0b1011; // less than (signed)
+ pub const GT: u8 = 0b1100; // greater than (signed)
+ pub const LE: u8 = 0b1101; // less than or equal to (signed)
+ pub const AL: u8 = 0b1110; // always
+
+ pub const fn inverse(condition: u8) -> u8 {
+ match condition {
+ Condition::EQ => Condition::NE,
+ Condition::NE => Condition::EQ,
+
+ Condition::CS => Condition::CC,
+ Condition::CC => Condition::CS,
+
+ Condition::MI => Condition::PL,
+ Condition::PL => Condition::MI,
+
+ Condition::VS => Condition::VC,
+ Condition::VC => Condition::VS,
+
+ Condition::HI => Condition::LS,
+ Condition::LS => Condition::HI,
+
+ Condition::LT => Condition::GE,
+ Condition::GE => Condition::LT,
+
+ Condition::GT => Condition::LE,
+ Condition::LE => Condition::GT,
+
+ Condition::AL => Condition::AL,
+
+ _ => panic!("Unknown condition")
+
+ }
+ }
+}
diff --git a/yjit/src/asm/arm64/arg/inst_offset.rs b/yjit/src/asm/arm64/arg/inst_offset.rs
new file mode 100644
index 0000000000..f4a6bc73a0
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/inst_offset.rs
@@ -0,0 +1,47 @@
+/// There are a lot of instructions in the AArch64 architectrue that take an
+/// offset in terms of number of instructions. Usually they are jump
+/// instructions or instructions that load a value relative to the current PC.
+///
+/// This struct is used to mark those locations instead of a generic operand in
+/// order to give better clarity to the developer when reading the AArch64
+/// backend code. It also helps to clarify that everything is in terms of a
+/// number of instructions and not a number of bytes (i.e., the offset is the
+/// number of bytes divided by 4).
+#[derive(Copy, Clone)]
+pub struct InstructionOffset(i32);
+
+impl InstructionOffset {
+ /// Create a new instruction offset.
+ pub fn from_insns(insns: i32) -> Self {
+ InstructionOffset(insns)
+ }
+
+ /// Create a new instruction offset from a number of bytes.
+ pub fn from_bytes(bytes: i32) -> Self {
+ assert_eq!(bytes % 4, 0, "Byte offset must be a multiple of 4");
+ InstructionOffset(bytes / 4)
+ }
+}
+
+impl From<i32> for InstructionOffset {
+ /// Convert an i64 into an instruction offset.
+ fn from(value: i32) -> Self {
+ InstructionOffset(value)
+ }
+}
+
+impl From<InstructionOffset> for i32 {
+ /// Convert an instruction offset into a number of instructions as an i32.
+ fn from(offset: InstructionOffset) -> Self {
+ offset.0
+ }
+}
+
+impl From<InstructionOffset> for i64 {
+ /// Convert an instruction offset into a number of instructions as an i64.
+ /// This is useful for when we're checking how many bits this offset fits
+ /// into.
+ fn from(offset: InstructionOffset) -> Self {
+ offset.0.into()
+ }
+}
diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs
new file mode 100644
index 0000000000..7eb37834f9
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/mod.rs
@@ -0,0 +1,18 @@
+// This module contains various A64 instruction arguments and the logic
+// necessary to encode them.
+
+mod bitmask_imm;
+mod condition;
+mod inst_offset;
+mod sf;
+mod shifted_imm;
+mod sys_reg;
+mod truncate;
+
+pub use bitmask_imm::BitmaskImmediate;
+pub use condition::Condition;
+pub use inst_offset::InstructionOffset;
+pub use sf::Sf;
+pub use shifted_imm::ShiftedImmediate;
+pub use sys_reg::SystemRegister;
+pub use truncate::{truncate_imm, truncate_uimm};
diff --git a/yjit/src/asm/arm64/arg/sf.rs b/yjit/src/asm/arm64/arg/sf.rs
new file mode 100644
index 0000000000..c2fd33302c
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/sf.rs
@@ -0,0 +1,19 @@
+/// This is commonly the top-most bit in the encoding of the instruction, and
+/// represents whether register operands should be treated as 64-bit registers
+/// or 32-bit registers.
+pub enum Sf {
+ Sf32 = 0b0,
+ Sf64 = 0b1
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into an Sf enum variant.
+impl From<u8> for Sf {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Sf::Sf64,
+ 32 => Sf::Sf32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
diff --git a/yjit/src/asm/arm64/arg/shifted_imm.rs b/yjit/src/asm/arm64/arg/shifted_imm.rs
new file mode 100644
index 0000000000..4602ac64ab
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/shifted_imm.rs
@@ -0,0 +1,81 @@
+/// How much to shift the immediate by.
+pub enum Shift {
+ LSL0 = 0b0, // no shift
+ LSL12 = 0b1 // logical shift left by 12 bits
+}
+
+/// Some instructions accept a 12-bit immediate that has an optional shift
+/// attached to it. This allows encoding larger values than just fit into 12
+/// bits. We attempt to encode those here. If the values are too large we have
+/// to bail out.
+pub struct ShiftedImmediate {
+ shift: Shift,
+ value: u16
+}
+
+impl TryFrom<u64> for ShiftedImmediate {
+ type Error = ();
+
+ /// Attempt to convert a u64 into a BitmaskImm.
+ fn try_from(value: u64) -> Result<Self, Self::Error> {
+ let current = value;
+ if current < 2_u64.pow(12) {
+ return Ok(ShiftedImmediate { shift: Shift::LSL0, value: current as u16 });
+ }
+
+ if (current & (2_u64.pow(12) - 1) == 0) && ((current >> 12) < 2_u64.pow(12)) {
+ return Ok(ShiftedImmediate { shift: Shift::LSL12, value: (current >> 12) as u16 });
+ }
+
+ Err(())
+ }
+}
+
+impl From<ShiftedImmediate> for u32 {
+ /// Encode a bitmask immediate into a 32-bit value.
+ fn from(imm: ShiftedImmediate) -> Self {
+ 0
+ | (((imm.shift as u32) & 1) << 12)
+ | (imm.value as u32)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_no_shift() {
+ let expected_value = 256;
+ let result = ShiftedImmediate::try_from(expected_value);
+
+ match result {
+ Ok(ShiftedImmediate { shift: Shift::LSL0, value }) => assert_eq!(value as u64, expected_value),
+ _ => panic!("Unexpected shift value")
+ }
+ }
+
+ #[test]
+ fn test_maximum_no_shift() {
+ let expected_value = (1 << 12) - 1;
+ let result = ShiftedImmediate::try_from(expected_value);
+
+ match result {
+ Ok(ShiftedImmediate { shift: Shift::LSL0, value }) => assert_eq!(value as u64, expected_value),
+ _ => panic!("Unexpected shift value")
+ }
+ }
+
+ #[test]
+ fn test_with_shift() {
+ let result = ShiftedImmediate::try_from(256 << 12);
+
+ assert!(matches!(result, Ok(ShiftedImmediate { shift: Shift::LSL12, value: 256 })));
+ }
+
+ #[test]
+ fn test_unencodable() {
+ let result = ShiftedImmediate::try_from((256 << 12) + 1);
+ assert!(matches!(result, Err(())));
+ }
+}
diff --git a/yjit/src/asm/arm64/arg/sys_reg.rs b/yjit/src/asm/arm64/arg/sys_reg.rs
new file mode 100644
index 0000000000..41d71920cb
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/sys_reg.rs
@@ -0,0 +1,6 @@
+/// The encoded representation of an A64 system register.
+/// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/
+pub enum SystemRegister {
+ /// https://developer.arm.com/documentation/ddi0601/2022-06/AArch64-Registers/NZCV--Condition-Flags?lang=en
+ NZCV = 0b1_011_0100_0010_000
+}
diff --git a/yjit/src/asm/arm64/arg/truncate.rs b/yjit/src/asm/arm64/arg/truncate.rs
new file mode 100644
index 0000000000..85d56ff202
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/truncate.rs
@@ -0,0 +1,66 @@
+// There are many instances in AArch64 instruction encoding where you represent
+// an integer value with a particular bit width that isn't a power of 2. These
+// functions represent truncating those integer values down to the appropriate
+// number of bits.
+
+/// Truncate a signed immediate to fit into a compile-time known width. It is
+/// assumed before calling this function that the value fits into the correct
+/// size. If it doesn't, then this function will panic.
+///
+/// When the value is positive, this should effectively be a no-op since we're
+/// just dropping leading zeroes. When the value is negative we should only be
+/// dropping leading ones.
+pub fn truncate_imm<T: Into<i32>, const WIDTH: usize>(imm: T) -> u32 {
+ let value: i32 = imm.into();
+ let masked = (value as u32) & ((1 << WIDTH) - 1);
+
+ // Assert that we didn't drop any bits by truncating.
+ if value >= 0 {
+ assert_eq!(value as u32, masked);
+ } else {
+ assert_eq!(value as u32, masked | (u32::MAX << WIDTH));
+ }
+
+ masked
+}
+
+/// Truncate an unsigned immediate to fit into a compile-time known width. It is
+/// assumed before calling this function that the value fits into the correct
+/// size. If it doesn't, then this function will panic.
+///
+/// This should effectively be a no-op since we're just dropping leading zeroes.
+pub fn truncate_uimm<T: Into<u32>, const WIDTH: usize>(uimm: T) -> u32 {
+ let value: u32 = uimm.into();
+ let masked = value & ((1 << WIDTH) - 1);
+
+ // Assert that we didn't drop any bits by truncating.
+ assert_eq!(value, masked);
+
+ masked
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_truncate_imm_positive() {
+ let inst = truncate_imm::<i32, 4>(5);
+ let result: u32 = inst;
+ assert_eq!(0b0101, result);
+ }
+
+ #[test]
+ fn test_truncate_imm_negative() {
+ let inst = truncate_imm::<i32, 4>(-5);
+ let result: u32 = inst;
+ assert_eq!(0b1011, result);
+ }
+
+ #[test]
+ fn test_truncate_uimm() {
+ let inst = truncate_uimm::<u32, 4>(5);
+ let result: u32 = inst;
+ assert_eq!(0b0101, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/atomic.rs b/yjit/src/asm/arm64/inst/atomic.rs
new file mode 100644
index 0000000000..5ce497209c
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/atomic.rs
@@ -0,0 +1,86 @@
+/// The size of the register operands to this instruction.
+enum Size {
+ /// Using 32-bit registers.
+ Size32 = 0b10,
+
+ /// Using 64-bit registers.
+ Size64 = 0b11
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into a Size enum variant.
+impl From<u8> for Size {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Size::Size64,
+ 32 => Size::Size32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The struct that represents an A64 atomic instruction that can be encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 1 1 0 0 0 1 1 1 0 0 0 0 0 0 |
+/// | size rs.............. rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Atomic {
+ /// The register holding the value to be loaded.
+ rt: u8,
+
+ /// The base register.
+ rn: u8,
+
+ /// The register holding the data value to be operated on.
+ rs: u8,
+
+ /// The size of the registers used in this instruction.
+ size: Size
+}
+
+impl Atomic {
+ /// LDADDAL
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en
+ pub fn ldaddal(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
+ Self { rt, rn, rs, size: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<Atomic> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: Atomic) -> Self {
+ 0
+ | ((inst.size as u32) << 30)
+ | (0b11 << 28)
+ | (FAMILY << 25)
+ | (0b111 << 21)
+ | ((inst.rs as u32) << 16)
+ | ((inst.rn as u32) << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<Atomic> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: Atomic) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldaddal() {
+ let result: u32 = Atomic::ldaddal(20, 21, 22, 64).into();
+ assert_eq!(0xf8f402d5, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/branch.rs b/yjit/src/asm/arm64/inst/branch.rs
new file mode 100644
index 0000000000..f15ef2a9b0
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/branch.rs
@@ -0,0 +1,100 @@
+/// Which operation to perform.
+enum Op {
+ /// Perform a BR instruction.
+ BR = 0b00,
+
+ /// Perform a BLR instruction.
+ BLR = 0b01,
+
+ /// Perform a RET instruction.
+ RET = 0b10
+}
+
+/// The struct that represents an A64 branch instruction that can be encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 1 0 1 0 1 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 |
+/// | op... rn.............. rm.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Branch {
+ /// The register holding the address to be branched to.
+ rn: u8,
+
+ /// The operation to perform.
+ op: Op
+}
+
+impl Branch {
+ /// BR
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BR--Branch-to-Register-?lang=en
+ pub fn br(rn: u8) -> Self {
+ Self { rn, op: Op::BR }
+ }
+
+ /// BLR
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/BLR--Branch-with-Link-to-Register-?lang=en
+ pub fn blr(rn: u8) -> Self {
+ Self { rn, op: Op::BLR }
+ }
+
+ /// RET
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RET--Return-from-subroutine-?lang=en
+ pub fn ret(rn: u8) -> Self {
+ Self { rn, op: Op::RET }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+const FAMILY: u32 = 0b101;
+
+impl From<Branch> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: Branch) -> Self {
+ 0
+ | (0b11 << 30)
+ | (FAMILY << 26)
+ | (1 << 25)
+ | ((inst.op as u32) << 21)
+ | (0b11111 << 16)
+ | ((inst.rn as u32) << 5)
+ }
+}
+
+impl From<Branch> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: Branch) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_br() {
+ let result: u32 = Branch::br(0).into();
+ assert_eq!(0xd61f0000, result);
+ }
+
+ #[test]
+ fn test_blr() {
+ let result: u32 = Branch::blr(0).into();
+ assert_eq!(0xd63f0000, result);
+ }
+
+ #[test]
+ fn test_ret() {
+ let result: u32 = Branch::ret(30).into();
+ assert_eq!(0xd65f03C0, result);
+ }
+
+ #[test]
+ fn test_ret_rn() {
+ let result: u32 = Branch::ret(20).into();
+ assert_eq!(0xd65f0280, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs
new file mode 100644
index 0000000000..fcc07f69aa
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/branch_cond.rs
@@ -0,0 +1,78 @@
+use super::super::arg::{InstructionOffset, truncate_imm};
+
+/// The struct that represents an A64 conditional branch instruction that can be
+/// encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 0 1 0 1 0 0 0 |
+/// | imm19........................................................... cond....... |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct BranchCond {
+ /// The kind of condition to check before branching.
+ cond: u8,
+
+ /// The instruction offset from this instruction to branch to.
+ offset: InstructionOffset
+}
+
+impl BranchCond {
+ /// B.cond
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
+ pub fn bcond(cond: u8, offset: InstructionOffset) -> Self {
+ Self { cond, offset }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+const FAMILY: u32 = 0b101;
+
+impl From<BranchCond> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: BranchCond) -> Self {
+ 0
+ | (1 << 30)
+ | (FAMILY << 26)
+ | (truncate_imm::<_, 19>(inst.offset) << 5)
+ | (inst.cond as u32)
+ }
+}
+
+impl From<BranchCond> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: BranchCond) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use super::super::super::arg::Condition;
+
+ #[test]
+ fn test_b_eq() {
+ let result: u32 = BranchCond::bcond(Condition::EQ, 32.into()).into();
+ assert_eq!(0x54000400, result);
+ }
+
+ #[test]
+ fn test_b_vs() {
+ let result: u32 = BranchCond::bcond(Condition::VS, 32.into()).into();
+ assert_eq!(0x54000406, result);
+ }
+
+ #[test]
+ fn test_b_eq_max() {
+ let result: u32 = BranchCond::bcond(Condition::EQ, ((1 << 18) - 1).into()).into();
+ assert_eq!(0x547fffe0, result);
+ }
+
+ #[test]
+ fn test_b_eq_min() {
+ let result: u32 = BranchCond::bcond(Condition::EQ, (-(1 << 18)).into()).into();
+ assert_eq!(0x54800000, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/breakpoint.rs b/yjit/src/asm/arm64/inst/breakpoint.rs
new file mode 100644
index 0000000000..be4920ac76
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/breakpoint.rs
@@ -0,0 +1,55 @@
+/// The struct that represents an A64 breakpoint instruction that can be encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 |
+/// | imm16.................................................. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Breakpoint {
+ /// The value to be captured by ESR_ELx.ISS
+ imm16: u16
+}
+
+impl Breakpoint {
+ /// BRK
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/BRK--Breakpoint-instruction-
+ pub fn brk(imm16: u16) -> Self {
+ Self { imm16 }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#control
+const FAMILY: u32 = 0b101;
+
+impl From<Breakpoint> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: Breakpoint) -> Self {
+ let imm16 = inst.imm16 as u32;
+
+ 0
+ | (0b11 << 30)
+ | (FAMILY << 26)
+ | (1 << 21)
+ | (imm16 << 5)
+ }
+}
+
+impl From<Breakpoint> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: Breakpoint) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_brk() {
+ let result: u32 = Breakpoint::brk(7).into();
+ assert_eq!(0xd42000e0, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs
new file mode 100644
index 0000000000..74debac7f7
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/call.rs
@@ -0,0 +1,104 @@
+use super::super::arg::{InstructionOffset, truncate_imm};
+
+/// The operation to perform for this instruction.
+enum Op {
+ /// Branch directly, with a hint that this is not a subroutine call or
+ /// return.
+ Branch = 0,
+
+ /// Branch directly, with a hint that this is a subroutine call or return.
+ BranchWithLink = 1
+}
+
+/// The struct that represents an A64 branch with our without link instruction
+/// that can be encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 0 1 0 1 |
+/// | op imm26.................................................................................... |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Call {
+ /// The PC-relative offset to jump to in terms of number of instructions.
+ offset: InstructionOffset,
+
+ /// The operation to perform for this instruction.
+ op: Op
+}
+
+impl Call {
+ /// B
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-
+ pub fn b(offset: InstructionOffset) -> Self {
+ Self { offset, op: Op::Branch }
+ }
+
+ /// BL
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
+ pub fn bl(offset: InstructionOffset) -> Self {
+ Self { offset, op: Op::BranchWithLink }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+const FAMILY: u32 = 0b101;
+
+impl From<Call> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: Call) -> Self {
+ 0
+ | ((inst.op as u32) << 31)
+ | (FAMILY << 26)
+ | truncate_imm::<_, 26>(inst.offset)
+ }
+}
+
+impl From<Call> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: Call) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_bl() {
+ let result: u32 = Call::bl(0.into()).into();
+ assert_eq!(0x94000000, result);
+ }
+
+ #[test]
+ fn test_bl_positive() {
+ let result: u32 = Call::bl(256.into()).into();
+ assert_eq!(0x94000100, result);
+ }
+
+ #[test]
+ fn test_bl_negative() {
+ let result: u32 = Call::bl((-256).into()).into();
+ assert_eq!(0x97ffff00, result);
+ }
+
+ #[test]
+ fn test_b() {
+ let result: u32 = Call::b(0.into()).into();
+ assert_eq!(0x14000000, result);
+ }
+
+ #[test]
+ fn test_b_positive() {
+ let result: u32 = Call::b(((1 << 25) - 1).into()).into();
+ assert_eq!(0x15ffffff, result);
+ }
+
+ #[test]
+ fn test_b_negative() {
+ let result: u32 = Call::b((-(1 << 25)).into()).into();
+ assert_eq!(0x16000000, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/conditional.rs b/yjit/src/asm/arm64/inst/conditional.rs
new file mode 100644
index 0000000000..e1950e95b4
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/conditional.rs
@@ -0,0 +1,73 @@
+use super::super::arg::Sf;
+
+/// The struct that represents an A64 conditional instruction that can be
+/// encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 0 1 1 0 1 0 1 0 0 0 0 |
+/// | sf rm.............. cond....... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Conditional {
+ /// The number of the general-purpose destination register.
+ rd: u8,
+
+ /// The number of the first general-purpose source register.
+ rn: u8,
+
+ /// The condition to use for the conditional instruction.
+ cond: u8,
+
+ /// The number of the second general-purpose source register.
+ rm: u8,
+
+ /// The size of the registers of this instruction.
+ sf: Sf
+}
+
+impl Conditional {
+ /// CSEL
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSEL--Conditional-Select-?lang=en
+ pub fn csel(rd: u8, rn: u8, rm: u8, cond: u8, num_bits: u8) -> Self {
+ Self { rd, rn, cond, rm, sf: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en#condsel
+const FAMILY: u32 = 0b101;
+
+impl From<Conditional> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: Conditional) -> Self {
+ 0
+ | ((inst.sf as u32) << 31)
+ | (1 << 28)
+ | (FAMILY << 25)
+ | (1 << 23)
+ | ((inst.rm as u32) << 16)
+ | ((inst.cond as u32) << 12)
+ | ((inst.rn as u32) << 5)
+ | (inst.rd as u32)
+ }
+}
+
+impl From<Conditional> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: Conditional) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use super::super::super::arg::Condition;
+
+ #[test]
+ fn test_csel() {
+ let result: u32 = Conditional::csel(0, 1, 2, Condition::NE, 64).into();
+ assert_eq!(0x9a821020, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/data_imm.rs b/yjit/src/asm/arm64/inst/data_imm.rs
new file mode 100644
index 0000000000..b474b00a52
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/data_imm.rs
@@ -0,0 +1,143 @@
+use super::super::arg::{Sf, ShiftedImmediate};
+
+/// The operation being performed by this instruction.
+enum Op {
+ Add = 0b0,
+ Sub = 0b1
+}
+
+// Whether or not to update the flags when this instruction is performed.
+enum S {
+ LeaveFlags = 0b0,
+ UpdateFlags = 0b1
+}
+
+/// The struct that represents an A64 data processing -- immediate instruction
+/// that can be encoded.
+///
+/// Add/subtract (immediate)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 0 1 0 |
+/// | sf op S sh imm12.................................... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct DataImm {
+ /// The register number of the destination register.
+ rd: u8,
+
+ /// The register number of the first operand register.
+ rn: u8,
+
+ /// How much to shift the immediate by.
+ imm: ShiftedImmediate,
+
+ /// Whether or not to update the flags when this instruction is performed.
+ s: S,
+
+ /// The opcode for this instruction.
+ op: Op,
+
+ /// Whether or not this instruction is operating on 64-bit operands.
+ sf: Sf
+}
+
+impl DataImm {
+ /// ADD (immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate--?lang=en
+ pub fn add(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Add, sf: num_bits.into() }
+ }
+
+ /// ADDS (immediate, set flags)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--immediate---Add--immediate---setting-flags-?lang=en
+ pub fn adds(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Add, sf: num_bits.into() }
+ }
+
+ /// CMP (immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en
+ pub fn cmp(rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
+ Self::subs(31, rn, imm, num_bits)
+ }
+
+ /// SUB (immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--immediate---Subtract--immediate--?lang=en
+ pub fn sub(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, s: S::LeaveFlags, op: Op::Sub, sf: num_bits.into() }
+ }
+
+ /// SUBS (immediate, set flags)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--immediate---Subtract--immediate---setting-flags-?lang=en
+ pub fn subs(rd: u8, rn: u8, imm: ShiftedImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, s: S::UpdateFlags, op: Op::Sub, sf: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en
+const FAMILY: u32 = 0b1000;
+
+impl From<DataImm> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: DataImm) -> Self {
+ let imm: u32 = inst.imm.into();
+
+ 0
+ | ((inst.sf as u32) << 31)
+ | ((inst.op as u32) << 30)
+ | ((inst.s as u32) << 29)
+ | (FAMILY << 25)
+ | (1 << 24)
+ | (imm << 10)
+ | ((inst.rn as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<DataImm> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: DataImm) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_add() {
+ let inst = DataImm::add(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x91001c20, result);
+ }
+
+ #[test]
+ fn test_adds() {
+ let inst = DataImm::adds(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xb1001c20, result);
+ }
+
+ #[test]
+ fn test_cmp() {
+ let inst = DataImm::cmp(0, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf1001c1f, result);
+ }
+
+ #[test]
+ fn test_sub() {
+ let inst = DataImm::sub(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd1001c20, result);
+ }
+
+ #[test]
+ fn test_subs() {
+ let inst = DataImm::subs(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf1001c20, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/data_reg.rs b/yjit/src/asm/arm64/inst/data_reg.rs
new file mode 100644
index 0000000000..a742121f1f
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/data_reg.rs
@@ -0,0 +1,192 @@
+use super::super::arg::{Sf, truncate_uimm};
+
+/// The operation being performed by this instruction.
+enum Op {
+ Add = 0b0,
+ Sub = 0b1
+}
+
+// Whether or not to update the flags when this instruction is performed.
+enum S {
+ LeaveFlags = 0b0,
+ UpdateFlags = 0b1
+}
+
+/// The type of shift to perform on the second operand register.
+enum Shift {
+ LSL = 0b00, // logical shift left (unsigned)
+ LSR = 0b01, // logical shift right (unsigned)
+ ASR = 0b10 // arithmetic shift right (signed)
+}
+
+/// The struct that represents an A64 data processing -- register instruction
+/// that can be encoded.
+///
+/// Add/subtract (shifted register)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 0 1 1 0 |
+/// | sf op S shift rm.............. imm6............... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct DataReg {
+ /// The register number of the destination register.
+ rd: u8,
+
+ /// The register number of the first operand register.
+ rn: u8,
+
+ /// The amount to shift the second operand register by.
+ imm6: u8,
+
+ /// The register number of the second operand register.
+ rm: u8,
+
+ /// The type of shift to perform on the second operand register.
+ shift: Shift,
+
+ /// Whether or not to update the flags when this instruction is performed.
+ s: S,
+
+ /// The opcode for this instruction.
+ op: Op,
+
+ /// Whether or not this instruction is operating on 64-bit operands.
+ sf: Sf
+}
+
+impl DataReg {
+ /// ADD (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--shifted-register---Add--shifted-register--?lang=en
+ pub fn add(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self {
+ rd,
+ rn,
+ imm6: 0,
+ rm,
+ shift: Shift::LSL,
+ s: S::LeaveFlags,
+ op: Op::Add,
+ sf: num_bits.into()
+ }
+ }
+
+ /// ADDS (shifted register, set flags)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADDS--shifted-register---Add--shifted-register---setting-flags-?lang=en
+ pub fn adds(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self {
+ rd,
+ rn,
+ imm6: 0,
+ rm,
+ shift: Shift::LSL,
+ s: S::UpdateFlags,
+ op: Op::Add,
+ sf: num_bits.into()
+ }
+ }
+
+ /// CMP (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--shifted-register---Compare--shifted-register---an-alias-of-SUBS--shifted-register--?lang=en
+ pub fn cmp(rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self::subs(31, rn, rm, num_bits)
+ }
+
+ /// SUB (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUB--shifted-register---Subtract--shifted-register--?lang=en
+ pub fn sub(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self {
+ rd,
+ rn,
+ imm6: 0,
+ rm,
+ shift: Shift::LSL,
+ s: S::LeaveFlags,
+ op: Op::Sub,
+ sf: num_bits.into()
+ }
+ }
+
+ /// SUBS (shifted register, set flags)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SUBS--shifted-register---Subtract--shifted-register---setting-flags-?lang=en
+ pub fn subs(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self {
+ rd,
+ rn,
+ imm6: 0,
+ rm,
+ shift: Shift::LSL,
+ s: S::UpdateFlags,
+ op: Op::Sub,
+ sf: num_bits.into()
+ }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en
+const FAMILY: u32 = 0b0101;
+
+impl From<DataReg> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: DataReg) -> Self {
+ 0
+ | ((inst.sf as u32) << 31)
+ | ((inst.op as u32) << 30)
+ | ((inst.s as u32) << 29)
+ | (FAMILY << 25)
+ | (1 << 24)
+ | ((inst.shift as u32) << 22)
+ | ((inst.rm as u32) << 16)
+ | (truncate_uimm::<_, 6>(inst.imm6) << 10)
+ | ((inst.rn as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<DataReg> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: DataReg) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_add() {
+ let inst = DataReg::add(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x8b020020, result);
+ }
+
+ #[test]
+ fn test_adds() {
+ let inst = DataReg::adds(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xab020020, result);
+ }
+
+ #[test]
+ fn test_cmp() {
+ let inst = DataReg::cmp(0, 1, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xeb01001f, result);
+ }
+
+ #[test]
+ fn test_sub() {
+ let inst = DataReg::sub(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xcb020020, result);
+ }
+
+ #[test]
+ fn test_subs() {
+ let inst = DataReg::subs(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xeb020020, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/halfword_imm.rs b/yjit/src/asm/arm64/inst/halfword_imm.rs
new file mode 100644
index 0000000000..0ddae8e8de
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/halfword_imm.rs
@@ -0,0 +1,179 @@
+use super::super::arg::truncate_imm;
+
+/// Whether this is a load or a store.
+enum Op {
+ Load = 1,
+ Store = 0
+}
+
+/// The type of indexing to perform for this instruction.
+enum Index {
+ /// No indexing.
+ None = 0b00,
+
+ /// Mutate the register after the read.
+ PostIndex = 0b01,
+
+ /// Mutate the register before the read.
+ PreIndex = 0b11
+}
+
+/// The struct that represents an A64 halfword instruction that can be encoded.
+///
+/// LDRH/STRH
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 1 1 1 0 0 1 0 |
+/// | op imm12.................................... rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+/// LDRH (pre-index/post-index)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 1 1 1 0 0 0 0 0 |
+/// | op imm9.......................... index rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct HalfwordImm {
+ /// The number of the 32-bit register to be loaded.
+ rt: u8,
+
+ /// The number of the 64-bit base register to calculate the memory address.
+ rn: u8,
+
+ /// The type of indexing to perform for this instruction.
+ index: Index,
+
+ /// The immediate offset from the base register.
+ imm: i16,
+
+ /// The operation to perform.
+ op: Op
+}
+
+impl HalfwordImm {
+ /// LDRH
+ /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+ pub fn ldrh(rt: u8, rn: u8, imm12: i16) -> Self {
+ Self { rt, rn, index: Index::None, imm: imm12, op: Op::Load }
+ }
+
+ /// LDRH (pre-index)
+ /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+ pub fn ldrh_pre(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Load }
+ }
+
+ /// LDRH (post-index)
+ /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDRH--immediate---Load-Register-Halfword--immediate--
+ pub fn ldrh_post(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Load }
+ }
+
+ /// STRH
+ /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--
+ pub fn strh(rt: u8, rn: u8, imm12: i16) -> Self {
+ Self { rt, rn, index: Index::None, imm: imm12, op: Op::Store }
+ }
+
+ /// STRH (pre-index)
+ /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--
+ pub fn strh_pre(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, index: Index::PreIndex, imm: imm9, op: Op::Store }
+ }
+
+ /// STRH (post-index)
+ /// https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STRH--immediate---Store-Register-Halfword--immediate--
+ pub fn strh_post(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, index: Index::PostIndex, imm: imm9, op: Op::Store }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b111100;
+
+impl From<HalfwordImm> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: HalfwordImm) -> Self {
+ let (opc, imm) = match inst.index {
+ Index::None => {
+ assert_eq!(inst.imm & 1, 0, "immediate offset must be even");
+ let imm12 = truncate_imm::<_, 12>(inst.imm / 2);
+ (0b100, imm12)
+ },
+ Index::PreIndex | Index::PostIndex => {
+ let imm9 = truncate_imm::<_, 9>(inst.imm);
+ (0b000, (imm9 << 2) | (inst.index as u32))
+ }
+ };
+
+ 0
+ | (FAMILY << 25)
+ | ((opc | (inst.op as u32)) << 22)
+ | (imm << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<HalfwordImm> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: HalfwordImm) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldrh() {
+ let inst = HalfwordImm::ldrh(0, 1, 8);
+ let result: u32 = inst.into();
+ assert_eq!(0x79401020, result);
+ }
+
+ #[test]
+ fn test_ldrh_pre() {
+ let inst = HalfwordImm::ldrh_pre(0, 1, 16);
+ let result: u32 = inst.into();
+ assert_eq!(0x78410c20, result);
+ }
+
+ #[test]
+ fn test_ldrh_post() {
+ let inst = HalfwordImm::ldrh_post(0, 1, 24);
+ let result: u32 = inst.into();
+ assert_eq!(0x78418420, result);
+ }
+
+ #[test]
+ fn test_ldrh_post_negative() {
+ let inst = HalfwordImm::ldrh_post(0, 1, -24);
+ let result: u32 = inst.into();
+ assert_eq!(0x785e8420, result);
+ }
+
+ #[test]
+ fn test_strh() {
+ let inst = HalfwordImm::strh(0, 1, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0x79000020, result);
+ }
+
+ #[test]
+ fn test_strh_pre() {
+ let inst = HalfwordImm::strh_pre(0, 1, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0x78000c20, result);
+ }
+
+ #[test]
+ fn test_strh_post() {
+ let inst = HalfwordImm::strh_post(0, 1, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0x78000420, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs
new file mode 100644
index 0000000000..3eade205c8
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/load_literal.rs
@@ -0,0 +1,89 @@
+use super::super::arg::{InstructionOffset, truncate_imm};
+
+/// The size of the operands being operated on.
+enum Opc {
+ Size32 = 0b00,
+ Size64 = 0b01,
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into an Sf enum variant.
+impl From<u8> for Opc {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Opc::Size64,
+ 32 => Opc::Size32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The struct that represents an A64 load literal instruction that can be encoded.
+///
+/// LDR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 1 0 0 0 |
+/// | opc.. imm19........................................................... rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadLiteral {
+ /// The number of the register to load the value into.
+ rt: u8,
+
+ /// The PC-relative number of instructions to load the value from.
+ offset: InstructionOffset,
+
+ /// The size of the operands being operated on.
+ opc: Opc
+}
+
+impl LoadLiteral {
+ /// LDR (load literal)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en
+ pub fn ldr_literal(rt: u8, offset: InstructionOffset, num_bits: u8) -> Self {
+ Self { rt, offset, opc: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadLiteral> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LoadLiteral) -> Self {
+ 0
+ | ((inst.opc as u32) << 30)
+ | (1 << 28)
+ | (FAMILY << 25)
+ | (truncate_imm::<_, 19>(inst.offset) << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<LoadLiteral> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LoadLiteral) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldr_positive() {
+ let inst = LoadLiteral::ldr_literal(0, 5.into(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x580000a0, result);
+ }
+
+ #[test]
+ fn test_ldr_negative() {
+ let inst = LoadLiteral::ldr_literal(0, (-5).into(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x58ffff60, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/load_register.rs b/yjit/src/asm/arm64/inst/load_register.rs
new file mode 100644
index 0000000000..3426b9ba5f
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/load_register.rs
@@ -0,0 +1,108 @@
+/// Whether or not to shift the register.
+enum S {
+ Shift = 1,
+ NoShift = 0
+}
+
+/// The option for this instruction.
+enum Option {
+ UXTW = 0b010,
+ LSL = 0b011,
+ SXTW = 0b110,
+ SXTX = 0b111
+}
+
+/// The size of the operands of this instruction.
+enum Size {
+ Size32 = 0b10,
+ Size64 = 0b11
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into a Size enum variant.
+impl From<u8> for Size {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Size::Size64,
+ 32 => Size::Size32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The struct that represents an A64 load instruction that can be encoded.
+///
+/// LDR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 1 1 0 0 0 0 1 1 1 0 |
+/// | size. rm.............. option.. S rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadRegister {
+ /// The number of the register to load the value into.
+ rt: u8,
+
+ /// The base register with which to form the address.
+ rn: u8,
+
+ /// Whether or not to shift the value of the register.
+ s: S,
+
+ /// The option associated with this instruction that controls the shift.
+ option: Option,
+
+ /// The number of the offset register.
+ rm: u8,
+
+ /// The size of the operands.
+ size: Size
+}
+
+impl LoadRegister {
+ /// LDR
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--register---Load-Register--register--?lang=en
+ pub fn ldr(rt: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rt, rn, s: S::NoShift, option: Option::LSL, rm, size: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadRegister> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LoadRegister) -> Self {
+ 0
+ | ((inst.size as u32) << 30)
+ | (0b11 << 28)
+ | (FAMILY << 25)
+ | (0b11 << 21)
+ | ((inst.rm as u32) << 16)
+ | ((inst.option as u32) << 13)
+ | ((inst.s as u32) << 12)
+ | (0b10 << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<LoadRegister> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LoadRegister) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldr() {
+ let inst = LoadRegister::ldr(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf8626820, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/load_store.rs b/yjit/src/asm/arm64/inst/load_store.rs
new file mode 100644
index 0000000000..b5c8a3c294
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/load_store.rs
@@ -0,0 +1,249 @@
+use super::super::arg::truncate_imm;
+
+/// The size of the operands being operated on.
+enum Size {
+ Size8 = 0b00,
+ Size16 = 0b01,
+ Size32 = 0b10,
+ Size64 = 0b11,
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into an Sf enum variant.
+impl From<u8> for Size {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Size::Size64,
+ 32 => Size::Size32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The operation to perform for this instruction.
+enum Opc {
+ STR = 0b00,
+ LDR = 0b01,
+ LDURSW = 0b10
+}
+
+/// What kind of indexing to perform for this instruction.
+enum Index {
+ None = 0b00,
+ PostIndex = 0b01,
+ PreIndex = 0b11
+}
+
+/// The struct that represents an A64 load or store instruction that can be
+/// encoded.
+///
+/// LDR/LDUR/LDURSW/STR/STUR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 1 1 0 0 0 0 |
+/// | size. opc.. imm9.......................... idx.. rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadStore {
+ /// The number of the register to load the value into.
+ rt: u8,
+
+ /// The base register with which to form the address.
+ rn: u8,
+
+ /// What kind of indexing to perform for this instruction.
+ idx: Index,
+
+ /// The optional signed immediate byte offset from the base register.
+ imm9: i16,
+
+ /// The operation to perform for this instruction.
+ opc: Opc,
+
+ /// The size of the operands being operated on.
+ size: Size
+}
+
+impl LoadStore {
+ /// LDR (immediate, post-index)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+ pub fn ldr_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
+ Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::LDR, size: num_bits.into() }
+ }
+
+ /// LDR (immediate, pre-index)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
+ pub fn ldr_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
+ Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::LDR, size: num_bits.into() }
+ }
+
+ /// LDUR (load register, unscaled)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled--?lang=en
+ pub fn ldur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
+ Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: num_bits.into() }
+ }
+
+ /// LDURH Load Register Halfword (unscaled)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURH--Load-Register-Halfword--unscaled--?lang=en
+ pub fn ldurh(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size16 }
+ }
+
+ /// LDURB (load register, byte, unscaled)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURB--Load-Register-Byte--unscaled--?lang=en
+ pub fn ldurb(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDR, size: Size::Size8 }
+ }
+
+ /// LDURSW (load register, unscaled, signed)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDURSW--Load-Register-Signed-Word--unscaled--?lang=en
+ pub fn ldursw(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, idx: Index::None, imm9, opc: Opc::LDURSW, size: Size::Size32 }
+ }
+
+ /// STR (immediate, post-index)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate--
+ pub fn str_post(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
+ Self { rt, rn, idx: Index::PostIndex, imm9, opc: Opc::STR, size: num_bits.into() }
+ }
+
+ /// STR (immediate, pre-index)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STR--immediate---Store-Register--immediate--
+ pub fn str_pre(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
+ Self { rt, rn, idx: Index::PreIndex, imm9, opc: Opc::STR, size: num_bits.into() }
+ }
+
+ /// STUR (store register, unscaled)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STUR--Store-Register--unscaled--?lang=en
+ pub fn stur(rt: u8, rn: u8, imm9: i16, num_bits: u8) -> Self {
+ Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: num_bits.into() }
+ }
+
+ /// STURH (store register, halfword, unscaled)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STURH--Store-Register-Halfword--unscaled--?lang=en
+ pub fn sturh(rt: u8, rn: u8, imm9: i16) -> Self {
+ Self { rt, rn, idx: Index::None, imm9, opc: Opc::STR, size: Size::Size16 }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadStore> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LoadStore) -> Self {
+ 0
+ | ((inst.size as u32) << 30)
+ | (0b11 << 28)
+ | (FAMILY << 25)
+ | ((inst.opc as u32) << 22)
+ | (truncate_imm::<_, 9>(inst.imm9) << 12)
+ | ((inst.idx as u32) << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<LoadStore> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LoadStore) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldr_post() {
+ let inst = LoadStore::ldr_post(0, 1, 16, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf8410420, result);
+ }
+
+ #[test]
+ fn test_ldr_pre() {
+ let inst = LoadStore::ldr_pre(0, 1, 16, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf8410c20, result);
+ }
+
+ #[test]
+ fn test_ldur() {
+ let inst = LoadStore::ldur(0, 1, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf8400020, result);
+ }
+
+ #[test]
+ fn test_ldurb() {
+ let inst = LoadStore::ldurb(0, 1, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0x38400020, result);
+ }
+
+ #[test]
+ fn test_ldurh() {
+ let inst = LoadStore::ldurh(0, 1, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0x78400020, result);
+ }
+
+ #[test]
+ fn test_ldur_with_imm() {
+ let inst = LoadStore::ldur(0, 1, 123, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf847b020, result);
+ }
+
+ #[test]
+ fn test_ldursw() {
+ let inst = LoadStore::ldursw(0, 1, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0xb8800020, result);
+ }
+
+ #[test]
+ fn test_ldursw_with_imm() {
+ let inst = LoadStore::ldursw(0, 1, 123);
+ let result: u32 = inst.into();
+ assert_eq!(0xb887b020, result);
+ }
+
+ #[test]
+ fn test_str_post() {
+ let inst = LoadStore::str_post(0, 1, -16, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf81f0420, result);
+ }
+
+ #[test]
+ fn test_str_pre() {
+ let inst = LoadStore::str_pre(0, 1, -16, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf81f0c20, result);
+ }
+
+ #[test]
+ fn test_stur() {
+ let inst = LoadStore::stur(0, 1, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf8000020, result);
+ }
+
+ #[test]
+ fn test_stur_negative_offset() {
+ let inst = LoadStore::stur(0, 1, -1, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf81ff020, result);
+ }
+
+ #[test]
+ fn test_stur_positive_offset() {
+ let inst = LoadStore::stur(0, 1, 255, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf80ff020, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/load_store_exclusive.rs b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
new file mode 100644
index 0000000000..8216c2200a
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/load_store_exclusive.rs
@@ -0,0 +1,109 @@
+/// The operation being performed for this instruction.
+enum Op {
+ Store = 0,
+ Load = 1
+}
+
+/// The size of the registers being operated on.
+enum Size {
+ Size32 = 0b10,
+ Size64 = 0b11
+}
+
+/// A convenience function so that we can convert the number of bits of an
+/// register operand directly into a Size enum variant.
+impl From<u8> for Size {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Size::Size64,
+ 32 => Size::Size32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The struct that represents an A64 load or store exclusive instruction that
+/// can be encoded.
+///
+/// LDAXR/STLXR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 1 0 0 0 0 0 1 1 1 1 1 1 |
+/// | size. op rs.............. rn.............. rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LoadStoreExclusive {
+ /// The number of the register to be loaded.
+ rt: u8,
+
+ /// The base register with which to form the address.
+ rn: u8,
+
+ /// The register to be used for the status result if it applies to this
+ /// operation. Otherwise it's the zero register.
+ rs: u8,
+
+ /// The operation being performed for this instruction.
+ op: Op,
+
+ /// The size of the registers being operated on.
+ size: Size
+}
+
+impl LoadStoreExclusive {
+ /// LDAXR
+ /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LDAXR--Load-Acquire-Exclusive-Register-
+ pub fn ldaxr(rt: u8, rn: u8, num_bits: u8) -> Self {
+ Self { rt, rn, rs: 31, op: Op::Load, size: num_bits.into() }
+ }
+
+ /// STLXR
+ /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/STLXR--Store-Release-Exclusive-Register-
+ pub fn stlxr(rs: u8, rt: u8, rn: u8, num_bits: u8) -> Self {
+ Self { rt, rn, rs, op: Op::Store, size: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<LoadStoreExclusive> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LoadStoreExclusive) -> Self {
+ 0
+ | ((inst.size as u32) << 30)
+ | (FAMILY << 25)
+ | ((inst.op as u32) << 22)
+ | ((inst.rs as u32) << 16)
+ | (0b111111 << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rt as u32)
+ }
+}
+
+impl From<LoadStoreExclusive> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LoadStoreExclusive) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldaxr() {
+ let inst = LoadStoreExclusive::ldaxr(16, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xc85ffc10, result);
+ }
+
+ #[test]
+ fn test_stlxr() {
+ let inst = LoadStoreExclusive::stlxr(17, 16, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xc811fc10, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/logical_imm.rs b/yjit/src/asm/arm64/inst/logical_imm.rs
new file mode 100644
index 0000000000..b24916f8a5
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/logical_imm.rs
@@ -0,0 +1,154 @@
+use super::super::arg::{BitmaskImmediate, Sf};
+
+// Which operation to perform.
+enum Opc {
+ /// The AND operation.
+ And = 0b00,
+
+ /// The ORR operation.
+ Orr = 0b01,
+
+ /// The EOR operation.
+ Eor = 0b10,
+
+ /// The ANDS operation.
+ Ands = 0b11
+}
+
+/// The struct that represents an A64 bitwise immediate instruction that can be
+/// encoded.
+///
+/// AND/ORR/ANDS (immediate)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 1 0 0 |
+/// | sf opc.. N immr............... imms............... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LogicalImm {
+ /// The register number of the destination register.
+ rd: u8,
+
+ /// The register number of the first operand register.
+ rn: u8,
+
+ /// The immediate value to test.
+ imm: BitmaskImmediate,
+
+ /// The opcode for this instruction.
+ opc: Opc,
+
+ /// Whether or not this instruction is operating on 64-bit operands.
+ sf: Sf
+}
+
+impl LogicalImm {
+ /// AND (bitmask immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--immediate---Bitwise-AND--immediate--?lang=en
+ pub fn and(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, opc: Opc::And, sf: num_bits.into() }
+ }
+
+ /// ANDS (bitmask immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--immediate---Bitwise-AND--immediate---setting-flags-?lang=en
+ pub fn ands(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, opc: Opc::Ands, sf: num_bits.into() }
+ }
+
+ /// EOR (bitmask immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--immediate---Bitwise-Exclusive-OR--immediate--
+ pub fn eor(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, opc: Opc::Eor, sf: num_bits.into() }
+ }
+
+ /// MOV (bitmask immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--bitmask-immediate---Move--bitmask-immediate---an-alias-of-ORR--immediate--?lang=en
+ pub fn mov(rd: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self { rd, rn: 0b11111, imm, opc: Opc::Orr, sf: num_bits.into() }
+ }
+
+ /// ORR (bitmask immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--
+ pub fn orr(rd: u8, rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self { rd, rn, imm, opc: Opc::Orr, sf: num_bits.into() }
+ }
+
+ /// TST (bitmask immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--immediate---Test-bits--immediate---an-alias-of-ANDS--immediate--?lang=en
+ pub fn tst(rn: u8, imm: BitmaskImmediate, num_bits: u8) -> Self {
+ Self::ands(31, rn, imm, num_bits)
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm
+const FAMILY: u32 = 0b1001;
+
+impl From<LogicalImm> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LogicalImm) -> Self {
+ let imm: u32 = inst.imm.encode();
+
+ 0
+ | ((inst.sf as u32) << 31)
+ | ((inst.opc as u32) << 29)
+ | (FAMILY << 25)
+ | (imm << 10)
+ | ((inst.rn as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<LogicalImm> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LogicalImm) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_and() {
+ let inst = LogicalImm::and(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x92400820, result);
+ }
+
+ #[test]
+ fn test_ands() {
+ let inst = LogicalImm::ands(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf2400820, result);
+ }
+
+ #[test]
+ fn test_eor() {
+ let inst = LogicalImm::eor(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd2400820, result);
+ }
+
+ #[test]
+ fn test_mov() {
+ let inst = LogicalImm::mov(0, 0x5555555555555555.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xb200f3e0, result);
+ }
+
+ #[test]
+ fn test_orr() {
+ let inst = LogicalImm::orr(0, 1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xb2400820, result);
+ }
+
+ #[test]
+ fn test_tst() {
+ let inst = LogicalImm::tst(1, 7.try_into().unwrap(), 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf240083f, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/logical_reg.rs b/yjit/src/asm/arm64/inst/logical_reg.rs
new file mode 100644
index 0000000000..a96805c9f9
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/logical_reg.rs
@@ -0,0 +1,207 @@
+use super::super::arg::{Sf, truncate_uimm};
+
+/// Whether or not this is a NOT instruction.
+enum N {
+ /// This is not a NOT instruction.
+ No = 0,
+
+ /// This is a NOT instruction.
+ Yes = 1
+}
+
+/// The type of shift to perform on the second operand register.
+enum Shift {
+ LSL = 0b00, // logical shift left (unsigned)
+ LSR = 0b01, // logical shift right (unsigned)
+ ASR = 0b10, // arithmetic shift right (signed)
+ ROR = 0b11 // rotate right (unsigned)
+}
+
+// Which operation to perform.
+enum Opc {
+ /// The AND operation.
+ And = 0b00,
+
+ /// The ORR operation.
+ Orr = 0b01,
+
+ /// The EOR operation.
+ Eor = 0b10,
+
+ /// The ANDS operation.
+ Ands = 0b11
+}
+
+/// The struct that represents an A64 logical register instruction that can be
+/// encoded.
+///
+/// AND/ORR/ANDS (shifted register)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 0 1 0 |
+/// | sf opc.. shift N rm.............. imm6............... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct LogicalReg {
+ /// The register number of the destination register.
+ rd: u8,
+
+ /// The register number of the first operand register.
+ rn: u8,
+
+ /// The amount to shift the second operand register.
+ imm6: u8,
+
+ /// The register number of the second operand register.
+ rm: u8,
+
+ /// Whether or not this is a NOT instruction.
+ n: N,
+
+ /// The type of shift to perform on the second operand register.
+ shift: Shift,
+
+ /// The opcode for this instruction.
+ opc: Opc,
+
+ /// Whether or not this instruction is operating on 64-bit operands.
+ sf: Sf
+}
+
+impl LogicalReg {
+ /// AND (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/AND--shifted-register---Bitwise-AND--shifted-register--?lang=en
+ pub fn and(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::And, sf: num_bits.into() }
+ }
+
+ /// ANDS (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ANDS--shifted-register---Bitwise-AND--shifted-register---setting-flags-?lang=en
+ pub fn ands(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
+ }
+
+ /// EOR (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/EOR--shifted-register---Bitwise-Exclusive-OR--shifted-register--
+ pub fn eor(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Eor, sf: num_bits.into() }
+ }
+
+ /// MOV (register)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register--?lang=en
+ pub fn mov(rd: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn: 0b11111, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
+ }
+
+ /// MVN (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/MVN--Bitwise-NOT--an-alias-of-ORN--shifted-register--?lang=en
+ pub fn mvn(rd: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn: 0b11111, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
+ }
+
+ /// ORN (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORN--shifted-register---Bitwise-OR-NOT--shifted-register--
+ pub fn orn(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn, imm6: 0, rm, n: N::Yes, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
+ }
+
+ /// ORR (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register--
+ pub fn orr(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Orr, sf: num_bits.into() }
+ }
+
+ /// TST (shifted register)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TST--shifted-register---Test--shifted-register---an-alias-of-ANDS--shifted-register--?lang=en
+ pub fn tst(rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd: 31, rn, imm6: 0, rm, n: N::No, shift: Shift::LSL, opc: Opc::Ands, sf: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Register?lang=en
+const FAMILY: u32 = 0b0101;
+
+impl From<LogicalReg> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: LogicalReg) -> Self {
+ 0
+ | ((inst.sf as u32) << 31)
+ | ((inst.opc as u32) << 29)
+ | (FAMILY << 25)
+ | ((inst.shift as u32) << 22)
+ | ((inst.n as u32) << 21)
+ | ((inst.rm as u32) << 16)
+ | (truncate_uimm::<_, 6>(inst.imm6) << 10)
+ | ((inst.rn as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<LogicalReg> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: LogicalReg) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_and() {
+ let inst = LogicalReg::and(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x8a020020, result);
+ }
+
+ #[test]
+ fn test_ands() {
+ let inst = LogicalReg::ands(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xea020020, result);
+ }
+
+ #[test]
+ fn test_eor() {
+ let inst = LogicalReg::eor(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xca020020, result);
+ }
+
+ #[test]
+ fn test_mov() {
+ let inst = LogicalReg::mov(0, 1, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xaa0103e0, result);
+ }
+
+ #[test]
+ fn test_mvn() {
+ let inst = LogicalReg::mvn(0, 1, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xaa2103e0, result);
+ }
+
+ #[test]
+ fn test_orn() {
+ let inst = LogicalReg::orn(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xaa220020, result);
+ }
+
+ #[test]
+ fn test_orr() {
+ let inst = LogicalReg::orr(0, 1, 2, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xaa020020, result);
+ }
+
+ #[test]
+ fn test_tst() {
+ let inst = LogicalReg::tst(0, 1, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xea01001f, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/madd.rs b/yjit/src/asm/arm64/inst/madd.rs
new file mode 100644
index 0000000000..683e643189
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/madd.rs
@@ -0,0 +1,73 @@
+use super::super::arg::Sf;
+
+/// The struct that represents an A64 multiply-add instruction that can be
+/// encoded.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 0 1 1 0 1 1 0 0 0 0 |
+/// | sf rm.............. ra.............. rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct MAdd {
+ /// The number of the general-purpose destination register.
+ rd: u8,
+
+ /// The number of the first general-purpose source register.
+ rn: u8,
+
+ /// The number of the third general-purpose source register.
+ ra: u8,
+
+ /// The number of the second general-purpose source register.
+ rm: u8,
+
+ /// The size of the registers of this instruction.
+ sf: Sf
+}
+
+impl MAdd {
+ /// MUL
+ /// https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/MUL--Multiply--an-alias-of-MADD-
+ pub fn mul(rd: u8, rn: u8, rm: u8, num_bits: u8) -> Self {
+ Self { rd, rn, ra: 0b11111, rm, sf: num_bits.into() }
+ }
+}
+
+impl From<MAdd> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: MAdd) -> Self {
+ 0
+ | ((inst.sf as u32) << 31)
+ | (0b11011 << 24)
+ | ((inst.rm as u32) << 16)
+ | ((inst.ra as u32) << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rd as u32)
+ }
+}
+
+impl From<MAdd> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: MAdd) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_mul_32() {
+ let result: u32 = MAdd::mul(0, 1, 2, 32).into();
+ assert_eq!(0x1B027C20, result);
+ }
+
+ #[test]
+ fn test_mul_64() {
+ let result: u32 = MAdd::mul(0, 1, 2, 64).into();
+ assert_eq!(0x9B027C20, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/mod.rs b/yjit/src/asm/arm64/inst/mod.rs
new file mode 100644
index 0000000000..bfffd914ef
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/mod.rs
@@ -0,0 +1,54 @@
+// This module contains various A64 instructions and the logic necessary to
+// encode them into u32s.
+
+mod atomic;
+mod branch;
+mod branch_cond;
+mod breakpoint;
+mod call;
+mod conditional;
+mod data_imm;
+mod data_reg;
+mod halfword_imm;
+mod load_literal;
+mod load_register;
+mod load_store;
+mod load_store_exclusive;
+mod logical_imm;
+mod logical_reg;
+mod madd;
+mod smulh;
+mod mov;
+mod nop;
+mod pc_rel;
+mod reg_pair;
+mod sbfm;
+mod shift_imm;
+mod sys_reg;
+mod test_bit;
+
+pub use atomic::Atomic;
+pub use branch::Branch;
+pub use branch_cond::BranchCond;
+pub use breakpoint::Breakpoint;
+pub use call::Call;
+pub use conditional::Conditional;
+pub use data_imm::DataImm;
+pub use data_reg::DataReg;
+pub use halfword_imm::HalfwordImm;
+pub use load_literal::LoadLiteral;
+pub use load_register::LoadRegister;
+pub use load_store::LoadStore;
+pub use load_store_exclusive::LoadStoreExclusive;
+pub use logical_imm::LogicalImm;
+pub use logical_reg::LogicalReg;
+pub use madd::MAdd;
+pub use smulh::SMulH;
+pub use mov::Mov;
+pub use nop::Nop;
+pub use pc_rel::PCRelative;
+pub use reg_pair::RegisterPair;
+pub use sbfm::SBFM;
+pub use shift_imm::ShiftImm;
+pub use sys_reg::SysReg;
+pub use test_bit::TestBit;
diff --git a/yjit/src/asm/arm64/inst/mov.rs b/yjit/src/asm/arm64/inst/mov.rs
new file mode 100644
index 0000000000..e7cb9215b0
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/mov.rs
@@ -0,0 +1,155 @@
+use super::super::arg::Sf;
+
+/// Which operation is being performed.
+enum Op {
+ /// A movz operation which zeroes out the other bits.
+ MOVZ = 0b10,
+
+ /// A movk operation which keeps the other bits in place.
+ MOVK = 0b11
+}
+
+/// How much to shift the immediate by.
+enum Hw {
+ LSL0 = 0b00,
+ LSL16 = 0b01,
+ LSL32 = 0b10,
+ LSL48 = 0b11
+}
+
+impl From<u8> for Hw {
+ fn from(shift: u8) -> Self {
+ match shift {
+ 0 => Hw::LSL0,
+ 16 => Hw::LSL16,
+ 32 => Hw::LSL32,
+ 48 => Hw::LSL48,
+ _ => panic!("Invalid value for shift: {}", shift)
+ }
+ }
+}
+
+/// The struct that represents a MOVK or MOVZ instruction.
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 1 0 1 |
+/// | sf op... hw... imm16.................................................. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Mov {
+ /// The register number of the destination register.
+ rd: u8,
+
+ /// The value to move into the register.
+ imm16: u16,
+
+ /// The shift of the value to move.
+ hw: Hw,
+
+ /// Which operation is being performed.
+ op: Op,
+
+ /// Whether or not this instruction is operating on 64-bit operands.
+ sf: Sf
+}
+
+impl Mov {
+ /// MOVK
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVK--Move-wide-with-keep-?lang=en
+ pub fn movk(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
+ Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() }
+ }
+
+ /// MOVZ
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en
+ pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
+ Self { rd, imm16, hw: hw.into(), op: Op::MOVZ, sf: num_bits.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en
+const FAMILY: u32 = 0b1000;
+
+impl From<Mov> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: Mov) -> Self {
+ 0
+ | ((inst.sf as u32) << 31)
+ | ((inst.op as u32) << 29)
+ | (FAMILY << 25)
+ | (0b101 << 23)
+ | ((inst.hw as u32) << 21)
+ | ((inst.imm16 as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<Mov> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: Mov) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_movk_unshifted() {
+ let inst = Mov::movk(0, 123, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf2800f60, result);
+ }
+
+ #[test]
+ fn test_movk_shifted_16() {
+ let inst = Mov::movk(0, 123, 16, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf2A00f60, result);
+ }
+
+ #[test]
+ fn test_movk_shifted_32() {
+ let inst = Mov::movk(0, 123, 32, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf2C00f60, result);
+ }
+
+ #[test]
+ fn test_movk_shifted_48() {
+ let inst = Mov::movk(0, 123, 48, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xf2e00f60, result);
+ }
+
+ #[test]
+ fn test_movz_unshifted() {
+ let inst = Mov::movz(0, 123, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd2800f60, result);
+ }
+
+ #[test]
+ fn test_movz_shifted_16() {
+ let inst = Mov::movz(0, 123, 16, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd2a00f60, result);
+ }
+
+ #[test]
+ fn test_movz_shifted_32() {
+ let inst = Mov::movz(0, 123, 32, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd2c00f60, result);
+ }
+
+ #[test]
+ fn test_movz_shifted_48() {
+ let inst = Mov::movz(0, 123, 48, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd2e00f60, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/nop.rs b/yjit/src/asm/arm64/inst/nop.rs
new file mode 100644
index 0000000000..d58b3574a9
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/nop.rs
@@ -0,0 +1,44 @@
+/// The struct that represents an A64 nop instruction that can be encoded.
+///
+/// NOP
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 1 0 1 0 1 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct Nop;
+
+impl Nop {
+ /// NOP
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/NOP--No-Operation-
+ pub fn nop() -> Self {
+ Self {}
+ }
+}
+
+impl From<Nop> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(_inst: Nop) -> Self {
+ 0b11010101000000110010000000011111
+ }
+}
+
+impl From<Nop> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: Nop) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_nop() {
+ let inst = Nop::nop();
+ let result: u32 = inst.into();
+ assert_eq!(0xd503201f, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/pc_rel.rs b/yjit/src/asm/arm64/inst/pc_rel.rs
new file mode 100644
index 0000000000..bd1a2b9367
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/pc_rel.rs
@@ -0,0 +1,107 @@
+/// Which operation to perform for the PC-relative instruction.
+enum Op {
+ /// Form a PC-relative address.
+ ADR = 0,
+
+ /// Form a PC-relative address to a 4KB page.
+ ADRP = 1
+}
+
+/// The struct that represents an A64 PC-relative address instruction that can
+/// be encoded.
+///
+/// ADR
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 0 0 |
+/// | op immlo immhi........................................................... rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct PCRelative {
+ /// The number for the general-purpose register to load the address into.
+ rd: u8,
+
+ /// The number of bytes to add to the PC to form the address.
+ imm: i32,
+
+ /// Which operation to perform for this instruction.
+ op: Op
+}
+
+impl PCRelative {
+ /// ADR
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADR--Form-PC-relative-address-
+ pub fn adr(rd: u8, imm: i32) -> Self {
+ Self { rd, imm, op: Op::ADR }
+ }
+
+ /// ADRP
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page-
+ pub fn adrp(rd: u8, imm: i32) -> Self {
+ Self { rd, imm: imm >> 12, op: Op::ADRP }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en
+const FAMILY: u32 = 0b1000;
+
+impl From<PCRelative> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: PCRelative) -> Self {
+ let immlo = (inst.imm & 0b11) as u32;
+ let mut immhi = ((inst.imm >> 2) & ((1 << 18) - 1)) as u32;
+
+ // Toggle the sign bit if necessary.
+ if inst.imm < 0 {
+ immhi |= 1 << 18;
+ }
+
+ 0
+ | ((inst.op as u32) << 31)
+ | (immlo << 29)
+ | (FAMILY << 25)
+ | (immhi << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<PCRelative> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: PCRelative) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_adr_positive() {
+ let inst = PCRelative::adr(0, 5);
+ let result: u32 = inst.into();
+ assert_eq!(0x30000020, result);
+ }
+
+ #[test]
+ fn test_adr_negative() {
+ let inst = PCRelative::adr(0, -5);
+ let result: u32 = inst.into();
+ assert_eq!(0x70ffffc0, result);
+ }
+
+ #[test]
+ fn test_adrp_positive() {
+ let inst = PCRelative::adrp(0, 0x4000);
+ let result: u32 = inst.into();
+ assert_eq!(0x90000020, result);
+ }
+
+ #[test]
+ fn test_adrp_negative() {
+ let inst = PCRelative::adrp(0, -0x4000);
+ let result: u32 = inst.into();
+ assert_eq!(0x90ffffe0, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/reg_pair.rs b/yjit/src/asm/arm64/inst/reg_pair.rs
new file mode 100644
index 0000000000..87690e3b4a
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/reg_pair.rs
@@ -0,0 +1,212 @@
+use super::super::arg::truncate_imm;
+
+/// The operation to perform for this instruction.
+enum Opc {
+ /// When the registers are 32-bits wide.
+ Opc32 = 0b00,
+
+ /// When the registers are 64-bits wide.
+ Opc64 = 0b10
+}
+
+/// The kind of indexing to perform for this instruction.
+enum Index {
+ StorePostIndex = 0b010,
+ LoadPostIndex = 0b011,
+ StoreSignedOffset = 0b100,
+ LoadSignedOffset = 0b101,
+ StorePreIndex = 0b110,
+ LoadPreIndex = 0b111
+}
+
+/// A convenience function so that we can convert the number of bits of a
+/// register operand directly into an Opc variant.
+impl From<u8> for Opc {
+ fn from(num_bits: u8) -> Self {
+ match num_bits {
+ 64 => Opc::Opc64,
+ 32 => Opc::Opc32,
+ _ => panic!("Invalid number of bits: {}", num_bits)
+ }
+ }
+}
+
+/// The struct that represents an A64 register pair instruction that can be
+/// encoded.
+///
+/// STP/LDP
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 0 1 0 0 |
+/// | opc index..... imm7.................... rt2............. rn.............. rt1............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct RegisterPair {
+ /// The number of the first register to be transferred.
+ rt1: u8,
+
+ /// The number of the base register.
+ rn: u8,
+
+ /// The number of the second register to be transferred.
+ rt2: u8,
+
+ /// The signed immediate byte offset, a multiple of 8.
+ imm7: i16,
+
+ /// The kind of indexing to use for this instruction.
+ index: Index,
+
+ /// The operation to be performed (in terms of size).
+ opc: Opc
+}
+
+impl RegisterPair {
+ /// Create a register pair instruction with a given indexing mode.
+ fn new(rt1: u8, rt2: u8, rn: u8, disp: i16, index: Index, num_bits: u8) -> Self {
+ Self { rt1, rn, rt2, imm7: disp / 8, index, opc: num_bits.into() }
+ }
+
+ /// LDP (signed offset)
+ /// LDP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en
+ pub fn ldp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
+ Self::new(rt1, rt2, rn, disp, Index::LoadSignedOffset, num_bits)
+ }
+
+ /// LDP (pre-index)
+ /// LDP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en
+ pub fn ldp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
+ Self::new(rt1, rt2, rn, disp, Index::LoadPreIndex, num_bits)
+ }
+
+ /// LDP (post-index)
+ /// LDP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers-?lang=en
+ pub fn ldp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
+ Self::new(rt1, rt2, rn, disp, Index::LoadPostIndex, num_bits)
+ }
+
+ /// STP (signed offset)
+ /// STP <Xt1>, <Xt2>, [<Xn|SP>{, #<imm>}]
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en
+ pub fn stp(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
+ Self::new(rt1, rt2, rn, disp, Index::StoreSignedOffset, num_bits)
+ }
+
+ /// STP (pre-index)
+ /// STP <Xt1>, <Xt2>, [<Xn|SP>, #<imm>]!
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en
+ pub fn stp_pre(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
+ Self::new(rt1, rt2, rn, disp, Index::StorePreIndex, num_bits)
+ }
+
+ /// STP (post-index)
+ /// STP <Xt1>, <Xt2>, [<Xn|SP>], #<imm>
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers-?lang=en
+ pub fn stp_post(rt1: u8, rt2: u8, rn: u8, disp: i16, num_bits: u8) -> Self {
+ Self::new(rt1, rt2, rn, disp, Index::StorePostIndex, num_bits)
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Loads-and-Stores?lang=en
+const FAMILY: u32 = 0b0100;
+
+impl From<RegisterPair> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: RegisterPair) -> Self {
+ 0
+ | ((inst.opc as u32) << 30)
+ | (1 << 29)
+ | (FAMILY << 25)
+ | ((inst.index as u32) << 22)
+ | (truncate_imm::<_, 7>(inst.imm7) << 15)
+ | ((inst.rt2 as u32) << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rt1 as u32)
+ }
+}
+
+impl From<RegisterPair> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: RegisterPair) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ldp() {
+ let inst = RegisterPair::ldp(0, 1, 2, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa9400440, result);
+ }
+
+ #[test]
+ fn test_ldp_maximum_displacement() {
+ let inst = RegisterPair::ldp(0, 1, 2, 504, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa95f8440, result);
+ }
+
+ #[test]
+ fn test_ldp_minimum_displacement() {
+ let inst = RegisterPair::ldp(0, 1, 2, -512, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa9600440, result);
+ }
+
+ #[test]
+ fn test_ldp_pre() {
+ let inst = RegisterPair::ldp_pre(0, 1, 2, 256, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa9d00440, result);
+ }
+
+ #[test]
+ fn test_ldp_post() {
+ let inst = RegisterPair::ldp_post(0, 1, 2, 256, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa8d00440, result);
+ }
+
+ #[test]
+ fn test_stp() {
+ let inst = RegisterPair::stp(0, 1, 2, 0, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa9000440, result);
+ }
+
+ #[test]
+ fn test_stp_maximum_displacement() {
+ let inst = RegisterPair::stp(0, 1, 2, 504, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa91f8440, result);
+ }
+
+ #[test]
+ fn test_stp_minimum_displacement() {
+ let inst = RegisterPair::stp(0, 1, 2, -512, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa9200440, result);
+ }
+
+ #[test]
+ fn test_stp_pre() {
+ let inst = RegisterPair::stp_pre(0, 1, 2, 256, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa9900440, result);
+ }
+
+ #[test]
+ fn test_stp_post() {
+ let inst = RegisterPair::stp_post(0, 1, 2, 256, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xa8900440, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/sbfm.rs b/yjit/src/asm/arm64/inst/sbfm.rs
new file mode 100644
index 0000000000..8602998980
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/sbfm.rs
@@ -0,0 +1,103 @@
+use super::super::arg::{Sf, truncate_uimm};
+
+/// The struct that represents an A64 signed bitfield move instruction that can
+/// be encoded.
+///
+/// SBFM
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 0 1 0 0 1 1 0 |
+/// | sf N immr............... imms............... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct SBFM {
+ /// The number for the general-purpose register to load the value into.
+ rd: u8,
+
+ /// The number for the general-purpose register to copy from.
+ rn: u8,
+
+ /// The leftmost bit number to be moved from the source.
+ imms: u8,
+
+ // The right rotate amount.
+ immr: u8,
+
+ /// Whether or not this is a 64-bit operation.
+ n: bool,
+
+ /// The size of this operation.
+ sf: Sf
+}
+
+impl SBFM {
+ /// ASR
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/ASR--immediate---Arithmetic-Shift-Right--immediate---an-alias-of-SBFM-?lang=en
+ pub fn asr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self {
+ let (imms, n) = if num_bits == 64 {
+ (0b111111, true)
+ } else {
+ (0b011111, false)
+ };
+
+ Self { rd, rn, immr: shift, imms, n, sf: num_bits.into() }
+ }
+
+ /// SXTW
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM-?lang=en
+ pub fn sxtw(rd: u8, rn: u8) -> Self {
+ Self { rd, rn, immr: 0, imms: 31, n: true, sf: Sf::Sf64 }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield
+const FAMILY: u32 = 0b1001;
+
+impl From<SBFM> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: SBFM) -> Self {
+ 0
+ | ((inst.sf as u32) << 31)
+ | (FAMILY << 25)
+ | (1 << 24)
+ | ((inst.n as u32) << 22)
+ | (truncate_uimm::<_, 6>(inst.immr) << 16)
+ | (truncate_uimm::<_, 6>(inst.imms) << 10)
+ | ((inst.rn as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<SBFM> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: SBFM) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_asr_32_bits() {
+ let inst = SBFM::asr(0, 1, 2, 32);
+ let result: u32 = inst.into();
+ assert_eq!(0x13027c20, result);
+ }
+
+ #[test]
+ fn test_asr_64_bits() {
+ let inst = SBFM::asr(10, 11, 5, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0x9345fd6a, result);
+ }
+
+ #[test]
+ fn test_sxtw() {
+ let inst = SBFM::sxtw(0, 1);
+ let result: u32 = inst.into();
+ assert_eq!(0x93407c20, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/shift_imm.rs b/yjit/src/asm/arm64/inst/shift_imm.rs
new file mode 100644
index 0000000000..3d2685a997
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/shift_imm.rs
@@ -0,0 +1,147 @@
+use super::super::arg::Sf;
+
+/// The operation to perform for this instruction.
+enum Opc {
+ /// Logical left shift
+ LSL,
+
+ /// Logical shift right
+ LSR
+}
+
+/// The struct that represents an A64 unsigned bitfield move instruction that
+/// can be encoded.
+///
+/// LSL (immediate)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 1 0 0 1 1 0 |
+/// | sf N immr............... imms............... rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct ShiftImm {
+ /// The register number of the destination register.
+ rd: u8,
+
+ /// The register number of the first operand register.
+ rn: u8,
+
+ /// The immediate value to shift by.
+ shift: u8,
+
+ /// The opcode for this instruction.
+ opc: Opc,
+
+ /// Whether or not this instruction is operating on 64-bit operands.
+ sf: Sf
+}
+
+impl ShiftImm {
+ /// LSL (immediate)
+ /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM-?lang=en
+ pub fn lsl(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self {
+ ShiftImm { rd, rn, shift, opc: Opc::LSL, sf: num_bits.into() }
+ }
+
+ /// LSR (immediate)
+ /// https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en
+ pub fn lsr(rd: u8, rn: u8, shift: u8, num_bits: u8) -> Self {
+ ShiftImm { rd, rn, shift, opc: Opc::LSR, sf: num_bits.into() }
+ }
+
+ /// Returns a triplet of (n, immr, imms) encoded in u32s for this
+ /// instruction. This mirrors how they will be encoded in the actual bits.
+ fn bitmask(&self) -> (u32, u32, u32) {
+ match self.opc {
+ // The key insight is a little buried in the docs, but effectively:
+ // LSL <Wd>, <Wn>, #<shift> == UBFM <Wd>, <Wn>, #(-<shift> MOD 32), #(31-<shift>)
+ // LSL <Xd>, <Xn>, #<shift> == UBFM <Xd>, <Xn>, #(-<shift> MOD 64), #(63-<shift>)
+ Opc::LSL => {
+ let shift = -(self.shift as i16);
+
+ match self.sf {
+ Sf::Sf32 => (
+ 0,
+ (shift.rem_euclid(32) & 0x3f) as u32,
+ ((31 - self.shift) & 0x3f) as u32
+ ),
+ Sf::Sf64 => (
+ 1,
+ (shift.rem_euclid(64) & 0x3f) as u32,
+ ((63 - self.shift) & 0x3f) as u32
+ )
+ }
+ },
+ // Similar to LSL:
+ // LSR <Wd>, <Wn>, #<shift> == UBFM <Wd>, <Wn>, #<shift>, #31
+ // LSR <Xd>, <Xn>, #<shift> == UBFM <Xd>, <Xn>, #<shift>, #63
+ Opc::LSR => {
+ match self.sf {
+ Sf::Sf32 => (0, (self.shift & 0x3f) as u32, 31),
+ Sf::Sf64 => (1, (self.shift & 0x3f) as u32, 63)
+ }
+ }
+ }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Data-Processing----Immediate?lang=en#bitfield
+const FAMILY: u32 = 0b10011;
+
+impl From<ShiftImm> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: ShiftImm) -> Self {
+ let (n, immr, imms) = inst.bitmask();
+
+ 0
+ | ((inst.sf as u32) << 31)
+ | (1 << 30)
+ | (FAMILY << 24)
+ | (n << 22)
+ | (immr << 16)
+ | (imms << 10)
+ | ((inst.rn as u32) << 5)
+ | inst.rd as u32
+ }
+}
+
+impl From<ShiftImm> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: ShiftImm) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_lsl_32() {
+ let inst = ShiftImm::lsl(0, 1, 7, 32);
+ let result: u32 = inst.into();
+ assert_eq!(0x53196020, result);
+ }
+
+ #[test]
+ fn test_lsl_64() {
+ let inst = ShiftImm::lsl(0, 1, 7, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd379e020, result);
+ }
+
+ #[test]
+ fn test_lsr_32() {
+ let inst = ShiftImm::lsr(0, 1, 7, 32);
+ let result: u32 = inst.into();
+ assert_eq!(0x53077c20, result);
+ }
+
+ #[test]
+ fn test_lsr_64() {
+ let inst = ShiftImm::lsr(0, 1, 7, 64);
+ let result: u32 = inst.into();
+ assert_eq!(0xd347fc20, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/smulh.rs b/yjit/src/asm/arm64/inst/smulh.rs
new file mode 100644
index 0000000000..5e9b231fde
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/smulh.rs
@@ -0,0 +1,60 @@
+/// The struct that represents an A64 signed multiply high instruction
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 0 0 1 1 0 1 1 0 1 0 0 |
+/// | rm.............. ra.............. rn.............. rd.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct SMulH {
+ /// The number of the general-purpose destination register.
+ rd: u8,
+
+ /// The number of the first general-purpose source register.
+ rn: u8,
+
+ /// The number of the third general-purpose source register.
+ ra: u8,
+
+ /// The number of the second general-purpose source register.
+ rm: u8,
+}
+
+impl SMulH {
+ /// SMULH
+ /// https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/SMULH--Signed-Multiply-High-
+ pub fn smulh(rd: u8, rn: u8, rm: u8) -> Self {
+ Self { rd, rn, ra: 0b11111, rm }
+ }
+}
+
+impl From<SMulH> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: SMulH) -> Self {
+ 0
+ | (0b10011011010 << 21)
+ | ((inst.rm as u32) << 16)
+ | ((inst.ra as u32) << 10)
+ | ((inst.rn as u32) << 5)
+ | (inst.rd as u32)
+ }
+}
+
+impl From<SMulH> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: SMulH) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_smulh() {
+ let result: u32 = SMulH::smulh(0, 1, 2).into();
+ assert_eq!(0x9b427c20, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/sys_reg.rs b/yjit/src/asm/arm64/inst/sys_reg.rs
new file mode 100644
index 0000000000..108737a870
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/sys_reg.rs
@@ -0,0 +1,86 @@
+use super::super::arg::SystemRegister;
+
+/// Which operation to perform (loading or storing the system register value).
+enum L {
+ /// Store the value of a general-purpose register in a system register.
+ MSR = 0,
+
+ /// Store the value of a system register in a general-purpose register.
+ MRS = 1
+}
+
+/// The struct that represents an A64 system register instruction that can be
+/// encoded.
+///
+/// MSR/MRS (register)
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 1 1 0 1 0 1 0 1 0 0 1 |
+/// | L o0 op1..... CRn........ CRm........ op2..... rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct SysReg {
+ /// The register to load the system register value into.
+ rt: u8,
+
+ /// Which system register to load or store.
+ systemreg: SystemRegister,
+
+ /// Which operation to perform (loading or storing the system register value).
+ l: L
+}
+
+impl SysReg {
+ /// MRS (register)
+ /// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MRS--Move-System-Register-?lang=en
+ pub fn mrs(rt: u8, systemreg: SystemRegister) -> Self {
+ SysReg { rt, systemreg, l: L::MRS }
+ }
+
+ /// MSR (register)
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-?lang=en
+ pub fn msr(systemreg: SystemRegister, rt: u8) -> Self {
+ SysReg { rt, systemreg, l: L::MSR }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en#systemmove
+const FAMILY: u32 = 0b110101010001;
+
+impl From<SysReg> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: SysReg) -> Self {
+ 0
+ | (FAMILY << 20)
+ | ((inst.l as u32) << 21)
+ | ((inst.systemreg as u32) << 5)
+ | inst.rt as u32
+ }
+}
+
+impl From<SysReg> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: SysReg) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_mrs() {
+ let inst = SysReg::mrs(0, SystemRegister::NZCV);
+ let result: u32 = inst.into();
+ assert_eq!(0xd53b4200, result);
+ }
+
+ #[test]
+ fn test_msr() {
+ let inst = SysReg::msr(SystemRegister::NZCV, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0xd51b4200, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/inst/test_bit.rs b/yjit/src/asm/arm64/inst/test_bit.rs
new file mode 100644
index 0000000000..c57a05ad2b
--- /dev/null
+++ b/yjit/src/asm/arm64/inst/test_bit.rs
@@ -0,0 +1,133 @@
+use super::super::arg::truncate_imm;
+
+/// The upper bit of the bit number to test.
+#[derive(Debug)]
+enum B5 {
+ /// When the bit number is below 32.
+ B532 = 0,
+
+ /// When the bit number is equal to or above 32.
+ B564 = 1
+}
+
+/// A convenience function so that we can convert the bit number directly into a
+/// B5 variant.
+impl From<u8> for B5 {
+ fn from(bit_num: u8) -> Self {
+ match bit_num {
+ 0..=31 => B5::B532,
+ 32..=63 => B5::B564,
+ _ => panic!("Invalid bit number: {}", bit_num)
+ }
+ }
+}
+
+/// The operation to perform for this instruction.
+enum Op {
+ /// The test bit zero operation.
+ TBZ = 0,
+
+ /// The test bit not zero operation.
+ TBNZ = 1
+}
+
+/// The struct that represents an A64 test bit instruction that can be encoded.
+///
+/// TBNZ/TBZ
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | 0 1 1 0 1 1 |
+/// | b5 op b40............. imm14.......................................... rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+///
+pub struct TestBit {
+ /// The number of the register to test.
+ rt: u8,
+
+ /// The PC-relative offset to the target instruction in term of number of
+ /// instructions.
+ imm14: i16,
+
+ /// The lower 5 bits of the bit number to be tested.
+ b40: u8,
+
+ /// The operation to perform for this instruction.
+ op: Op,
+
+ /// The upper bit of the bit number to test.
+ b5: B5
+}
+
+impl TestBit {
+ /// TBNZ
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBNZ--Test-bit-and-Branch-if-Nonzero-?lang=en
+ pub fn tbnz(rt: u8, bit_num: u8, offset: i16) -> Self {
+ Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBNZ, b5: bit_num.into() }
+ }
+
+ /// TBZ
+ /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/TBZ--Test-bit-and-Branch-if-Zero-?lang=en
+ pub fn tbz(rt: u8, bit_num: u8, offset: i16) -> Self {
+ Self { rt, imm14: offset, b40: bit_num & 0b11111, op: Op::TBZ, b5: bit_num.into() }
+ }
+}
+
+/// https://developer.arm.com/documentation/ddi0602/2022-03/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en
+const FAMILY: u32 = 0b11011;
+
+impl From<TestBit> for u32 {
+ /// Convert an instruction into a 32-bit value.
+ fn from(inst: TestBit) -> Self {
+ let b40 = (inst.b40 & 0b11111) as u32;
+ let imm14 = truncate_imm::<_, 14>(inst.imm14);
+
+ 0
+ | ((inst.b5 as u32) << 31)
+ | (FAMILY << 25)
+ | ((inst.op as u32) << 24)
+ | (b40 << 19)
+ | (imm14 << 5)
+ | inst.rt as u32
+ }
+}
+
+impl From<TestBit> for [u8; 4] {
+ /// Convert an instruction into a 4 byte array.
+ fn from(inst: TestBit) -> [u8; 4] {
+ let result: u32 = inst.into();
+ result.to_le_bytes()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_tbnz() {
+ let inst = TestBit::tbnz(0, 0, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0x37000000, result);
+ }
+
+ #[test]
+ fn test_tbnz_negative() {
+ let inst = TestBit::tbnz(0, 0, -1);
+ let result: u32 = inst.into();
+ assert_eq!(0x3707ffe0, result);
+ }
+
+ #[test]
+ fn test_tbz() {
+ let inst = TestBit::tbz(0, 0, 0);
+ let result: u32 = inst.into();
+ assert_eq!(0x36000000, result);
+ }
+
+ #[test]
+ fn test_tbz_negative() {
+ let inst = TestBit::tbz(0, 0, -1);
+ let result: u32 = inst.into();
+ assert_eq!(0x3607ffe0, result);
+ }
+}
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
new file mode 100644
index 0000000000..a94d435b7c
--- /dev/null
+++ b/yjit/src/asm/arm64/mod.rs
@@ -0,0 +1,1680 @@
+#![allow(dead_code)] // For instructions and operands we're not currently using.
+
+use crate::asm::CodeBlock;
+
+mod arg;
+mod inst;
+mod opnd;
+
+use inst::*;
+
+// We're going to make these public to make using these things easier in the
+// backend (so they don't have to have knowledge about the submodule).
+pub use arg::*;
+pub use opnd::*;
+
+/// Checks that a signed value fits within the specified number of bits.
+pub const fn imm_fits_bits(imm: i64, num_bits: u8) -> bool {
+ let minimum = if num_bits == 64 { i64::MIN } else { -(2_i64.pow((num_bits as u32) - 1)) };
+ let maximum = if num_bits == 64 { i64::MAX } else { 2_i64.pow((num_bits as u32) - 1) - 1 };
+
+ imm >= minimum && imm <= maximum
+}
+
+/// Checks that an unsigned value fits within the specified number of bits.
+pub const fn uimm_fits_bits(uimm: u64, num_bits: u8) -> bool {
+ let maximum = if num_bits == 64 { u64::MAX } else { 2_u64.pow(num_bits as u32) - 1 };
+
+ uimm <= maximum
+}
+
+/// ADD - add rn and rm, put the result in rd, don't update flags
+pub fn add(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ DataReg::add(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ DataImm::add(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ if imm12 < 0 {
+ DataImm::sub(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ } else {
+ DataImm::add(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ }
+ },
+ _ => panic!("Invalid operand combination to add instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// ADDS - add rn and rm, put the result in rd, update flags
+pub fn adds(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ DataReg::adds(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ DataImm::adds(rd.reg_no, rn.reg_no, imm12.try_into().unwrap(), rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ if imm12 < 0 {
+ DataImm::subs(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ } else {
+ DataImm::adds(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ }
+ },
+ _ => panic!("Invalid operand combination to adds instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// ADR - form a PC-relative address and load it into a register
+pub fn adr(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, imm) {
+ (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => {
+ assert!(rd.num_bits == 64, "The destination register must be 64 bits.");
+ assert!(imm_fits_bits(imm, 21), "The immediate operand must be 21 bits or less.");
+
+ PCRelative::adr(rd.reg_no, imm as i32).into()
+ },
+ _ => panic!("Invalid operand combination to adr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// ADRP - form a PC-relative address to a 4KB page and load it into a register.
+/// This is effectively the same as ADR except that the immediate must be a
+/// multiple of 4KB.
+pub fn adrp(cb: &mut CodeBlock, rd: A64Opnd, imm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, imm) {
+ (A64Opnd::Reg(rd), A64Opnd::Imm(imm)) => {
+ assert!(rd.num_bits == 64, "The destination register must be 64 bits.");
+ assert!(imm_fits_bits(imm, 32), "The immediate operand must be 32 bits or less.");
+
+ PCRelative::adrp(rd.reg_no, imm as i32).into()
+ },
+ _ => panic!("Invalid operand combination to adr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// AND - and rn and rm, put the result in rd, don't update flags
+pub fn and(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ LogicalReg::and(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ let bitmask_imm = if rd.num_bits == 32 {
+ BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+ } else {
+ imm.try_into()
+ }.unwrap();
+
+ LogicalImm::and(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to and instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// ANDS - and rn and rm, put the result in rd, update flags
+pub fn ands(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ LogicalReg::ands(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ let bitmask_imm = if rd.num_bits == 32 {
+ BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+ } else {
+ imm.try_into()
+ }.unwrap();
+
+ LogicalImm::ands(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ands instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// ASR - arithmetic shift right rn by shift, put the result in rd, don't update
+/// flags
+pub fn asr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, shift) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(shift)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ assert!(uimm_fits_bits(shift, 6), "The shift operand must be 6 bits or less.");
+
+ SBFM::asr(rd.reg_no, rn.reg_no, shift.try_into().unwrap(), rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to asr instruction: asr {:?}, {:?}, {:?}", rd, rn, shift),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// Whether or not the offset between two instructions fits into the branch with
+/// or without link instruction. If it doesn't, then we have to load the value
+/// into a register first.
+pub const fn b_offset_fits_bits(offset: i64) -> bool {
+ imm_fits_bits(offset, 26)
+}
+
+/// B - branch without link (offset is number of instructions to jump)
+pub fn b(cb: &mut CodeBlock, offset: InstructionOffset) {
+ assert!(b_offset_fits_bits(offset.into()), "The immediate operand must be 26 bits or less.");
+ let bytes: [u8; 4] = Call::b(offset).into();
+
+ cb.write_bytes(&bytes);
+}
+
+/// Whether or not the offset in number of instructions between two instructions
+/// fits into the b.cond instruction. If it doesn't, then we have to load the
+/// value into a register first, then use the b.cond instruction to skip past a
+/// direct jump.
+pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
+ imm_fits_bits(offset, 19)
+}
+
+/// CBZ and CBNZ also have a limit of 19 bits for the branch offset.
+pub use bcond_offset_fits_bits as cmp_branch_offset_fits_bits;
+
+/// B.cond - branch to target if condition is true
+pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) {
+ assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less.");
+ let bytes: [u8; 4] = BranchCond::bcond(cond, offset).into();
+
+ cb.write_bytes(&bytes);
+}
+
+/// BL - branch with link (offset is number of instructions to jump)
+pub fn bl(cb: &mut CodeBlock, offset: InstructionOffset) {
+ assert!(b_offset_fits_bits(offset.into()), "The offset must be 26 bits or less.");
+ let bytes: [u8; 4] = Call::bl(offset).into();
+
+ cb.write_bytes(&bytes);
+}
+
+/// BLR - branch with link to a register
+pub fn blr(cb: &mut CodeBlock, rn: A64Opnd) {
+ let bytes: [u8; 4] = match rn {
+ A64Opnd::Reg(rn) => Branch::blr(rn.reg_no).into(),
+ _ => panic!("Invalid operand to blr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// BR - branch to a register
+pub fn br(cb: &mut CodeBlock, rn: A64Opnd) {
+ let bytes: [u8; 4] = match rn {
+ A64Opnd::Reg(rn) => Branch::br(rn.reg_no).into(),
+ _ => panic!("Invalid operand to br instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// BRK - create a breakpoint
+pub fn brk(cb: &mut CodeBlock, imm16: A64Opnd) {
+ let bytes: [u8; 4] = match imm16 {
+ A64Opnd::None => Breakpoint::brk(0).into(),
+ A64Opnd::UImm(imm16) => {
+ assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
+ Breakpoint::brk(imm16 as u16).into()
+ },
+ _ => panic!("Invalid operand combination to brk instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// CMP - compare rn and rm, update flags
+pub fn cmp(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rn, rm) {
+ (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ DataReg::cmp(rn.reg_no, rm.reg_no, rn.num_bits).into()
+ },
+ (A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ DataImm::cmp(rn.reg_no, (imm12 as u64).try_into().unwrap(), rn.num_bits).into()
+ },
+ (A64Opnd::Reg(rn), A64Opnd::UImm(imm12)) => {
+ DataImm::cmp(rn.reg_no, imm12.try_into().unwrap(), rn.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to cmp instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// CSEL - conditionally select between two registers
+pub fn csel(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd, cond: u8) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ Conditional::csel(rd.reg_no, rn.reg_no, rm.reg_no, cond, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to csel instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// EOR - perform a bitwise XOR of rn and rm, put the result in rd, don't update flags
+pub fn eor(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ LogicalReg::eor(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ let bitmask_imm = if rd.num_bits == 32 {
+ BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+ } else {
+ imm.try_into()
+ }.unwrap();
+
+ LogicalImm::eor(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to eor instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDADDAL - atomic add with acquire and release semantics
+pub fn ldaddal(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rs, rt, rn) {
+ (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+ assert!(
+ rs.num_bits == rt.num_bits && rt.num_bits == rn.num_bits,
+ "All operands must be of the same size."
+ );
+
+ Atomic::ldaddal(rs.reg_no, rt.reg_no, rn.reg_no, rs.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldaddal instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDAXR - atomic load with acquire semantics
+pub fn ldaxr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+ assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
+
+ LoadStoreExclusive::ldaxr(rt.reg_no, rn.reg_no, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldaxr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDP (signed offset) - load a pair of registers from memory
+pub fn ldp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt1, rt2, rn) {
+ (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => {
+ assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size");
+ assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less.");
+ assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register");
+
+ RegisterPair::ldp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldp instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDP (pre-index) - load a pair of registers from memory, update the base pointer before loading it
+pub fn ldp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt1, rt2, rn) {
+ (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => {
+ assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size");
+ assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less.");
+ assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register");
+
+ RegisterPair::ldp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldp instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDP (post-index) - load a pair of registers from memory, update the base pointer after loading it
+pub fn ldp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt1, rt2, rn) {
+ (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => {
+ assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size");
+ assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less.");
+ assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register");
+
+ RegisterPair::ldp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldp instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDR - load a memory address into a register with a register offset
+pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn, rm) {
+ (A64Opnd::Reg(rt), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
+ assert!(rn.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+ LoadRegister::ldr(rt.reg_no, rn.reg_no, rm.reg_no, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldr instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDR - load a PC-relative memory address into a register
+pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: InstructionOffset) {
+ let bytes: [u8; 4] = match rt {
+ A64Opnd::Reg(rt) => {
+ LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDRH - load a halfword from memory
+pub fn ldrh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword");
+ assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less.");
+
+ HalfwordImm::ldrh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to ldrh instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDRH (pre-index) - load a halfword from memory, update the base pointer before loading it
+pub fn ldrh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword");
+ assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+
+ HalfwordImm::ldrh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to ldrh instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDRH (post-index) - load a halfword from memory, update the base pointer after loading it
+pub fn ldrh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword");
+ assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+
+ HalfwordImm::ldrh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to ldrh instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// Whether or not a memory address displacement fits into the maximum number of
+/// bits such that it can be used without loading it into a register first.
+pub fn mem_disp_fits_bits(disp: i32) -> bool {
+ imm_fits_bits(disp.into(), 9)
+}
+
+/// LDR (post-index) - load a register from memory, update the base pointer after loading it
+pub fn ldr_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
+ assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");
+
+ LoadStore::ldr_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDR (pre-index) - load a register from memory, update the base pointer before loading it
+pub fn ldr_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
+ assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");
+
+ LoadStore::ldr_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to ldr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDUR - load a memory address into a register
+pub fn ldur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
+
+ LoadStore::ldur(rt.reg_no, rn.reg_no, 0, rt.num_bits).into()
+ },
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
+ assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");
+
+ LoadStore::ldur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operands for LDUR")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDURH - load a byte from memory, zero-extend it, and write it to a register
+pub fn ldurh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");
+
+ LoadStore::ldurh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operands for LDURH")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDURB - load a byte from memory, zero-extend it, and write it to a register
+pub fn ldurb(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
+ assert!(rt.num_bits == 8, "Expected registers to have size 8");
+ assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");
+
+ LoadStore::ldurb(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operands for LDURB")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LDURSW - load a 32-bit memory address into a register and sign-extend it
+pub fn ldursw(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "Expected registers to be the same size");
+ assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");
+
+ LoadStore::ldursw(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to ldursw instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LSL - logical shift left a register by an immediate
+pub fn lsl(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, shift) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => {
+ assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size");
+ assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less");
+
+ ShiftImm::lsl(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operands combination to lsl instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// LSR - logical shift right a register by an immediate
+pub fn lsr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, shift: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, shift) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm)) => {
+ assert!(rd.num_bits == rn.num_bits, "Expected registers to be the same size");
+ assert!(uimm_fits_bits(uimm, 6), "Expected shift to be 6 bits or less");
+
+ ShiftImm::lsr(rd.reg_no, rn.reg_no, uimm as u8, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operands combination to lsr instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// MOV - move a value in a register to another register
+pub fn mov(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rm) {
+ (A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 }), A64Opnd::Reg(rm)) => {
+ assert!(rm.num_bits == 64, "Expected rm to be 64 bits");
+
+ DataImm::add(31, rm.reg_no, 0.try_into().unwrap(), 64).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(A64Reg { reg_no: 31, num_bits: 64 })) => {
+ assert!(rd.num_bits == 64, "Expected rd to be 64 bits");
+
+ DataImm::add(rd.reg_no, 31, 0.try_into().unwrap(), 64).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => {
+ assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+ LogicalReg::mov(rd.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::UImm(0)) => {
+ LogicalReg::mov(rd.reg_no, XZR_REG.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::UImm(imm)) => {
+ let bitmask_imm = if rd.num_bits == 32 {
+ BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+ } else {
+ imm.try_into()
+ }.unwrap();
+
+ LogicalImm::mov(rd.reg_no, bitmask_imm, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to mov instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// MOVK - move a 16 bit immediate into a register, keep the other bits in place
+pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
+ let bytes: [u8; 4] = match (rd, imm16) {
+ (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => {
+ assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
+
+ Mov::movk(rd.reg_no, imm16 as u16, shift, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to movk instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// MOVZ - move a 16 bit immediate into a register, zero the other bits
+pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
+ let bytes: [u8; 4] = match (rd, imm16) {
+ (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => {
+ assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
+
+ Mov::movz(rd.reg_no, imm16 as u16, shift, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to movz instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// MRS - move a system register into a general-purpose register
+pub fn mrs(cb: &mut CodeBlock, rt: A64Opnd, systemregister: SystemRegister) {
+ let bytes: [u8; 4] = match rt {
+ A64Opnd::Reg(rt) => {
+ SysReg::mrs(rt.reg_no, systemregister).into()
+ },
+ _ => panic!("Invalid operand combination to mrs instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// MSR - move a general-purpose register into a system register
+pub fn msr(cb: &mut CodeBlock, systemregister: SystemRegister, rt: A64Opnd) {
+ let bytes: [u8; 4] = match rt {
+ A64Opnd::Reg(rt) => {
+ SysReg::msr(systemregister, rt.reg_no).into()
+ },
+ _ => panic!("Invalid operand combination to msr instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// MUL - multiply two registers, put the result in a third register
+pub fn mul(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+ MAdd::mul(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to mul instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// SMULH - multiply two 64-bit registers to produce a 128-bit result, put the high 64-bits of the result into rd
+pub fn smulh(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size");
+ assert!(rd.num_bits == 64, "smulh only applicable to 64-bit registers");
+
+ SMulH::smulh(rd.reg_no, rn.reg_no, rm.reg_no).into()
+ },
+ _ => panic!("Invalid operand combination to mul instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// MVN - move a value in a register to another register, negating it
+pub fn mvn(cb: &mut CodeBlock, rd: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rm)) => {
+ assert!(rd.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+ LogicalReg::mvn(rd.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to mvn instruction")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// NOP - no-operation, used for alignment purposes
+pub fn nop(cb: &mut CodeBlock) {
+ let bytes: [u8; 4] = Nop::nop().into();
+
+ cb.write_bytes(&bytes);
+}
+
+/// ORN - perform a bitwise OR of rn and NOT rm, put the result in rd, don't update flags
+pub fn orn(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits, "Expected registers to be the same size");
+
+ LogicalReg::orn(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to orn instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// ORR - perform a bitwise OR of rn and rm, put the result in rd, don't update flags
+pub fn orr(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ LogicalReg::orr(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+ let bitmask_imm = if rd.num_bits == 32 {
+ BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+ } else {
+ imm.try_into()
+ }.unwrap();
+
+ LogicalImm::orr(rd.reg_no, rn.reg_no, bitmask_imm, rd.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to orr instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STLXR - store a value to memory, release exclusive access
+pub fn stlxr(cb: &mut CodeBlock, rs: A64Opnd, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rs, rt, rn) {
+ (A64Opnd::Reg(rs), A64Opnd::Reg(rt), A64Opnd::Reg(rn)) => {
+ assert_eq!(rs.num_bits, 32, "rs must be a 32-bit register.");
+ assert_eq!(rn.num_bits, 64, "rn must be a 64-bit register.");
+
+ LoadStoreExclusive::stlxr(rs.reg_no, rt.reg_no, rn.reg_no, rn.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to stlxr instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STP (signed offset) - store a pair of registers to memory
+pub fn stp(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt1, rt2, rn) {
+ (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => {
+ assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size");
+ assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less.");
+ assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register");
+
+ RegisterPair::stp(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to stp instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STP (pre-index) - store a pair of registers to memory, update the base pointer before loading it
+pub fn stp_pre(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt1, rt2, rn) {
+ (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => {
+ assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size");
+ assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less.");
+ assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register");
+
+ RegisterPair::stp_pre(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to stp instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STP (post-index) - store a pair of registers to memory, update the base pointer after loading it
+pub fn stp_post(cb: &mut CodeBlock, rt1: A64Opnd, rt2: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt1, rt2, rn) {
+ (A64Opnd::Reg(rt1), A64Opnd::Reg(rt2), A64Opnd::Mem(rn)) => {
+ assert!(rt1.num_bits == rt2.num_bits, "Expected source registers to be the same size");
+ assert!(imm_fits_bits(rn.disp.into(), 10), "The displacement must be 10 bits or less.");
+ assert_ne!(rt1.reg_no, rt2.reg_no, "Behavior is unpredictable with pairs of the same register");
+
+ RegisterPair::stp_post(rt1.reg_no, rt2.reg_no, rn.base_reg_no, rn.disp as i16, rt1.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to stp instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STR (post-index) - store a register to memory, update the base pointer after loading it
+pub fn str_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
+ assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");
+
+ LoadStore::str_post(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to str instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STR (pre-index) - store a register to memory, update the base pointer before loading it
+pub fn str_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rt.num_bits == rn.num_bits, "All operands must be of the same size.");
+ assert!(mem_disp_fits_bits(rn.disp), "The displacement must be 9 bits or less.");
+
+ LoadStore::str_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16, rt.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to str instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STRH - store a halfword into memory
+pub fn strh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword");
+ assert!(imm_fits_bits(rn.disp.into(), 12), "The displacement must be 12 bits or less.");
+
+ HalfwordImm::strh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to strh instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STRH (pre-index) - store a halfword into memory, update the base pointer before loading it
+pub fn strh_pre(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword");
+ assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+
+ HalfwordImm::strh_pre(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to strh instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STRH (post-index) - store a halfword into memory, update the base pointer after loading it
+pub fn strh_post(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert_eq!(rt.num_bits, 32, "Expected to be loading a halfword");
+ assert!(imm_fits_bits(rn.disp.into(), 9), "The displacement must be 9 bits or less.");
+
+ HalfwordImm::strh_post(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to strh instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STUR - store a value in a register at a memory address
+pub fn stur(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rn.num_bits == 32 || rn.num_bits == 64);
+ assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");
+
+ LoadStore::stur(rt.reg_no, rn.base_reg_no, rn.disp as i16, rn.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to stur instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// STURH - store a value in a register at a memory address
+pub fn sturh(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, rn) {
+ (A64Opnd::Reg(rt), A64Opnd::Mem(rn)) => {
+ assert!(rn.num_bits == 16);
+ assert!(mem_disp_fits_bits(rn.disp), "Expected displacement to be 9 bits or less");
+
+ LoadStore::sturh(rt.reg_no, rn.base_reg_no, rn.disp as i16).into()
+ },
+ _ => panic!("Invalid operand combination to stur instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// SUB - subtract rm from rn, put the result in rd, don't update flags
+pub fn sub(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ DataReg::sub(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ DataImm::sub(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ if imm12 < 0 {
+ DataImm::add(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ } else {
+ DataImm::sub(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ }
+ },
+ _ => panic!("Invalid operand combination to sub instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// SUBS - subtract rm from rn, put the result in rd, update flags
+pub fn subs(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn, rm) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(
+ rd.num_bits == rn.num_bits && rn.num_bits == rm.num_bits,
+ "All operands must be of the same size."
+ );
+
+ DataReg::subs(rd.reg_no, rn.reg_no, rm.reg_no, rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::UImm(uimm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ DataImm::subs(rd.reg_no, rn.reg_no, uimm12.try_into().unwrap(), rd.num_bits).into()
+ },
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn), A64Opnd::Imm(imm12)) => {
+ assert!(rd.num_bits == rn.num_bits, "rd and rn must be of the same size.");
+
+ if imm12 < 0 {
+ DataImm::adds(rd.reg_no, rn.reg_no, (-imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ } else {
+ DataImm::subs(rd.reg_no, rn.reg_no, (imm12 as u64).try_into().unwrap(), rd.num_bits).into()
+ }
+ },
+ _ => panic!("Invalid operand combination to subs instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// SXTW - sign extend a 32-bit register into a 64-bit register
+pub fn sxtw(cb: &mut CodeBlock, rd: A64Opnd, rn: A64Opnd) {
+ let bytes: [u8; 4] = match (rd, rn) {
+ (A64Opnd::Reg(rd), A64Opnd::Reg(rn)) => {
+ assert_eq!(rd.num_bits, 64, "rd must be 64-bits wide.");
+ assert_eq!(rn.num_bits, 32, "rn must be 32-bits wide.");
+
+ SBFM::sxtw(rd.reg_no, rn.reg_no).into()
+ },
+ _ => panic!("Invalid operand combination to sxtw instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// RET - unconditionally return to a location in a register, defaults to X30
+pub fn ret(cb: &mut CodeBlock, rn: A64Opnd) {
+ let bytes: [u8; 4] = match rn {
+ A64Opnd::None => Branch::ret(30).into(),
+ A64Opnd::Reg(reg) => Branch::ret(reg.reg_no).into(),
+ _ => panic!("Invalid operand to ret instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// TBNZ - test bit and branch if not zero
+pub fn tbnz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, bit_num, offset) {
+ (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => {
+ TestBit::tbnz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into()
+ },
+ _ => panic!("Invalid operand combination to tbnz instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// TBZ - test bit and branch if zero
+pub fn tbz(cb: &mut CodeBlock, rt: A64Opnd, bit_num: A64Opnd, offset: A64Opnd) {
+ let bytes: [u8; 4] = match (rt, bit_num, offset) {
+ (A64Opnd::Reg(rt), A64Opnd::UImm(bit_num), A64Opnd::Imm(offset)) => {
+ TestBit::tbz(rt.reg_no, bit_num.try_into().unwrap(), offset.try_into().unwrap()).into()
+ },
+ _ => panic!("Invalid operand combination to tbz instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// TST - test the bits of a register against a mask, then update flags
+pub fn tst(cb: &mut CodeBlock, rn: A64Opnd, rm: A64Opnd) {
+ let bytes: [u8; 4] = match (rn, rm) {
+ (A64Opnd::Reg(rn), A64Opnd::Reg(rm)) => {
+ assert!(rn.num_bits == rm.num_bits, "All operands must be of the same size.");
+
+ LogicalReg::tst(rn.reg_no, rm.reg_no, rn.num_bits).into()
+ },
+ (A64Opnd::Reg(rn), A64Opnd::UImm(imm)) => {
+ let bitmask_imm = if rn.num_bits == 32 {
+ BitmaskImmediate::new_32b_reg(imm.try_into().unwrap())
+ } else {
+ imm.try_into()
+ }.unwrap();
+
+ LogicalImm::tst(rn.reg_no, bitmask_imm, rn.num_bits).into()
+ },
+ _ => panic!("Invalid operand combination to tst instruction."),
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// CBZ - branch if a register is zero
+pub fn cbz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
+ assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
+ let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
+ cbz_cbnz(rt.num_bits, false, offset, rt.reg_no)
+ } else {
+ panic!("Invalid operand combination to cbz instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// CBNZ - branch if a register is non-zero
+pub fn cbnz(cb: &mut CodeBlock, rt: A64Opnd, offset: InstructionOffset) {
+ assert!(imm_fits_bits(offset.into(), 19), "jump offset for cbz must fit in 19 bits");
+ let bytes: [u8; 4] = if let A64Opnd::Reg(rt) = rt {
+ cbz_cbnz(rt.num_bits, true, offset, rt.reg_no)
+ } else {
+ panic!("Invalid operand combination to cbnz instruction.")
+ };
+
+ cb.write_bytes(&bytes);
+}
+
+/// Encode Compare and Branch on Zero (CBZ) with `op=0` or Compare and Branch on Nonzero (CBNZ)
+/// with `op=1`.
+///
+/// <https://developer.arm.com/documentation/ddi0602/2024-03/Base-Instructions/CBZ--Compare-and-Branch-on-Zero->
+///
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+/// | 31 30 29 28 | 27 26 25 24 | 23 22 21 20 | 19 18 17 16 | 15 14 13 12 | 11 10 09 08 | 07 06 05 04 | 03 02 01 00 |
+/// | sf 0 1 1 0 1 0 op |
+/// | imm19........................................................... Rt.............. |
+/// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
+fn cbz_cbnz(num_bits: u8, op: bool, offset: InstructionOffset, rt: u8) -> [u8; 4] {
+ ((Sf::from(num_bits) as u32) << 31 |
+ 0b11010 << 25 |
+ u32::from(op) << 24 |
+ truncate_imm::<_, 19>(offset) << 5 |
+ rt as u32).to_le_bytes()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ /// Check that the bytes for an instruction sequence match a hex string
+ fn check_bytes<R>(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) {
+ let mut cb = super::CodeBlock::new_dummy(128);
+ run(&mut cb);
+ assert_eq!(format!("{:x}", cb), bytes);
+ }
+
+ #[test]
+ fn test_imm_fits_bits() {
+ assert!(imm_fits_bits(i8::MAX.into(), 8));
+ assert!(imm_fits_bits(i8::MIN.into(), 8));
+
+ assert!(imm_fits_bits(i16::MAX.into(), 16));
+ assert!(imm_fits_bits(i16::MIN.into(), 16));
+
+ assert!(imm_fits_bits(i32::MAX.into(), 32));
+ assert!(imm_fits_bits(i32::MIN.into(), 32));
+
+ assert!(imm_fits_bits(i64::MAX, 64));
+ assert!(imm_fits_bits(i64::MIN, 64));
+ }
+
+ #[test]
+ fn test_uimm_fits_bits() {
+ assert!(uimm_fits_bits(u8::MAX.into(), 8));
+ assert!(uimm_fits_bits(u16::MAX.into(), 16));
+ assert!(uimm_fits_bits(u32::MAX.into(), 32));
+ assert!(uimm_fits_bits(u64::MAX, 64));
+ }
+
+ #[test]
+ fn test_add_reg() {
+ check_bytes("2000028b", |cb| add(cb, X0, X1, X2));
+ }
+
+ #[test]
+ fn test_add_uimm() {
+ check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_add_imm_positive() {
+ check_bytes("201c0091", |cb| add(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_add_imm_negative() {
+ check_bytes("201c00d1", |cb| add(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
+ fn test_adds_reg() {
+ check_bytes("200002ab", |cb| adds(cb, X0, X1, X2));
+ }
+
+ #[test]
+ fn test_adds_uimm() {
+ check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_adds_imm_positive() {
+ check_bytes("201c00b1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_adds_imm_negative() {
+ check_bytes("201c00f1", |cb| adds(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
+ fn test_adr() {
+ check_bytes("aa000010", |cb| adr(cb, X10, A64Opnd::new_imm(20)));
+ }
+
+ #[test]
+ fn test_adrp() {
+ check_bytes("4a000090", |cb| adrp(cb, X10, A64Opnd::new_imm(0x8000)));
+ }
+
+ #[test]
+ fn test_and_register() {
+ check_bytes("2000028a", |cb| and(cb, X0, X1, X2));
+ }
+
+ #[test]
+ fn test_and_immediate() {
+ check_bytes("20084092", |cb| and(cb, X0, X1, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_and_32b_immediate() {
+ check_bytes("404c0012", |cb| and(cb, W0, W2, A64Opnd::new_uimm(0xfffff)));
+ }
+
+ #[test]
+ fn test_ands_register() {
+ check_bytes("200002ea", |cb| ands(cb, X0, X1, X2));
+ }
+
+ #[test]
+ fn test_ands_immediate() {
+ check_bytes("200840f2", |cb| ands(cb, X0, X1, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_asr() {
+ check_bytes("b4fe4a93", |cb| asr(cb, X20, X21, A64Opnd::new_uimm(10)));
+ }
+
+ #[test]
+ fn test_bcond() {
+ let offset = InstructionOffset::from_insns(0x100);
+ check_bytes("01200054", |cb| bcond(cb, Condition::NE, offset));
+ }
+
+ #[test]
+ fn test_b() {
+ let offset = InstructionOffset::from_insns((1 << 25) - 1);
+ check_bytes("ffffff15", |cb| b(cb, offset));
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_b_too_big() {
+ // There are 26 bits available
+ let offset = InstructionOffset::from_insns(1 << 25);
+ check_bytes("", |cb| b(cb, offset));
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_b_too_small() {
+ // There are 26 bits available
+ let offset = InstructionOffset::from_insns(-(1 << 25) - 1);
+ check_bytes("", |cb| b(cb, offset));
+ }
+
+ #[test]
+ fn test_bl() {
+ let offset = InstructionOffset::from_insns(-(1 << 25));
+ check_bytes("00000096", |cb| bl(cb, offset));
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_bl_too_big() {
+ // There are 26 bits available
+ let offset = InstructionOffset::from_insns(1 << 25);
+ check_bytes("", |cb| bl(cb, offset));
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_bl_too_small() {
+ // There are 26 bits available
+ let offset = InstructionOffset::from_insns(-(1 << 25) - 1);
+ check_bytes("", |cb| bl(cb, offset));
+ }
+
+ #[test]
+ fn test_blr() {
+ check_bytes("80023fd6", |cb| blr(cb, X20));
+ }
+
+ #[test]
+ fn test_br() {
+ check_bytes("80021fd6", |cb| br(cb, X20));
+ }
+
+ #[test]
+ fn test_cbz() {
+ let offset = InstructionOffset::from_insns(-1);
+ check_bytes("e0ffffb4e0ffff34", |cb| {
+ cbz(cb, X0, offset);
+ cbz(cb, W0, offset);
+ });
+ }
+
+ #[test]
+ fn test_cbnz() {
+ let offset = InstructionOffset::from_insns(2);
+ check_bytes("540000b554000035", |cb| {
+ cbnz(cb, X20, offset);
+ cbnz(cb, W20, offset);
+ });
+ }
+
+ #[test]
+ fn test_brk_none() {
+ check_bytes("000020d4", |cb| brk(cb, A64Opnd::None));
+ }
+
+ #[test]
+ fn test_brk_uimm() {
+ check_bytes("c00120d4", |cb| brk(cb, A64Opnd::new_uimm(14)));
+ }
+
+ #[test]
+ fn test_cmp_register() {
+ check_bytes("5f010beb", |cb| cmp(cb, X10, X11));
+ }
+
+ #[test]
+ fn test_cmp_immediate() {
+ check_bytes("5f3900f1", |cb| cmp(cb, X10, A64Opnd::new_uimm(14)));
+ }
+
+ #[test]
+ fn test_csel() {
+ check_bytes("6a018c9a", |cb| csel(cb, X10, X11, X12, Condition::EQ));
+ }
+
+ #[test]
+ fn test_eor_register() {
+ check_bytes("6a010cca", |cb| eor(cb, X10, X11, X12));
+ }
+
+ #[test]
+ fn test_eor_immediate() {
+ check_bytes("6a0940d2", |cb| eor(cb, X10, X11, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_eor_32b_immediate() {
+ check_bytes("29040152", |cb| eor(cb, W9, W1, A64Opnd::new_uimm(0x80000001)));
+ }
+
+ #[test]
+ fn test_ldaddal() {
+ check_bytes("8b01eaf8", |cb| ldaddal(cb, X10, X11, X12));
+ }
+
+ #[test]
+ fn test_ldaxr() {
+ check_bytes("6afd5fc8", |cb| ldaxr(cb, X10, X11));
+ }
+
+ #[test]
+ fn test_ldp() {
+ check_bytes("8a2d4da9", |cb| ldp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
+ }
+
+ #[test]
+ fn test_ldp_pre() {
+ check_bytes("8a2dcda9", |cb| ldp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
+ }
+
+ #[test]
+ fn test_ldp_post() {
+ check_bytes("8a2dcda8", |cb| ldp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
+ }
+
+ #[test]
+ fn test_ldr() {
+ check_bytes("6a696cf8", |cb| ldr(cb, X10, X11, X12));
+ }
+
+ #[test]
+ fn test_ldr_literal() {
+ check_bytes("40010058", |cb| ldr_literal(cb, X0, 10.into()));
+ }
+
+ #[test]
+ fn test_ldr_post() {
+ check_bytes("6a0541f8", |cb| ldr_post(cb, X10, A64Opnd::new_mem(64, X11, 16)));
+ }
+
+ #[test]
+ fn test_ldr_pre() {
+ check_bytes("6a0d41f8", |cb| ldr_pre(cb, X10, A64Opnd::new_mem(64, X11, 16)));
+ }
+
+ #[test]
+ fn test_ldrh() {
+ check_bytes("6a194079", |cb| ldrh(cb, W10, A64Opnd::new_mem(64, X11, 12)));
+ }
+
+ #[test]
+ fn test_ldrh_pre() {
+ check_bytes("6acd4078", |cb| ldrh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12)));
+ }
+
+ #[test]
+ fn test_ldrh_post() {
+ check_bytes("6ac54078", |cb| ldrh_post(cb, W10, A64Opnd::new_mem(64, X11, 12)));
+ }
+
+ #[test]
+ fn test_ldurh_memory() {
+ check_bytes("2a004078", |cb| ldurh(cb, W10, A64Opnd::new_mem(64, X1, 0)));
+ check_bytes("2ab04778", |cb| ldurh(cb, W10, A64Opnd::new_mem(64, X1, 123)));
+ }
+
+ #[test]
+ fn test_ldur_memory() {
+ check_bytes("20b047f8", |cb| ldur(cb, X0, A64Opnd::new_mem(64, X1, 123)));
+ }
+
+ #[test]
+ fn test_ldur_register() {
+ check_bytes("200040f8", |cb| ldur(cb, X0, X1));
+ }
+
+ #[test]
+ fn test_ldursw() {
+ check_bytes("6ab187b8", |cb| ldursw(cb, X10, A64Opnd::new_mem(64, X11, 123)));
+ }
+
+ #[test]
+ fn test_lsl() {
+ check_bytes("6ac572d3", |cb| lsl(cb, X10, X11, A64Opnd::new_uimm(14)));
+ }
+
+ #[test]
+ fn test_lsr() {
+ check_bytes("6afd4ed3", |cb| lsr(cb, X10, X11, A64Opnd::new_uimm(14)));
+ }
+
+ #[test]
+ fn test_mov_registers() {
+ check_bytes("ea030baa", |cb| mov(cb, X10, X11));
+ }
+
+ #[test]
+ fn test_mov_immediate() {
+ check_bytes("eaf300b2", |cb| mov(cb, X10, A64Opnd::new_uimm(0x5555555555555555)));
+ }
+
+ #[test]
+ fn test_mov_32b_immediate() {
+ check_bytes("ea070132", |cb| mov(cb, W10, A64Opnd::new_uimm(0x80000001)));
+ }
+ #[test]
+ fn test_mov_into_sp() {
+ check_bytes("1f000091", |cb| mov(cb, X31, X0));
+ }
+
+ #[test]
+ fn test_mov_from_sp() {
+ check_bytes("e0030091", |cb| mov(cb, X0, X31));
+ }
+
+ #[test]
+ fn test_movk() {
+ check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16));
+ }
+
+ #[test]
+ fn test_movz() {
+ check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16));
+ }
+
+ #[test]
+ fn test_mrs() {
+ check_bytes("0a423bd5", |cb| mrs(cb, X10, SystemRegister::NZCV));
+ }
+
+ #[test]
+ fn test_msr() {
+ check_bytes("0a421bd5", |cb| msr(cb, SystemRegister::NZCV, X10));
+ }
+
+ #[test]
+ fn test_mul() {
+ check_bytes("6a7d0c9b", |cb| mul(cb, X10, X11, X12));
+ }
+
+ #[test]
+ fn test_mvn() {
+ check_bytes("ea032baa", |cb| mvn(cb, X10, X11));
+ }
+
+ #[test]
+ fn test_nop() {
+ check_bytes("1f2003d5", |cb| nop(cb));
+ }
+
+ #[test]
+ fn test_orn() {
+ check_bytes("6a012caa", |cb| orn(cb, X10, X11, X12));
+ }
+
+ #[test]
+ fn test_orr_register() {
+ check_bytes("6a010caa", |cb| orr(cb, X10, X11, X12));
+ }
+
+ #[test]
+ fn test_orr_immediate() {
+ check_bytes("6a0940b2", |cb| orr(cb, X10, X11, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_orr_32b_immediate() {
+ check_bytes("6a010032", |cb| orr(cb, W10, W11, A64Opnd::new_uimm(1)));
+ }
+
+ #[test]
+ fn test_ret_none() {
+ check_bytes("c0035fd6", |cb| ret(cb, A64Opnd::None));
+ }
+
+ #[test]
+ fn test_ret_register() {
+ check_bytes("80025fd6", |cb| ret(cb, X20));
+ }
+
+ #[test]
+ fn test_stlxr() {
+ check_bytes("8bfd0ac8", |cb| stlxr(cb, W10, X11, X12));
+ }
+
+ #[test]
+ fn test_stp() {
+ check_bytes("8a2d0da9", |cb| stp(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
+ }
+
+ #[test]
+ fn test_stp_pre() {
+ check_bytes("8a2d8da9", |cb| stp_pre(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
+ }
+
+ #[test]
+ fn test_stp_post() {
+ check_bytes("8a2d8da8", |cb| stp_post(cb, X10, X11, A64Opnd::new_mem(64, X12, 208)));
+ }
+
+ #[test]
+ fn test_str_post() {
+ check_bytes("6a051ff8", |cb| str_post(cb, X10, A64Opnd::new_mem(64, X11, -16)));
+ }
+
+ #[test]
+ fn test_str_pre() {
+ check_bytes("6a0d1ff8", |cb| str_pre(cb, X10, A64Opnd::new_mem(64, X11, -16)));
+ }
+
+ #[test]
+ fn test_strh() {
+ check_bytes("6a190079", |cb| strh(cb, W10, A64Opnd::new_mem(64, X11, 12)));
+ }
+
+ #[test]
+ fn test_strh_pre() {
+ check_bytes("6acd0078", |cb| strh_pre(cb, W10, A64Opnd::new_mem(64, X11, 12)));
+ }
+
+ #[test]
+ fn test_strh_post() {
+ check_bytes("6ac50078", |cb| strh_post(cb, W10, A64Opnd::new_mem(64, X11, 12)));
+ }
+
+ #[test]
+ fn test_stur_64_bits() {
+ check_bytes("6a0108f8", |cb| stur(cb, X10, A64Opnd::new_mem(64, X11, 128)));
+ }
+
+ #[test]
+ fn test_stur_32_bits() {
+ check_bytes("6a0108b8", |cb| stur(cb, X10, A64Opnd::new_mem(32, X11, 128)));
+ }
+
+ #[test]
+ fn test_sub_reg() {
+ check_bytes("200002cb", |cb| sub(cb, X0, X1, X2));
+ }
+
+ #[test]
+ fn test_sub_uimm() {
+ check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_sub_imm_positive() {
+ check_bytes("201c00d1", |cb| sub(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_sub_imm_negative() {
+ check_bytes("201c0091", |cb| sub(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
+ fn test_subs_reg() {
+ check_bytes("200002eb", |cb| subs(cb, X0, X1, X2));
+ }
+
+ #[test]
+ fn test_subs_imm_positive() {
+ check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(7)));
+ }
+
+ #[test]
+ fn test_subs_imm_negative() {
+ check_bytes("201c00b1", |cb| subs(cb, X0, X1, A64Opnd::new_imm(-7)));
+ }
+
+ #[test]
+ fn test_subs_uimm() {
+ check_bytes("201c00f1", |cb| subs(cb, X0, X1, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_sxtw() {
+ check_bytes("6a7d4093", |cb| sxtw(cb, X10, W11));
+ }
+
+ #[test]
+ fn test_tbnz() {
+ check_bytes("4a005037", |cb| tbnz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2)));
+ }
+
+ #[test]
+ fn test_tbz() {
+ check_bytes("4a005036", |cb| tbz(cb, X10, A64Opnd::UImm(10), A64Opnd::Imm(2)));
+ }
+
+ #[test]
+ fn test_tst_register() {
+ check_bytes("1f0001ea", |cb| tst(cb, X0, X1));
+ }
+
+ #[test]
+ fn test_tst_immediate() {
+ check_bytes("3f0840f2", |cb| tst(cb, X1, A64Opnd::new_uimm(7)));
+ }
+
+ #[test]
+ fn test_tst_32b_immediate() {
+ check_bytes("1f3c0072", |cb| tst(cb, W0, A64Opnd::new_uimm(0xffff)));
+ }
+}
diff --git a/yjit/src/asm/arm64/opnd.rs b/yjit/src/asm/arm64/opnd.rs
new file mode 100644
index 0000000000..108824e08d
--- /dev/null
+++ b/yjit/src/asm/arm64/opnd.rs
@@ -0,0 +1,195 @@
+
+
+/// This operand represents a register.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct A64Reg
+{
+ // Size in bits
+ pub num_bits: u8,
+
+ // Register index number
+ pub reg_no: u8,
+}
+
+impl A64Reg {
+ pub fn with_num_bits(&self, num_bits: u8) -> Self {
+ assert!(num_bits == 8 || num_bits == 16 || num_bits == 32 || num_bits == 64);
+ Self { num_bits, reg_no: self.reg_no }
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct A64Mem
+{
+ // Size in bits
+ pub num_bits: u8,
+
+ /// Base register number
+ pub base_reg_no: u8,
+
+ /// Constant displacement from the base, not scaled
+ pub disp: i32,
+}
+
+impl A64Mem {
+ pub fn new(num_bits: u8, reg: A64Opnd, disp: i32) -> Self {
+ match reg {
+ A64Opnd::Reg(reg) => {
+ Self { num_bits, base_reg_no: reg.reg_no, disp }
+ },
+ _ => panic!("Expected register operand")
+ }
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum A64Opnd
+{
+ // Dummy operand
+ None,
+
+ // Immediate value
+ Imm(i64),
+
+ // Unsigned immediate
+ UImm(u64),
+
+ // Register
+ Reg(A64Reg),
+
+ // Memory
+ Mem(A64Mem)
+}
+
+impl A64Opnd {
+ /// Create a new immediate value operand.
+ pub fn new_imm(value: i64) -> Self {
+ A64Opnd::Imm(value)
+ }
+
+ /// Create a new unsigned immediate value operand.
+ pub fn new_uimm(value: u64) -> Self {
+ A64Opnd::UImm(value)
+ }
+
+ /// Creates a new memory operand.
+ pub fn new_mem(num_bits: u8, reg: A64Opnd, disp: i32) -> Self {
+ A64Opnd::Mem(A64Mem::new(num_bits, reg, disp))
+ }
+
+ /// Convenience function to check if this operand is a register.
+ pub fn is_reg(&self) -> bool {
+ match self {
+ A64Opnd::Reg(_) => true,
+ _ => false
+ }
+ }
+
+ /// Unwrap a register from an operand.
+ pub fn unwrap_reg(&self) -> A64Reg {
+ match self {
+ A64Opnd::Reg(reg) => *reg,
+ _ => panic!("Expected register operand")
+ }
+ }
+}
+
+// argument registers
+pub const X0_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 0 };
+pub const X1_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 1 };
+pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 };
+pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 };
+pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 };
+pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 };
+
+// caller-save registers
+pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 };
+pub const X10_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 10 };
+pub const X11_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 11 };
+pub const X12_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 12 };
+pub const X13_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 13 };
+pub const X14_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 14 };
+pub const X15_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 15 };
+pub const X16_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 16 };
+pub const X17_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 17 };
+
+// callee-save registers
+pub const X19_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 19 };
+pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 };
+pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 };
+pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 };
+
+// zero register
+pub const XZR_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 31 };
+
+// 64-bit registers
+pub const X0: A64Opnd = A64Opnd::Reg(X0_REG);
+pub const X1: A64Opnd = A64Opnd::Reg(X1_REG);
+pub const X2: A64Opnd = A64Opnd::Reg(X2_REG);
+pub const X3: A64Opnd = A64Opnd::Reg(X3_REG);
+pub const X4: A64Opnd = A64Opnd::Reg(X4_REG);
+pub const X5: A64Opnd = A64Opnd::Reg(X5_REG);
+pub const X6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 6 });
+pub const X7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 7 });
+pub const X8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 8 });
+pub const X9: A64Opnd = A64Opnd::Reg(X9_REG);
+pub const X10: A64Opnd = A64Opnd::Reg(X10_REG);
+pub const X11: A64Opnd = A64Opnd::Reg(X11_REG);
+pub const X12: A64Opnd = A64Opnd::Reg(X12_REG);
+pub const X13: A64Opnd = A64Opnd::Reg(X13_REG);
+pub const X14: A64Opnd = A64Opnd::Reg(X14_REG);
+pub const X15: A64Opnd = A64Opnd::Reg(X15_REG);
+pub const X16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 16 });
+pub const X17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 17 });
+pub const X18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 18 });
+pub const X19: A64Opnd = A64Opnd::Reg(X19_REG);
+pub const X20: A64Opnd = A64Opnd::Reg(X20_REG);
+pub const X21: A64Opnd = A64Opnd::Reg(X21_REG);
+pub const X22: A64Opnd = A64Opnd::Reg(X22_REG);
+pub const X23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 23 });
+pub const X24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 24 });
+pub const X25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 25 });
+pub const X26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 26 });
+pub const X27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 27 });
+pub const X28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 28 });
+pub const X29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 29 });
+pub const X30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: 30 });
+pub const X31: A64Opnd = A64Opnd::Reg(XZR_REG);
+
+// 32-bit registers
+pub const W0: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 0 });
+pub const W1: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 1 });
+pub const W2: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 2 });
+pub const W3: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 3 });
+pub const W4: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 4 });
+pub const W5: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 5 });
+pub const W6: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 6 });
+pub const W7: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 7 });
+pub const W8: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 8 });
+pub const W9: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 9 });
+pub const W10: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 10 });
+pub const W11: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 11 });
+pub const W12: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 12 });
+pub const W13: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 13 });
+pub const W14: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 14 });
+pub const W15: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 15 });
+pub const W16: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 16 });
+pub const W17: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 17 });
+pub const W18: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 18 });
+pub const W19: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 19 });
+pub const W20: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 20 });
+pub const W21: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 21 });
+pub const W22: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 22 });
+pub const W23: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 23 });
+pub const W24: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 24 });
+pub const W25: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 25 });
+pub const W26: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 26 });
+pub const W27: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 27 });
+pub const W28: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 28 });
+pub const W29: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 29 });
+pub const W30: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 30 });
+pub const W31: A64Opnd = A64Opnd::Reg(A64Reg { num_bits: 32, reg_no: 31 });
+
+// C argument registers
+pub const C_ARG_REGS: [A64Opnd; 4] = [X0, X1, X2, X3];
+pub const C_ARG_REGREGS: [A64Reg; 4] = [X0_REG, X1_REG, X2_REG, X3_REG];
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index e16e856925..524d6341f5 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -1,31 +1,57 @@
+use std::cell::RefCell;
+use std::fmt;
use std::mem;
-
-#[cfg(feature = "asm_comments")]
+use std::rc::Rc;
+use crate::core::IseqPayload;
+use crate::core::for_each_off_stack_iseq_payload;
+use crate::core::for_each_on_stack_iseq_payload;
+use crate::invariants::rb_yjit_tracing_invalidate_all;
+use crate::stats::incr_counter;
+use crate::virtualmem::WriteError;
+
+#[cfg(feature = "disasm")]
use std::collections::BTreeMap;
+use crate::codegen::CodegenGlobals;
use crate::virtualmem::{VirtualMem, CodePtr};
// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
#[rustfmt::skip]
pub mod x86_64;
+pub mod arm64;
+
//
// TODO: need a field_size_of macro, to compute the size of a struct field in bytes
//
/// Reference to an ASM label
-struct LabelRef {
+#[derive(Clone)]
+pub struct LabelRef {
// Position in the code block where the label reference exists
pos: usize,
// Label which this refers to
label_idx: usize,
+
+ /// The number of bytes that this label reference takes up in the memory.
+ /// It's necessary to know this ahead of time so that when we come back to
+ /// patch it it takes the same amount of space.
+ num_bytes: usize,
+
+ /// The object that knows how to encode the branch instruction.
+ encode: fn(&mut CodeBlock, i64, i64)
}
/// Block of memory into which instructions can be assembled
pub struct CodeBlock {
// Memory for storing the encoded instructions
- mem_block: VirtualMem,
+ mem_block: Rc<RefCell<VirtualMem>>,
+
+ // Size of a code page in bytes. Each code page is split into an inlined and an outlined portion.
+ // Code GC collects code memory at this granularity.
+ // Must be a multiple of the OS page size.
+ page_size: usize,
// Memory block size
mem_size: usize,
@@ -33,6 +59,15 @@ pub struct CodeBlock {
// Current writing position
write_pos: usize,
+ // The index of the last page with written bytes
+ last_page_idx: usize,
+
+ // Total number of bytes written to past pages
+ past_page_bytes: usize,
+
+ // Size reserved for writing a jump to the next page
+ page_end_reserve: usize,
+
// Table of registered label addresses
label_addrs: Vec<usize>,
@@ -43,41 +78,298 @@ pub struct CodeBlock {
label_refs: Vec<LabelRef>,
// Comments for assembly instructions, if that feature is enabled
- #[cfg(feature = "asm_comments")]
+ #[cfg(feature = "disasm")]
asm_comments: BTreeMap<usize, Vec<String>>,
+ // True for OutlinedCb
+ pub outlined: bool,
+
// Set if the CodeBlock is unable to output some instructions,
// for example, when there is not enough space or when a jump
// target is too far away.
dropped_bytes: bool,
+
+ // Keeps track of what pages we can write to after code gc.
+ // `None` means all pages are free.
+ freed_pages: Rc<Option<Vec<usize>>>,
+}
+
+/// Set of CodeBlock label states. Used for recovering the previous state.
+pub struct LabelState {
+ label_addrs: Vec<usize>,
+ label_names: Vec<String>,
+ label_refs: Vec<LabelRef>,
}
impl CodeBlock {
+ /// Works for common AArch64 systems that have 16 KiB pages and
+ /// common x86_64 systems that use 4 KiB pages.
+ const PREFERRED_CODE_PAGE_SIZE: usize = 16 * 1024;
+
/// Make a new CodeBlock
- pub fn new(mem_block: VirtualMem) -> Self {
- Self {
- mem_size: mem_block.virtual_region_size(),
+ pub fn new(mem_block: Rc<RefCell<VirtualMem>>, outlined: bool, freed_pages: Rc<Option<Vec<usize>>>) -> Self {
+ // Pick the code page size
+ let system_page_size = mem_block.borrow().system_page_size();
+ let page_size = if 0 == Self::PREFERRED_CODE_PAGE_SIZE % system_page_size {
+ Self::PREFERRED_CODE_PAGE_SIZE
+ } else {
+ system_page_size
+ };
+
+ let mem_size = mem_block.borrow().virtual_region_size();
+ let mut cb = Self {
mem_block,
+ mem_size,
+ page_size,
write_pos: 0,
+ last_page_idx: 0,
+ past_page_bytes: 0,
+ page_end_reserve: 0,
label_addrs: Vec::new(),
label_names: Vec::new(),
label_refs: Vec::new(),
- #[cfg(feature = "asm_comments")]
+ #[cfg(feature = "disasm")]
asm_comments: BTreeMap::new(),
+ outlined,
dropped_bytes: false,
+ freed_pages,
+ };
+ cb.page_end_reserve = cb.jmp_ptr_bytes();
+ cb.write_pos = cb.page_start();
+
+ #[cfg(not(test))]
+ assert_eq!(0, mem_size % page_size, "partially in-bounds code pages should be impossible");
+
+ cb
+ }
+
+ /// Move the CodeBlock to the next page. If it's on the furthest page,
+ /// move the other CodeBlock to the next page as well.
+ pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
+ let old_write_ptr = self.get_write_ptr();
+ self.set_write_ptr(base_ptr);
+
+ // Use the freed_pages list if code GC has been used. Otherwise use the next page.
+ let next_page_idx = if let Some(freed_pages) = self.freed_pages.as_ref() {
+ let current_page = self.write_pos / self.page_size;
+ freed_pages.iter().find(|&&page| current_page < page).map(|&page| page)
+ } else {
+ Some(self.write_pos / self.page_size + 1)
+ };
+
+ // Move self to the next page
+ if next_page_idx.is_none() || !self.set_page(next_page_idx.unwrap(), &jmp_ptr) {
+ self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
+ return false;
+ }
+
+ // Move the other CodeBlock to the same page if it's on the furthest page
+ if cfg!(not(test)) {
+ self.other_cb().unwrap().set_page(next_page_idx.unwrap(), &jmp_ptr);
+ }
+
+ return !self.dropped_bytes;
+ }
+
+ /// Move the CodeBlock to page_idx only if it's not going backwards.
+ fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool {
+ // Do not move the CodeBlock if page_idx points to an old position so that this
+ // CodeBlock will not overwrite existing code.
+ //
+ // Let's say this is the current situation:
+ // cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)]
+ //
+ // When cb needs to patch page1, this will be temporarily changed to:
+ // cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)]
+ //
+ // While patching page1, cb may need to jump to page2. What set_page currently does is:
+ // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)]
+ // instead of:
+ // cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3]
+ // because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's
+ // write_pos point to existing code in page2, which might let ocb overwrite it later.
+ //
+ // We could remember the last write_pos in page2 and let set_page use that position,
+ // but you need to waste some space for keeping write_pos for every single page.
+ // It doesn't seem necessary for performance either. So we're currently not doing it.
+ let dst_pos = self.get_page_pos(page_idx);
+ if self.write_pos < dst_pos {
+ // Fail if next page is out of bounds
+ if dst_pos >= self.mem_size {
+ return false;
+ }
+
+ // Reset dropped_bytes
+ self.dropped_bytes = false;
+
+ // Generate jmp_ptr from src_pos to dst_pos
+ let dst_ptr = self.get_ptr(dst_pos);
+ self.without_page_end_reserve(|cb| {
+ assert!(cb.has_capacity(cb.jmp_ptr_bytes()));
+ cb.add_comment("jump to next page");
+ jmp_ptr(cb, dst_ptr);
+ });
+
+ // Update past_page_bytes for code_size() if this is a new page
+ if self.last_page_idx < page_idx {
+ self.past_page_bytes += self.current_page_bytes();
+ }
+
+ // Start the next code from dst_pos
+ self.write_pos = dst_pos;
+ // Update the last_page_idx if page_idx points to the furthest page
+ self.last_page_idx = usize::max(self.last_page_idx, page_idx);
+ }
+ !self.dropped_bytes
+ }
+
+ /// Free the memory pages of given code page indexes
+ fn free_pages(&mut self, page_idxs: &Vec<usize>) {
+ let mut page_idxs = page_idxs.clone();
+ page_idxs.reverse(); // to loop with pop()
+
+ // Group adjacent page indexes and free them in batches to reduce the # of syscalls.
+ while let Some(page_idx) = page_idxs.pop() {
+ // Group first adjacent page indexes
+ let mut batch_idxs = vec![page_idx];
+ while page_idxs.last() == Some(&(batch_idxs.last().unwrap() + 1)) {
+ batch_idxs.push(page_idxs.pop().unwrap());
+ }
+
+ // Free the grouped pages at once
+ let start_ptr = self.mem_block.borrow().start_ptr().add_bytes(page_idx * self.page_size);
+ let batch_size = self.page_size * batch_idxs.len();
+ self.mem_block.borrow_mut().free_bytes(start_ptr, batch_size as u32);
+ }
+ }
+
+ pub fn page_size(&self) -> usize {
+ self.page_size
+ }
+
+ pub fn mapped_region_size(&self) -> usize {
+ self.mem_block.borrow().mapped_region_size()
+ }
+
+ /// Size of the region in bytes where writes could be attempted.
+ #[cfg(target_arch = "aarch64")]
+ pub fn virtual_region_size(&self) -> usize {
+ self.mem_block.borrow().virtual_region_size()
+ }
+
+ /// Return the number of code pages that have been mapped by the VirtualMemory.
+ pub fn num_mapped_pages(&self) -> usize {
+ // CodeBlock's page size != VirtualMem's page size on Linux,
+ // so mapped_region_size % self.page_size may not be 0
+ ((self.mapped_region_size() - 1) / self.page_size) + 1
+ }
+
+ /// Return the number of code pages that have been reserved by the VirtualMemory.
+ pub fn num_virtual_pages(&self) -> usize {
+ let virtual_region_size = self.mem_block.borrow().virtual_region_size();
+ // CodeBlock's page size != VirtualMem's page size on Linux,
+ // so mapped_region_size % self.page_size may not be 0
+ ((virtual_region_size - 1) / self.page_size) + 1
+ }
+
+ /// Return the number of code pages that have been freed and not used yet.
+ pub fn num_freed_pages(&self) -> usize {
+ (0..self.num_mapped_pages()).filter(|&page_idx| self.has_freed_page(page_idx)).count()
+ }
+
+ pub fn has_freed_page(&self, page_idx: usize) -> bool {
+ self.freed_pages.as_ref().as_ref().map_or(false, |pages| pages.contains(&page_idx)) && // code GCed
+ self.write_pos < page_idx * self.page_size // and not written yet
+ }
+
+ /// Convert a page index to the write_pos for the page start.
+ fn get_page_pos(&self, page_idx: usize) -> usize {
+ self.page_size * page_idx + self.page_start()
+ }
+
+ /// write_pos of the current page start
+ pub fn page_start_pos(&self) -> usize {
+ self.get_write_pos() / self.page_size * self.page_size + self.page_start()
+ }
+
+ /// Offset of each page where CodeBlock should start writing
+ pub fn page_start(&self) -> usize {
+ let mut start = if self.inline() {
+ 0
+ } else {
+ self.page_size / 2
+ };
+ if cfg!(debug_assertions) && !cfg!(test) {
+ // Leave illegal instructions at the beginning of each page to assert
+ // we're not accidentally crossing page boundaries.
+ start += self.jmp_ptr_bytes();
+ }
+ start
+ }
+
+ /// Offset of each page where CodeBlock should stop writing (exclusive)
+ pub fn page_end(&self) -> usize {
+ let page_end = if self.inline() {
+ self.page_size / 2
+ } else {
+ self.page_size
+ };
+ page_end - self.page_end_reserve // reserve space to jump to the next page
+ }
+
+ /// Call a given function with page_end_reserve = 0
+ pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) {
+ let old_page_end_reserve = self.page_end_reserve;
+ self.page_end_reserve = 0;
+ block(self);
+ self.page_end_reserve = old_page_end_reserve;
+ }
+
+ /// Return the address ranges of a given address range that this CodeBlock can write.
+ #[allow(dead_code)]
+ pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
+ let region_start = self.get_ptr(0).raw_addr(self);
+ let region_end = self.get_ptr(self.get_mem_size()).raw_addr(self);
+ let mut start = start_ptr.raw_addr(self);
+ let end = std::cmp::min(end_ptr.raw_addr(self), region_end);
+
+ let freed_pages = self.freed_pages.as_ref().as_ref();
+ let mut addrs = vec![];
+ while start < end {
+ let page_idx = start.saturating_sub(region_start) / self.page_size;
+ let current_page = region_start + (page_idx * self.page_size);
+ let page_end = std::cmp::min(end, current_page + self.page_end());
+ // If code GC has been used, skip pages that are used by past on-stack code
+ if freed_pages.map_or(true, |pages| pages.contains(&page_idx)) {
+ addrs.push((start, page_end));
+ }
+ start = current_page + self.page_size + self.page_start();
}
+ addrs
+ }
+
+ /// Return the number of bytes written by this CodeBlock.
+ pub fn code_size(&self) -> usize {
+ self.current_page_bytes() + self.past_page_bytes
+ }
+
+ /// Return the number of bytes written to the current page.
+ fn current_page_bytes(&self) -> usize {
+ (self.write_pos % self.page_size).saturating_sub(self.page_start())
}
/// Check if this code block has sufficient remaining capacity
pub fn has_capacity(&self, num_bytes: usize) -> bool {
- self.write_pos + num_bytes < self.mem_size
+ let page_offset = self.write_pos % self.page_size;
+ let capacity = self.page_end().saturating_sub(page_offset);
+ num_bytes <= capacity
}
/// Add an assembly comment if the feature is on.
/// If not, this becomes an inline no-op.
- #[cfg(feature = "asm_comments")]
+ #[cfg(feature = "disasm")]
pub fn add_comment(&mut self, comment: &str) {
- let cur_ptr = self.get_write_ptr().into_usize();
+ let cur_ptr = self.get_write_ptr().raw_addr(self);
// If there's no current list of comments for this line number, add one.
let this_line_comments = self.asm_comments.entry(cur_ptr).or_default();
@@ -87,15 +379,31 @@ impl CodeBlock {
this_line_comments.push(comment.to_string());
}
}
- #[cfg(not(feature = "asm_comments"))]
+ #[cfg(not(feature = "disasm"))]
#[inline]
pub fn add_comment(&mut self, _: &str) {}
- #[cfg(feature = "asm_comments")]
+ #[cfg(feature = "disasm")]
pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> {
self.asm_comments.get(&pos)
}
+ #[allow(unused_variables)]
+ #[cfg(feature = "disasm")]
+ pub fn remove_comments(&mut self, start_addr: CodePtr, end_addr: CodePtr) {
+ for addr in start_addr.raw_addr(self)..end_addr.raw_addr(self) {
+ self.asm_comments.remove(&addr);
+ }
+ }
+ #[cfg(not(feature = "disasm"))]
+ #[inline]
+ pub fn remove_comments(&mut self, _: CodePtr, _: CodePtr) {}
+
+ pub fn clear_comments(&mut self) {
+ #[cfg(feature = "disasm")]
+ self.asm_comments.clear();
+ }
+
pub fn get_mem_size(&self) -> usize {
self.mem_size
}
@@ -104,73 +412,72 @@ impl CodeBlock {
self.write_pos
}
- pub fn get_mem(&mut self) -> &mut VirtualMem {
- &mut self.mem_block
+ pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
+ self.mem_block.borrow_mut().write_byte(write_ptr, byte)
}
// Set the current write position
pub fn set_pos(&mut self, pos: usize) {
- // Assert here since while CodeBlock functions do bounds checking, there is
- // nothing stopping users from taking out an out-of-bounds pointer and
- // doing bad accesses with it.
- assert!(pos < self.mem_size);
+ // No bounds check here since we can be out of bounds
+ // when the code block fills up. We want to be able to
+ // restore to the filled up state after patching something
+ // in the middle.
self.write_pos = pos;
}
- // Align the current write pointer to a multiple of bytes
- pub fn align_pos(&mut self, multiple: u32) {
- // Compute the alignment boundary that is lower or equal
- // Do everything with usize
- let multiple: usize = multiple.try_into().unwrap();
- let pos = self.get_write_ptr().raw_ptr() as usize;
- let remainder = pos % multiple;
- let prev_aligned = pos - remainder;
-
- if prev_aligned == pos {
- // Already aligned so do nothing
- } else {
- // Align by advancing
- let pad = multiple - remainder;
- self.set_pos(self.get_write_pos() + pad);
- }
- }
-
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
- let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
- self.set_pos(pos);
+ let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset();
+ self.set_pos(pos.try_into().unwrap());
}
- // Get a direct pointer into the executable memory block
+ /// Get a (possibly dangling) direct pointer into the executable memory block
pub fn get_ptr(&self, offset: usize) -> CodePtr {
- self.mem_block.start_ptr().add_bytes(offset)
+ self.mem_block.borrow().start_ptr().add_bytes(offset)
}
- // Get a direct pointer to the current write position
- pub fn get_write_ptr(&mut self) -> CodePtr {
+ /// Convert an address range to memory page indexes against a num_pages()-sized array.
+ pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
+ let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self);
+ let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self);
+ assert!(mem_start <= start_addr.raw_addr(self));
+ assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self));
+ assert!(end_addr.raw_addr(self) <= mem_end);
+
+ // Ignore empty code ranges
+ if start_addr == end_addr {
+ return vec![];
+ }
+
+ let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size;
+ let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size;
+ (start_page..=end_page).collect() // TODO: consider returning an iterator
+ }
+
+ /// Get a (possibly dangling) direct pointer to the current write position
+ pub fn get_write_ptr(&self) -> CodePtr {
self.get_ptr(self.write_pos)
}
- // Write a single byte at the current position
+ /// Write a single byte at the current position.
pub fn write_byte(&mut self, byte: u8) {
let write_ptr = self.get_write_ptr();
-
- if self.mem_block.write_byte(write_ptr, byte).is_ok() {
+ if self.has_capacity(1) && self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_ok() {
self.write_pos += 1;
} else {
self.dropped_bytes = true;
}
}
- // Write multiple bytes starting from the current position
+ /// Write multiple bytes starting from the current position.
pub fn write_bytes(&mut self, bytes: &[u8]) {
for byte in bytes {
self.write_byte(*byte);
}
}
- // Write a signed integer over a given number of bits at the current position
- pub fn write_int(&mut self, val: u64, num_bits: u32) {
+ /// Write an integer over the given number of bits at the current position.
+ fn write_int(&mut self, val: u64, num_bits: u32) {
assert!(num_bits > 0);
assert!(num_bits % 8 == 0);
@@ -201,8 +508,16 @@ impl CodeBlock {
self.dropped_bytes
}
+ /// To patch code that straddle pages correctly, we need to start with
+ /// the dropped bytes flag unset so we can detect when to switch to a new page.
+ pub fn set_dropped_bytes(&mut self, dropped_bytes: bool) {
+ self.dropped_bytes = dropped_bytes;
+ }
+
/// Allocate a new label with a given name
pub fn new_label(&mut self, name: String) -> usize {
+ assert!(!name.contains(' '), "use underscores in label names, not spaces");
+
// This label doesn't have an address yet
self.label_addrs.push(0);
self.label_names.push(name);
@@ -212,22 +527,22 @@ impl CodeBlock {
/// Write a label at the current address
pub fn write_label(&mut self, label_idx: usize) {
- // TODO: make sure that label_idx is valid
- // TODO: add an asseer here
-
self.label_addrs[label_idx] = self.write_pos;
}
// Add a label reference at the current write position
- pub fn label_ref(&mut self, label_idx: usize) {
- // TODO: make sure that label_idx is valid
- // TODO: add an asseer here
+ pub fn label_ref(&mut self, label_idx: usize, num_bytes: usize, encode: fn(&mut CodeBlock, i64, i64)) {
+ assert!(label_idx < self.label_addrs.len());
// Keep track of the reference
- self.label_refs.push(LabelRef {
- pos: self.write_pos,
- label_idx,
- });
+ self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode });
+
+ // Move past however many bytes the instruction takes up
+ if self.has_capacity(num_bytes) {
+ self.write_pos += num_bytes;
+ } else {
+ self.dropped_bytes = true; // retry emitting the Insn after next_page
+ }
}
// Link internal label references
@@ -243,11 +558,12 @@ impl CodeBlock {
let label_addr = self.label_addrs[label_idx];
assert!(label_addr < self.mem_size);
- // Compute the offset from the reference's end to the label
- let offset = (label_addr as i64) - ((ref_pos + 4) as i64);
-
self.set_pos(ref_pos);
- self.write_int(offset as u64, 32);
+ (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64);
+
+ // Assert that we've written the same number of bytes that we
+ // expected to have written.
+ assert!(self.write_pos == ref_pos + label_ref.num_bytes);
}
self.write_pos = orig_pos;
@@ -258,8 +574,110 @@ impl CodeBlock {
assert!(self.label_refs.is_empty());
}
+ pub fn clear_labels(&mut self) {
+ self.label_addrs.clear();
+ self.label_names.clear();
+ self.label_refs.clear();
+ }
+
+ pub fn get_label_state(&self) -> LabelState {
+ LabelState {
+ label_addrs: self.label_addrs.clone(),
+ label_names: self.label_names.clone(),
+ label_refs: self.label_refs.clone(),
+ }
+ }
+
+ pub fn set_label_state(&mut self, state: LabelState) {
+ self.label_addrs = state.label_addrs;
+ self.label_names = state.label_names;
+ self.label_refs = state.label_refs;
+ }
+
pub fn mark_all_executable(&mut self) {
- self.mem_block.mark_all_executable();
+ self.mem_block.borrow_mut().mark_all_executable();
+ }
+
+ /// Code GC. Free code pages that are not on stack and reuse them.
+ pub fn code_gc(&mut self, ocb: &mut OutlinedCb) {
+ assert!(self.inline(), "must use on inline code block");
+
+ // The previous code GC failed to free any pages. Give up.
+ if self.freed_pages.as_ref() == &Some(vec![]) {
+ return;
+ }
+
+ // Check which pages are still in use
+ let mut pages_in_use = vec![false; self.num_mapped_pages()];
+ // For each ISEQ, we currently assume that only code pages used by inline code
+ // are used by outlined code, so we mark only code pages used by inlined code.
+ for_each_on_stack_iseq_payload(|iseq_payload| {
+ for page in &iseq_payload.pages {
+ pages_in_use[*page] = true;
+ }
+ });
+ // Avoid accumulating freed pages for future code GC
+ for_each_off_stack_iseq_payload(|iseq_payload: &mut IseqPayload| {
+ iseq_payload.pages = std::collections::HashSet::default();
+ });
+ // Outlined code generated by CodegenGlobals::init() should also be kept.
+ for page in CodegenGlobals::get_ocb_pages() {
+ pages_in_use[*page] = true;
+ }
+
+ // Invalidate everything to have more compact code after code GC.
+ // This currently patches every ISEQ, which works, but in the future,
+ // we could limit that to patch only on-stack ISEQs for optimizing code GC.
+ rb_yjit_tracing_invalidate_all();
+
+ // Assert that all code pages are freeable
+ assert_eq!(
+ 0,
+ self.mem_size % self.page_size,
+ "end of the last code page should be the end of the entire region"
+ );
+
+ // Let VirtuamMem free the pages
+ let mut freed_pages: Vec<usize> = pages_in_use.iter().enumerate()
+ .filter(|&(_, &in_use)| !in_use).map(|(page, _)| page).collect();
+ // ObjectSpace API may trigger Ruby's GC, which marks gc_offsets in JIT code.
+ // So this should be called after for_each_*_iseq_payload and rb_yjit_tracing_invalidate_all.
+ self.free_pages(&freed_pages);
+
+ // Append virtual pages in case RubyVM::YJIT.code_gc is manually triggered.
+ let mut virtual_pages: Vec<usize> = (self.num_mapped_pages()..self.num_virtual_pages()).collect();
+ freed_pages.append(&mut virtual_pages);
+
+ if let Some(&first_page) = freed_pages.first() {
+ for cb in [&mut *self, ocb.unwrap()] {
+ cb.write_pos = cb.get_page_pos(first_page);
+ cb.past_page_bytes = 0;
+ cb.dropped_bytes = false;
+ cb.clear_comments();
+ }
+ }
+
+ // Track which pages are free.
+ let new_freed_pages = Rc::new(Some(freed_pages));
+ let old_freed_pages = mem::replace(&mut self.freed_pages, Rc::clone(&new_freed_pages));
+ ocb.unwrap().freed_pages = new_freed_pages;
+ assert_eq!(1, Rc::strong_count(&old_freed_pages)); // will deallocate
+
+ incr_counter!(code_gc_count);
+ }
+
+ pub fn inline(&self) -> bool {
+ !self.outlined
+ }
+
+ pub fn other_cb(&self) -> Option<&'static mut Self> {
+ if !CodegenGlobals::has_instance() {
+ None
+ } else if self.inline() {
+ Some(CodegenGlobals::get_outlined_cb().unwrap())
+ } else {
+ Some(CodegenGlobals::get_inline_cb())
+ }
}
}
@@ -267,14 +685,51 @@ impl CodeBlock {
impl CodeBlock {
/// Stubbed CodeBlock for testing. Can't execute generated code.
pub fn new_dummy(mem_size: usize) -> Self {
+ use std::ptr::NonNull;
use crate::virtualmem::*;
use crate::virtualmem::tests::TestingAllocator;
let alloc = TestingAllocator::new(mem_size);
let mem_start: *const u8 = alloc.mem_start();
- let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
+ let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
- Self::new(virt_mem)
+ Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None))
+ }
+
+ /// Stubbed CodeBlock for testing conditions that can arise due to code GC. Can't execute generated code.
+ #[cfg(target_arch = "aarch64")]
+ pub fn new_dummy_with_freed_pages(mut freed_pages: Vec<usize>) -> Self {
+ use std::ptr::NonNull;
+ use crate::virtualmem::*;
+ use crate::virtualmem::tests::TestingAllocator;
+
+ freed_pages.sort_unstable();
+ let mem_size = Self::PREFERRED_CODE_PAGE_SIZE *
+ (1 + freed_pages.last().expect("freed_pages vec should not be empty"));
+
+ let alloc = TestingAllocator::new(mem_size);
+ let mem_start: *const u8 = alloc.mem_start();
+ let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size);
+
+ Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)))
+ }
+}
+
+/// Produce hex string output from the bytes in a code block
+impl fmt::LowerHex for CodeBlock {
+ fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
+ for pos in 0..self.write_pos {
+ let mem_block = &*self.mem_block.borrow();
+ let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() };
+ fmtr.write_fmt(format_args!("{:02x}", byte))?;
+ }
+ Ok(())
+ }
+}
+
+impl crate::virtualmem::CodePtrBase for CodeBlock {
+ fn base_ptr(&self) -> std::ptr::NonNull<u8> {
+ self.mem_block.borrow().base_ptr()
}
}
@@ -294,3 +749,100 @@ impl OutlinedCb {
&mut self.cb
}
}
+
+/// Compute the number of bits needed to encode a signed value
+pub fn imm_num_bits(imm: i64) -> u8
+{
+ // Compute the smallest size this immediate fits in
+ if imm >= i8::MIN.into() && imm <= i8::MAX.into() {
+ return 8;
+ }
+ if imm >= i16::MIN.into() && imm <= i16::MAX.into() {
+ return 16;
+ }
+ if imm >= i32::MIN.into() && imm <= i32::MAX.into() {
+ return 32;
+ }
+
+ return 64;
+}
+
+/// Compute the number of bits needed to encode an unsigned value
+pub fn uimm_num_bits(uimm: u64) -> u8
+{
+ // Compute the smallest size this immediate fits in
+ if uimm <= u8::MAX.into() {
+ return 8;
+ }
+ else if uimm <= u16::MAX.into() {
+ return 16;
+ }
+ else if uimm <= u32::MAX.into() {
+ return 32;
+ }
+
+ return 64;
+}
+
+#[cfg(test)]
+mod tests
+{
+ use super::*;
+
+ #[test]
+ fn test_imm_num_bits()
+ {
+ assert_eq!(imm_num_bits(i8::MIN.into()), 8);
+ assert_eq!(imm_num_bits(i8::MAX.into()), 8);
+
+ assert_eq!(imm_num_bits(i16::MIN.into()), 16);
+ assert_eq!(imm_num_bits(i16::MAX.into()), 16);
+
+ assert_eq!(imm_num_bits(i32::MIN.into()), 32);
+ assert_eq!(imm_num_bits(i32::MAX.into()), 32);
+
+ assert_eq!(imm_num_bits(i64::MIN), 64);
+ assert_eq!(imm_num_bits(i64::MAX), 64);
+ }
+
+ #[test]
+ fn test_uimm_num_bits() {
+ assert_eq!(uimm_num_bits(u8::MIN.into()), 8);
+ assert_eq!(uimm_num_bits(u8::MAX.into()), 8);
+
+ assert_eq!(uimm_num_bits(((u8::MAX as u16) + 1).into()), 16);
+ assert_eq!(uimm_num_bits(u16::MAX.into()), 16);
+
+ assert_eq!(uimm_num_bits(((u16::MAX as u32) + 1).into()), 32);
+ assert_eq!(uimm_num_bits(u32::MAX.into()), 32);
+
+ assert_eq!(uimm_num_bits((u32::MAX as u64) + 1), 64);
+ assert_eq!(uimm_num_bits(u64::MAX), 64);
+ }
+
+ #[test]
+ fn test_code_size() {
+ // Write 4 bytes in the first page
+ let mut cb = CodeBlock::new_dummy(CodeBlock::PREFERRED_CODE_PAGE_SIZE * 2);
+ cb.write_bytes(&[0, 0, 0, 0]);
+ assert_eq!(cb.code_size(), 4);
+
+ // Moving to the next page should not increase code_size
+ cb.next_page(cb.get_write_ptr(), |_, _| {});
+ assert_eq!(cb.code_size(), 4);
+
+ // Write 4 bytes in the second page
+ cb.write_bytes(&[0, 0, 0, 0]);
+ assert_eq!(cb.code_size(), 8);
+
+ // Rewrite 4 bytes in the first page
+ let old_write_pos = cb.get_write_pos();
+ cb.set_pos(0);
+ cb.write_bytes(&[1, 1, 1, 1]);
+
+ // Moving from an old page to the next page should not increase code_size
+ cb.next_page(cb.get_write_ptr(), |_, _| {});
+ cb.set_pos(old_write_pos);
+ assert_eq!(cb.code_size(), 8);
+ }
+}
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
index 6eb7efaa0a..fbbfa714d8 100644
--- a/yjit/src/asm/x86_64/mod.rs
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -5,24 +5,24 @@ use crate::asm::*;
// Import the assembler tests module
mod tests;
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct X86Imm
{
// Size in bits
- num_bits: u8,
+ pub num_bits: u8,
// The value of the immediate
- value: i64
+ pub value: i64
}
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct X86UImm
{
// Size in bits
- num_bits: u8,
+ pub num_bits: u8,
// The value of the immediate
- value: u64
+ pub value: u64
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
@@ -34,36 +34,36 @@ pub enum RegType
IP,
}
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct X86Reg
{
// Size in bits
- num_bits: u8,
+ pub num_bits: u8,
// Register type
- reg_type: RegType,
+ pub reg_type: RegType,
// Register index number
- reg_no: u8,
+ pub reg_no: u8,
}
#[derive(Clone, Copy, Debug)]
pub struct X86Mem
{
// Size in bits
- num_bits: u8,
+ pub num_bits: u8,
/// Base register number
- base_reg_no: u8,
+ pub base_reg_no: u8,
/// Index register number
- idx_reg_no: Option<u8>,
+ pub idx_reg_no: Option<u8>,
/// SIB scale exponent value (power of two, two bits)
- scale_exp: u8,
+ pub scale_exp: u8,
/// Constant displacement from the base, not scaled
- disp: i32,
+ pub disp: i32,
}
#[derive(Clone, Copy, Debug)]
@@ -88,6 +88,22 @@ pub enum X86Opnd
IPRel(i32)
}
+impl X86Reg {
+ pub fn with_num_bits(&self, num_bits: u8) -> Self {
+ assert!(
+ num_bits == 8 ||
+ num_bits == 16 ||
+ num_bits == 32 ||
+ num_bits == 64
+ );
+ Self {
+ num_bits,
+ reg_type: self.reg_type,
+ reg_no: self.reg_no
+ }
+ }
+}
+
impl X86Opnd {
fn rex_needed(&self) -> bool {
match self {
@@ -95,7 +111,7 @@ impl X86Opnd {
X86Opnd::Imm(_) => false,
X86Opnd::UImm(_) => false,
X86Opnd::Reg(reg) => reg.reg_no > 7 || reg.num_bits == 8 && reg.reg_no >= 4,
- X86Opnd::Mem(mem) => (mem.base_reg_no > 7 || (mem.idx_reg_no.unwrap_or(0) > 7)),
+ X86Opnd::Mem(mem) => mem.base_reg_no > 7 || (mem.idx_reg_no.unwrap_or(0) > 7),
X86Opnd::IPRel(_) => false
}
}
@@ -118,7 +134,7 @@ impl X86Opnd {
X86Opnd::Mem(mem) => {
if mem.disp != 0 {
// Compute the required displacement size
- let num_bits = sig_imm_size(mem.disp.into());
+ let num_bits = imm_num_bits(mem.disp.into());
if num_bits > 32 {
panic!("displacement does not fit in 32 bits");
}
@@ -145,6 +161,14 @@ impl X86Opnd {
_ => unreachable!()
}
}
+
+ pub fn is_some(&self) -> bool {
+ match self {
+ X86Opnd::None => false,
+ _ => true
+ }
+ }
+
}
// Instruction pointer
@@ -157,22 +181,39 @@ const RBP_REG_NO: u8 = 5;
const R12_REG_NO: u8 = 12;
const R13_REG_NO: u8 = 13;
-pub const RAX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO });
-pub const RCX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 });
-pub const RDX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 });
-pub const RBX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 });
-pub const RSP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO });
-pub const RBP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO });
-pub const RSI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 });
-pub const RDI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 });
-pub const R8: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 });
-pub const R9: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 });
-pub const R10: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 });
-pub const R11: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 });
-pub const R12: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO });
-pub const R13: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO });
-pub const R14: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 });
-pub const R15: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 });
+pub const RAX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO };
+pub const RCX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 };
+pub const RDX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 };
+pub const RBX_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 };
+pub const RSP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO };
+pub const RBP_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO };
+pub const RSI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 };
+pub const RDI_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 };
+pub const R8_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 };
+pub const R9_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 };
+pub const R10_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 };
+pub const R11_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 };
+pub const R12_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO };
+pub const R13_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO };
+pub const R14_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 };
+pub const R15_REG: X86Reg = X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 };
+
+pub const RAX: X86Opnd = X86Opnd::Reg(RAX_REG);
+pub const RCX: X86Opnd = X86Opnd::Reg(RCX_REG);
+pub const RDX: X86Opnd = X86Opnd::Reg(RDX_REG);
+pub const RBX: X86Opnd = X86Opnd::Reg(RBX_REG);
+pub const RSP: X86Opnd = X86Opnd::Reg(RSP_REG);
+pub const RBP: X86Opnd = X86Opnd::Reg(RBP_REG);
+pub const RSI: X86Opnd = X86Opnd::Reg(RSI_REG);
+pub const RDI: X86Opnd = X86Opnd::Reg(RDI_REG);
+pub const R8: X86Opnd = X86Opnd::Reg(R8_REG);
+pub const R9: X86Opnd = X86Opnd::Reg(R9_REG);
+pub const R10: X86Opnd = X86Opnd::Reg(R10_REG);
+pub const R11: X86Opnd = X86Opnd::Reg(R11_REG);
+pub const R12: X86Opnd = X86Opnd::Reg(R12_REG);
+pub const R13: X86Opnd = X86Opnd::Reg(R13_REG);
+pub const R14: X86Opnd = X86Opnd::Reg(R14_REG);
+pub const R15: X86Opnd = X86Opnd::Reg(R15_REG);
// 32-bit GP registers
pub const EAX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 0 });
@@ -197,7 +238,7 @@ pub const AX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType:
pub const CX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 1 });
pub const DX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 2 });
pub const BX: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 3 });
-pub const SP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 });
+//pub const SP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 });
pub const BP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 5 });
pub const SI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 6 });
pub const DI: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 7 });
@@ -228,45 +269,8 @@ pub const R13B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::
pub const R14B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 14 });
pub const R15B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 15 });
-// C argument registers
-pub const C_ARG_REGS: [X86Opnd; 6] = [RDI, RSI, RDX, RCX, R8, R9];
-
//===========================================================================
-/// Compute the number of bits needed to encode a signed value
-pub fn sig_imm_size(imm: i64) -> u8
-{
- // Compute the smallest size this immediate fits in
- if imm >= i8::MIN.into() && imm <= i8::MAX.into() {
- return 8;
- }
- if imm >= i16::MIN.into() && imm <= i16::MAX.into() {
- return 16;
- }
- if imm >= i32::MIN.into() && imm <= i32::MAX.into() {
- return 32;
- }
-
- return 64;
-}
-
-/// Compute the number of bits needed to encode an unsigned value
-pub fn unsig_imm_size(imm: u64) -> u8
-{
- // Compute the smallest size this immediate fits in
- if imm <= u8::MAX.into() {
- return 8;
- }
- else if imm <= u16::MAX.into() {
- return 16;
- }
- else if imm <= u32::MAX.into() {
- return 32;
- }
-
- return 64;
-}
-
/// Shorthand for memory operand with base register and displacement
pub fn mem_opnd(num_bits: u8, base_reg: X86Opnd, disp: i32) -> X86Opnd
{
@@ -345,12 +349,12 @@ static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits)
pub fn imm_opnd(value: i64) -> X86Opnd
{
- X86Opnd::Imm(X86Imm { num_bits: sig_imm_size(value), value })
+ X86Opnd::Imm(X86Imm { num_bits: imm_num_bits(value), value })
}
pub fn uimm_opnd(value: u64) -> X86Opnd
{
- X86Opnd::UImm(X86UImm { num_bits: unsig_imm_size(value), value })
+ X86Opnd::UImm(X86UImm { num_bits: uimm_num_bits(value), value })
}
pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd
@@ -358,11 +362,6 @@ pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd
uimm_opnd(ptr as u64)
}
-pub fn code_ptr_opnd(code_ptr: CodePtr) -> X86Opnd
-{
- uimm_opnd(code_ptr.raw_ptr() as u64)
-}
-
/// Write the REX byte
fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) {
// 0 1 0 0 w r x b
@@ -386,7 +385,7 @@ fn write_opcode(cb: &mut CodeBlock, opcode: u8, reg: X86Reg) {
}
/// Encode an RM instruction
-fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_opnd: X86Opnd, op_ext: u8, bytes: &[u8]) {
+fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_opnd: X86Opnd, op_ext: Option<u8>, bytes: &[u8]) {
let op_len = bytes.len();
assert!(op_len > 0 && op_len <= 3);
assert!(matches!(r_opnd, X86Opnd::Reg(_) | X86Opnd::None), "Can only encode an RM instruction with a register or a none");
@@ -443,7 +442,7 @@ fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_
// MODRM.rm (3 bits)
assert!(
- !(op_ext != 0xff && !matches!(r_opnd, X86Opnd::None)),
+ !(op_ext.is_some() && r_opnd.is_some()),
"opcode extension and register operand present"
);
@@ -464,8 +463,8 @@ fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_
// Encode the reg field
let reg: u8;
- if op_ext != 0xff {
- reg = op_ext;
+ if let Some(val) = op_ext {
+ reg = val;
} else {
reg = match r_opnd {
X86Opnd::Reg(reg) => reg.reg_no & 7,
@@ -526,7 +525,7 @@ fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_
}
// Encode a mul-like single-operand RM instruction
-fn write_rm_unary(cb: &mut CodeBlock, op_mem_reg_8: u8, op_mem_reg_pref: u8, op_ext: u8, opnd: X86Opnd) {
+fn write_rm_unary(cb: &mut CodeBlock, op_mem_reg_8: u8, op_mem_reg_pref: u8, op_ext: Option<u8>, opnd: X86Opnd) {
assert!(matches!(opnd, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
let opnd_size = opnd.num_bits();
@@ -542,7 +541,7 @@ fn write_rm_unary(cb: &mut CodeBlock, op_mem_reg_8: u8, op_mem_reg_pref: u8, op_
}
// Encode an add-like RM instruction with multiple possible encodings
-fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_reg_mem8: u8, op_reg_mem_pref: u8, op_mem_imm8: u8, op_mem_imm_sml: u8, op_mem_imm_lrg: u8, op_ext_imm: u8, opnd0: X86Opnd, opnd1: X86Opnd) {
+fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_reg_mem8: u8, op_reg_mem_pref: u8, op_mem_imm8: u8, op_mem_imm_sml: u8, op_mem_imm_lrg: u8, op_ext_imm: Option<u8>, opnd0: X86Opnd, opnd1: X86Opnd) {
assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
// Check the size of opnd0
@@ -551,8 +550,8 @@ fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_r
// Check the size of opnd1
match opnd1 {
- X86Opnd::Reg(reg) => assert!(reg.num_bits == opnd_size),
- X86Opnd::Mem(mem) => assert!(mem.num_bits == opnd_size),
+ X86Opnd::Reg(reg) => assert_eq!(reg.num_bits, opnd_size),
+ X86Opnd::Mem(mem) => assert_eq!(mem.num_bits, opnd_size),
X86Opnd::Imm(imm) => assert!(imm.num_bits <= opnd_size),
X86Opnd::UImm(uimm) => assert!(uimm.num_bits <= opnd_size),
_ => ()
@@ -565,17 +564,17 @@ fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_r
// R/M + Reg
(X86Opnd::Mem(_), X86Opnd::Reg(_)) | (X86Opnd::Reg(_), X86Opnd::Reg(_)) => {
if opnd_size == 8 {
- write_rm(cb, false, false, opnd1, opnd0, 0xff, &[op_mem_reg8]);
+ write_rm(cb, false, false, opnd1, opnd0, None, &[op_mem_reg8]);
} else {
- write_rm(cb, sz_pref, rex_w, opnd1, opnd0, 0xff, &[op_mem_reg_pref]);
+ write_rm(cb, sz_pref, rex_w, opnd1, opnd0, None, &[op_mem_reg_pref]);
}
},
// Reg + R/M/IPRel
(X86Opnd::Reg(_), X86Opnd::Mem(_) | X86Opnd::IPRel(_)) => {
if opnd_size == 8 {
- write_rm(cb, false, false, opnd0, opnd1, 0xff, &[op_reg_mem8]);
+ write_rm(cb, false, false, opnd0, opnd1, None, &[op_reg_mem8]);
} else {
- write_rm(cb, sz_pref, rex_w, opnd0, opnd1, 0xff, &[op_reg_mem_pref]);
+ write_rm(cb, sz_pref, rex_w, opnd0, opnd1, None, &[op_reg_mem_pref]);
}
},
// R/M + Imm
@@ -602,7 +601,14 @@ fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_r
},
// R/M + UImm
(_, X86Opnd::UImm(uimm)) => {
- let num_bits = sig_imm_size(uimm.value.try_into().unwrap());
+ // If the size of left hand operand equals the number of bits
+ // required to represent the right hand immediate, then we
+ // don't care about sign extension when calculating the immediate
+ let num_bits = if opnd0.num_bits() == uimm_num_bits(uimm.value) {
+ uimm_num_bits(uimm.value)
+ } else {
+ imm_num_bits(uimm.value.try_into().unwrap())
+ };
if num_bits <= 8 {
// 8-bit immediate
@@ -621,10 +627,10 @@ fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_r
write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_lrg]);
cb.write_int(uimm.value, if opnd_size > 32 { 32 } else { opnd_size.into() });
} else {
- panic!("immediate value too large");
+ panic!("immediate value too large (num_bits={}, num={uimm:?})", num_bits);
}
},
- _ => unreachable!()
+ _ => panic!("unknown encoding combo: {opnd0:?} {opnd1:?}")
};
}
@@ -644,7 +650,7 @@ pub fn add(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
0x80, // opMemImm8
0x83, // opMemImmSml
0x81, // opMemImmLrg
- 0x00, // opExtImm
+ Some(0x00), // opExtImm
opnd0,
opnd1
);
@@ -661,7 +667,7 @@ pub fn and(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
0x80, // opMemImm8
0x83, // opMemImmSml
0x81, // opMemImmLrg
- 0x04, // opExtImm
+ Some(0x04), // opExtImm
opnd0,
opnd1
);
@@ -679,19 +685,23 @@ pub fn call_rel32(cb: &mut CodeBlock, rel32: i32) {
/// call - Call a pointer, encode with a 32-bit offset if possible
pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
if let X86Opnd::Reg(_scratch_reg) = scratch_opnd {
+ use crate::stats::{incr_counter};
+
// Pointer to the end of this call instruction
let end_ptr = cb.get_ptr(cb.write_pos + 5);
// Compute the jump offset
- let rel64: i64 = dst_ptr as i64 - end_ptr.into_i64();
+ let rel64: i64 = dst_ptr as i64 - end_ptr.raw_ptr(cb) as i64;
// If the offset fits in 32-bit
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
+ incr_counter!(num_send_x86_rel32);
call_rel32(cb, rel64.try_into().unwrap());
return;
}
// Move the pointer into the scratch register and call
+ incr_counter!(num_send_x86_reg);
mov(cb, scratch_opnd, const_ptr_opnd(dst_ptr));
call(cb, scratch_opnd);
} else {
@@ -701,19 +711,15 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
/// call - Call to label with 32-bit offset
pub fn call_label(cb: &mut CodeBlock, label_idx: usize) {
- // Write the opcode
- cb.write_byte(0xE8);
-
- // Add a reference to the label
- cb.label_ref(label_idx);
-
- // Relative 32-bit offset to be patched
- cb.write_int(0, 32);
+ cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| {
+ cb.write_byte(0xE8);
+ cb.write_int((dst_addr - src_addr) as u64, 32);
+ });
}
/// call - Indirect call with an R/M operand
pub fn call(cb: &mut CodeBlock, opnd: X86Opnd) {
- write_rm(cb, false, false, X86Opnd::None, opnd, 2, &[0xff]);
+ write_rm(cb, false, false, X86Opnd::None, opnd, Some(2), &[0xff]);
}
/// Encode a conditional move instruction
@@ -729,7 +735,7 @@ fn write_cmov(cb: &mut CodeBlock, opcode1: u8, dst: X86Opnd, src: X86Opnd) {
let sz_pref = reg.num_bits == 16;
let rex_w = reg.num_bits == 64;
- write_rm(cb, sz_pref, rex_w, dst, src, 0xff, &[0x0f, opcode1]);
+ write_rm(cb, sz_pref, rex_w, dst, src, None, &[0x0f, opcode1]);
} else {
unreachable!()
}
@@ -778,7 +784,7 @@ pub fn cmp(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
0x80, // opMemImm8
0x83, // opMemImmSml
0x81, // opMemImmLrg
- 0x07, // opExtImm
+ Some(0x07), // opExtImm
opnd0,
opnd1
);
@@ -794,60 +800,84 @@ pub fn cqo(cb: &mut CodeBlock) {
cb.write_bytes(&[0x48, 0x99]);
}
+/// imul - signed integer multiply
+pub fn imul(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+ assert!(opnd0.num_bits() == 64);
+ assert!(opnd1.num_bits() == 64);
+ assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+ assert!(matches!(opnd1, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+ match (opnd0, opnd1) {
+ (X86Opnd::Reg(_), X86Opnd::Reg(_) | X86Opnd::Mem(_)) => {
+ //REX.W + 0F AF /rIMUL r64, r/m64
+ // Quadword register := Quadword register * r/m64.
+ write_rm(cb, false, true, opnd0, opnd1, None, &[0x0F, 0xAF]);
+ }
+
+ // Flip the operands to handle this case. This instruction has weird encoding restrictions.
+ (X86Opnd::Mem(_), X86Opnd::Reg(_)) => {
+ //REX.W + 0F AF /rIMUL r64, r/m64
+ // Quadword register := Quadword register * r/m64.
+ write_rm(cb, false, true, opnd1, opnd0, None, &[0x0F, 0xAF]);
+ }
+
+ _ => unreachable!()
+ }
+}
+
/// Interrupt 3 - trap to debugger
pub fn int3(cb: &mut CodeBlock) {
cb.write_byte(0xcc);
}
-// Encode a relative jump to a label (direct or conditional)
+// Encode a conditional relative jump to a label
// Note: this always encodes a 32-bit offset
-fn write_jcc(cb: &mut CodeBlock, op0: u8, op1: u8, label_idx: usize) {
- // Write the opcode
- if op0 != 0xff {
- cb.write_byte(op0);
- }
-
- cb.write_byte(op1);
-
- // Add a reference to the label
- cb.label_ref(label_idx);
-
- // Relative 32-bit offset to be patched
- cb.write_int( 0, 32);
+fn write_jcc<const OP: u8>(cb: &mut CodeBlock, label_idx: usize) {
+ cb.label_ref(label_idx, 6, |cb, src_addr, dst_addr| {
+ cb.write_byte(0x0F);
+ cb.write_byte(OP);
+ cb.write_int((dst_addr - src_addr) as u64, 32);
+ });
}
/// jcc - relative jumps to a label
-pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
-pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
-pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
-pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
-pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
-pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
-pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
-pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
-pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
-pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
-pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
-pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
-pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
-pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
-pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
-pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
-pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
-pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
-pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
-pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
-pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x81, label_idx); }
-pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8b, label_idx); }
-pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x89, label_idx); }
-pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
-pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x80, label_idx); }
-pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
-pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
-pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8B, label_idx); }
-pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x88, label_idx); }
-pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
-pub fn jmp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0xFF, 0xE9, label_idx); }
+pub fn ja_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); }
+pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); }
+pub fn jb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); }
+pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); }
+pub fn jc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); }
+pub fn je_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); }
+pub fn jg_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); }
+pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); }
+pub fn jl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); }
+pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); }
+pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x86>(cb, label_idx); }
+pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x82>(cb, label_idx); }
+pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); }
+pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x87>(cb, label_idx); }
+pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x83>(cb, label_idx); }
+pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); }
+pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8E>(cb, label_idx); }
+pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8C>(cb, label_idx); }
+pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8D>(cb, label_idx); }
+pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8F>(cb, label_idx); }
+pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x81>(cb, label_idx); }
+pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8b>(cb, label_idx); }
+pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x89>(cb, label_idx); }
+pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x85>(cb, label_idx); }
+pub fn jo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x80>(cb, label_idx); }
+pub fn jp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); }
+pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8A>(cb, label_idx); }
+pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x8B>(cb, label_idx); }
+pub fn js_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x88>(cb, label_idx); }
+pub fn jz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc::<0x84>(cb, label_idx); }
+
+pub fn jmp_label(cb: &mut CodeBlock, label_idx: usize) {
+ cb.label_ref(label_idx, 5, |cb, src_addr, dst_addr| {
+ cb.write_byte(0xE9);
+ cb.write_int((dst_addr - src_addr) as u64, 32);
+ });
+}
/// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional)
fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
@@ -862,7 +892,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
let end_ptr = cb.get_ptr(cb.write_pos + 4);
// Compute the jump offset
- let rel64 = dst_ptr.into_i64() - end_ptr.into_i64();
+ let rel64 = dst_ptr.as_offset() - end_ptr.as_offset();
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
// Write the relative 32-bit jump offset
@@ -909,7 +939,7 @@ pub fn jmp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0xFF, 0xE9
/// jmp - Indirect jump near to an R/M operand.
pub fn jmp_rm(cb: &mut CodeBlock, opnd: X86Opnd) {
- write_rm(cb, false, false, X86Opnd::None, opnd, 4, &[0xff]);
+ write_rm(cb, false, false, X86Opnd::None, opnd, Some(4), &[0xff]);
}
// jmp - Jump with relative 32-bit offset
@@ -922,7 +952,8 @@ pub fn jmp32(cb: &mut CodeBlock, offset: i32) {
pub fn lea(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
if let X86Opnd::Reg(reg) = dst {
assert!(reg.num_bits == 64);
- write_rm(cb, false, true, dst, src, 0xff, &[0x8d]);
+ assert!(matches!(src, X86Opnd::Mem(_) | X86Opnd::IPRel(_)));
+ write_rm(cb, false, true, dst, src, None, &[0x8d]);
} else {
unreachable!();
}
@@ -990,13 +1021,13 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
assert!(imm.num_bits <= mem.num_bits);
if mem.num_bits == 8 {
- write_rm(cb, false, false, X86Opnd::None, dst, 0xff, &[0xc6]);
+ write_rm(cb, false, false, X86Opnd::None, dst, None, &[0xc6]);
} else {
- write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, 0, &[0xc7]);
+ write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, Some(0), &[0xc7]);
}
let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
- assert!(sig_imm_size(imm.value) <= (output_num_bits as u8));
+ assert!(imm_num_bits(imm.value) <= (output_num_bits as u8));
cb.write_int(imm.value as u64, output_num_bits);
},
// M + UImm
@@ -1004,14 +1035,14 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
assert!(uimm.num_bits <= mem.num_bits);
if mem.num_bits == 8 {
- write_rm(cb, false, false, X86Opnd::None, dst, 0xff, &[0xc6]);
+ write_rm(cb, false, false, X86Opnd::None, dst, None, &[0xc6]);
}
else {
- write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, 0, &[0xc7]);
+ write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, Some(0), &[0xc7]);
}
let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
- assert!(sig_imm_size(uimm.value as i64) <= (output_num_bits as u8));
+ assert!(imm_num_bits(uimm.value as i64) <= (output_num_bits as u8));
cb.write_int(uimm.value, output_num_bits);
},
// * + Imm/UImm
@@ -1027,7 +1058,7 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
0xC6, // opMemImm8
0xFF, // opMemImmSml (not available)
0xFF, // opMemImmLrg
- 0xFF, // opExtImm
+ None, // opExtImm
dst,
src
);
@@ -1035,6 +1066,20 @@ pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
};
}
+/// A variant of mov used for always writing the value in 64 bits for GC offsets.
+pub fn movabs(cb: &mut CodeBlock, dst: X86Opnd, value: u64) {
+ match dst {
+ X86Opnd::Reg(reg) => {
+ assert_eq!(reg.num_bits, 64);
+ write_rex(cb, true, 0, 0, reg.reg_no);
+
+ write_opcode(cb, 0xb8, reg);
+ cb.write_int(value, 64);
+ },
+ _ => unreachable!()
+ }
+}
+
/// movsx - Move with sign extension (signed integers)
pub fn movsx(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
if let X86Opnd::Reg(_dst_reg) = dst {
@@ -1045,9 +1090,9 @@ pub fn movsx(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
assert!(src_num_bits < dst_num_bits);
match src_num_bits {
- 8 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, 0xff, &[0x0f, 0xbe]),
- 16 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, 0xff, &[0x0f, 0xbf]),
- 32 => write_rm(cb, false, true, dst, src, 0xff, &[0x63]),
+ 8 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, None, &[0x0f, 0xbe]),
+ 16 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, None, &[0x0f, 0xbf]),
+ 32 => write_rm(cb, false, true, dst, src, None, &[0x63]),
_ => unreachable!()
};
} else {
@@ -1125,7 +1170,7 @@ pub fn not(cb: &mut CodeBlock, opnd: X86Opnd) {
cb,
0xf6, // opMemReg8
0xf7, // opMemRegPref
- 0x02, // opExt
+ Some(0x02), // opExt
opnd
);
}
@@ -1141,7 +1186,7 @@ pub fn or(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
0x80, // opMemImm8
0x83, // opMemImmSml
0x81, // opMemImmLrg
- 0x01, // opExtImm
+ Some(0x01), // opExtImm
opnd0,
opnd1
);
@@ -1161,7 +1206,7 @@ pub fn pop(cb: &mut CodeBlock, opnd: X86Opnd) {
X86Opnd::Mem(mem) => {
assert!(mem.num_bits == 64);
- write_rm(cb, false, false, X86Opnd::None, opnd, 0, &[0x8f]);
+ write_rm(cb, false, false, X86Opnd::None, opnd, Some(0), &[0x8f]);
},
_ => unreachable!()
};
@@ -1183,7 +1228,7 @@ pub fn push(cb: &mut CodeBlock, opnd: X86Opnd) {
write_opcode(cb, 0x50, reg);
},
X86Opnd::Mem(_mem) => {
- write_rm(cb, false, false, X86Opnd::None, opnd, 6, &[0xff]);
+ write_rm(cb, false, false, X86Opnd::None, opnd, Some(6), &[0xff]);
},
_ => unreachable!()
}
@@ -1199,8 +1244,8 @@ pub fn ret(cb: &mut CodeBlock) {
cb.write_byte(0xC3);
}
-// Encode a single-operand shift instruction
-fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, _op_mem_cl_pref: u8, op_mem_imm_pref: u8, op_ext: u8, opnd0: X86Opnd, opnd1: X86Opnd) {
+// Encode a bitwise shift instruction
+fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, op_mem_cl_pref: u8, op_mem_imm_pref: u8, op_ext: u8, opnd0: X86Opnd, opnd1: X86Opnd) {
assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
// Check the size of opnd0
@@ -1210,16 +1255,26 @@ fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, _op_mem_cl_pref: u8, op_
let sz_pref = opnd_size == 16;
let rex_w = opnd_size == 64;
- if let X86Opnd::UImm(imm) = opnd1 {
- if imm.value == 1 {
- write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_one_pref]);
- } else {
- assert!(imm.num_bits <= 8);
- write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_imm_pref]);
- cb.write_byte(imm.value as u8);
+ match opnd1 {
+ X86Opnd::UImm(imm) => {
+ if imm.value == 1 {
+ write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_one_pref]);
+ } else {
+ assert!(imm.num_bits <= 8);
+ write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_imm_pref]);
+ cb.write_byte(imm.value as u8);
+ }
+ }
+
+ X86Opnd::Reg(reg) => {
+ // We can only use CL/RCX as the shift amount
+ assert!(reg.reg_no == RCX_REG.reg_no);
+ write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, Some(op_ext), &[op_mem_cl_pref]);
+ }
+
+ _ => {
+ unreachable!("unsupported operands: {:?}, {:?}", opnd0, opnd1);
}
- } else {
- unreachable!();
}
}
@@ -1286,7 +1341,7 @@ pub fn sub(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
0x80, // opMemImm8
0x83, // opMemImmSml
0x81, // opMemImmLrg
- 0x05, // opExtImm
+ Some(0x05), // opExtImm
opnd0,
opnd1
);
@@ -1323,10 +1378,10 @@ pub fn test(cb: &mut CodeBlock, rm_opnd: X86Opnd, test_opnd: X86Opnd) {
let rm_resized = resize_opnd(rm_opnd, uimm.num_bits);
if uimm.num_bits == 8 {
- write_rm(cb, false, false, X86Opnd::None, rm_resized, 0x00, &[0xf6]);
+ write_rm(cb, false, false, X86Opnd::None, rm_resized, Some(0x00), &[0xf6]);
cb.write_int(uimm.value, uimm.num_bits.into());
} else {
- write_rm(cb, uimm.num_bits == 16, false, X86Opnd::None, rm_resized, 0x00, &[0xf7]);
+ write_rm(cb, uimm.num_bits == 16, false, X86Opnd::None, rm_resized, Some(0x00), &[0xf7]);
cb.write_int(uimm.value, uimm.num_bits.into());
}
},
@@ -1335,16 +1390,16 @@ pub fn test(cb: &mut CodeBlock, rm_opnd: X86Opnd, test_opnd: X86Opnd) {
assert!(imm.num_bits <= 32);
assert!(rm_num_bits == 64);
- write_rm(cb, false, true, X86Opnd::None, rm_opnd, 0x00, &[0xf7]);
+ write_rm(cb, false, true, X86Opnd::None, rm_opnd, Some(0x00), &[0xf7]);
cb.write_int(imm.value as u64, 32);
},
X86Opnd::Reg(reg) => {
assert!(reg.num_bits == rm_num_bits);
if rm_num_bits == 8 {
- write_rm(cb, false, false, test_opnd, rm_opnd, 0xff, &[0x84]);
+ write_rm(cb, false, false, test_opnd, rm_opnd, None, &[0x84]);
} else {
- write_rm(cb, rm_num_bits == 16, rm_num_bits == 64, test_opnd, rm_opnd, 0xff, &[0x85]);
+ write_rm(cb, rm_num_bits == 16, rm_num_bits == 64, test_opnd, rm_opnd, None, &[0x85]);
}
},
_ => unreachable!()
@@ -1370,7 +1425,7 @@ pub fn xchg(cb: &mut CodeBlock, rm_opnd: X86Opnd, r_opnd: X86Opnd) {
// Write the opcode and register number
cb.write_byte(0x90 + (r_reg.reg_no & 7));
} else {
- write_rm(cb, false, true, r_opnd, rm_opnd, 0xff, &[0x87]);
+ write_rm(cb, false, true, r_opnd, rm_opnd, None, &[0x87]);
}
} else {
unreachable!();
@@ -1388,7 +1443,7 @@ pub fn xor(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
0x80, // opMemImm8
0x83, // opMemImmSml
0x81, // opMemImmLrg
- 0x06, // opExtImm
+ Some(0x06), // opExtImm
opnd0,
opnd1
);
diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs
index ffcc063420..5ae983270f 100644
--- a/yjit/src/asm/x86_64/tests.rs
+++ b/yjit/src/asm/x86_64/tests.rs
@@ -1,18 +1,6 @@
#![cfg(test)]
use crate::asm::x86_64::*;
-use std::fmt;
-
-/// Produce hex string output from the bytes in a code block
-impl<'a> fmt::LowerHex for super::CodeBlock {
- fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
- for pos in 0..self.write_pos {
- let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
- fmtr.write_fmt(format_args!("{:02x}", byte))?;
- }
- Ok(())
- }
-}
/// Check that the bytes for an instruction sequence match a hex string
fn check_bytes<R>(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) {
@@ -80,7 +68,7 @@ fn test_call_ptr() {
// calling a lower address
check_bytes("e8fbffffff", |cb| {
let ptr = cb.get_write_ptr();
- call_ptr(cb, RAX, ptr.raw_ptr());
+ call_ptr(cb, RAX, ptr.raw_ptr(cb));
});
}
@@ -109,6 +97,7 @@ fn test_cmp() {
check_bytes("39f9", |cb| cmp(cb, ECX, EDI));
check_bytes("493b1424", |cb| cmp(cb, RDX, mem_opnd(64, R12, 0)));
check_bytes("4883f802", |cb| cmp(cb, RAX, imm_opnd(2)));
+ check_bytes("81f900000080", |cb| cmp(cb, ECX, uimm_opnd(0x8000_0000)));
}
#[test]
@@ -117,6 +106,15 @@ fn test_cqo() {
}
#[test]
+fn test_imul() {
+ check_bytes("480fafc3", |cb| imul(cb, RAX, RBX));
+ check_bytes("480faf10", |cb| imul(cb, RDX, mem_opnd(64, RAX, 0)));
+
+ // Operands flipped for encoding since multiplication is commutative
+ check_bytes("480faf10", |cb| imul(cb, mem_opnd(64, RAX, 0), RDX));
+}
+
+#[test]
fn test_jge_label() {
check_bytes("0f8dfaffffff", |cb| {
let label_idx = cb.new_label("loop".to_owned());
@@ -201,6 +199,12 @@ fn test_mov() {
}
#[test]
+fn test_movabs() {
+ check_bytes("49b83400000000000000", |cb| movabs(cb, R8, 0x34));
+ check_bytes("49b80000008000000000", |cb| movabs(cb, R8, 0x80000000));
+}
+
+#[test]
fn test_mov_unsigned() {
// MOV AL, imm8
check_bytes("b001", |cb| mov(cb, AL, uimm_opnd(1)));
@@ -345,6 +349,7 @@ fn test_sal() {
check_bytes("d1e1", |cb| sal(cb, ECX, uimm_opnd(1)));
check_bytes("c1e505", |cb| sal(cb, EBP, uimm_opnd(5)));
check_bytes("d1642444", |cb| sal(cb, mem_opnd(32, RSP, 68), uimm_opnd(1)));
+ check_bytes("48d3e1", |cb| sal(cb, RCX, CL));
}
#[test]
@@ -364,6 +369,14 @@ fn test_sub() {
}
#[test]
+#[should_panic]
+fn test_sub_uimm_too_large() {
+ // This immediate becomes a different value after
+ // sign extension, so not safe to encode.
+ check_bytes("ff", |cb| sub(cb, RCX, uimm_opnd(0x8000_0000)));
+}
+
+#[test]
fn test_test() {
check_bytes("84c0", |cb| test(cb, AL, AL));
check_bytes("6685c0", |cb| test(cb, AX, AX));
@@ -425,19 +438,19 @@ fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> {
}
#[test]
-#[cfg(feature = "asm_comments")]
+#[cfg(feature = "disasm")]
fn block_comments() {
let mut cb = super::CodeBlock::new_dummy(4096);
- let first_write_ptr = cb.get_write_ptr().into_usize();
+ let first_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Beginning");
xor(&mut cb, EAX, EAX); // 2 bytes long
- let second_write_ptr = cb.get_write_ptr().into_usize();
+ let second_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Two bytes in");
cb.add_comment("Still two bytes in");
cb.add_comment("Still two bytes in"); // Duplicate, should be ignored
test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long
- let third_write_ptr = cb.get_write_ptr().into_usize();
+ let third_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Ten bytes in");
assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap());
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
new file mode 100644
index 0000000000..3bf949ba7d
--- /dev/null
+++ b/yjit/src/backend/arm64/mod.rs
@@ -0,0 +1,1835 @@
+use std::mem::take;
+
+use crate::asm::{CodeBlock, OutlinedCb};
+use crate::asm::arm64::*;
+use crate::cruby::*;
+use crate::backend::ir::*;
+use crate::virtualmem::CodePtr;
+use crate::utils::*;
+
+// Use the arm64 register type for this platform
+pub type Reg = A64Reg;
+
+// Callee-saved registers
+pub const _CFP: Opnd = Opnd::Reg(X19_REG);
+pub const _EC: Opnd = Opnd::Reg(X20_REG);
+pub const _SP: Opnd = Opnd::Reg(X21_REG);
+
+// C argument registers on this platform
+pub const _C_ARG_OPNDS: [Opnd; 6] = [
+ Opnd::Reg(X0_REG),
+ Opnd::Reg(X1_REG),
+ Opnd::Reg(X2_REG),
+ Opnd::Reg(X3_REG),
+ Opnd::Reg(X4_REG),
+ Opnd::Reg(X5_REG)
+];
+
+// C return value register on this platform
+pub const C_RET_REG: Reg = X0_REG;
+pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
+
+// These constants define the way we work with Arm64's stack pointer. The stack
+// pointer always needs to be aligned to a 16-byte boundary.
+pub const C_SP_REG: A64Opnd = X31;
+pub const C_SP_STEP: i32 = 16;
+
+impl CodeBlock {
+ // The maximum number of bytes that can be generated by emit_jmp_ptr.
+ pub fn jmp_ptr_bytes(&self) -> usize {
+ // b instruction's offset is encoded as imm26 times 4. It can jump to
+ // +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128.
+ let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) {
+ 1 // b instruction
+ } else {
+ 5 // 4 instructions to load a 64-bit absolute address + br instruction
+ };
+ num_insns * 4
+ }
+
+ // The maximum number of instructions that can be generated by emit_conditional_jump.
+ fn conditional_jump_insns(&self) -> i32 {
+ // The worst case is instructions for a jump + bcond.
+ self.jmp_ptr_bytes() as i32 / 4 + 1
+ }
+}
+
+/// Map Opnd to A64Opnd
+impl From<Opnd> for A64Opnd {
+ fn from(opnd: Opnd) -> Self {
+ match opnd {
+ Opnd::UImm(value) => A64Opnd::new_uimm(value),
+ Opnd::Imm(value) => A64Opnd::new_imm(value),
+ Opnd::Reg(reg) => A64Opnd::Reg(reg),
+ Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => {
+ A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp)
+ },
+ Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => {
+ panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base")
+ },
+ Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"),
+ Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"),
+ Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"),
+ Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"),
+ Opnd::None => panic!(
+ "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output."
+ ),
+
+ }
+ }
+}
+
+/// Also implement going from a reference to an operand for convenience.
+impl From<&Opnd> for A64Opnd {
+ fn from(opnd: &Opnd) -> Self {
+ A64Opnd::from(*opnd)
+ }
+}
+
+/// Call emit_jmp_ptr and immediately invalidate the written range.
+/// This is needed when next_page also moves other_cb that is not invalidated
+/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot
+/// more than necessary when other_cb jumps from a position early in the page.
+/// This invalidates a small range of cb twice, but we accept the small cost.
+fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
+ #[cfg(not(test))]
+ let start = cb.get_write_ptr();
+ emit_jmp_ptr(cb, dst_ptr, true);
+ #[cfg(not(test))]
+ {
+ let end = cb.get_write_ptr();
+ unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) };
+ }
+}
+
+fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
+ let src_addr = cb.get_write_ptr().as_offset();
+ let dst_addr = dst_ptr.as_offset();
+
+ // If the offset is short enough, then we'll use the
+ // branch instruction. Otherwise, we'll move the
+ // destination into a register and use the branch
+ // register instruction.
+ let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+ b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+ 1
+ } else {
+ let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
+ br(cb, Assembler::SCRATCH0);
+ num_insns + 1
+ };
+
+ if padding {
+ // Make sure it's always a consistent number of
+ // instructions in case it gets patched and has to
+ // use the other branch.
+ assert!(num_insns * 4 <= cb.jmp_ptr_bytes());
+ for _ in num_insns..(cb.jmp_ptr_bytes() / 4) {
+ nop(cb);
+ }
+ }
+}
+
+/// Emit the required instructions to load the given value into the
+/// given register. Our goal here is to use as few instructions as
+/// possible to get this value into the register.
+fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
+ let mut current = value;
+
+ if current <= 0xffff {
+ // If the value fits into a single movz
+ // instruction, then we'll use that.
+ movz(cb, rd, A64Opnd::new_uimm(current), 0);
+ return 1;
+ } else if BitmaskImmediate::try_from(current).is_ok() {
+ // Otherwise, if the immediate can be encoded
+ // with the special bitmask immediate encoding,
+ // we'll use that.
+ mov(cb, rd, A64Opnd::new_uimm(current));
+ return 1;
+ } else {
+ // Finally we'll fall back to encoding the value
+ // using movz for the first 16 bits and movk for
+ // each subsequent set of 16 bits as long we
+ // they are necessary.
+ movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0);
+ let mut num_insns = 1;
+
+ // (We're sure this is necessary since we
+ // checked if it only fit into movz above).
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16);
+ num_insns += 1;
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32);
+ num_insns += 1;
+ }
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48);
+ num_insns += 1;
+ }
+ return num_insns;
+ }
+}
+
+/// List of registers that can be used for stack temps.
+/// These are caller-saved registers.
+pub static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG];
+
+#[derive(Debug, PartialEq)]
+enum EmitError {
+ RetryOnNextPage,
+ OutOfMemory,
+}
+
+impl Assembler
+{
+ // Special scratch registers for intermediate processing.
+ // This register is caller-saved (so we don't have to save it before using it)
+ pub const SCRATCH_REG: Reg = X16_REG;
+ const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG);
+ const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
+
+ /// Get the list of registers from which we will allocate on this platform
+ /// These are caller-saved registers
+ /// Note: we intentionally exclude C_RET_REG (X0) from this list
+ /// because of the way it's used in gen_leave() and gen_leave_exit()
+ pub fn get_alloc_regs() -> Vec<Reg> {
+ vec![X11_REG, X12_REG, X13_REG]
+ }
+
+ /// Get a list of all of the caller-saved registers
+ pub fn get_caller_save_regs() -> Vec<Reg> {
+ vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
+ }
+
+ /// Split platform-specific instructions
+ /// The transformations done here are meant to make our lives simpler in later
+ /// stages of the compilation pipeline.
+ /// Here we may want to make sure that all instructions (except load and store)
+ /// have no memory operands.
+ fn arm64_split(mut self) -> Assembler
+ {
+ /// When we're attempting to load a memory address into a register, the
+ /// displacement must fit into the maximum number of bits for an Op::Add
+ /// immediate. If it doesn't, we have to load the displacement into a
+ /// register first.
+ fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Mem(Mem { base, disp, num_bits }) => {
+ if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() {
+ asm.lea(opnd)
+ } else {
+ let disp = asm.load(Opnd::Imm(disp.into()));
+ let reg = match base {
+ MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }),
+ MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits }
+ };
+
+ asm.add(reg, disp)
+ }
+ },
+ _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.")
+ }
+ }
+
+ /// When you're storing a register into a memory location or loading a
+ /// memory location into a register, the displacement from the base
+ /// register of the memory location must fit into 9 bits. If it doesn't,
+ /// then we need to load that memory address into a register first.
+ fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Mem(mem) => {
+ if mem_disp_fits_bits(mem.disp) {
+ opnd
+ } else {
+ let base = split_lea_operand(asm, opnd);
+ Opnd::mem(64, base, 0)
+ }
+ },
+ _ => unreachable!("Can only split memory addresses.")
+ }
+ }
+
+ /// Any memory operands you're sending into an Op::Load instruction need
+ /// to be split in case their displacement doesn't fit into 9 bits.
+ fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
+ Opnd::Mem(_) => {
+ let split_opnd = split_memory_address(asm, opnd);
+ let out_opnd = asm.load(split_opnd);
+ // Many Arm insns support only 32-bit or 64-bit operands. asm.load with fewer
+ // bits zero-extends the value, so it's safe to recognize it as a 32-bit value.
+ if out_opnd.rm_num_bits() < 32 {
+ out_opnd.with_num_bits(32).unwrap()
+ } else {
+ out_opnd
+ }
+ },
+ _ => asm.load(opnd)
+ }
+ }
+
+ /// Operands that take the place of bitmask immediates must follow a
+ /// certain encoding. In this function we ensure that those operands
+ /// do follow that encoding, and if they don't then we load them first.
+ fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd {
+ match opnd {
+ Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd,
+ Opnd::Mem(_) => split_load_operand(asm, opnd),
+ Opnd::Imm(imm) => {
+ if imm == 0 {
+ Opnd::Reg(XZR_REG)
+ } else if (dest_num_bits == 64 &&
+ BitmaskImmediate::try_from(imm as u64).is_ok()) ||
+ (dest_num_bits == 32 &&
+ u32::try_from(imm).is_ok() &&
+ BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) {
+ Opnd::UImm(imm as u64)
+ } else {
+ asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
+ }
+ },
+ Opnd::UImm(uimm) => {
+ if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) ||
+ (dest_num_bits == 32 &&
+ u32::try_from(uimm).is_ok() &&
+ BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) {
+ opnd
+ } else {
+ asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
+ }
+ },
+ Opnd::None | Opnd::Value(_) => unreachable!()
+ }
+ }
+
+ /// Operands that take the place of a shifted immediate must fit within
+ /// a certain size. If they don't then we need to load them first.
+ fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd,
+ Opnd::Mem(_) => split_load_operand(asm, opnd),
+ Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() {
+ opnd
+ } else {
+ asm.load(opnd)
+ }
+ Opnd::UImm(uimm) => {
+ if ShiftedImmediate::try_from(uimm).is_ok() {
+ opnd
+ } else {
+ asm.load(opnd)
+ }
+ },
+ Opnd::None | Opnd::Value(_) | Opnd::Stack { .. } => unreachable!()
+ }
+ }
+
+ /// Returns the operands that should be used for a boolean logic
+ /// instruction.
+ fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) {
+ match (opnd0, opnd1) {
+ (Opnd::Reg(_), Opnd::Reg(_)) => {
+ (opnd0, opnd1)
+ },
+ (reg_opnd @ Opnd::Reg(_), other_opnd) |
+ (other_opnd, reg_opnd @ Opnd::Reg(_)) => {
+ let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits());
+ (reg_opnd, opnd1)
+ },
+ _ => {
+ let opnd0 = split_load_operand(asm, opnd0);
+ let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits());
+ (opnd0, opnd1)
+ }
+ }
+ }
+
+ /// Returns the operands that should be used for a csel instruction.
+ fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) {
+ let opnd0 = match opnd0 {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd0,
+ _ => split_load_operand(asm, opnd0)
+ };
+
+ let opnd1 = match opnd1 {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd1,
+ _ => split_load_operand(asm, opnd1)
+ };
+
+ (opnd0, opnd1)
+ }
+
+ fn split_less_than_32_cmp(asm: &mut Assembler, opnd0: Opnd) -> Opnd {
+ match opnd0 {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => {
+ match opnd0.rm_num_bits() {
+ 8 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xff)),
+ 16 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xffff)),
+ 32 | 64 => opnd0,
+ bits => unreachable!("Invalid number of bits. {}", bits)
+ }
+ }
+ _ => opnd0
+ }
+ }
+
+ let live_ranges: Vec<usize> = take(&mut self.live_ranges);
+ let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let asm = &mut asm_local;
+ let mut iterator = self.into_draining_iter();
+
+ while let Some((index, mut insn)) = iterator.next_mapped() {
+ // Here we're going to map the operands of the instruction to load
+ // any Opnd::Value operands into registers if they are heap objects
+ // such that only the Op::Load instruction needs to handle that
+ // case. If the values aren't heap objects then we'll treat them as
+ // if they were just unsigned integer.
+ let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
+ let mut opnd_iter = insn.opnd_iter_mut();
+
+ while let Some(opnd) = opnd_iter.next() {
+ match opnd {
+ Opnd::Value(value) => {
+ if value.special_const_p() {
+ *opnd = Opnd::UImm(value.as_u64());
+ } else if !is_load {
+ *opnd = asm.load(*opnd);
+ }
+ },
+ Opnd::Stack { .. } => {
+ *opnd = asm.lower_stack_opnd(opnd);
+ }
+ _ => {}
+ };
+ }
+
+ // We are replacing instructions here so we know they are already
+ // being used. It is okay not to use their output here.
+ #[allow(unused_must_use)]
+ match &mut insn {
+ Insn::Add { left, right, .. } => {
+ match (*left, *right) {
+ (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => {
+ asm.add(*left, *right);
+ },
+ (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) |
+ (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => {
+ let opnd1 = split_shifted_immediate(asm, other_opnd);
+ asm.add(reg_opnd, opnd1);
+ },
+ _ => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd1 = split_shifted_immediate(asm, *right);
+ asm.add(opnd0, opnd1);
+ }
+ }
+ },
+ Insn::And { left, right, out } |
+ Insn::Or { left, right, out } |
+ Insn::Xor { left, right, out } => {
+ let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right);
+ *left = opnd0;
+ *right = opnd1;
+
+ // Since these instructions are lowered to an instruction that have 2 input
+ // registers and an output register, look to merge with an `Insn::Mov` that
+ // follows which puts the output in another register. For example:
+ // `Add a, b => out` followed by `Mov c, out` becomes `Add a, b => c`.
+ if let (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) = (left, right, iterator.peek()) {
+ if live_ranges[index] == index + 1 {
+ // Check after potentially lowering a stack operand to a register operand
+ let lowered_dest = if let Opnd::Stack { .. } = dest {
+ asm.lower_stack_opnd(dest)
+ } else {
+ *dest
+ };
+ if out == src && matches!(lowered_dest, Opnd::Reg(_)) {
+ *out = lowered_dest;
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ }
+ }
+
+ asm.push_insn(insn);
+ }
+ // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch.
+ ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } |
+ ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if {
+ let same_opnd_if_test = if let Insn::Test { .. } = insn {
+ left == right
+ } else {
+ true
+ };
+
+ same_opnd_if_test && if let Some(
+ Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target)
+ ) = iterator.peek() {
+ matches!(target, Target::SideExit { .. })
+ } else {
+ false
+ }
+ } => {
+ let reg = split_load_operand(asm, *left);
+ match iterator.peek() {
+ Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)),
+ Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)),
+ _ => ()
+ }
+
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged jump instruction
+ }
+ Insn::CCall { opnds, fptr, .. } => {
+ assert!(opnds.len() <= C_ARG_OPNDS.len());
+
+ // Load each operand into the corresponding argument
+ // register.
+ // Note: the iteration order is reversed to avoid corrupting x0,
+ // which is both the return value and first argument register
+ for (idx, opnd) in opnds.into_iter().enumerate().rev() {
+ // If the value that we're sending is 0, then we can use
+ // the zero register, so in this case we'll just send
+ // a UImm of 0 along as the argument to the move.
+ let value = match opnd {
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
+ Opnd::Mem(_) => split_memory_address(asm, *opnd),
+ _ => *opnd
+ };
+
+ asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value);
+ }
+
+ // Now we push the CCall without any arguments so that it
+ // just performs the call.
+ asm.ccall(*fptr, vec![]);
+ },
+ Insn::Cmp { left, right } => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd0 = split_less_than_32_cmp(asm, opnd0);
+ let split_right = split_shifted_immediate(asm, *right);
+ let opnd1 = match split_right {
+ Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => {
+ split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap()
+ },
+ _ => split_right
+ };
+
+ asm.cmp(opnd0, opnd1);
+ },
+ Insn::CRet(opnd) => {
+ match opnd {
+ // If the value is already in the return register, then
+ // we don't need to do anything.
+ Opnd::Reg(C_RET_REG) => {},
+
+ // If the value is a memory address, we need to first
+ // make sure the displacement isn't too large and then
+ // load it into the return register.
+ Opnd::Mem(_) => {
+ let split = split_memory_address(asm, *opnd);
+ asm.load_into(C_RET_OPND, split);
+ },
+
+ // Otherwise we just need to load the value into the
+ // return register.
+ _ => {
+ asm.load_into(C_RET_OPND, *opnd);
+ }
+ }
+ asm.cret(C_RET_OPND);
+ },
+ Insn::CSelZ { truthy, falsy, out } |
+ Insn::CSelNZ { truthy, falsy, out } |
+ Insn::CSelE { truthy, falsy, out } |
+ Insn::CSelNE { truthy, falsy, out } |
+ Insn::CSelL { truthy, falsy, out } |
+ Insn::CSelLE { truthy, falsy, out } |
+ Insn::CSelG { truthy, falsy, out } |
+ Insn::CSelGE { truthy, falsy, out } => {
+ let (opnd0, opnd1) = split_csel_operands(asm, *truthy, *falsy);
+ *truthy = opnd0;
+ *falsy = opnd1;
+ // Merge `csel` and `mov` into a single `csel` when possible
+ match iterator.peek() {
+ Some(Insn::Mov { dest: Opnd::Reg(reg), src })
+ if matches!(out, Opnd::InsnOut { .. }) && *out == *src && live_ranges[index] == index + 1 => {
+ *out = Opnd::Reg(*reg);
+ asm.push_insn(insn);
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => {
+ asm.push_insn(insn);
+ }
+ }
+ },
+ Insn::IncrCounter { mem, value } => {
+ let counter_addr = match mem {
+ Opnd::Mem(_) => split_lea_operand(asm, *mem),
+ _ => *mem
+ };
+
+ asm.incr_counter(counter_addr, *value);
+ },
+ Insn::JmpOpnd(opnd) => {
+ if let Opnd::Mem(_) = opnd {
+ let opnd0 = split_load_operand(asm, *opnd);
+ asm.jmp_opnd(opnd0);
+ } else {
+ asm.jmp_opnd(*opnd);
+ }
+ },
+ Insn::Load { opnd, .. } |
+ Insn::LoadInto { opnd, .. } => {
+ *opnd = match opnd {
+ Opnd::Mem(_) => split_memory_address(asm, *opnd),
+ _ => *opnd
+ };
+ asm.push_insn(insn);
+ },
+ Insn::LoadSExt { opnd, .. } => {
+ match opnd {
+ // We only want to sign extend if the operand is a
+ // register, instruction output, or memory address that
+ // is 32 bits. Otherwise we'll just load the value
+ // directly since there's no need to sign extend.
+ Opnd::Reg(Reg { num_bits: 32, .. }) |
+ Opnd::InsnOut { num_bits: 32, .. } |
+ Opnd::Mem(Mem { num_bits: 32, .. }) => {
+ asm.load_sext(*opnd);
+ },
+ _ => {
+ asm.load(*opnd);
+ }
+ };
+ },
+ Insn::Mov { dest, src } => {
+ match (&dest, &src) {
+ // If we're attempting to load into a memory operand, then
+ // we'll switch over to the store instruction.
+ (Opnd::Mem(_), _) => {
+ let opnd0 = split_memory_address(asm, *dest);
+ let value = match *src {
+ // If the first operand is zero, then we can just use
+ // the zero register.
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
+ // If the first operand is a memory operand, we're going
+ // to transform this into a store instruction, so we'll
+ // need to load this anyway.
+ Opnd::UImm(_) => asm.load(*src),
+ // The value that is being moved must be either a
+ // register or an immediate that can be encoded as a
+ // bitmask immediate. Otherwise, we'll need to split the
+ // move into multiple instructions.
+ _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits())
+ };
+
+ asm.store(opnd0, value);
+ },
+ // If we're loading a memory operand into a register, then
+ // we'll switch over to the load instruction.
+ (Opnd::Reg(_), Opnd::Mem(_)) => {
+ let value = split_memory_address(asm, *src);
+ asm.load_into(*dest, value);
+ },
+ // Otherwise we'll use the normal mov instruction.
+ (Opnd::Reg(_), _) => {
+ let value = match *src {
+ // Unlike other instructions, we can avoid splitting this case, using movz.
+ Opnd::UImm(uimm) if uimm <= 0xffff => *src,
+ _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()),
+ };
+ asm.mov(*dest, value);
+ },
+ _ => unreachable!()
+ };
+ },
+ Insn::Not { opnd, .. } => {
+ // The value that is being negated must be in a register, so
+ // if we get anything else we need to load it first.
+ let opnd0 = match opnd {
+ Opnd::Mem(_) => split_load_operand(asm, *opnd),
+ _ => *opnd
+ };
+
+ asm.not(opnd0);
+ },
+ Insn::LShift { opnd, .. } |
+ Insn::RShift { opnd, .. } |
+ Insn::URShift { opnd, .. } => {
+ // The operand must be in a register, so
+ // if we get anything else we need to load it first.
+ let opnd0 = match opnd {
+ Opnd::Mem(_) => split_load_operand(asm, *opnd),
+ _ => *opnd
+ };
+
+ *opnd = opnd0;
+ asm.push_insn(insn);
+ },
+ Insn::Store { dest, src } => {
+ // The value being stored must be in a register, so if it's
+ // not already one we'll load it first.
+ let opnd1 = match src {
+ // If the first operand is zero, then we can just use
+ // the zero register.
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
+ // Otherwise we'll check if we need to load it first.
+ _ => split_load_operand(asm, *src)
+ };
+
+ match dest {
+ Opnd::Reg(_) => {
+ // Store does not support a register as a dest operand.
+ asm.mov(*dest, opnd1);
+ }
+ _ => {
+ // The displacement for the STUR instruction can't be more
+ // than 9 bits long. If it's longer, we need to load the
+ // memory address into a register first.
+ let opnd0 = split_memory_address(asm, *dest);
+ asm.store(opnd0, opnd1);
+ }
+ }
+ },
+ Insn::Sub { left, right, .. } => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd1 = split_shifted_immediate(asm, *right);
+ asm.sub(opnd0, opnd1);
+ },
+ Insn::Mul { left, right, .. } => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd1 = split_load_operand(asm, *right);
+ asm.mul(opnd0, opnd1);
+ },
+ Insn::Test { left, right } => {
+ // The value being tested must be in a register, so if it's
+ // not already one we'll load it first.
+ let opnd0 = split_load_operand(asm, *left);
+
+ // The second value must be either a register or an
+ // unsigned immediate that can be encoded as a bitmask
+ // immediate. If it's not one of those, we'll need to load
+ // it first.
+ let opnd1 = split_bitmask_immediate(asm, *right, opnd0.rm_num_bits());
+ asm.test(opnd0, opnd1);
+ },
+ _ => {
+ // If we have an output operand, then we need to replace it
+ // with a new output operand from the new assembler.
+ if insn.out_opnd().is_some() {
+ let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
+ let out = insn.out_opnd_mut().unwrap();
+ *out = asm.next_opnd_out(out_num_bits);
+ }
+
+ asm.push_insn(insn);
+ }
+ };
+
+ iterator.map_insn_index(asm);
+ }
+
+ asm_local
+ }
+
+ /// Emit platform-specific machine code
+ /// Returns a list of GC offsets. Can return failure to signal caller to retry.
+ fn arm64_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Result<Vec<u32>, EmitError> {
+ /// Determine how many instructions it will take to represent moving
+ /// this value into a register. Note that the return value of this
+ /// function must correspond to how many instructions are used to
+ /// represent this load in the emit_load_value function.
+ fn emit_load_size(value: u64) -> u8 {
+ if BitmaskImmediate::try_from(value).is_ok() {
+ return 1;
+ }
+
+ if value < (1 << 16) {
+ 1
+ } else if value < (1 << 32) {
+ 2
+ } else if value < (1 << 48) {
+ 3
+ } else {
+ 4
+ }
+ }
+
+ /// Emit a conditional jump instruction to a specific target. This is
+ /// called when lowering any of the conditional jump instructions.
+ fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
+ match target {
+ Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => {
+ let dst_addr = dst_ptr.as_offset();
+ let src_addr = cb.get_write_ptr().as_offset();
+
+ let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) {
+ // If the jump offset fits into the conditional jump as
+ // an immediate value and it's properly aligned, then we
+ // can use the b.cond instruction directly. We're safe
+ // to use as i32 here since we already checked that it
+ // fits.
+ let bytes = (dst_addr - src_addr) as i32;
+ bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes));
+
+ // Here we're going to return 1 because we've only
+ // written out 1 instruction.
+ 1
+ } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond
+ // If the jump offset fits into the unconditional jump as
+ // an immediate value, we can use inverse b.cond + b.
+ //
+ // We're going to write out the inverse condition so
+ // that if it doesn't match it will skip over the
+ // instruction used for branching.
+ bcond(cb, Condition::inverse(CONDITION), 2.into());
+ b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond
+
+ // We've only written out 2 instructions.
+ 2
+ } else {
+ // Otherwise, we need to load the address into a
+ // register and use the branch register instruction.
+ let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64();
+ let load_insns: i32 = emit_load_size(dst_addr).into();
+
+ // We're going to write out the inverse condition so
+ // that if it doesn't match it will skip over the
+ // instructions used for branching.
+ bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into());
+ emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
+ br(cb, Assembler::SCRATCH0);
+
+ // Here we'll return the number of instructions that it
+ // took to write out the destination address + 1 for the
+ // b.cond and 1 for the br.
+ load_insns + 2
+ };
+
+ if let Target::CodePtr(_) = target {
+ // We need to make sure we have at least 6 instructions for
+ // every kind of jump for invalidation purposes, so we're
+ // going to write out padding nop instructions here.
+ assert!(num_insns <= cb.conditional_jump_insns());
+ for _ in num_insns..cb.conditional_jump_insns() { nop(cb); }
+ }
+ },
+ Target::Label(label_idx) => {
+ // Here we're going to save enough space for ourselves and
+ // then come back and write the instruction once we know the
+ // offset. We're going to assume we can fit into a single
+ // b.cond instruction. It will panic otherwise.
+ cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
+ let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
+ bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes));
+ });
+ },
+ Target::SideExit { .. } => {
+ unreachable!("Target::SideExit should have been compiled by compile_side_exit")
+ },
+ };
+ }
+
+ /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero
+ fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) {
+ if let Target::SideExitPtr(dst_ptr) = target {
+ let dst_addr = dst_ptr.as_offset();
+ let src_addr = cb.get_write_ptr().as_offset();
+
+ if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) {
+ // If the offset fits in one instruction, generate cbz or cbnz
+ let bytes = (dst_addr - src_addr) as i32;
+ if branch_if_zero {
+ cbz(cb, reg, InstructionOffset::from_bytes(bytes));
+ } else {
+ cbnz(cb, reg, InstructionOffset::from_bytes(bytes));
+ }
+ } else {
+ // Otherwise, we load the address into a register and
+ // use the branch register instruction. Note that because
+ // side exits should always be close, this form should be
+ // rare or impossible to see.
+ let dst_addr = dst_ptr.raw_addr(cb) as u64;
+ let load_insns: i32 = emit_load_size(dst_addr).into();
+
+ // Write out the inverse condition so that if
+ // it doesn't match it will skip over the
+ // instructions used for branching.
+ if branch_if_zero {
+ cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
+ } else {
+ cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
+ }
+ emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
+ br(cb, Assembler::SCRATCH0);
+
+ }
+ } else {
+ unreachable!("We should only generate Joz/Jonz with side-exit targets");
+ }
+ }
+
+ /// Emit a push instruction for the given operand by adding to the stack
+ /// pointer and then storing the given value.
+ fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
+ str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP));
+ }
+
+ /// Emit a pop instruction into the given operand by loading the value
+ /// and then subtracting from the stack pointer.
+ fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) {
+ ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
+ }
+
+ /// Compile a side exit if Target::SideExit is given.
+ fn compile_side_exit(
+ target: Target,
+ asm: &mut Assembler,
+ ocb: &mut Option<&mut OutlinedCb>,
+ ) -> Result<Target, EmitError> {
+ if let Target::SideExit { counter, context } = target {
+ let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap())
+ .ok_or(EmitError::OutOfMemory)?;
+ Ok(Target::SideExitPtr(side_exit))
+ } else {
+ Ok(target)
+ }
+ }
+
+ // dbg!(&self.insns);
+
+ // List of GC offsets
+ let mut gc_offsets: Vec<u32> = Vec::new();
+
+ // Buffered list of PosMarker callbacks to fire if codegen is successful
+ let mut pos_markers: Vec<(usize, CodePtr)> = vec![];
+
+ // For each instruction
+ let start_write_pos = cb.get_write_pos();
+ let mut insn_idx: usize = 0;
+ while let Some(insn) = self.insns.get(insn_idx) {
+ let src_ptr = cb.get_write_ptr();
+ let had_dropped_bytes = cb.has_dropped_bytes();
+ let old_label_state = cb.get_label_state();
+ let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
+ match insn {
+ Insn::Comment(text) => {
+ if cfg!(feature = "disasm") {
+ cb.add_comment(text);
+ }
+ },
+ Insn::Label(target) => {
+ cb.write_label(target.unwrap_label_idx());
+ },
+ // Report back the current position in the generated code
+ Insn::PosMarker(..) => {
+ pos_markers.push((insn_idx, cb.get_write_ptr()))
+ }
+ Insn::BakeString(text) => {
+ for byte in text.as_bytes() {
+ cb.write_byte(*byte);
+ }
+
+ // Add a null-terminator byte for safety (in case we pass
+ // this to C code)
+ cb.write_byte(0);
+
+ // Pad out the string to the next 4-byte boundary so that
+ // it's easy to jump past.
+ for _ in 0..(4 - ((text.len() + 1) % 4)) {
+ cb.write_byte(0);
+ }
+ },
+ Insn::FrameSetup => {
+ stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16));
+
+ // X29 (frame_pointer) = SP
+ mov(cb, X29, C_SP_REG);
+ },
+ Insn::FrameTeardown => {
+ // SP = X29 (frame pointer)
+ mov(cb, C_SP_REG, X29);
+
+ ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16));
+ },
+ Insn::Add { left, right, out } => {
+ adds(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Sub { left, right, out } => {
+ subs(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Mul { left, right, out } => {
+ // If the next instruction is jo (jump on overflow)
+ match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) {
+ (Some(Insn::JoMul(_)), _) |
+ (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => {
+ // Compute the high 64 bits
+ smulh(cb, Self::SCRATCH0, left.into(), right.into());
+
+ // Compute the low 64 bits
+ // This may clobber one of the input registers,
+ // so we do it after smulh
+ mul(cb, out.into(), left.into(), right.into());
+
+ // Produce a register that is all zeros or all ones
+ // Based on the sign bit of the 64-bit mul result
+ asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63));
+
+ // If the high 64-bits are not all zeros or all ones,
+ // matching the sign bit, then we have an overflow
+ cmp(cb, Self::SCRATCH0, Self::SCRATCH1);
+ // Insn::JoMul will emit_conditional_jump::<{Condition::NE}>
+ }
+ _ => {
+ mul(cb, out.into(), left.into(), right.into());
+ }
+ }
+ },
+ Insn::And { left, right, out } => {
+ and(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Or { left, right, out } => {
+ orr(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Xor { left, right, out } => {
+ eor(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Not { opnd, out } => {
+ mvn(cb, out.into(), opnd.into());
+ },
+ Insn::RShift { opnd, shift, out } => {
+ asr(cb, out.into(), opnd.into(), shift.into());
+ },
+ Insn::URShift { opnd, shift, out } => {
+ lsr(cb, out.into(), opnd.into(), shift.into());
+ },
+ Insn::LShift { opnd, shift, out } => {
+ lsl(cb, out.into(), opnd.into(), shift.into());
+ },
+ Insn::Store { dest, src } => {
+ // This order may be surprising but it is correct. The way
+ // the Arm64 assembler works, the register that is going to
+ // be stored is first and the address is second. However in
+ // our IR we have the address first and the register second.
+ match dest.rm_num_bits() {
+ 64 | 32 => stur(cb, src.into(), dest.into()),
+ 16 => sturh(cb, src.into(), dest.into()),
+ num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest),
+ }
+ },
+ Insn::Load { opnd, out } |
+ Insn::LoadInto { opnd, dest: out } => {
+ match *opnd {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => {
+ mov(cb, out.into(), opnd.into());
+ },
+ Opnd::UImm(uimm) => {
+ emit_load_value(cb, out.into(), uimm);
+ },
+ Opnd::Imm(imm) => {
+ emit_load_value(cb, out.into(), imm as u64);
+ },
+ Opnd::Mem(_) => {
+ match opnd.rm_num_bits() {
+ 64 | 32 => ldur(cb, out.into(), opnd.into()),
+ 16 => ldurh(cb, out.into(), opnd.into()),
+ 8 => ldurb(cb, out.into(), opnd.into()),
+ num_bits => panic!("unexpected num_bits: {}", num_bits)
+ };
+ },
+ Opnd::Value(value) => {
+ // We dont need to check if it's a special const
+ // here because we only allow these operands to hit
+ // this point if they're not a special const.
+ assert!(!value.special_const_p());
+
+ // This assumes only load instructions can contain
+ // references to GC'd Value operands. If the value
+ // being loaded is a heap object, we'll report that
+ // back out to the gc_offsets list.
+ ldr_literal(cb, out.into(), 2.into());
+ b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
+ cb.write_bytes(&value.as_u64().to_le_bytes());
+
+ let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+ insn_gc_offsets.push(ptr_offset);
+ },
+ Opnd::CArg { .. } => {
+ unreachable!("C argument operand was not lowered before arm64_emit");
+ }
+ Opnd::Stack { .. } => {
+ unreachable!("Stack operand was not lowered before arm64_emit");
+ }
+ Opnd::None => {
+ unreachable!("Attempted to load from None operand");
+ }
+ };
+ },
+ Insn::LoadSExt { opnd, out } => {
+ match *opnd {
+ Opnd::Reg(Reg { num_bits: 32, .. }) |
+ Opnd::InsnOut { num_bits: 32, .. } => {
+ sxtw(cb, out.into(), opnd.into());
+ },
+ Opnd::Mem(Mem { num_bits: 32, .. }) => {
+ ldursw(cb, out.into(), opnd.into());
+ },
+ _ => unreachable!()
+ };
+ },
+ Insn::Mov { dest, src } => {
+ // This supports the following two kinds of immediates:
+ // * The value fits into a single movz instruction
+ // * It can be encoded with the special bitmask immediate encoding
+ // arm64_split() should have split other immediates that require multiple instructions.
+ match src {
+ Opnd::UImm(uimm) if *uimm <= 0xffff => {
+ movz(cb, dest.into(), A64Opnd::new_uimm(*uimm), 0);
+ },
+ _ => {
+ mov(cb, dest.into(), src.into());
+ }
+ }
+ },
+ Insn::Lea { opnd, out } => {
+ let opnd: A64Opnd = opnd.into();
+
+ match opnd {
+ A64Opnd::Mem(mem) => {
+ add(
+ cb,
+ out.into(),
+ A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }),
+ A64Opnd::new_imm(mem.disp.into())
+ );
+ },
+ _ => {
+ panic!("Op::Lea only accepts Opnd::Mem operands.");
+ }
+ };
+ },
+ Insn::LeaJumpTarget { out, target, .. } => {
+ if let Target::Label(label_idx) = target {
+ // Set output to the raw address of the label
+ cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
+ adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
+ });
+
+ mov(cb, out.into(), Self::SCRATCH0);
+ } else {
+ // Set output to the jump target's raw address
+ let target_code = target.unwrap_code_ptr();
+ let target_addr = target_code.raw_addr(cb).as_u64();
+ emit_load_value(cb, out.into(), target_addr);
+ }
+ },
+ Insn::CPush(opnd) => {
+ emit_push(cb, opnd.into());
+ },
+ Insn::CPop { out } => {
+ emit_pop(cb, out.into());
+ },
+ Insn::CPopInto(opnd) => {
+ emit_pop(cb, opnd.into());
+ },
+ Insn::CPushAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ for reg in regs {
+ emit_push(cb, A64Opnd::Reg(reg));
+ }
+
+ // Push the flags/state register
+ mrs(cb, Self::SCRATCH0, SystemRegister::NZCV);
+ emit_push(cb, Self::SCRATCH0);
+ },
+ Insn::CPopAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ // Pop the state/flags register
+ msr(cb, SystemRegister::NZCV, Self::SCRATCH0);
+ emit_pop(cb, Self::SCRATCH0);
+
+ for reg in regs.into_iter().rev() {
+ emit_pop(cb, A64Opnd::Reg(reg));
+ }
+ },
+ Insn::CCall { fptr, .. } => {
+ // The offset to the call target in bytes
+ let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64;
+ let dst_addr = *fptr as i64;
+
+ // Use BL if the offset is short enough to encode as an immediate.
+ // Otherwise, use BLR with a register.
+ if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+ bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+ } else {
+ emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
+ blr(cb, Self::SCRATCH0);
+ }
+ },
+ Insn::CRet { .. } => {
+ ret(cb, A64Opnd::None);
+ },
+ Insn::Cmp { left, right } => {
+ cmp(cb, left.into(), right.into());
+ },
+ Insn::Test { left, right } => {
+ tst(cb, left.into(), right.into());
+ },
+ Insn::JmpOpnd(opnd) => {
+ br(cb, opnd.into());
+ },
+ Insn::Jmp(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(dst_ptr) => {
+ emit_jmp_ptr(cb, dst_ptr, true);
+ },
+ Target::SideExitPtr(dst_ptr) => {
+ emit_jmp_ptr(cb, dst_ptr, false);
+ },
+ Target::Label(label_idx) => {
+ // Here we're going to save enough space for
+ // ourselves and then come back and write the
+ // instruction once we know the offset. We're going
+ // to assume we can fit into a single b instruction.
+ // It will panic otherwise.
+ cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
+ let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
+ b(cb, InstructionOffset::from_bytes(bytes));
+ });
+ },
+ Target::SideExit { .. } => {
+ unreachable!("Target::SideExit should have been compiled by compile_side_exit")
+ },
+ };
+ },
+ Insn::Je(target) | Insn::Jz(target) => {
+ emit_conditional_jump::<{Condition::EQ}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => {
+ emit_conditional_jump::<{Condition::NE}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jl(target) => {
+ emit_conditional_jump::<{Condition::LT}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jg(target) => {
+ emit_conditional_jump::<{Condition::GT}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jge(target) => {
+ emit_conditional_jump::<{Condition::GE}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jbe(target) => {
+ emit_conditional_jump::<{Condition::LS}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jb(target) => {
+ emit_conditional_jump::<{Condition::CC}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jo(target) => {
+ emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Joz(opnd, target) => {
+ emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jonz(opnd, target) => {
+ emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::IncrCounter { mem, value } => {
+ let label = cb.new_label("incr_counter_loop".to_string());
+ cb.write_label(label);
+
+ ldaxr(cb, Self::SCRATCH0, mem.into());
+ add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into());
+
+ // The status register that gets used to track whether or
+ // not the store was successful must be 32 bytes. Since we
+ // store the SCRATCH registers as their 64-bit versions, we
+ // need to rewrap it here.
+ let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32));
+ stlxr(cb, status, Self::SCRATCH0, mem.into());
+
+ cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
+ emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
+ },
+ Insn::Breakpoint => {
+ brk(cb, A64Opnd::None);
+ },
+ Insn::CSelZ { truthy, falsy, out } |
+ Insn::CSelE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ);
+ },
+ Insn::CSelNZ { truthy, falsy, out } |
+ Insn::CSelNE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE);
+ },
+ Insn::CSelL { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT);
+ },
+ Insn::CSelLE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE);
+ },
+ Insn::CSelG { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT);
+ },
+ Insn::CSelGE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE);
+ }
+ Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
+ Insn::PadInvalPatch => {
+ while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() {
+ nop(cb);
+ }
+ }
+ };
+
+ // On failure, jump to the next page and retry the current insn
+ if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) {
+ // Reset cb states before retrying the current Insn
+ cb.set_label_state(old_label_state);
+
+ // We don't want label references to cross page boundaries. Signal caller for
+ // retry.
+ if !self.label_names.is_empty() {
+ return Err(EmitError::RetryOnNextPage);
+ }
+ } else {
+ insn_idx += 1;
+ gc_offsets.append(&mut insn_gc_offsets);
+ }
+ }
+
+ // Error if we couldn't write out everything
+ if cb.has_dropped_bytes() {
+ return Err(EmitError::OutOfMemory)
+ } else {
+ // No bytes dropped, so the pos markers point to valid code
+ for (insn_idx, pos) in pos_markers {
+ if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() {
+ callback(pos, &cb);
+ } else {
+ panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}");
+ }
+ }
+
+ return Ok(gc_offsets)
+ }
+ }
+
+ /// Optimize and compile the stored instructions
+ pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> {
+ let asm = self.arm64_split();
+ let mut asm = asm.alloc_regs(regs);
+
+ // Create label instances in the code block
+ for (idx, name) in asm.label_names.iter().enumerate() {
+ let label_idx = cb.new_label(name.to_string());
+ assert!(label_idx == idx);
+ }
+
+ let start_ptr = cb.get_write_ptr();
+ let starting_label_state = cb.get_label_state();
+ let mut ocb = ocb; // for &mut
+ let emit_result = match asm.arm64_emit(cb, &mut ocb) {
+ Err(EmitError::RetryOnNextPage) => {
+ // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB
+ // range limit. We can easily exceed the limit in case the jump straddles two pages.
+ // In this case, we retry with a fresh page.
+ cb.set_label_state(starting_label_state);
+ cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation);
+ let result = asm.arm64_emit(cb, &mut ocb);
+ assert_ne!(
+ Err(EmitError::RetryOnNextPage),
+ result,
+ "should not fail when writing to a fresh code page"
+ );
+ result
+ }
+ result => result
+ };
+
+ if let (Ok(gc_offsets), false) = (emit_result, cb.has_dropped_bytes()) {
+ cb.link_labels();
+
+ // Invalidate icache for newly written out region so we don't run stale code.
+ // It should invalidate only the code ranges of the current cb because the code
+ // ranges of the other cb might have a memory region that is still PROT_NONE.
+ #[cfg(not(test))]
+ cb.without_page_end_reserve(|cb| {
+ for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) {
+ unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
+ }
+ });
+
+ Some((start_ptr, gc_offsets))
+ } else {
+ cb.clear_labels();
+
+ None
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::disasm::*;
+
+ fn setup_asm() -> (Assembler, CodeBlock) {
+ (Assembler::new(), CodeBlock::new_dummy(1024))
+ }
+
+ #[test]
+ fn test_emit_add() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_regs(&mut cb, None, vec![X3_REG]);
+
+ // Assert that only 2 instructions were written.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_bake_string() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.bake_string("Hello, world!");
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Testing that we pad the string to the nearest 4-byte boundary to make
+ // it easier to jump over.
+ assert_eq!(16, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_cpush_all() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cpush_all();
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_cpop_all() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cpop_all();
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_frame() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.frame_setup();
+ asm.frame_teardown();
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_je_fits_into_bcond() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let target: CodePtr = cb.get_write_ptr().add_bytes(80);
+
+ asm.je(Target::CodePtr(target));
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_je_does_not_fit_into_bcond() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let offset = 1 << 21;
+ let target: CodePtr = cb.get_write_ptr().add_bytes(offset);
+
+ asm.je(Target::CodePtr(target));
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_lea_label() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let label = asm.new_label("label");
+ let opnd = asm.lea_jump_target(label);
+
+ asm.write_label(label);
+ asm.bake_string("Hello, world!");
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_load_mem_disp_fits_into_load() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::mem(64, SP, 0));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that two instructions were written: LDUR and STUR.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_mem_disp_fits_into_add() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::mem(64, SP, 1 << 10));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that three instructions were written: ADD, LDUR, and STUR.
+ assert_eq!(12, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_mem_disp_does_not_fit_into_add() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR.
+ assert_eq!(16, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_value_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::Value(Qnil));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that only two instructions were written since the value is an
+ // immediate.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_value_non_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000)));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that five instructions were written since the value is not an
+ // immediate and needs to be loaded into a register.
+ assert_eq!(20, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_32b_reg_not_bitmask_imm() {
+ let (mut asm, mut cb) = setup_asm();
+ let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+ asm.test(w0, Opnd::UImm(u32::MAX.into()));
+ // All ones is not encodable with a bitmask immediate,
+ // so this needs one register
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_test_32b_reg_bitmask_imm() {
+ let (mut asm, mut cb) = setup_asm();
+ let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+ asm.test(w0, Opnd::UImm(0x80000001));
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_or() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_lshift() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_rshift() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_urshift() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_test() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Assert that only one instruction was written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_encodable_unsigned_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Assert that only one instruction was written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_unencodable_unsigned_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that a load and a test instruction were written.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_encodable_signed_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Assert that only one instruction was written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_unencodable_signed_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that a load and a test instruction were written.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_negative_signed_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that a test instruction is written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_32_bit_register_with_some_number() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6);
+ asm.cmp(shape_opnd, Opnd::UImm(4097));
+ asm.compile_with_num_regs(&mut cb, 2);
+ }
+
+ #[test]
+ fn test_16_bit_register_store_some_number() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(16, Opnd::Reg(X0_REG), 0);
+ asm.store(shape_opnd, Opnd::UImm(4097));
+ asm.compile_with_num_regs(&mut cb, 2);
+ }
+
+ #[test]
+ fn test_32_bit_register_store_some_number() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6);
+ asm.store(shape_opnd, Opnd::UImm(4097));
+ asm.compile_with_num_regs(&mut cb, 2);
+ }
+
+ #[test]
+ fn test_bcond_straddling_code_pages() {
+ const LANDING_PAGE: usize = 65;
+ let mut asm = Assembler::new();
+ let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]);
+
+ // Skip to near the end of the page. Room for two instructions.
+ cb.set_pos(cb.page_start_pos() + cb.page_end() - 8);
+
+ let end = asm.new_label("end");
+ // Start with a conditional jump...
+ asm.jz(end);
+
+ // A few instructions, enough to cause a page switch.
+ let sum = asm.add(399.into(), 111.into());
+ let xorred = asm.xor(sum, 859.into());
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred);
+ asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred);
+
+ // The branch target. It should be in the landing page.
+ asm.write_label(end);
+ asm.cret(xorred);
+
+ // [Bug #19385]
+ // This used to panic with "The offset must be 19 bits or less."
+ // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB.
+ let starting_pos = cb.get_write_pos();
+ asm.compile_with_num_regs(&mut cb, 2);
+ let gap = cb.get_write_pos() - starting_pos;
+ assert!(gap > 0b1111111111111111111);
+
+ let instruction_at_starting_pos: [u8; 4] = unsafe {
+ std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4)
+ }.try_into().unwrap();
+ assert_eq!(
+ 0b000101 << 26_u32,
+ u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32),
+ "starting instruction should be an unconditional branch to the new page (B)"
+ );
+ }
+
+ #[test]
+ fn test_emit_xor() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "0b0001ca4b0000f8", "
+ 0x0: eor x11, x0, x1
+ 0x4: stur x11, [x2]
+ ");
+ }
+
+ #[test]
+ #[cfg(feature = "disasm")]
+ fn test_simple_disasm() -> std::result::Result<(), capstone::Error> {
+ // Test drive Capstone with simple input
+ use capstone::prelude::*;
+
+ let cs = Capstone::new()
+ .arm64()
+ .mode(arch::arm64::ArchMode::Arm)
+ .build()?;
+
+ let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?;
+
+ match insns.as_ref() {
+ [insn] => {
+ assert_eq!(Some("movk"), insn.mnemonic());
+ Ok(())
+ }
+ _ => Err(capstone::Error::CustomError(
+ "expected to disassemble to movk",
+ )),
+ }
+ }
+
+ #[test]
+ fn test_replace_mov_with_ldur() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::mem(64, CFP, 8));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "618240f8", {"
+ 0x0: ldur x1, [x19, #8]
+ "});
+ }
+
+ #[test]
+ fn test_not_split_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0xffff));
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0x10000));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "e1ff9fd2e10370b2", {"
+ 0x0: mov x1, #0xffff
+ 0x4: orr x1, xzr, #0x10000
+ "});
+ }
+
+ #[test]
+ fn test_merge_csel_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.csel_l(Qtrue.into(), Qfalse.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "8b0280d20c0080d261b18c9a", {"
+ 0x0: mov x11, #0x14
+ 0x4: mov x12, #0
+ 0x8: csel x1, x11, x12, lt
+ "});
+ }
+
+ #[test]
+ fn test_add_with_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into());
+ let out = asm.add(out, 1_usize.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "2b0500b16b0500b1e1030baa", {"
+ 0x0: adds x11, x9, #1
+ 0x4: adds x11, x11, #1
+ 0x8: mov x1, x11
+ "});
+ }
+
+ #[test]
+ fn test_mul_with_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "6b0080d22b7d0b9be1030baa", {"
+ 0x0: mov x11, #3
+ 0x4: mul x11, x9, x11
+ 0x8: mov x1, x11
+ "});
+ }
+}
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
new file mode 100644
index 0000000000..edc0eaf390
--- /dev/null
+++ b/yjit/src/backend/ir.rs
@@ -0,0 +1,2095 @@
+use std::collections::HashMap;
+use std::fmt;
+use std::convert::From;
+use std::mem::take;
+use crate::codegen::{gen_outlined_exit, gen_counted_exit};
+use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE};
+use crate::virtualmem::CodePtr;
+use crate::asm::{CodeBlock, OutlinedCb};
+use crate::core::{Context, RegTemps, MAX_REG_TEMPS};
+use crate::options::*;
+use crate::stats::*;
+
+use crate::backend::current::*;
+
+pub const EC: Opnd = _EC;
+pub const CFP: Opnd = _CFP;
+pub const SP: Opnd = _SP;
+
+pub const C_ARG_OPNDS: [Opnd; 6] = _C_ARG_OPNDS;
+pub const C_RET_OPND: Opnd = _C_RET_OPND;
+pub use crate::backend::current::{Reg, C_RET_REG};
+
+// Memory operand base
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum MemBase
+{
+ Reg(u8),
+ InsnOut(usize),
+}
+
+// Memory location
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct Mem
+{
+ // Base register number or instruction index
+ pub(super) base: MemBase,
+
+ // Offset relative to the base pointer
+ pub(super) disp: i32,
+
+ // Size in bits
+ pub(super) num_bits: u8,
+}
+
+impl fmt::Debug for Mem {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "Mem{}[{:?}", self.num_bits, self.base)?;
+ if self.disp != 0 {
+ let sign = if self.disp > 0 { '+' } else { '-' };
+ write!(fmt, " {sign} {}", self.disp)?;
+ }
+
+ write!(fmt, "]")
+ }
+}
+
+/// Operand to an IR instruction
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum Opnd
+{
+ None, // For insns with no output
+
+ // Immediate Ruby value, may be GC'd, movable
+ Value(VALUE),
+
+ /// C argument register. The alloc_regs resolves its register dependencies.
+ CArg(Reg),
+
+ // Output of a preceding instruction in this block
+ InsnOut{ idx: usize, num_bits: u8 },
+
+ /// Pointer to a slot on the VM stack
+ Stack {
+ /// Index from stack top. Used for conversion to StackOpnd.
+ idx: i32,
+ /// Number of bits for Opnd::Reg and Opnd::Mem.
+ num_bits: u8,
+ /// ctx.stack_size when this operand is made. Used with idx for Opnd::Reg.
+ stack_size: u8,
+ /// ctx.sp_offset when this operand is made. Used with idx for Opnd::Mem.
+ sp_offset: i8,
+ /// ctx.reg_temps when this operand is read. Used for register allocation.
+ reg_temps: Option<RegTemps>
+ },
+
+ // Low-level operands, for lowering
+ Imm(i64), // Raw signed immediate
+ UImm(u64), // Raw unsigned immediate
+ Mem(Mem), // Memory location
+ Reg(Reg), // Machine register
+}
+
+impl fmt::Debug for Opnd {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ use Opnd::*;
+ match self {
+ Self::None => write!(fmt, "None"),
+ Value(val) => write!(fmt, "Value({val:?})"),
+ CArg(reg) => write!(fmt, "CArg({reg:?})"),
+ Stack { idx, sp_offset, .. } => write!(fmt, "SP[{}]", *sp_offset as i32 - idx - 1),
+ InsnOut { idx, num_bits } => write!(fmt, "Out{num_bits}({idx})"),
+ Imm(signed) => write!(fmt, "{signed:x}_i64"),
+ UImm(unsigned) => write!(fmt, "{unsigned:x}_u64"),
+ // Say Mem and Reg only once
+ Mem(mem) => write!(fmt, "{mem:?}"),
+ Reg(reg) => write!(fmt, "{reg:?}"),
+ }
+ }
+}
+
+impl Opnd
+{
+ /// Convenience constructor for memory operands
+ pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self {
+ match base {
+ Opnd::Reg(base_reg) => {
+ assert!(base_reg.num_bits == 64);
+ Opnd::Mem(Mem {
+ base: MemBase::Reg(base_reg.reg_no),
+ disp: disp,
+ num_bits: num_bits,
+ })
+ },
+
+ Opnd::InsnOut{idx, num_bits: out_num_bits } => {
+ assert!(num_bits <= out_num_bits);
+ Opnd::Mem(Mem {
+ base: MemBase::InsnOut(idx),
+ disp: disp,
+ num_bits: num_bits,
+ })
+ },
+
+ _ => unreachable!("memory operand with non-register base")
+ }
+ }
+
+ /// Constructor for constant pointer operand
+ pub fn const_ptr(ptr: *const u8) -> Self {
+ Opnd::UImm(ptr as u64)
+ }
+
+ /// Constructor for a C argument operand
+ pub fn c_arg(reg_opnd: Opnd) -> Self {
+ match reg_opnd {
+ Opnd::Reg(reg) => Opnd::CArg(reg),
+ _ => unreachable!(),
+ }
+ }
+
+ /// Unwrap a register operand
+ pub fn unwrap_reg(&self) -> Reg {
+ match self {
+ Opnd::Reg(reg) => *reg,
+ _ => unreachable!("trying to unwrap {:?} into reg", self)
+ }
+ }
+
+ /// Get the size in bits for this operand if there is one.
+ pub fn num_bits(&self) -> Option<u8> {
+ match *self {
+ Opnd::Reg(Reg { num_bits, .. }) => Some(num_bits),
+ Opnd::Mem(Mem { num_bits, .. }) => Some(num_bits),
+ Opnd::InsnOut { num_bits, .. } => Some(num_bits),
+ _ => None
+ }
+ }
+
+ pub fn with_num_bits(&self, num_bits: u8) -> Option<Opnd> {
+ assert!(num_bits == 8 || num_bits == 16 || num_bits == 32 || num_bits == 64);
+ match *self {
+ Opnd::Reg(reg) => Some(Opnd::Reg(reg.with_num_bits(num_bits))),
+ Opnd::Mem(Mem { base, disp, .. }) => Some(Opnd::Mem(Mem { base, disp, num_bits })),
+ Opnd::InsnOut { idx, .. } => Some(Opnd::InsnOut { idx, num_bits }),
+ Opnd::Stack { idx, stack_size, sp_offset, reg_temps, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps }),
+ _ => None,
+ }
+ }
+
+ /// Get the size in bits for register/memory operands.
+ pub fn rm_num_bits(&self) -> u8 {
+ self.num_bits().unwrap()
+ }
+
+ /// Maps the indices from a previous list of instructions to a new list of
+ /// instructions.
+ pub fn map_index(self, indices: &Vec<usize>) -> Opnd {
+ match self {
+ Opnd::InsnOut { idx, num_bits } => {
+ Opnd::InsnOut { idx: indices[idx], num_bits }
+ }
+ Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => {
+ Opnd::Mem(Mem { base: MemBase::InsnOut(indices[idx]), disp, num_bits })
+ },
+ _ => self
+ }
+ }
+
+ /// When there aren't any operands to check against, this is the number of
+ /// bits that should be used for any given output variable.
+ const DEFAULT_NUM_BITS: u8 = 64;
+
+ /// Determine the size in bits from the iterator of operands. If any of them
+ /// are different sizes this will panic.
+ pub fn match_num_bits_iter<'a>(opnds: impl Iterator<Item = &'a Opnd>) -> u8 {
+ let mut value: Option<u8> = None;
+
+ for opnd in opnds {
+ if let Some(num_bits) = opnd.num_bits() {
+ match value {
+ None => {
+ value = Some(num_bits);
+ },
+ Some(value) => {
+ assert_eq!(value, num_bits, "operands of incompatible sizes");
+ }
+ };
+ }
+ }
+
+ value.unwrap_or(Self::DEFAULT_NUM_BITS)
+ }
+
+ /// Determine the size in bits of the slice of the given operands. If any of
+ /// them are different sizes this will panic.
+ pub fn match_num_bits(opnds: &[Opnd]) -> u8 {
+ Self::match_num_bits_iter(opnds.iter())
+ }
+
+ /// Calculate Opnd::Stack's index from the stack bottom.
+ pub fn stack_idx(&self) -> u8 {
+ self.get_stack_idx().unwrap()
+ }
+
+ /// Calculate Opnd::Stack's index from the stack bottom if it's Opnd::Stack.
+ pub fn get_stack_idx(&self) -> Option<u8> {
+ match self {
+ Opnd::Stack { idx, stack_size, .. } => {
+ Some((*stack_size as isize - *idx as isize - 1) as u8)
+ },
+ _ => None
+ }
+ }
+
+ /// Get the index for stack temp registers.
+ pub fn reg_idx(&self) -> usize {
+ match self {
+ Opnd::Stack { .. } => {
+ self.stack_idx() as usize % get_option!(num_temp_regs)
+ },
+ _ => unreachable!(),
+ }
+ }
+}
+
+impl From<usize> for Opnd {
+ fn from(value: usize) -> Self {
+ Opnd::UImm(value.try_into().unwrap())
+ }
+}
+
+impl From<u64> for Opnd {
+ fn from(value: u64) -> Self {
+ Opnd::UImm(value)
+ }
+}
+
+impl From<i64> for Opnd {
+ fn from(value: i64) -> Self {
+ Opnd::Imm(value)
+ }
+}
+
+impl From<i32> for Opnd {
+ fn from(value: i32) -> Self {
+ Opnd::Imm(value.try_into().unwrap())
+ }
+}
+
+impl From<u32> for Opnd {
+ fn from(value: u32) -> Self {
+ Opnd::UImm(value as u64)
+ }
+}
+
+impl From<VALUE> for Opnd {
+ fn from(value: VALUE) -> Self {
+ Opnd::Value(value)
+ }
+}
+
+/// Branch target (something that we can jump to)
+/// for branch instructions
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum Target
+{
+ /// Pointer to a piece of YJIT-generated code
+ CodePtr(CodePtr),
+ /// Side exit with a counter
+ SideExit { counter: Counter, context: Option<SideExitContext> },
+ /// Pointer to a side exit code
+ SideExitPtr(CodePtr),
+ /// A label within the generated code
+ Label(usize),
+}
+
+impl Target
+{
+ pub fn side_exit(counter: Counter) -> Target {
+ Target::SideExit { counter, context: None }
+ }
+
+ pub fn unwrap_label_idx(&self) -> usize {
+ match self {
+ Target::Label(idx) => *idx,
+ _ => unreachable!("trying to unwrap {:?} into label", self)
+ }
+ }
+
+ pub fn unwrap_code_ptr(&self) -> CodePtr {
+ match self {
+ Target::CodePtr(ptr) => *ptr,
+ Target::SideExitPtr(ptr) => *ptr,
+ _ => unreachable!("trying to unwrap {:?} into code ptr", self)
+ }
+ }
+}
+
+impl From<CodePtr> for Target {
+ fn from(code_ptr: CodePtr) -> Self {
+ Target::CodePtr(code_ptr)
+ }
+}
+
+type PosMarkerFn = Box<dyn Fn(CodePtr, &CodeBlock)>;
+
+/// YJIT IR instruction
+pub enum Insn {
+ /// Add two operands together, and return the result as a new operand.
+ Add { left: Opnd, right: Opnd, out: Opnd },
+
+ /// This is the same as the OP_ADD instruction, except that it performs the
+ /// binary AND operation.
+ And { left: Opnd, right: Opnd, out: Opnd },
+
+ /// Bake a string directly into the instruction stream.
+ BakeString(String),
+
+ // Trigger a debugger breakpoint
+ #[allow(dead_code)]
+ Breakpoint,
+
+ /// Add a comment into the IR at the point that this instruction is added.
+ /// It won't have any impact on that actual compiled code.
+ Comment(String),
+
+ /// Compare two operands
+ Cmp { left: Opnd, right: Opnd },
+
+ /// Pop a register from the C stack
+ CPop { out: Opnd },
+
+ /// Pop all of the caller-save registers and the flags from the C stack
+ CPopAll,
+
+ /// Pop a register from the C stack and store it into another register
+ CPopInto(Opnd),
+
+ /// Push a register onto the C stack
+ CPush(Opnd),
+
+ /// Push all of the caller-save registers and the flags to the C stack
+ CPushAll,
+
+ // C function call with N arguments (variadic)
+ CCall { opnds: Vec<Opnd>, fptr: *const u8, out: Opnd },
+
+ // C function return
+ CRet(Opnd),
+
+ /// Conditionally select if equal
+ CSelE { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Conditionally select if greater
+ CSelG { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Conditionally select if greater or equal
+ CSelGE { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Conditionally select if less
+ CSelL { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Conditionally select if less or equal
+ CSelLE { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Conditionally select if not equal
+ CSelNE { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Conditionally select if not zero
+ CSelNZ { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Conditionally select if zero
+ CSelZ { truthy: Opnd, falsy: Opnd, out: Opnd },
+
+ /// Set up the frame stack as necessary per the architecture.
+ FrameSetup,
+
+ /// Tear down the frame stack as necessary per the architecture.
+ FrameTeardown,
+
+ // Atomically increment a counter
+ // Input: memory operand, increment value
+ // Produces no output
+ IncrCounter { mem: Opnd, value: Opnd },
+
+ /// Jump if below or equal (unsigned)
+ Jbe(Target),
+
+ /// Jump if below (unsigned)
+ Jb(Target),
+
+ /// Jump if equal
+ Je(Target),
+
+ /// Jump if lower
+ Jl(Target),
+
+ /// Jump if greater
+ Jg(Target),
+
+ /// Jump if greater or equal
+ Jge(Target),
+
+ // Unconditional jump to a branch target
+ Jmp(Target),
+
+ // Unconditional jump which takes a reg/mem address operand
+ JmpOpnd(Opnd),
+
+ /// Jump if not equal
+ Jne(Target),
+
+ /// Jump if not zero
+ Jnz(Target),
+
+ /// Jump if overflow
+ Jo(Target),
+
+ /// Jump if overflow in multiplication
+ JoMul(Target),
+
+ /// Jump if zero
+ Jz(Target),
+
+ /// Jump if operand is zero (only used during lowering at the moment)
+ Joz(Opnd, Target),
+
+ /// Jump if operand is non-zero (only used during lowering at the moment)
+ Jonz(Opnd, Target),
+
+ // Add a label into the IR at the point that this instruction is added.
+ Label(Target),
+
+ /// Get the code address of a jump target
+ LeaJumpTarget { target: Target, out: Opnd },
+
+ // Load effective address
+ Lea { opnd: Opnd, out: Opnd },
+
+ /// Take a specific register. Signal the register allocator to not use it.
+ LiveReg { opnd: Opnd, out: Opnd },
+
+ // A low-level instruction that loads a value into a register.
+ Load { opnd: Opnd, out: Opnd },
+
+ // A low-level instruction that loads a value into a specified register.
+ LoadInto { dest: Opnd, opnd: Opnd },
+
+ // A low-level instruction that loads a value into a register and
+ // sign-extends it to a 64-bit value.
+ LoadSExt { opnd: Opnd, out: Opnd },
+
+ /// Shift a value left by a certain amount.
+ LShift { opnd: Opnd, shift: Opnd, out: Opnd },
+
+ // A low-level mov instruction. It accepts two operands.
+ Mov { dest: Opnd, src: Opnd },
+
+ // Perform the NOT operation on an individual operand, and return the result
+ // as a new operand. This operand can then be used as the operand on another
+ // instruction.
+ Not { opnd: Opnd, out: Opnd },
+
+ // This is the same as the OP_ADD instruction, except that it performs the
+ // binary OR operation.
+ Or { left: Opnd, right: Opnd, out: Opnd },
+
+ /// Pad nop instructions to accommodate Op::Jmp in case the block or the insn
+ /// is invalidated.
+ PadInvalPatch,
+
+ // Mark a position in the generated code
+ PosMarker(PosMarkerFn),
+
+ /// Shift a value right by a certain amount (signed).
+ RShift { opnd: Opnd, shift: Opnd, out: Opnd },
+
+ // Low-level instruction to store a value to memory.
+ Store { dest: Opnd, src: Opnd },
+
+ // This is the same as the add instruction, except for subtraction.
+ Sub { left: Opnd, right: Opnd, out: Opnd },
+
+ // Integer multiplication
+ Mul { left: Opnd, right: Opnd, out: Opnd },
+
+ // Bitwise AND test instruction
+ Test { left: Opnd, right: Opnd },
+
+ /// Shift a value right by a certain amount (unsigned).
+ URShift { opnd: Opnd, shift: Opnd, out: Opnd },
+
+ // This is the same as the OP_ADD instruction, except that it performs the
+ // binary XOR operation.
+ Xor { left: Opnd, right: Opnd, out: Opnd }
+}
+
+impl Insn {
+ /// Create an iterator that will yield a non-mutable reference to each
+ /// operand in turn for this instruction.
+ pub(super) fn opnd_iter(&self) -> InsnOpndIterator {
+ InsnOpndIterator::new(self)
+ }
+
+ /// Create an iterator that will yield a mutable reference to each operand
+ /// in turn for this instruction.
+ pub(super) fn opnd_iter_mut(&mut self) -> InsnOpndMutIterator {
+ InsnOpndMutIterator::new(self)
+ }
+
+ /// Get a mutable reference to a Target if it exists.
+ pub(super) fn target_mut(&mut self) -> Option<&mut Target> {
+ match self {
+ Insn::Jbe(target) |
+ Insn::Jb(target) |
+ Insn::Je(target) |
+ Insn::Jl(target) |
+ Insn::Jg(target) |
+ Insn::Jge(target) |
+ Insn::Jmp(target) |
+ Insn::Jne(target) |
+ Insn::Jnz(target) |
+ Insn::Jo(target) |
+ Insn::Jz(target) |
+ Insn::Label(target) |
+ Insn::JoMul(target) |
+ Insn::Joz(_, target) |
+ Insn::Jonz(_, target) |
+ Insn::LeaJumpTarget { target, .. } => {
+ Some(target)
+ }
+ _ => None,
+ }
+ }
+
+ /// Returns a string that describes which operation this instruction is
+ /// performing. This is used for debugging.
+ fn op(&self) -> &'static str {
+ match self {
+ Insn::Add { .. } => "Add",
+ Insn::And { .. } => "And",
+ Insn::BakeString(_) => "BakeString",
+ Insn::Breakpoint => "Breakpoint",
+ Insn::Comment(_) => "Comment",
+ Insn::Cmp { .. } => "Cmp",
+ Insn::CPop { .. } => "CPop",
+ Insn::CPopAll => "CPopAll",
+ Insn::CPopInto(_) => "CPopInto",
+ Insn::CPush(_) => "CPush",
+ Insn::CPushAll => "CPushAll",
+ Insn::CCall { .. } => "CCall",
+ Insn::CRet(_) => "CRet",
+ Insn::CSelE { .. } => "CSelE",
+ Insn::CSelG { .. } => "CSelG",
+ Insn::CSelGE { .. } => "CSelGE",
+ Insn::CSelL { .. } => "CSelL",
+ Insn::CSelLE { .. } => "CSelLE",
+ Insn::CSelNE { .. } => "CSelNE",
+ Insn::CSelNZ { .. } => "CSelNZ",
+ Insn::CSelZ { .. } => "CSelZ",
+ Insn::FrameSetup => "FrameSetup",
+ Insn::FrameTeardown => "FrameTeardown",
+ Insn::IncrCounter { .. } => "IncrCounter",
+ Insn::Jbe(_) => "Jbe",
+ Insn::Jb(_) => "Jb",
+ Insn::Je(_) => "Je",
+ Insn::Jl(_) => "Jl",
+ Insn::Jg(_) => "Jg",
+ Insn::Jge(_) => "Jge",
+ Insn::Jmp(_) => "Jmp",
+ Insn::JmpOpnd(_) => "JmpOpnd",
+ Insn::Jne(_) => "Jne",
+ Insn::Jnz(_) => "Jnz",
+ Insn::Jo(_) => "Jo",
+ Insn::JoMul(_) => "JoMul",
+ Insn::Jz(_) => "Jz",
+ Insn::Joz(..) => "Joz",
+ Insn::Jonz(..) => "Jonz",
+ Insn::Label(_) => "Label",
+ Insn::LeaJumpTarget { .. } => "LeaJumpTarget",
+ Insn::Lea { .. } => "Lea",
+ Insn::LiveReg { .. } => "LiveReg",
+ Insn::Load { .. } => "Load",
+ Insn::LoadInto { .. } => "LoadInto",
+ Insn::LoadSExt { .. } => "LoadSExt",
+ Insn::LShift { .. } => "LShift",
+ Insn::Mov { .. } => "Mov",
+ Insn::Not { .. } => "Not",
+ Insn::Or { .. } => "Or",
+ Insn::PadInvalPatch => "PadEntryExit",
+ Insn::PosMarker(_) => "PosMarker",
+ Insn::RShift { .. } => "RShift",
+ Insn::Store { .. } => "Store",
+ Insn::Sub { .. } => "Sub",
+ Insn::Mul { .. } => "Mul",
+ Insn::Test { .. } => "Test",
+ Insn::URShift { .. } => "URShift",
+ Insn::Xor { .. } => "Xor"
+ }
+ }
+
+ /// Return a non-mutable reference to the out operand for this instruction
+ /// if it has one.
+ pub fn out_opnd(&self) -> Option<&Opnd> {
+ match self {
+ Insn::Add { out, .. } |
+ Insn::And { out, .. } |
+ Insn::CCall { out, .. } |
+ Insn::CPop { out, .. } |
+ Insn::CSelE { out, .. } |
+ Insn::CSelG { out, .. } |
+ Insn::CSelGE { out, .. } |
+ Insn::CSelL { out, .. } |
+ Insn::CSelLE { out, .. } |
+ Insn::CSelNE { out, .. } |
+ Insn::CSelNZ { out, .. } |
+ Insn::CSelZ { out, .. } |
+ Insn::Lea { out, .. } |
+ Insn::LeaJumpTarget { out, .. } |
+ Insn::LiveReg { out, .. } |
+ Insn::Load { out, .. } |
+ Insn::LoadSExt { out, .. } |
+ Insn::LShift { out, .. } |
+ Insn::Not { out, .. } |
+ Insn::Or { out, .. } |
+ Insn::RShift { out, .. } |
+ Insn::Sub { out, .. } |
+ Insn::Mul { out, .. } |
+ Insn::URShift { out, .. } |
+ Insn::Xor { out, .. } => Some(out),
+ _ => None
+ }
+ }
+
+ /// Return a mutable reference to the out operand for this instruction if it
+ /// has one.
+ pub fn out_opnd_mut(&mut self) -> Option<&mut Opnd> {
+ match self {
+ Insn::Add { out, .. } |
+ Insn::And { out, .. } |
+ Insn::CCall { out, .. } |
+ Insn::CPop { out, .. } |
+ Insn::CSelE { out, .. } |
+ Insn::CSelG { out, .. } |
+ Insn::CSelGE { out, .. } |
+ Insn::CSelL { out, .. } |
+ Insn::CSelLE { out, .. } |
+ Insn::CSelNE { out, .. } |
+ Insn::CSelNZ { out, .. } |
+ Insn::CSelZ { out, .. } |
+ Insn::Lea { out, .. } |
+ Insn::LeaJumpTarget { out, .. } |
+ Insn::LiveReg { out, .. } |
+ Insn::Load { out, .. } |
+ Insn::LoadSExt { out, .. } |
+ Insn::LShift { out, .. } |
+ Insn::Not { out, .. } |
+ Insn::Or { out, .. } |
+ Insn::RShift { out, .. } |
+ Insn::Sub { out, .. } |
+ Insn::Mul { out, .. } |
+ Insn::URShift { out, .. } |
+ Insn::Xor { out, .. } => Some(out),
+ _ => None
+ }
+ }
+
+ /// Returns the target for this instruction if there is one.
+ pub fn target(&self) -> Option<&Target> {
+ match self {
+ Insn::Jbe(target) |
+ Insn::Jb(target) |
+ Insn::Je(target) |
+ Insn::Jl(target) |
+ Insn::Jg(target) |
+ Insn::Jge(target) |
+ Insn::Jmp(target) |
+ Insn::Jne(target) |
+ Insn::Jnz(target) |
+ Insn::Jo(target) |
+ Insn::Jz(target) |
+ Insn::LeaJumpTarget { target, .. } => Some(target),
+ _ => None
+ }
+ }
+
+ /// Returns the text associated with this instruction if there is some.
+ pub fn text(&self) -> Option<&String> {
+ match self {
+ Insn::BakeString(text) |
+ Insn::Comment(text) => Some(text),
+ _ => None
+ }
+ }
+}
+
+/// An iterator that will yield a non-mutable reference to each operand in turn
+/// for the given instruction.
+pub(super) struct InsnOpndIterator<'a> {
+ insn: &'a Insn,
+ idx: usize,
+}
+
+impl<'a> InsnOpndIterator<'a> {
+ fn new(insn: &'a Insn) -> Self {
+ Self { insn, idx: 0 }
+ }
+}
+
+impl<'a> Iterator for InsnOpndIterator<'a> {
+ type Item = &'a Opnd;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ match self.insn {
+ Insn::BakeString(_) |
+ Insn::Breakpoint |
+ Insn::Comment(_) |
+ Insn::CPop { .. } |
+ Insn::CPopAll |
+ Insn::CPushAll |
+ Insn::FrameSetup |
+ Insn::FrameTeardown |
+ Insn::Jbe(_) |
+ Insn::Jb(_) |
+ Insn::Je(_) |
+ Insn::Jl(_) |
+ Insn::Jg(_) |
+ Insn::Jge(_) |
+ Insn::Jmp(_) |
+ Insn::Jne(_) |
+ Insn::Jnz(_) |
+ Insn::Jo(_) |
+ Insn::JoMul(_) |
+ Insn::Jz(_) |
+ Insn::Label(_) |
+ Insn::LeaJumpTarget { .. } |
+ Insn::PadInvalPatch |
+ Insn::PosMarker(_) => None,
+
+ Insn::CPopInto(opnd) |
+ Insn::CPush(opnd) |
+ Insn::CRet(opnd) |
+ Insn::JmpOpnd(opnd) |
+ Insn::Lea { opnd, .. } |
+ Insn::LiveReg { opnd, .. } |
+ Insn::Load { opnd, .. } |
+ Insn::LoadSExt { opnd, .. } |
+ Insn::Joz(opnd, _) |
+ Insn::Jonz(opnd, _) |
+ Insn::Not { opnd, .. } => {
+ match self.idx {
+ 0 => {
+ self.idx += 1;
+ Some(&opnd)
+ },
+ _ => None
+ }
+ },
+ Insn::Add { left: opnd0, right: opnd1, .. } |
+ Insn::And { left: opnd0, right: opnd1, .. } |
+ Insn::Cmp { left: opnd0, right: opnd1 } |
+ Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::IncrCounter { mem: opnd0, value: opnd1, .. } |
+ Insn::LoadInto { dest: opnd0, opnd: opnd1 } |
+ Insn::LShift { opnd: opnd0, shift: opnd1, .. } |
+ Insn::Mov { dest: opnd0, src: opnd1 } |
+ Insn::Or { left: opnd0, right: opnd1, .. } |
+ Insn::RShift { opnd: opnd0, shift: opnd1, .. } |
+ Insn::Store { dest: opnd0, src: opnd1 } |
+ Insn::Sub { left: opnd0, right: opnd1, .. } |
+ Insn::Mul { left: opnd0, right: opnd1, .. } |
+ Insn::Test { left: opnd0, right: opnd1 } |
+ Insn::URShift { opnd: opnd0, shift: opnd1, .. } |
+ Insn::Xor { left: opnd0, right: opnd1, .. } => {
+ match self.idx {
+ 0 => {
+ self.idx += 1;
+ Some(&opnd0)
+ }
+ 1 => {
+ self.idx += 1;
+ Some(&opnd1)
+ }
+ _ => None
+ }
+ },
+ Insn::CCall { opnds, .. } => {
+ if self.idx < opnds.len() {
+ let opnd = &opnds[self.idx];
+ self.idx += 1;
+ Some(opnd)
+ } else {
+ None
+ }
+ }
+ }
+ }
+}
+
+/// An iterator that will yield each operand in turn for the given instruction.
+pub(super) struct InsnOpndMutIterator<'a> {
+ insn: &'a mut Insn,
+ idx: usize,
+}
+
+impl<'a> InsnOpndMutIterator<'a> {
+ fn new(insn: &'a mut Insn) -> Self {
+ Self { insn, idx: 0 }
+ }
+
+ pub(super) fn next(&mut self) -> Option<&mut Opnd> {
+ match self.insn {
+ Insn::BakeString(_) |
+ Insn::Breakpoint |
+ Insn::Comment(_) |
+ Insn::CPop { .. } |
+ Insn::CPopAll |
+ Insn::CPushAll |
+ Insn::FrameSetup |
+ Insn::FrameTeardown |
+ Insn::Jbe(_) |
+ Insn::Jb(_) |
+ Insn::Je(_) |
+ Insn::Jl(_) |
+ Insn::Jg(_) |
+ Insn::Jge(_) |
+ Insn::Jmp(_) |
+ Insn::Jne(_) |
+ Insn::Jnz(_) |
+ Insn::Jo(_) |
+ Insn::JoMul(_) |
+ Insn::Jz(_) |
+ Insn::Label(_) |
+ Insn::LeaJumpTarget { .. } |
+ Insn::PadInvalPatch |
+ Insn::PosMarker(_) => None,
+
+ Insn::CPopInto(opnd) |
+ Insn::CPush(opnd) |
+ Insn::CRet(opnd) |
+ Insn::JmpOpnd(opnd) |
+ Insn::Lea { opnd, .. } |
+ Insn::LiveReg { opnd, .. } |
+ Insn::Load { opnd, .. } |
+ Insn::LoadSExt { opnd, .. } |
+ Insn::Joz(opnd, _) |
+ Insn::Jonz(opnd, _) |
+ Insn::Not { opnd, .. } => {
+ match self.idx {
+ 0 => {
+ self.idx += 1;
+ Some(opnd)
+ },
+ _ => None
+ }
+ },
+ Insn::Add { left: opnd0, right: opnd1, .. } |
+ Insn::And { left: opnd0, right: opnd1, .. } |
+ Insn::Cmp { left: opnd0, right: opnd1 } |
+ Insn::CSelE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelG { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelGE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelL { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelLE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelNE { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelNZ { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::CSelZ { truthy: opnd0, falsy: opnd1, .. } |
+ Insn::IncrCounter { mem: opnd0, value: opnd1, .. } |
+ Insn::LoadInto { dest: opnd0, opnd: opnd1 } |
+ Insn::LShift { opnd: opnd0, shift: opnd1, .. } |
+ Insn::Mov { dest: opnd0, src: opnd1 } |
+ Insn::Or { left: opnd0, right: opnd1, .. } |
+ Insn::RShift { opnd: opnd0, shift: opnd1, .. } |
+ Insn::Store { dest: opnd0, src: opnd1 } |
+ Insn::Sub { left: opnd0, right: opnd1, .. } |
+ Insn::Mul { left: opnd0, right: opnd1, .. } |
+ Insn::Test { left: opnd0, right: opnd1 } |
+ Insn::URShift { opnd: opnd0, shift: opnd1, .. } |
+ Insn::Xor { left: opnd0, right: opnd1, .. } => {
+ match self.idx {
+ 0 => {
+ self.idx += 1;
+ Some(opnd0)
+ }
+ 1 => {
+ self.idx += 1;
+ Some(opnd1)
+ }
+ _ => None
+ }
+ },
+ Insn::CCall { opnds, .. } => {
+ if self.idx < opnds.len() {
+ let opnd = &mut opnds[self.idx];
+ self.idx += 1;
+ Some(opnd)
+ } else {
+ None
+ }
+ }
+ }
+ }
+}
+
+impl fmt::Debug for Insn {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "{}(", self.op())?;
+
+ // Print list of operands
+ let mut opnd_iter = self.opnd_iter();
+ if let Some(first_opnd) = opnd_iter.next() {
+ write!(fmt, "{first_opnd:?}")?;
+ }
+ for opnd in opnd_iter {
+ write!(fmt, ", {opnd:?}")?;
+ }
+ write!(fmt, ")")?;
+
+ // Print text, target, and pos if they are present
+ if let Some(text) = self.text() {
+ write!(fmt, " {text:?}")?
+ }
+ if let Some(target) = self.target() {
+ write!(fmt, " target={target:?}")?;
+ }
+
+ write!(fmt, " -> {:?}", self.out_opnd().unwrap_or(&Opnd::None))
+ }
+}
+
+/// Set of variables used for generating side exits
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct SideExitContext {
+ /// PC of the instruction being compiled
+ pub pc: *mut VALUE,
+
+ /// Context fields used by get_generic_ctx()
+ pub stack_size: u8,
+ pub sp_offset: i8,
+ pub reg_temps: RegTemps,
+ pub is_return_landing: bool,
+ pub is_deferred: bool,
+}
+
+impl SideExitContext {
+ /// Convert PC and Context into SideExitContext
+ pub fn new(pc: *mut VALUE, ctx: Context) -> Self {
+ let exit_ctx = SideExitContext {
+ pc,
+ stack_size: ctx.get_stack_size(),
+ sp_offset: ctx.get_sp_offset(),
+ reg_temps: ctx.get_reg_temps(),
+ is_return_landing: ctx.is_return_landing(),
+ is_deferred: ctx.is_deferred(),
+ };
+ if cfg!(debug_assertions) {
+ // Assert that we're not losing any mandatory metadata
+ assert_eq!(exit_ctx.get_ctx(), ctx.get_generic_ctx());
+ }
+ exit_ctx
+ }
+
+ /// Convert SideExitContext to Context
+ fn get_ctx(&self) -> Context {
+ let mut ctx = Context::default();
+ ctx.set_stack_size(self.stack_size);
+ ctx.set_sp_offset(self.sp_offset);
+ ctx.set_reg_temps(self.reg_temps);
+ if self.is_return_landing {
+ ctx.set_as_return_landing();
+ }
+ if self.is_deferred {
+ ctx.mark_as_deferred();
+ }
+ ctx
+ }
+}
+
+/// Initial capacity for asm.insns vector
+const ASSEMBLER_INSNS_CAPACITY: usize = 256;
+
+/// Object into which we assemble instructions to be
+/// optimized and lowered
+pub struct Assembler {
+ pub(super) insns: Vec<Insn>,
+
+ /// Parallel vec with insns
+ /// Index of the last insn using the output of this insn
+ pub(super) live_ranges: Vec<usize>,
+
+ /// Names of labels
+ pub(super) label_names: Vec<String>,
+
+ /// Context for generating the current insn
+ pub ctx: Context,
+
+ /// Side exit caches for each SideExitContext
+ pub(super) side_exits: HashMap<SideExitContext, CodePtr>,
+
+ /// PC for Target::SideExit
+ side_exit_pc: Option<*mut VALUE>,
+
+ /// Stack size for Target::SideExit
+ side_exit_stack_size: Option<u8>,
+
+ /// If true, the next ccall() should verify its leafness
+ leaf_ccall: bool,
+}
+
+impl Assembler
+{
+ pub fn new() -> Self {
+ Self::new_with_label_names(Vec::default(), HashMap::default())
+ }
+
+ pub fn new_with_label_names(label_names: Vec<String>, side_exits: HashMap<SideExitContext, CodePtr>) -> Self {
+ Self {
+ insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
+ live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
+ label_names,
+ ctx: Context::default(),
+ side_exits,
+ side_exit_pc: None,
+ side_exit_stack_size: None,
+ leaf_ccall: false,
+ }
+ }
+
+ /// Get the list of registers that can be used for stack temps.
+ pub fn get_temp_regs() -> &'static [Reg] {
+ let num_regs = get_option!(num_temp_regs);
+ &TEMP_REGS[0..num_regs]
+ }
+
+ /// Set a context for generating side exits
+ pub fn set_side_exit_context(&mut self, pc: *mut VALUE, stack_size: u8) {
+ self.side_exit_pc = Some(pc);
+ self.side_exit_stack_size = Some(stack_size);
+ }
+
+ /// Build an Opnd::InsnOut from the current index of the assembler and the
+ /// given number of bits.
+ pub(super) fn next_opnd_out(&self, num_bits: u8) -> Opnd {
+ Opnd::InsnOut { idx: self.insns.len(), num_bits }
+ }
+
+ /// Append an instruction onto the current list of instructions and update
+ /// the live ranges of any instructions whose outputs are being used as
+ /// operands to this instruction.
+ pub fn push_insn(&mut self, mut insn: Insn) {
+ // Index of this instruction
+ let insn_idx = self.insns.len();
+
+ let mut opnd_iter = insn.opnd_iter_mut();
+ while let Some(opnd) = opnd_iter.next() {
+ match opnd {
+ // If we find any InsnOut from previous instructions, we're going to update
+ // the live range of the previous instruction to point to this one.
+ Opnd::InsnOut { idx, .. } => {
+ assert!(*idx < self.insns.len());
+ self.live_ranges[*idx] = insn_idx;
+ }
+ Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => {
+ assert!(*idx < self.insns.len());
+ self.live_ranges[*idx] = insn_idx;
+ }
+ // Set current ctx.reg_temps to Opnd::Stack.
+ Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: None } => {
+ assert_eq!(
+ self.ctx.get_stack_size() as i16 - self.ctx.get_sp_offset() as i16,
+ *stack_size as i16 - *sp_offset as i16,
+ "Opnd::Stack (stack_size: {}, sp_offset: {}) expects a different SP position from asm.ctx (stack_size: {}, sp_offset: {})",
+ *stack_size, *sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(),
+ );
+ *opnd = Opnd::Stack {
+ idx: *idx,
+ num_bits: *num_bits,
+ stack_size: *stack_size,
+ sp_offset: *sp_offset,
+ reg_temps: Some(self.ctx.get_reg_temps()),
+ };
+ }
+ _ => {}
+ }
+ }
+
+ // Set a side exit context to Target::SideExit
+ if let Some(Target::SideExit { context, .. }) = insn.target_mut() {
+ // We should skip this when this instruction is being copied from another Assembler.
+ if context.is_none() {
+ *context = Some(SideExitContext::new(
+ self.side_exit_pc.unwrap(),
+ self.ctx.with_stack_size(self.side_exit_stack_size.unwrap()),
+ ));
+ }
+ }
+
+ self.insns.push(insn);
+ self.live_ranges.push(insn_idx);
+ }
+
+ /// Get a cached side exit, wrapping a counter if specified
+ pub fn get_side_exit(&mut self, side_exit_context: &SideExitContext, counter: Option<Counter>, ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ // Get a cached side exit
+ let side_exit = match self.side_exits.get(&side_exit_context) {
+ None => {
+ let exit_code = gen_outlined_exit(side_exit_context.pc, &side_exit_context.get_ctx(), ocb)?;
+ self.side_exits.insert(*side_exit_context, exit_code);
+ exit_code
+ }
+ Some(code_ptr) => *code_ptr,
+ };
+
+ // Wrap a counter if needed
+ gen_counted_exit(side_exit_context.pc, side_exit, ocb, counter)
+ }
+
+ /// Create a new label instance that we can jump to
+ pub fn new_label(&mut self, name: &str) -> Target
+ {
+ assert!(!name.contains(' '), "use underscores in label names, not spaces");
+
+ let label_idx = self.label_names.len();
+ self.label_names.push(name.to_string());
+ Target::Label(label_idx)
+ }
+
+ /// Convert Opnd::Stack to Opnd::Mem or Opnd::Reg
+ pub fn lower_stack_opnd(&self, opnd: &Opnd) -> Opnd {
+ // Convert Opnd::Stack to Opnd::Mem
+ fn mem_opnd(opnd: &Opnd) -> Opnd {
+ if let Opnd::Stack { idx, sp_offset, num_bits, .. } = *opnd {
+ incr_counter!(temp_mem_opnd);
+ Opnd::mem(num_bits, SP, (sp_offset as i32 - idx - 1) * SIZEOF_VALUE_I32)
+ } else {
+ unreachable!()
+ }
+ }
+
+ // Convert Opnd::Stack to Opnd::Reg
+ fn reg_opnd(opnd: &Opnd) -> Opnd {
+ let regs = Assembler::get_temp_regs();
+ if let Opnd::Stack { num_bits, .. } = *opnd {
+ incr_counter!(temp_reg_opnd);
+ Opnd::Reg(regs[opnd.reg_idx()]).with_num_bits(num_bits).unwrap()
+ } else {
+ unreachable!()
+ }
+ }
+
+ match opnd {
+ Opnd::Stack { reg_temps, .. } => {
+ if opnd.stack_idx() < MAX_REG_TEMPS && reg_temps.unwrap().get(opnd.stack_idx()) {
+ reg_opnd(opnd)
+ } else {
+ mem_opnd(opnd)
+ }
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ /// Allocate a register to a stack temp if available.
+ pub fn alloc_temp_reg(&mut self, stack_idx: u8) {
+ if get_option!(num_temp_regs) == 0 {
+ return;
+ }
+
+ // Allocate a register if there's no conflict.
+ let mut reg_temps = self.ctx.get_reg_temps();
+ if reg_temps.conflicts_with(stack_idx) {
+ assert!(!reg_temps.get(stack_idx));
+ } else {
+ reg_temps.set(stack_idx, true);
+ self.set_reg_temps(reg_temps);
+ }
+ }
+
+ /// Erase local variable type information
+ /// eg: because of a call we can't track
+ pub fn clear_local_types(&mut self) {
+ asm_comment!(self, "clear local variable types");
+ self.ctx.clear_local_types();
+ }
+
+ /// Spill all live stack temps from registers to the stack
+ pub fn spill_temps(&mut self) {
+ // Forget registers above the stack top
+ let mut reg_temps = self.ctx.get_reg_temps();
+ for stack_idx in self.ctx.get_stack_size()..MAX_REG_TEMPS {
+ reg_temps.set(stack_idx, false);
+ }
+ self.set_reg_temps(reg_temps);
+
+ // Spill live stack temps
+ if self.ctx.get_reg_temps() != RegTemps::default() {
+ asm_comment!(self, "spill_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), RegTemps::default().as_u8());
+ for stack_idx in 0..u8::min(MAX_REG_TEMPS, self.ctx.get_stack_size()) {
+ if self.ctx.get_reg_temps().get(stack_idx) {
+ let idx = self.ctx.get_stack_size() - 1 - stack_idx;
+ self.spill_temp(self.stack_opnd(idx.into()));
+ reg_temps.set(stack_idx, false);
+ }
+ }
+ self.ctx.set_reg_temps(reg_temps);
+ }
+
+ // Every stack temp should have been spilled
+ assert_eq!(self.ctx.get_reg_temps(), RegTemps::default());
+ }
+
+ /// Spill a stack temp from a register to the stack
+ fn spill_temp(&mut self, opnd: Opnd) {
+ assert!(self.ctx.get_reg_temps().get(opnd.stack_idx()));
+
+ // Use different RegTemps for dest and src operands
+ let reg_temps = self.ctx.get_reg_temps();
+ let mut mem_temps = reg_temps;
+ mem_temps.set(opnd.stack_idx(), false);
+
+ // Move the stack operand from a register to memory
+ match opnd {
+ Opnd::Stack { idx, num_bits, stack_size, sp_offset, .. } => {
+ self.mov(
+ Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(mem_temps) },
+ Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(reg_temps) },
+ );
+ }
+ _ => unreachable!(),
+ }
+ incr_counter!(temp_spill);
+ }
+
+ /// Update which stack temps are in a register
+ pub fn set_reg_temps(&mut self, reg_temps: RegTemps) {
+ if self.ctx.get_reg_temps() != reg_temps {
+ asm_comment!(self, "reg_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), reg_temps.as_u8());
+ self.ctx.set_reg_temps(reg_temps);
+ self.verify_reg_temps();
+ }
+ }
+
+ /// Assert there's no conflict in stack temp register allocation
+ fn verify_reg_temps(&self) {
+ for stack_idx in 0..MAX_REG_TEMPS {
+ if self.ctx.get_reg_temps().get(stack_idx) {
+ assert!(!self.ctx.get_reg_temps().conflicts_with(stack_idx));
+ }
+ }
+ }
+
+ /// Sets the out field on the various instructions that require allocated
+ /// registers because their output is used as the operand on a subsequent
+ /// instruction. This is our implementation of the linear scan algorithm.
+ pub(super) fn alloc_regs(mut self, regs: Vec<Reg>) -> Assembler
+ {
+ //dbg!(&self);
+
+ // First, create the pool of registers.
+ let mut pool: u32 = 0;
+
+ // Mutate the pool bitmap to indicate that the register at that index
+ // has been allocated and is live.
+ fn alloc_reg(pool: &mut u32, regs: &Vec<Reg>) -> Option<Reg> {
+ for (index, reg) in regs.iter().enumerate() {
+ if (*pool & (1 << index)) == 0 {
+ *pool |= 1 << index;
+ return Some(*reg);
+ }
+ }
+ None
+ }
+
+ // Allocate a specific register
+ fn take_reg(pool: &mut u32, regs: &Vec<Reg>, reg: &Reg) -> Reg {
+ let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no);
+
+ if let Some(reg_index) = reg_index {
+ assert_eq!(*pool & (1 << reg_index), 0, "register already allocated");
+ *pool |= 1 << reg_index;
+ }
+
+ return *reg;
+ }
+
+ // Mutate the pool bitmap to indicate that the given register is being
+ // returned as it is no longer used by the instruction that previously
+ // held it.
+ fn dealloc_reg(pool: &mut u32, regs: &Vec<Reg>, reg: &Reg) {
+ let reg_index = regs.iter().position(|elem| elem.reg_no == reg.reg_no);
+
+ if let Some(reg_index) = reg_index {
+ *pool &= !(1 << reg_index);
+ }
+ }
+
+ // Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG,
+ // so that they will not rewrite each other before they are used.
+ fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
+ // Return the index of a move whose destination is not used as a source if any.
+ fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> {
+ c_args.iter().enumerate().find(|(_, &(dest_reg, _))| {
+ c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
+ }).map(|(index, _)| index)
+ }
+
+ // Remove moves whose source and destination are the same
+ let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter()
+ .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
+
+ let mut moves = vec![];
+ while c_args.len() > 0 {
+ // Keep taking safe moves
+ while let Some(index) = find_safe_arg(&c_args) {
+ moves.push(c_args.remove(index));
+ }
+
+ // No safe move. Load the source of one move into SCRATCH_REG, and
+ // then load SCRATCH_REG into the destination when it's safe.
+ if c_args.len() > 0 {
+ // Make sure it's safe to use SCRATCH_REG
+ assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
+
+ // Move SCRATCH <- opnd, and delay reg <- SCRATCH
+ let (reg, opnd) = c_args.remove(0);
+ moves.push((Assembler::SCRATCH_REG, opnd));
+ c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
+ }
+ }
+ moves
+ }
+
+ // Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes.
+ fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) {
+ if shift_offset >= 0 {
+ for index in 0..(shift_offset as usize) {
+ live_ranges.insert(start_index + index, start_index + index);
+ }
+ } else {
+ for _ in 0..-shift_offset {
+ live_ranges.remove(start_index);
+ }
+ }
+ }
+
+ // Dump live registers for register spill debugging.
+ fn dump_live_regs(insns: Vec<Insn>, live_ranges: Vec<usize>, num_regs: usize, spill_index: usize) {
+ // Convert live_ranges to live_regs: the number of live registers at each index
+ let mut live_regs: Vec<usize> = vec![];
+ let mut end_idxs: Vec<usize> = vec![];
+ for (cur_idx, &end_idx) in live_ranges.iter().enumerate() {
+ end_idxs.push(end_idx);
+ while let Some(end_idx) = end_idxs.iter().position(|&end_idx| cur_idx == end_idx) {
+ end_idxs.remove(end_idx);
+ }
+ live_regs.push(end_idxs.len());
+ }
+
+ // Dump insns along with live registers
+ for (insn_idx, insn) in insns.iter().enumerate() {
+ eprint!("{:3} ", if spill_index == insn_idx { "==>" } else { "" });
+ for reg in 0..=num_regs {
+ eprint!("{:1}", if reg < live_regs[insn_idx] { "|" } else { "" });
+ }
+ eprintln!(" [{:3}] {:?}", insn_idx, insn);
+ }
+ }
+
+ // We may need to reorder LoadInto instructions with a C argument operand.
+ // This buffers the operands of such instructions to process them in batches.
+ let mut c_args: Vec<(Reg, Opnd)> = vec![];
+
+ // live_ranges is indexed by original `index` given by the iterator.
+ let live_ranges: Vec<usize> = take(&mut self.live_ranges);
+ // shifted_live_ranges is indexed by mapped indexes in insn operands.
+ let mut shifted_live_ranges: Vec<usize> = live_ranges.clone();
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let mut iterator = self.into_draining_iter();
+
+ while let Some((index, mut insn)) = iterator.next_mapped() {
+ // Check if this is the last instruction that uses an operand that
+ // spans more than one instruction. In that case, return the
+ // allocated register to the pool.
+ for opnd in insn.opnd_iter() {
+ match opnd {
+ Opnd::InsnOut { idx, .. } |
+ Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => {
+ // Since we have an InsnOut, we know it spans more that one
+ // instruction.
+ let start_index = *idx;
+
+ // We're going to check if this is the last instruction that
+ // uses this operand. If it is, we can return the allocated
+ // register to the pool.
+ if shifted_live_ranges[start_index] == index {
+ if let Some(Opnd::Reg(reg)) = asm.insns[start_index].out_opnd() {
+ dealloc_reg(&mut pool, &regs, reg);
+ } else {
+ unreachable!("no register allocated for insn {:?}", insn);
+ }
+ }
+ }
+ _ => {}
+ }
+ }
+
+ // C return values need to be mapped to the C return register
+ if matches!(insn, Insn::CCall { .. }) {
+ assert_eq!(pool, 0, "register lives past C function call");
+ }
+
+ // If this instruction is used by another instruction,
+ // we need to allocate a register to it
+ if live_ranges[index] != index {
+ // If we get to this point where the end of the live range is
+ // not equal to the index of the instruction, then it must be
+ // true that we set an output operand for this instruction. If
+ // it's not true, something has gone wrong.
+ assert!(
+ !matches!(insn.out_opnd(), None),
+ "Instruction output reused but no output operand set"
+ );
+
+ // This is going to be the output operand that we will set on
+ // the instruction.
+ let mut out_reg: Option<Reg> = None;
+
+ // C return values need to be mapped to the C return register
+ if matches!(insn, Insn::CCall { .. }) {
+ out_reg = Some(take_reg(&mut pool, &regs, &C_RET_REG));
+ }
+
+ // If this instruction's first operand maps to a register and
+ // this is the last use of the register, reuse the register
+ // We do this to improve register allocation on x86
+ // e.g. out = add(reg0, reg1)
+ // reg0 = add(reg0, reg1)
+ if out_reg.is_none() {
+ let mut opnd_iter = insn.opnd_iter();
+
+ if let Some(Opnd::InsnOut{ idx, .. }) = opnd_iter.next() {
+ if shifted_live_ranges[*idx] == index {
+ if let Some(Opnd::Reg(reg)) = asm.insns[*idx].out_opnd() {
+ out_reg = Some(take_reg(&mut pool, &regs, reg));
+ }
+ }
+ }
+ }
+
+ // Allocate a new register for this instruction if one is not
+ // already allocated.
+ if out_reg.is_none() {
+ out_reg = match &insn {
+ Insn::LiveReg { opnd, .. } => {
+ // Allocate a specific register
+ let reg = opnd.unwrap_reg();
+ Some(take_reg(&mut pool, &regs, &reg))
+ },
+ _ => match alloc_reg(&mut pool, &regs) {
+ Some(reg) => Some(reg),
+ None => {
+ let mut insns = asm.insns;
+ insns.push(insn);
+ for insn in iterator.insns {
+ insns.push(insn);
+ }
+ dump_live_regs(insns, live_ranges, regs.len(), index);
+ unreachable!("Register spill not supported");
+ }
+ }
+ };
+ }
+
+ // Set the output operand on the instruction
+ let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
+
+ // If we have gotten to this point, then we're sure we have an
+ // output operand on this instruction because the live range
+ // extends beyond the index of the instruction.
+ let out = insn.out_opnd_mut().unwrap();
+ *out = Opnd::Reg(out_reg.unwrap().with_num_bits(out_num_bits));
+ }
+
+ // Replace InsnOut operands by their corresponding register
+ let mut opnd_iter = insn.opnd_iter_mut();
+ while let Some(opnd) = opnd_iter.next() {
+ match *opnd {
+ Opnd::InsnOut { idx, num_bits } => {
+ *opnd = (*asm.insns[idx].out_opnd().unwrap()).with_num_bits(num_bits).unwrap();
+ },
+ Opnd::Mem(Mem { base: MemBase::InsnOut(idx), disp, num_bits }) => {
+ let base = MemBase::Reg(asm.insns[idx].out_opnd().unwrap().unwrap_reg().reg_no);
+ *opnd = Opnd::Mem(Mem { base, disp, num_bits });
+ }
+ _ => {},
+ }
+ }
+
+ // Push instruction(s). Batch and reorder C argument operations if needed.
+ if let Insn::LoadInto { dest: Opnd::CArg(reg), opnd } = insn {
+ // Buffer C arguments
+ c_args.push((reg, opnd));
+ } else {
+ // C arguments are buffered until CCall
+ if c_args.len() > 0 {
+ // Resolve C argument dependencies
+ let c_args_len = c_args.len() as isize;
+ let moves = reorder_c_args(&c_args.drain(..).into_iter().collect());
+ shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
+
+ // Push batched C arguments
+ for (reg, opnd) in moves {
+ asm.load_into(Opnd::Reg(reg), opnd);
+ }
+ }
+ // Other instructions are pushed as is
+ asm.push_insn(insn);
+ }
+ iterator.map_insn_index(&mut asm);
+ }
+
+ assert_eq!(pool, 0, "Expected all registers to be returned to the pool");
+ asm
+ }
+
+ /// Compile the instructions down to machine code.
+ /// Can fail due to lack of code memory and inopportune code placement, among other reasons.
+ #[must_use]
+ pub fn compile(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>) -> Option<(CodePtr, Vec<u32>)>
+ {
+ #[cfg(feature = "disasm")]
+ let start_addr = cb.get_write_ptr();
+
+ let alloc_regs = Self::get_alloc_regs();
+ let ret = self.compile_with_regs(cb, ocb, alloc_regs);
+
+ #[cfg(feature = "disasm")]
+ if let Some(dump_disasm) = get_option_ref!(dump_disasm) {
+ use crate::disasm::dump_disasm_addr_range;
+ let end_addr = cb.get_write_ptr();
+ dump_disasm_addr_range(cb, start_addr, end_addr, dump_disasm)
+ }
+ ret
+ }
+
+ /// Compile with a limited number of registers. Used only for unit tests.
+ #[cfg(test)]
+ pub fn compile_with_num_regs(self, cb: &mut CodeBlock, num_regs: usize) -> (CodePtr, Vec<u32>)
+ {
+ let mut alloc_regs = Self::get_alloc_regs();
+ let alloc_regs = alloc_regs.drain(0..num_regs).collect();
+ self.compile_with_regs(cb, None, alloc_regs).unwrap()
+ }
+
+ /// Consume the assembler by creating a new draining iterator.
+ pub fn into_draining_iter(self) -> AssemblerDrainingIterator {
+ AssemblerDrainingIterator::new(self)
+ }
+
+ /// Return true if the next ccall() is expected to be leaf.
+ pub fn get_leaf_ccall(&mut self) -> bool {
+ self.leaf_ccall
+ }
+
+ /// Assert that the next ccall() is going to be leaf.
+ pub fn expect_leaf_ccall(&mut self) {
+ self.leaf_ccall = true;
+ }
+}
+
+/// A struct that allows iterating through an assembler's instructions and
+/// consuming them as it iterates.
+pub struct AssemblerDrainingIterator {
+ insns: std::iter::Peekable<std::vec::IntoIter<Insn>>,
+ index: usize,
+ indices: Vec<usize>
+}
+
+impl AssemblerDrainingIterator {
+ fn new(asm: Assembler) -> Self {
+ Self {
+ insns: asm.insns.into_iter().peekable(),
+ index: 0,
+ indices: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
+ }
+ }
+
+ /// When you're working with two lists of instructions, you need to make
+ /// sure you do some bookkeeping to align the indices contained within the
+ /// operands of the two lists.
+ ///
+ /// This function accepts the assembler that is being built and tracks the
+ /// end of the current list of instructions in order to maintain that
+ /// alignment.
+ pub fn map_insn_index(&mut self, asm: &mut Assembler) {
+ self.indices.push(asm.insns.len().saturating_sub(1));
+ }
+
+ /// Map an operand by using this iterator's list of mapped indices.
+ #[cfg(target_arch = "x86_64")]
+ pub fn map_opnd(&self, opnd: Opnd) -> Opnd {
+ opnd.map_index(&self.indices)
+ }
+
+ /// Returns the next instruction in the list with the indices corresponding
+ /// to the next list of instructions.
+ pub fn next_mapped(&mut self) -> Option<(usize, Insn)> {
+ self.next_unmapped().map(|(index, mut insn)| {
+ let mut opnd_iter = insn.opnd_iter_mut();
+ while let Some(opnd) = opnd_iter.next() {
+ *opnd = opnd.map_index(&self.indices);
+ }
+
+ (index, insn)
+ })
+ }
+
+ /// Returns the next instruction in the list with the indices corresponding
+ /// to the previous list of instructions.
+ pub fn next_unmapped(&mut self) -> Option<(usize, Insn)> {
+ let index = self.index;
+ self.index += 1;
+ self.insns.next().map(|insn| (index, insn))
+ }
+
+ /// Returns the next instruction without incrementing the iterator's index.
+ pub fn peek(&mut self) -> Option<&Insn> {
+ self.insns.peek()
+ }
+}
+
+impl fmt::Debug for Assembler {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ writeln!(fmt, "Assembler")?;
+
+ for (idx, insn) in self.insns.iter().enumerate() {
+ writeln!(fmt, " {idx:03} {insn:?}")?;
+ }
+
+ Ok(())
+ }
+}
+
+impl Assembler {
+ #[must_use]
+ pub fn add(&mut self, left: Opnd, right: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right]));
+ self.push_insn(Insn::Add { left, right, out });
+ out
+ }
+
+ #[must_use]
+ pub fn and(&mut self, left: Opnd, right: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right]));
+ self.push_insn(Insn::And { left, right, out });
+ out
+ }
+
+ pub fn bake_string(&mut self, text: &str) {
+ self.push_insn(Insn::BakeString(text.to_string()));
+ }
+
+ #[allow(dead_code)]
+ pub fn breakpoint(&mut self) {
+ self.push_insn(Insn::Breakpoint);
+ }
+
+ pub fn ccall(&mut self, fptr: *const u8, opnds: Vec<Opnd>) -> Opnd {
+ // Let vm_check_canary() assert this ccall's leafness if leaf_ccall is set
+ let canary_opnd = self.set_stack_canary(&opnds);
+
+ let old_temps = self.ctx.get_reg_temps(); // with registers
+ // Spill stack temp registers since they are caller-saved registers.
+ // Note that this doesn't spill stack temps that are already popped
+ // but may still be used in the C arguments.
+ self.spill_temps();
+ let new_temps = self.ctx.get_reg_temps(); // all spilled
+
+ // Temporarily manipulate RegTemps so that we can use registers
+ // to pass stack operands that are already spilled above.
+ self.ctx.set_reg_temps(old_temps);
+
+ // Call a C function
+ let out = self.next_opnd_out(Opnd::match_num_bits(&opnds));
+ self.push_insn(Insn::CCall { fptr, opnds, out });
+
+ // Registers in old_temps may be clobbered by the above C call,
+ // so rollback the manipulated RegTemps to a spilled version.
+ self.ctx.set_reg_temps(new_temps);
+
+ // Clear the canary after use
+ if let Some(canary_opnd) = canary_opnd {
+ self.mov(canary_opnd, 0.into());
+ }
+
+ out
+ }
+
+ /// Let vm_check_canary() assert the leafness of this ccall if leaf_ccall is set
+ fn set_stack_canary(&mut self, opnds: &Vec<Opnd>) -> Option<Opnd> {
+ // Use the slot right above the stack top for verifying leafness.
+ let canary_opnd = self.stack_opnd(-1);
+
+ // If the slot is already used, which is a valid optimization to avoid spills,
+ // give up the verification.
+ let canary_opnd = if cfg!(debug_assertions) && self.leaf_ccall && opnds.iter().all(|opnd|
+ opnd.get_stack_idx() != canary_opnd.get_stack_idx()
+ ) {
+ asm_comment!(self, "set stack canary");
+ self.mov(canary_opnd, vm_stack_canary().into());
+ Some(canary_opnd)
+ } else {
+ None
+ };
+
+ // Avoid carrying the flag to the next instruction whether we verified it or not.
+ self.leaf_ccall = false;
+
+ canary_opnd
+ }
+
+ pub fn cmp(&mut self, left: Opnd, right: Opnd) {
+ self.push_insn(Insn::Cmp { left, right });
+ }
+
+ #[must_use]
+ pub fn cpop(&mut self) -> Opnd {
+ let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS);
+ self.push_insn(Insn::CPop { out });
+ out
+ }
+
+ pub fn cpop_all(&mut self) {
+ self.push_insn(Insn::CPopAll);
+
+ // Re-enable ccall's RegTemps assertion disabled by cpush_all.
+ // cpush_all + cpop_all preserve all stack temp registers, so it's safe.
+ self.set_reg_temps(self.ctx.get_reg_temps());
+ }
+
+ pub fn cpop_into(&mut self, opnd: Opnd) {
+ self.push_insn(Insn::CPopInto(opnd));
+ }
+
+ pub fn cpush(&mut self, opnd: Opnd) {
+ self.push_insn(Insn::CPush(opnd));
+ }
+
+ pub fn cpush_all(&mut self) {
+ self.push_insn(Insn::CPushAll);
+
+ // Mark all temps as not being in registers.
+ // Temps will be marked back as being in registers by cpop_all.
+ // We assume that cpush_all + cpop_all are used for C functions in utils.rs
+ // that don't require spill_temps for GC.
+ self.set_reg_temps(RegTemps::default());
+ }
+
+ pub fn cret(&mut self, opnd: Opnd) {
+ self.push_insn(Insn::CRet(opnd));
+ }
+
+ #[must_use]
+ pub fn csel_e(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelE { truthy, falsy, out });
+ out
+ }
+
+ #[must_use]
+ pub fn csel_g(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelG { truthy, falsy, out });
+ out
+ }
+
+ #[must_use]
+ pub fn csel_ge(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelGE { truthy, falsy, out });
+ out
+ }
+
+ #[must_use]
+ pub fn csel_l(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelL { truthy, falsy, out });
+ out
+ }
+
+ #[must_use]
+ pub fn csel_le(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelLE { truthy, falsy, out });
+ out
+ }
+
+ #[must_use]
+ pub fn csel_ne(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelNE { truthy, falsy, out });
+ out
+ }
+
+ #[must_use]
+ pub fn csel_nz(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelNZ { truthy, falsy, out });
+ out
+ }
+
+ #[must_use]
+ pub fn csel_z(&mut self, truthy: Opnd, falsy: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[truthy, falsy]));
+ self.push_insn(Insn::CSelZ { truthy, falsy, out });
+ out
+ }
+
+ pub fn frame_setup(&mut self) {
+ self.push_insn(Insn::FrameSetup);
+ }
+
+ pub fn frame_teardown(&mut self) {
+ self.push_insn(Insn::FrameTeardown);
+ }
+
+ pub fn incr_counter(&mut self, mem: Opnd, value: Opnd) {
+ self.push_insn(Insn::IncrCounter { mem, value });
+ }
+
+ pub fn jbe(&mut self, target: Target) {
+ self.push_insn(Insn::Jbe(target));
+ }
+
+ pub fn jb(&mut self, target: Target) {
+ self.push_insn(Insn::Jb(target));
+ }
+
+ pub fn je(&mut self, target: Target) {
+ self.push_insn(Insn::Je(target));
+ }
+
+ pub fn jl(&mut self, target: Target) {
+ self.push_insn(Insn::Jl(target));
+ }
+
+ #[allow(dead_code)]
+ pub fn jg(&mut self, target: Target) {
+ self.push_insn(Insn::Jg(target));
+ }
+
+ #[allow(dead_code)]
+ pub fn jge(&mut self, target: Target) {
+ self.push_insn(Insn::Jge(target));
+ }
+
+ pub fn jmp(&mut self, target: Target) {
+ self.push_insn(Insn::Jmp(target));
+ }
+
+ pub fn jmp_opnd(&mut self, opnd: Opnd) {
+ self.push_insn(Insn::JmpOpnd(opnd));
+ }
+
+ pub fn jne(&mut self, target: Target) {
+ self.push_insn(Insn::Jne(target));
+ }
+
+ pub fn jnz(&mut self, target: Target) {
+ self.push_insn(Insn::Jnz(target));
+ }
+
+ pub fn jo(&mut self, target: Target) {
+ self.push_insn(Insn::Jo(target));
+ }
+
+ pub fn jo_mul(&mut self, target: Target) {
+ self.push_insn(Insn::JoMul(target));
+ }
+
+ pub fn jz(&mut self, target: Target) {
+ self.push_insn(Insn::Jz(target));
+ }
+
+ #[must_use]
+ pub fn lea(&mut self, opnd: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd]));
+ self.push_insn(Insn::Lea { opnd, out });
+ out
+ }
+
+ #[must_use]
+ pub fn lea_jump_target(&mut self, target: Target) -> Opnd {
+ let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS);
+ self.push_insn(Insn::LeaJumpTarget { target, out });
+ out
+ }
+
+ #[must_use]
+ pub fn live_reg_opnd(&mut self, opnd: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd]));
+ self.push_insn(Insn::LiveReg { opnd, out });
+ out
+ }
+
+ #[must_use]
+ pub fn load(&mut self, opnd: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd]));
+ self.push_insn(Insn::Load { opnd, out });
+ out
+ }
+
+ pub fn load_into(&mut self, dest: Opnd, opnd: Opnd) {
+ match (dest, opnd) {
+ (Opnd::Reg(dest), Opnd::Reg(opnd)) if dest == opnd => {}, // skip if noop
+ _ => self.push_insn(Insn::LoadInto { dest, opnd }),
+ }
+ }
+
+ #[must_use]
+ pub fn load_sext(&mut self, opnd: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd]));
+ self.push_insn(Insn::LoadSExt { opnd, out });
+ out
+ }
+
+ #[must_use]
+ pub fn lshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift]));
+ self.push_insn(Insn::LShift { opnd, shift, out });
+ out
+ }
+
+ pub fn mov(&mut self, dest: Opnd, src: Opnd) {
+ self.push_insn(Insn::Mov { dest, src });
+ }
+
+ #[must_use]
+ pub fn not(&mut self, opnd: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd]));
+ self.push_insn(Insn::Not { opnd, out });
+ out
+ }
+
+ #[must_use]
+ pub fn or(&mut self, left: Opnd, right: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right]));
+ self.push_insn(Insn::Or { left, right, out });
+ out
+ }
+
+ pub fn pad_inval_patch(&mut self) {
+ self.push_insn(Insn::PadInvalPatch);
+ }
+
+ //pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F)
+ pub fn pos_marker(&mut self, marker_fn: impl Fn(CodePtr, &CodeBlock) + 'static) {
+ self.push_insn(Insn::PosMarker(Box::new(marker_fn)));
+ }
+
+ #[must_use]
+ pub fn rshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift]));
+ self.push_insn(Insn::RShift { opnd, shift, out });
+ out
+ }
+
+ pub fn store(&mut self, dest: Opnd, src: Opnd) {
+ self.push_insn(Insn::Store { dest, src });
+ }
+
+ #[must_use]
+ pub fn sub(&mut self, left: Opnd, right: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right]));
+ self.push_insn(Insn::Sub { left, right, out });
+ out
+ }
+
+ #[must_use]
+ pub fn mul(&mut self, left: Opnd, right: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right]));
+ self.push_insn(Insn::Mul { left, right, out });
+ out
+ }
+
+ pub fn test(&mut self, left: Opnd, right: Opnd) {
+ self.push_insn(Insn::Test { left, right });
+ }
+
+ #[must_use]
+ #[allow(dead_code)]
+ pub fn urshift(&mut self, opnd: Opnd, shift: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[opnd, shift]));
+ self.push_insn(Insn::URShift { opnd, shift, out });
+ out
+ }
+
+ /// Verify the leafness of the given block
+ pub fn with_leaf_ccall<F, R>(&mut self, mut block: F) -> R
+ where F: FnMut(&mut Self) -> R {
+ let old_leaf_ccall = self.leaf_ccall;
+ self.leaf_ccall = true;
+ let ret = block(self);
+ self.leaf_ccall = old_leaf_ccall;
+ ret
+ }
+
+ /// Add a label at the current position
+ pub fn write_label(&mut self, target: Target) {
+ assert!(target.unwrap_label_idx() < self.label_names.len());
+ self.push_insn(Insn::Label(target));
+ }
+
+ #[must_use]
+ pub fn xor(&mut self, left: Opnd, right: Opnd) -> Opnd {
+ let out = self.next_opnd_out(Opnd::match_num_bits(&[left, right]));
+ self.push_insn(Insn::Xor { left, right, out });
+ out
+ }
+}
+
+/// Macro to use format! for Insn::Comment, which skips a format! call
+/// when disasm is not supported.
+macro_rules! asm_comment {
+ ($asm:expr, $($fmt:tt)*) => {
+ if cfg!(feature = "disasm") {
+ $asm.push_insn(Insn::Comment(format!($($fmt)*)));
+ }
+ };
+}
+pub(crate) use asm_comment;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_opnd_iter() {
+ let insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None };
+
+ let mut opnd_iter = insn.opnd_iter();
+ assert!(matches!(opnd_iter.next(), Some(Opnd::None)));
+ assert!(matches!(opnd_iter.next(), Some(Opnd::None)));
+
+ assert!(matches!(opnd_iter.next(), None));
+ }
+
+ #[test]
+ fn test_opnd_iter_mut() {
+ let mut insn = Insn::Add { left: Opnd::None, right: Opnd::None, out: Opnd::None };
+
+ let mut opnd_iter = insn.opnd_iter_mut();
+ assert!(matches!(opnd_iter.next(), Some(Opnd::None)));
+ assert!(matches!(opnd_iter.next(), Some(Opnd::None)));
+
+ assert!(matches!(opnd_iter.next(), None));
+ }
+}
diff --git a/yjit/src/backend/mod.rs b/yjit/src/backend/mod.rs
new file mode 100644
index 0000000000..6921244c72
--- /dev/null
+++ b/yjit/src/backend/mod.rs
@@ -0,0 +1,14 @@
+#[cfg(target_arch = "x86_64")]
+pub mod x86_64;
+
+#[cfg(target_arch = "aarch64")]
+pub mod arm64;
+
+#[cfg(target_arch = "x86_64")]
+pub use x86_64 as current;
+
+#[cfg(target_arch = "aarch64")]
+pub use arm64 as current;
+
+pub mod ir;
+mod tests;
diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs
new file mode 100644
index 0000000000..01e87fe26c
--- /dev/null
+++ b/yjit/src/backend/tests.rs
@@ -0,0 +1,330 @@
+#![cfg(test)]
+use crate::asm::{CodeBlock};
+use crate::backend::ir::*;
+use crate::cruby::*;
+use crate::utils::c_callable;
+
+#[test]
+fn test_add() {
+ let mut asm = Assembler::new();
+ let out = asm.add(SP, Opnd::UImm(1));
+ let _ = asm.add(out, Opnd::UImm(2));
+}
+
+#[test]
+fn test_alloc_regs() {
+ let mut asm = Assembler::new();
+
+ // Get the first output that we're going to reuse later.
+ let out1 = asm.add(EC, Opnd::UImm(1));
+
+ // Pad some instructions in to make sure it can handle that.
+ let _ = asm.add(EC, Opnd::UImm(2));
+
+ // Get the second output we're going to reuse.
+ let out2 = asm.add(EC, Opnd::UImm(3));
+
+ // Pad another instruction.
+ let _ = asm.add(EC, Opnd::UImm(4));
+
+ // Reuse both the previously captured outputs.
+ let _ = asm.add(out1, out2);
+
+ // Now get a third output to make sure that the pool has registers to
+ // allocate now that the previous ones have been returned.
+ let out3 = asm.add(EC, Opnd::UImm(5));
+ let _ = asm.add(out3, Opnd::UImm(6));
+
+ // Here we're going to allocate the registers.
+ let result = asm.alloc_regs(Assembler::get_alloc_regs());
+
+ // Now we're going to verify that the out field has been appropriately
+ // updated for each of the instructions that needs it.
+ let regs = Assembler::get_alloc_regs();
+ let reg0 = regs[0];
+ let reg1 = regs[1];
+
+ match result.insns[0].out_opnd() {
+ Some(Opnd::Reg(value)) => assert_eq!(value, &reg0),
+ val => panic!("Unexpected register value {:?}", val),
+ }
+
+ match result.insns[2].out_opnd() {
+ Some(Opnd::Reg(value)) => assert_eq!(value, &reg1),
+ val => panic!("Unexpected register value {:?}", val),
+ }
+
+ match result.insns[5].out_opnd() {
+ Some(Opnd::Reg(value)) => assert_eq!(value, &reg0),
+ val => panic!("Unexpected register value {:?}", val),
+ }
+}
+
+fn setup_asm() -> (Assembler, CodeBlock) {
+ return (
+ Assembler::new(),
+ CodeBlock::new_dummy(1024)
+ );
+}
+
+// Test full codegen pipeline
+#[test]
+fn test_compile()
+{
+ let (mut asm, mut cb) = setup_asm();
+ let regs = Assembler::get_alloc_regs();
+
+ let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2));
+ let out2 = asm.add(out, Opnd::UImm(2));
+ asm.store(Opnd::mem(64, SP, 0), out2);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+// Test memory-to-memory move
+#[test]
+fn test_mov_mem2mem()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ asm_comment!(asm, "check that comments work too");
+ asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8));
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+// Test load of register into new register
+#[test]
+fn test_load_reg()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.load(SP);
+ asm.mov(Opnd::mem(64, SP, 0), out);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+// Test load of a GC'd value
+#[test]
+fn test_load_value()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let gcd_value = VALUE(0xFFFFFFFFFFFF00);
+ assert!(!gcd_value.special_const_p());
+
+ let out = asm.load(Opnd::Value(gcd_value));
+ asm.mov(Opnd::mem(64, SP, 0), out);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+// Multiple registers needed and register reuse
+#[test]
+fn test_reuse_reg()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let v0 = asm.add(Opnd::mem(64, SP, 0), Opnd::UImm(1));
+ let v1 = asm.add(Opnd::mem(64, SP, 8), Opnd::UImm(1));
+
+ let v2 = asm.add(v1, Opnd::UImm(1)); // Reuse v1 register
+ let v3 = asm.add(v0, v2);
+
+ asm.store(Opnd::mem(64, SP, 0), v2);
+ asm.store(Opnd::mem(64, SP, 8), v3);
+
+ asm.compile_with_num_regs(&mut cb, 2);
+}
+
+// 64-bit values can't be written directly to memory,
+// need to be split into one or more register movs first
+#[test]
+fn test_store_u64()
+{
+ let (mut asm, mut cb) = setup_asm();
+ asm.store(Opnd::mem(64, SP, 0), u64::MAX.into());
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+// Use instruction output as base register for memory operand
+#[test]
+fn test_base_insn_out()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ // Forced register to be reused
+ // This also causes the insn sequence to change length
+ asm.mov(
+ Opnd::mem(64, SP, 8),
+ Opnd::mem(64, SP, 0)
+ );
+
+ // Load the pointer into a register
+ let ptr_reg = asm.load(Opnd::const_ptr(4351776248 as *const u8));
+ let counter_opnd = Opnd::mem(64, ptr_reg, 0);
+
+ // Increment and store the updated value
+ asm.incr_counter(counter_opnd, 1.into());
+
+ asm.compile_with_num_regs(&mut cb, 2);
+}
+
+#[test]
+fn test_c_call()
+{
+ c_callable! {
+ fn dummy_c_fun(_v0: usize, _v1: usize) {}
+ }
+
+ let (mut asm, mut cb) = setup_asm();
+
+ let ret_val = asm.ccall(
+ dummy_c_fun as *const u8,
+ vec![Opnd::mem(64, SP, 0), Opnd::UImm(1)]
+ );
+
+ // Make sure that the call's return value is usable
+ asm.mov(Opnd::mem(64, SP, 0), ret_val);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+#[test]
+fn test_alloc_ccall_regs() {
+ let mut asm = Assembler::new();
+ let out1 = asm.ccall(0 as *const u8, vec![]);
+ let out2 = asm.ccall(0 as *const u8, vec![out1]);
+ asm.mov(EC, out2);
+ let mut cb = CodeBlock::new_dummy(1024);
+ asm.compile_with_regs(&mut cb, None, Assembler::get_alloc_regs());
+}
+
+#[test]
+fn test_lea_ret()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let addr = asm.lea(Opnd::mem(64, SP, 0));
+ asm.cret(addr);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+#[test]
+fn test_jcc_label()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let label = asm.new_label("foo");
+ asm.cmp(EC, EC);
+ asm.je(label);
+ asm.write_label(label);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+#[test]
+fn test_jcc_ptr()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
+ let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
+ asm.test(
+ Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
+ not_mask,
+ );
+ asm.jnz(side_exit);
+
+ asm.compile_with_num_regs(&mut cb, 2);
+}
+
+/// Direct jump to a stub e.g. for deferred compilation
+#[test]
+fn test_jmp_ptr()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let stub = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
+ asm.jmp(stub);
+
+ asm.compile_with_num_regs(&mut cb, 0);
+}
+
+#[test]
+fn test_jo()
+{
+ let (mut asm, mut cb) = setup_asm();
+
+ let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
+
+ let arg1 = Opnd::mem(64, SP, 0);
+ let arg0 = Opnd::mem(64, SP, 8);
+
+ let arg0_untag = asm.sub(arg0, Opnd::Imm(1));
+ let out_val = asm.add(arg0_untag, arg1);
+ asm.jo(side_exit);
+
+ asm.mov(Opnd::mem(64, SP, 0), out_val);
+
+ asm.compile_with_num_regs(&mut cb, 2);
+}
+
+#[test]
+fn test_bake_string() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.bake_string("Hello, world!");
+ asm.compile_with_num_regs(&mut cb, 0);
+}
+
+#[test]
+fn test_draining_iterator() {
+
+ let mut asm = Assembler::new();
+
+ let _ = asm.load(Opnd::None);
+ asm.store(Opnd::None, Opnd::None);
+ let _ = asm.add(Opnd::None, Opnd::None);
+
+ let mut iter = asm.into_draining_iter();
+
+ while let Some((index, insn)) = iter.next_unmapped() {
+ match index {
+ 0 => assert!(matches!(insn, Insn::Load { .. })),
+ 1 => assert!(matches!(insn, Insn::Store { .. })),
+ 2 => assert!(matches!(insn, Insn::Add { .. })),
+ _ => panic!("Unexpected instruction index"),
+ };
+ }
+}
+
+#[test]
+fn test_cmp_8_bit() {
+ let (mut asm, mut cb) = setup_asm();
+ let reg = Assembler::get_alloc_regs()[0];
+ asm.cmp(Opnd::Reg(reg).with_num_bits(8).unwrap(), Opnd::UImm(RUBY_SYMBOL_FLAG as u64));
+
+ asm.compile_with_num_regs(&mut cb, 1);
+}
+
+#[test]
+fn test_no_pos_marker_callback_when_compile_fails() {
+ // When compilation fails (e.g. when out of memory), the code written out is malformed.
+ // We don't want to invoke the pos_marker callbacks with positions of malformed code.
+ let mut asm = Assembler::new();
+
+ // Markers around code to exhaust memory limit
+ let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called");
+ asm.pos_marker(fail_if_called);
+ let zero = asm.load(0.into());
+ let sum = asm.add(zero, 500.into());
+ asm.store(Opnd::mem(64, SP, 8), sum);
+ asm.pos_marker(fail_if_called);
+
+ let cb = &mut CodeBlock::new_dummy(8);
+ assert!(asm.compile(cb, None).is_none(), "should fail due to tiny size limit");
+}
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
new file mode 100644
index 0000000000..4ca5e9be9c
--- /dev/null
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -0,0 +1,1322 @@
+use std::mem::take;
+
+use crate::asm::*;
+use crate::asm::x86_64::*;
+use crate::codegen::CodePtr;
+use crate::cruby::*;
+use crate::backend::ir::*;
+use crate::options::*;
+use crate::utils::*;
+
+// Use the x86 register type for this platform
+pub type Reg = X86Reg;
+
+// Callee-saved registers
+pub const _CFP: Opnd = Opnd::Reg(R13_REG);
+pub const _EC: Opnd = Opnd::Reg(R12_REG);
+pub const _SP: Opnd = Opnd::Reg(RBX_REG);
+
+// C argument registers on this platform
+pub const _C_ARG_OPNDS: [Opnd; 6] = [
+ Opnd::Reg(RDI_REG),
+ Opnd::Reg(RSI_REG),
+ Opnd::Reg(RDX_REG),
+ Opnd::Reg(RCX_REG),
+ Opnd::Reg(R8_REG),
+ Opnd::Reg(R9_REG)
+];
+
+// C return value register on this platform
+pub const C_RET_REG: Reg = RAX_REG;
+pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
+
+impl CodeBlock {
+ // The number of bytes that are generated by jmp_ptr
+ pub fn jmp_ptr_bytes(&self) -> usize { 5 }
+}
+
+/// Map Opnd to X86Opnd
+impl From<Opnd> for X86Opnd {
+ fn from(opnd: Opnd) -> Self {
+ match opnd {
+ // NOTE: these operand types need to be lowered first
+ //Value(VALUE), // Immediate Ruby value, may be GC'd, movable
+ //InsnOut(usize), // Output of a preceding instruction in this block
+
+ Opnd::InsnOut{..} => panic!("InsnOut operand made it past register allocation"),
+
+ Opnd::UImm(val) => uimm_opnd(val),
+ Opnd::Imm(val) => imm_opnd(val),
+ Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64),
+
+ // General-purpose register
+ Opnd::Reg(reg) => X86Opnd::Reg(reg),
+
+ // Memory operand with displacement
+ Opnd::Mem(Mem{ base: MemBase::Reg(reg_no), num_bits, disp }) => {
+ let reg = X86Reg {
+ reg_no,
+ num_bits: 64,
+ reg_type: RegType::GP
+ };
+
+ mem_opnd(num_bits, X86Opnd::Reg(reg), disp)
+ }
+
+ Opnd::None => panic!(
+ "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output."
+ ),
+
+ _ => panic!("unsupported x86 operand type")
+ }
+ }
+}
+
+/// Also implement going from a reference to an operand for convenience.
+impl From<&Opnd> for X86Opnd {
+ fn from(opnd: &Opnd) -> Self {
+ X86Opnd::from(*opnd)
+ }
+}
+
+/// List of registers that can be used for stack temps.
+pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG];
+
+impl Assembler
+{
+ // A special scratch register for intermediate processing.
+ // This register is caller-saved (so we don't have to save it before using it)
+ pub const SCRATCH_REG: Reg = R11_REG;
+ const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
+
+
+ /// Get the list of registers from which we can allocate on this platform
+ pub fn get_alloc_regs() -> Vec<Reg>
+ {
+ vec![
+ RAX_REG,
+ RCX_REG,
+ RDX_REG,
+ ]
+ }
+
+ /// Get a list of all of the caller-save registers
+ pub fn get_caller_save_regs() -> Vec<Reg> {
+ vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG]
+ }
+
+ // These are the callee-saved registers in the x86-64 SysV ABI
+ // RBX, RSP, RBP, and R12–R15
+
+ /// Split IR instructions for the x86 platform
+ fn x86_split(mut self) -> Assembler
+ {
+ let live_ranges: Vec<usize> = take(&mut self.live_ranges);
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let mut iterator = self.into_draining_iter();
+
+ while let Some((index, mut insn)) = iterator.next_unmapped() {
+ // When we're iterating through the instructions with x86_split, we
+ // need to know the previous live ranges in order to tell if a
+ // register lasts beyond the current instruction. So instead of
+ // using next_mapped, we call next_unmapped. When you're using the
+ // next_unmapped API, you need to make sure that you map each
+ // operand that could reference an old index, which means both
+ // Opnd::InsnOut operands and Opnd::Mem operands with a base of
+ // MemBase::InsnOut.
+ //
+ // You need to ensure that you only map it _once_, because otherwise
+ // you'll end up mapping an incorrect index which could end up being
+ // out of bounds of the old set of indices.
+ //
+ // We handle all of that mapping here to ensure that it's only
+ // mapped once. We also handle loading Opnd::Value operands into
+ // registers here so that all mapping happens in one place. We load
+ // Opnd::Value operands into registers here because:
+ //
+ // - Most instructions can't be encoded with 64-bit immediates.
+ // - We look for Op::Load specifically when emitting to keep GC'ed
+ // VALUEs alive. This is a sort of canonicalization.
+ let mut unmapped_opnds: Vec<Opnd> = vec![];
+
+ let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
+ let mut opnd_iter = insn.opnd_iter_mut();
+
+ while let Some(opnd) = opnd_iter.next() {
+ if let Opnd::Stack { .. } = opnd {
+ *opnd = asm.lower_stack_opnd(opnd);
+ }
+ unmapped_opnds.push(*opnd);
+
+ *opnd = match opnd {
+ Opnd::Value(value) if !is_load => {
+ // Since mov(mem64, imm32) sign extends, as_i64() makes sure
+ // we split when the extended value is different.
+ if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 {
+ asm.load(iterator.map_opnd(*opnd))
+ } else {
+ Opnd::UImm(value.as_u64())
+ }
+ }
+ _ => iterator.map_opnd(*opnd),
+ };
+ }
+
+ // We are replacing instructions here so we know they are already
+ // being used. It is okay not to use their output here.
+ #[allow(unused_must_use)]
+ match &mut insn {
+ Insn::Add { left, right, out } |
+ Insn::Sub { left, right, out } |
+ Insn::Mul { left, right, out } |
+ Insn::And { left, right, out } |
+ Insn::Or { left, right, out } |
+ Insn::Xor { left, right, out } => {
+ match (&left, &right, iterator.peek()) {
+ // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible
+ (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src }))
+ if out == src && left == dest && live_ranges[index] == index + 1 && uimm_num_bits(*value) <= 32 => {
+ *out = *dest;
+ asm.push_insn(insn);
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src }))
+ if out == src && live_ranges[index] == index + 1 && {
+ // We want to do `dest == left`, but `left` has already gone
+ // through lower_stack_opnd() while `dest` has not. So we
+ // lower `dest` before comparing.
+ let lowered_dest = if let Opnd::Stack { .. } = dest {
+ asm.lower_stack_opnd(dest)
+ } else {
+ *dest
+ };
+ lowered_dest == *left
+ } => {
+ *out = *dest;
+ asm.push_insn(insn);
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => {
+ match (unmapped_opnds[0], unmapped_opnds[1]) {
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ *left = asm.load(*left);
+ *right = asm.load(*right);
+ },
+ (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => {
+ *left = asm.load(*left);
+ },
+ // Instruction output whose live range spans beyond this instruction
+ (Opnd::InsnOut { idx, .. }, _) => {
+ if live_ranges[idx] > index {
+ *left = asm.load(*left);
+ }
+ },
+ // We have to load memory operands to avoid corrupting them
+ (Opnd::Mem(_) | Opnd::Reg(_), _) => {
+ *left = asm.load(*left);
+ },
+ _ => {}
+ };
+
+ *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right]));
+ asm.push_insn(insn);
+ }
+ }
+ },
+ Insn::Cmp { left, right } => {
+ // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes)
+ // when next IR is `je`, `jne`, `csel_e`, or `csel_ne`
+ match (&left, &right, iterator.peek()) {
+ (Opnd::InsnOut { .. },
+ Opnd::UImm(0) | Opnd::Imm(0),
+ Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => {
+ asm.push_insn(Insn::Test { left: *left, right: *left });
+ }
+ _ => {
+ if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) {
+ let loaded = asm.load(*right);
+ *right = loaded;
+ }
+ asm.push_insn(insn);
+ }
+ }
+ },
+ Insn::Test { left, right } => {
+ if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) {
+ let loaded = asm.load(*right);
+ *right = loaded;
+ }
+ asm.push_insn(insn);
+ },
+ // These instructions modify their input operand in-place, so we
+ // may need to load the input value to preserve it
+ Insn::LShift { opnd, shift, out } |
+ Insn::RShift { opnd, shift, out } |
+ Insn::URShift { opnd, shift, out } => {
+ match (&unmapped_opnds[0], &unmapped_opnds[1]) {
+ // Instruction output whose live range spans beyond this instruction
+ (Opnd::InsnOut { idx, .. }, _) => {
+ if live_ranges[*idx] > index {
+ *opnd = asm.load(*opnd);
+ }
+ },
+ // We have to load memory operands to avoid corrupting them
+ (Opnd::Mem(_) | Opnd::Reg(_), _) => {
+ *opnd = asm.load(*opnd);
+ },
+ _ => {}
+ };
+
+ *out = asm.next_opnd_out(Opnd::match_num_bits(&[*opnd, *shift]));
+ asm.push_insn(insn);
+ },
+ Insn::CSelZ { truthy, falsy, out } |
+ Insn::CSelNZ { truthy, falsy, out } |
+ Insn::CSelE { truthy, falsy, out } |
+ Insn::CSelNE { truthy, falsy, out } |
+ Insn::CSelL { truthy, falsy, out } |
+ Insn::CSelLE { truthy, falsy, out } |
+ Insn::CSelG { truthy, falsy, out } |
+ Insn::CSelGE { truthy, falsy, out } => {
+ match unmapped_opnds[0] {
+ // If we have an instruction output whose live range
+ // spans beyond this instruction, we have to load it.
+ Opnd::InsnOut { idx, .. } => {
+ if live_ranges[idx] > index {
+ *truthy = asm.load(*truthy);
+ }
+ },
+ Opnd::UImm(_) | Opnd::Imm(_) => {
+ *truthy = asm.load(*truthy);
+ },
+ // Opnd::Value could have already been split
+ Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => {
+ *truthy = asm.load(*truthy);
+ },
+ _ => {}
+ };
+
+ match falsy {
+ Opnd::UImm(_) | Opnd::Imm(_) => {
+ *falsy = asm.load(*falsy);
+ },
+ _ => {}
+ };
+
+ *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy]));
+ asm.push_insn(insn);
+ },
+ Insn::Mov { dest, src } | Insn::Store { dest, src } => {
+ match (&dest, &src) {
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ // We load opnd1 because for mov, opnd0 is the output
+ let opnd1 = asm.load(*src);
+ asm.mov(*dest, opnd1);
+ },
+ (Opnd::Mem(_), Opnd::UImm(value)) => {
+ // 32-bit values will be sign-extended
+ if imm_num_bits(*value as i64) > 32 {
+ let opnd1 = asm.load(*src);
+ asm.mov(*dest, opnd1);
+ } else {
+ asm.mov(*dest, *src);
+ }
+ },
+ (Opnd::Mem(_), Opnd::Imm(value)) => {
+ if imm_num_bits(*value) > 32 {
+ let opnd1 = asm.load(*src);
+ asm.mov(*dest, opnd1);
+ } else {
+ asm.mov(*dest, *src);
+ }
+ },
+ _ => {
+ asm.mov(*dest, *src);
+ }
+ }
+ },
+ Insn::Not { opnd, .. } => {
+ let opnd0 = match unmapped_opnds[0] {
+ // If we have an instruction output whose live range
+ // spans beyond this instruction, we have to load it.
+ Opnd::InsnOut { idx, .. } => {
+ if live_ranges[idx] > index {
+ asm.load(*opnd)
+ } else {
+ *opnd
+ }
+ },
+ // We have to load memory and register operands to avoid
+ // corrupting them.
+ Opnd::Mem(_) | Opnd::Reg(_) => {
+ asm.load(*opnd)
+ },
+ // Otherwise we can just reuse the existing operand.
+ _ => *opnd
+ };
+
+ asm.not(opnd0);
+ },
+ Insn::CCall { opnds, fptr, .. } => {
+ assert!(opnds.len() <= C_ARG_OPNDS.len());
+
+ // Load each operand into the corresponding argument
+ // register.
+ for (idx, opnd) in opnds.into_iter().enumerate() {
+ asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd);
+ }
+
+ // Now we push the CCall without any arguments so that it
+ // just performs the call.
+ asm.ccall(*fptr, vec![]);
+ },
+ Insn::Lea { .. } => {
+ // Merge `lea` and `mov` into a single `lea` when possible
+ match (&insn, iterator.peek()) {
+ (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src }))
+ if matches!(out, Opnd::InsnOut { .. }) && out == src && live_ranges[index] == index + 1 => {
+ asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) });
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => asm.push_insn(insn),
+ }
+ },
+ _ => {
+ if insn.out_opnd().is_some() {
+ let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
+ let out = insn.out_opnd_mut().unwrap();
+ *out = asm.next_opnd_out(out_num_bits);
+ }
+
+ asm.push_insn(insn);
+ }
+ };
+
+ iterator.map_insn_index(&mut asm);
+ }
+
+ asm
+ }
+
+ /// Emit platform-specific machine code
+ pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>>
+ {
+ /// For some instructions, we want to be able to lower a 64-bit operand
+ /// without requiring more registers to be available in the register
+ /// allocator. So we just use the SCRATCH0 register temporarily to hold
+ /// the value before we immediately use it.
+ fn emit_64bit_immediate(cb: &mut CodeBlock, opnd: &Opnd) -> X86Opnd {
+ match opnd {
+ Opnd::Imm(value) => {
+ // 32-bit values will be sign-extended
+ if imm_num_bits(*value) > 32 {
+ mov(cb, Assembler::SCRATCH0, opnd.into());
+ Assembler::SCRATCH0
+ } else {
+ opnd.into()
+ }
+ },
+ Opnd::UImm(value) => {
+ // 32-bit values will be sign-extended
+ if imm_num_bits(*value as i64) > 32 {
+ mov(cb, Assembler::SCRATCH0, opnd.into());
+ Assembler::SCRATCH0
+ } else {
+ opnd.into()
+ }
+ },
+ _ => opnd.into()
+ }
+ }
+
+ /// Compile a side exit if Target::SideExit is given.
+ fn compile_side_exit(
+ target: Target,
+ asm: &mut Assembler,
+ ocb: &mut Option<&mut OutlinedCb>,
+ ) -> Option<Target> {
+ if let Target::SideExit { counter, context } = target {
+ let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap());
+ Some(Target::SideExitPtr(side_exit?))
+ } else {
+ Some(target)
+ }
+ }
+
+ fn emit_csel(
+ cb: &mut CodeBlock,
+ truthy: Opnd,
+ falsy: Opnd,
+ out: Opnd,
+ cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd),
+ cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){
+
+ // Assert that output is a register
+ out.unwrap_reg();
+
+ // If the truthy value is a memory operand
+ if let Opnd::Mem(_) = truthy {
+ if out != falsy {
+ mov(cb, out.into(), falsy.into());
+ }
+
+ cmov_fn(cb, out.into(), truthy.into());
+ } else {
+ if out != truthy {
+ mov(cb, out.into(), truthy.into());
+ }
+
+ cmov_neg(cb, out.into(), falsy.into());
+ }
+ }
+
+ //dbg!(&self.insns);
+
+ // List of GC offsets
+ let mut gc_offsets: Vec<u32> = Vec::new();
+
+ // Buffered list of PosMarker callbacks to fire if codegen is successful
+ let mut pos_markers: Vec<(usize, CodePtr)> = vec![];
+
+ // For each instruction
+ let start_write_pos = cb.get_write_pos();
+ let mut insn_idx: usize = 0;
+ while let Some(insn) = self.insns.get(insn_idx) {
+ let src_ptr = cb.get_write_ptr();
+ let had_dropped_bytes = cb.has_dropped_bytes();
+ let old_label_state = cb.get_label_state();
+ let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
+ match insn {
+ Insn::Comment(text) => {
+ if cfg!(feature = "disasm") {
+ cb.add_comment(text);
+ }
+ },
+
+ // Write the label at the current position
+ Insn::Label(target) => {
+ cb.write_label(target.unwrap_label_idx());
+ },
+
+ // Report back the current position in the generated code
+ Insn::PosMarker(..) => {
+ pos_markers.push((insn_idx, cb.get_write_ptr()));
+ },
+
+ Insn::BakeString(text) => {
+ for byte in text.as_bytes() {
+ cb.write_byte(*byte);
+ }
+
+ // Add a null-terminator byte for safety (in case we pass
+ // this to C code)
+ cb.write_byte(0);
+ },
+
+ // Set up RBP to work with frame pointer unwinding
+ // (e.g. with Linux `perf record --call-graph fp`)
+ Insn::FrameSetup => {
+ if get_option!(frame_pointer) {
+ push(cb, RBP);
+ mov(cb, RBP, RSP);
+ push(cb, RBP);
+ }
+ },
+ Insn::FrameTeardown => {
+ if get_option!(frame_pointer) {
+ pop(cb, RBP);
+ pop(cb, RBP);
+ }
+ },
+
+ Insn::Add { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ add(cb, left.into(), opnd1);
+ },
+
+ Insn::Sub { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ sub(cb, left.into(), opnd1);
+ },
+
+ Insn::Mul { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ imul(cb, left.into(), opnd1);
+ },
+
+ Insn::And { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ and(cb, left.into(), opnd1);
+ },
+
+ Insn::Or { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ or(cb, left.into(), opnd1);
+ },
+
+ Insn::Xor { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ xor(cb, left.into(), opnd1);
+ },
+
+ Insn::Not { opnd, .. } => {
+ not(cb, opnd.into());
+ },
+
+ Insn::LShift { opnd, shift , ..} => {
+ shl(cb, opnd.into(), shift.into())
+ },
+
+ Insn::RShift { opnd, shift , ..} => {
+ sar(cb, opnd.into(), shift.into())
+ },
+
+ Insn::URShift { opnd, shift, .. } => {
+ shr(cb, opnd.into(), shift.into())
+ },
+
+ Insn::Store { dest, src } => {
+ mov(cb, dest.into(), src.into());
+ },
+
+ // This assumes only load instructions can contain references to GC'd Value operands
+ Insn::Load { opnd, out } |
+ Insn::LoadInto { dest: out, opnd } => {
+ match opnd {
+ Opnd::Value(val) if val.heap_object_p() => {
+ // Using movabs because mov might write value in 32 bits
+ movabs(cb, out.into(), val.0 as _);
+ // The pointer immediate is encoded as the last part of the mov written out
+ let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+ insn_gc_offsets.push(ptr_offset);
+ }
+ _ => mov(cb, out.into(), opnd.into())
+ }
+ },
+
+ Insn::LoadSExt { opnd, out } => {
+ movsx(cb, out.into(), opnd.into());
+ },
+
+ Insn::Mov { dest, src } => {
+ mov(cb, dest.into(), src.into());
+ },
+
+ // Load effective address
+ Insn::Lea { opnd, out } => {
+ lea(cb, out.into(), opnd.into());
+ },
+
+ // Load address of jump target
+ Insn::LeaJumpTarget { target, out } => {
+ if let Target::Label(label_idx) = target {
+ // Set output to the raw address of the label
+ cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| {
+ let disp = dst_addr - src_addr;
+ lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
+ });
+
+ mov(cb, out.into(), Self::SCRATCH0);
+ } else {
+ // Set output to the jump target's raw address
+ let target_code = target.unwrap_code_ptr();
+ let target_addr = target_code.raw_addr(cb).as_u64();
+ // Constant encoded length important for patching
+ movabs(cb, out.into(), target_addr);
+ }
+ },
+
+ // Push and pop to/from the C stack
+ Insn::CPush(opnd) => {
+ push(cb, opnd.into());
+ },
+ Insn::CPop { out } => {
+ pop(cb, out.into());
+ },
+ Insn::CPopInto(opnd) => {
+ pop(cb, opnd.into());
+ },
+
+ // Push and pop to the C stack all caller-save registers and the
+ // flags
+ Insn::CPushAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ for reg in regs {
+ push(cb, X86Opnd::Reg(reg));
+ }
+ pushfq(cb);
+ },
+ Insn::CPopAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ popfq(cb);
+ for reg in regs.into_iter().rev() {
+ pop(cb, X86Opnd::Reg(reg));
+ }
+ },
+
+ // C function call
+ Insn::CCall { fptr, .. } => {
+ call_ptr(cb, RAX, *fptr);
+ },
+
+ Insn::CRet(opnd) => {
+ // TODO: bias allocation towards return register
+ if *opnd != Opnd::Reg(C_RET_REG) {
+ mov(cb, RAX, opnd.into());
+ }
+
+ ret(cb);
+ },
+
+ // Compare
+ Insn::Cmp { left, right } => {
+ let num_bits = match right {
+ Opnd::Imm(value) => Some(imm_num_bits(*value)),
+ Opnd::UImm(value) => Some(uimm_num_bits(*value)),
+ _ => None
+ };
+
+ // If the immediate is less than 64 bits (like 32, 16, 8), and the operand
+ // sizes match, then we can represent it as an immediate in the instruction
+ // without moving it to a register first.
+ // IOW, 64 bit immediates must always be moved to a register
+ // before comparisons, where other sizes may be encoded
+ // directly in the instruction.
+ if num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64 {
+ cmp(cb, left.into(), right.into());
+ } else {
+ let emitted = emit_64bit_immediate(cb, right);
+ cmp(cb, left.into(), emitted);
+ }
+ }
+
+ // Test and set flags
+ Insn::Test { left, right } => {
+ let emitted = emit_64bit_immediate(cb, right);
+ test(cb, left.into(), emitted);
+ }
+
+ Insn::JmpOpnd(opnd) => {
+ jmp_rm(cb, opnd.into());
+ }
+
+ // Conditional jump to a label
+ Insn::Jmp(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jmp_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Je(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr),
+ Target::Label(label_idx) => je_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jne(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jne_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jl(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jl_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jg(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jg_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jge(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jge_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jbe(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jbe_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jb(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jb_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jz(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jz_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jnz(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jnz_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jo(target) |
+ Insn::JoMul(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jo_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"),
+
+ // Atomically increment a counter at a given memory location
+ Insn::IncrCounter { mem, value } => {
+ assert!(matches!(mem, Opnd::Mem(_)));
+ assert!(matches!(value, Opnd::UImm(_) | Opnd::Imm(_) ) );
+ write_lock_prefix(cb);
+ add(cb, mem.into(), value.into());
+ },
+
+ Insn::Breakpoint => int3(cb),
+
+ Insn::CSelZ { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz);
+ },
+ Insn::CSelNZ { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz);
+ },
+ Insn::CSelE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne);
+ },
+ Insn::CSelNE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove);
+ },
+ Insn::CSelL { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge);
+ },
+ Insn::CSelLE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg);
+ },
+ Insn::CSelG { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle);
+ },
+ Insn::CSelGE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl);
+ }
+ Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
+ Insn::PadInvalPatch => {
+ let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
+ if code_size < cb.jmp_ptr_bytes() {
+ nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32);
+ }
+ }
+ };
+
+ // On failure, jump to the next page and retry the current insn
+ if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) {
+ // Reset cb states before retrying the current Insn
+ cb.set_label_state(old_label_state);
+ } else {
+ insn_idx += 1;
+ gc_offsets.append(&mut insn_gc_offsets);
+ }
+ }
+
+ // Error if we couldn't write out everything
+ if cb.has_dropped_bytes() {
+ return None
+ } else {
+ // No bytes dropped, so the pos markers point to valid code
+ for (insn_idx, pos) in pos_markers {
+ if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() {
+ callback(pos, &cb);
+ } else {
+ panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}");
+ }
+ }
+
+ return Some(gc_offsets)
+ }
+ }
+
+ /// Optimize and compile the stored instructions
+ pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> {
+ let asm = self.x86_split();
+ let mut asm = asm.alloc_regs(regs);
+
+ // Create label instances in the code block
+ for (idx, name) in asm.label_names.iter().enumerate() {
+ let label_idx = cb.new_label(name.to_string());
+ assert!(label_idx == idx);
+ }
+
+ let mut ocb = ocb; // for &mut
+ let start_ptr = cb.get_write_ptr();
+ let gc_offsets = asm.x86_emit(cb, &mut ocb);
+
+ if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
+ cb.link_labels();
+
+ Some((start_ptr, gc_offsets))
+ } else {
+ cb.clear_labels();
+
+ None
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::disasm::{assert_disasm};
+ #[cfg(feature = "disasm")]
+ use crate::disasm::{unindent, disasm_addr_range};
+
+ use super::*;
+
+ fn setup_asm() -> (Assembler, CodeBlock) {
+ (Assembler::new(), CodeBlock::new_dummy(1024))
+ }
+
+ #[test]
+ fn test_emit_add_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881c0ff000000");
+ }
+
+ #[test]
+ fn test_emit_add_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c01d8");
+ }
+
+ #[test]
+ fn test_emit_and_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881e0ff000000");
+ }
+
+ #[test]
+ fn test_emit_and_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c21d8");
+ }
+
+ #[test]
+ fn test_emit_cmp_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "4881f8ff000000");
+ }
+
+ #[test]
+ fn test_emit_cmp_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c39d8");
+ }
+
+ #[test]
+ fn test_emit_cmp_mem_16_bits_with_imm_16() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(16, Opnd::Reg(RAX_REG), 6);
+
+ asm.cmp(shape_opnd, Opnd::UImm(0xF000));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "6681780600f0");
+ }
+
+ #[test]
+ fn test_emit_cmp_mem_32_bits_with_imm_32() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(32, Opnd::Reg(RAX_REG), 4);
+
+ asm.cmp(shape_opnd, Opnd::UImm(0xF000_0000));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "817804000000f0");
+ }
+
+ #[test]
+ fn test_emit_or_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881c8ff000000");
+ }
+
+ #[test]
+ fn test_emit_or_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c09d8");
+ }
+
+ #[test]
+ fn test_emit_sub_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881e8ff000000");
+ }
+
+ #[test]
+ fn test_emit_sub_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c29d8");
+ }
+
+ #[test]
+ fn test_emit_test_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "f6c0ff");
+ }
+
+ #[test]
+ fn test_emit_test_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c85d8");
+ }
+
+ #[test]
+ fn test_emit_xor_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881f0ff000000");
+ }
+
+ #[test]
+ fn test_emit_xor_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c31d8");
+ }
+
+ #[test]
+ fn test_merge_lea_reg() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.lea(Opnd::mem(64, SP, 8));
+ asm.mov(SP, sp); // should be merged to lea
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "488d5b08", {"
+ 0x0: lea rbx, [rbx + 8]
+ "});
+ }
+
+ #[test]
+ fn test_merge_lea_mem() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.lea(Opnd::mem(64, SP, 8));
+ asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "488d4308488903", {"
+ 0x0: lea rax, [rbx + 8]
+ 0x4: mov qword ptr [rbx], rax
+ "});
+ }
+
+ #[test]
+ fn test_replace_cmp_0() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let val = asm.load(Opnd::mem(64, SP, 8));
+ asm.cmp(val, 0.into());
+ let result = asm.csel_e(Qtrue.into(), Qfalse.into());
+ asm.mov(Opnd::Reg(RAX_REG), result);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_eq!(format!("{:x}", cb), "488b43084885c0b814000000b900000000480f45c14889c0");
+ }
+
+ #[test]
+ fn test_merge_add_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.add(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983c540");
+ }
+
+ #[test]
+ fn test_merge_sub_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.sub(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983ed40");
+ }
+
+ #[test]
+ fn test_merge_and_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.and(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983e540");
+ }
+
+ #[test]
+ fn test_merge_or_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.or(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983cd40");
+ }
+
+ #[test]
+ fn test_merge_xor_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.xor(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983f540");
+ }
+
+ #[test]
+ fn test_reorder_c_args_no_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[0], // mov rdi, rdi (optimized away)
+ C_ARG_OPNDS[1], // mov rsi, rsi (optimized away)
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "b800000000ffd0", {"
+ 0x0: mov eax, 0
+ 0x5: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_single_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi and rsi form a cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[0], // mov rsi, rdi
+ C_ARG_OPNDS[2], // mov rdx, rdx (optimized away)
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdi
+ 0x6: mov rdi, r11
+ 0x9: mov eax, 0
+ 0xe: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_two_cycles() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi and rsi form a cycle, and rdx and rcx form another cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[0], // mov rsi, rdi
+ C_ARG_OPNDS[3], // mov rdx, rcx
+ C_ARG_OPNDS[2], // mov rcx, rdx
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdi
+ 0x6: mov rdi, r11
+ 0x9: mov r11, rcx
+ 0xc: mov rcx, rdx
+ 0xf: mov rdx, r11
+ 0x12: mov eax, 0
+ 0x17: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_large_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi, rsi, and rdx form a cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[2], // mov rsi, rdx
+ C_ARG_OPNDS[0], // mov rdx, rdi
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdx
+ 0x6: mov rdx, rdi
+ 0x9: mov rdi, r11
+ 0xc: mov eax, 0
+ 0x11: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_with_insn_out() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let rax = asm.load(Opnd::UImm(1));
+ let rcx = asm.load(Opnd::UImm(2));
+ let rdx = asm.load(Opnd::UImm(3));
+ // rcx and rdx form a cycle
+ asm.ccall(0 as _, vec![
+ rax, // mov rdi, rax
+ rcx, // mov rsi, rcx
+ rcx, // mov rdx, rcx
+ rdx, // mov rcx, rdx
+ ]);
+ asm.compile_with_num_regs(&mut cb, 3);
+
+ assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {"
+ 0x0: mov eax, 1
+ 0x5: mov ecx, 2
+ 0xa: mov edx, 3
+ 0xf: mov rdi, rax
+ 0x12: mov rsi, rcx
+ 0x15: mov r11, rcx
+ 0x18: mov rcx, rdx
+ 0x1b: mov rdx, r11
+ 0x1e: mov eax, 0
+ 0x23: call rax
+ "});
+ }
+
+ #[test]
+ fn test_cmov_mem() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let top = Opnd::mem(64, SP, 0);
+ let ary_opnd = SP;
+ let array_len_opnd = Opnd::mem(64, SP, 16);
+
+ asm.cmp(array_len_opnd, 1.into());
+ let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into());
+ asm.mov(top, elem_opnd);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {"
+ 0x0: cmp qword ptr [rbx + 0x10], 1
+ 0x5: mov eax, 4
+ 0xa: cmovg rax, qword ptr [rbx]
+ 0xe: mov qword ptr [rbx], rax
+ "});
+ }
+
+ #[test]
+ fn test_csel_split() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let stack_top = Opnd::mem(64, SP, 0);
+ let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into());
+ asm.mov(stack_top, elem_opnd);
+
+ asm.compile_with_num_regs(&mut cb, 3);
+
+ assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {"
+ 0x0: movabs rax, 0x7f22c88d1930
+ 0xa: mov ecx, 4
+ 0xf: cmove rax, rcx
+ 0x13: mov qword ptr [rbx], rax
+ "});
+ }
+}
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 1b8b99d530..000f9fb516 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -1,8 +1,8 @@
// We use the YARV bytecode constants which have a CRuby-style name
#![allow(non_upper_case_globals)]
-use crate::asm::x86_64::*;
use crate::asm::*;
+use crate::backend::ir::*;
use crate::core::*;
use crate::cruby::*;
use crate::invariants::*;
@@ -10,103 +10,137 @@ use crate::options::*;
use crate::stats::*;
use crate::utils::*;
use CodegenStatus::*;
-use InsnOpnd::*;
+use YARVOpnd::*;
-use std::cell::RefMut;
+use std::cell::Cell;
use std::cmp;
+use std::cmp::min;
use std::collections::HashMap;
+use std::ffi::c_void;
use std::ffi::CStr;
-use std::mem::{self, size_of};
-use std::os::raw::c_uint;
+use std::mem;
+use std::os::raw::c_int;
use std::ptr;
+use std::rc::Rc;
+use std::cell::RefCell;
use std::slice;
pub use crate::virtualmem::CodePtr;
-// Callee-saved registers
-pub const REG_CFP: X86Opnd = R13;
-pub const REG_EC: X86Opnd = R12;
-pub const REG_SP: X86Opnd = RBX;
-
-// Scratch registers used by YJIT
-pub const REG0: X86Opnd = RAX;
-pub const REG0_32: X86Opnd = EAX;
-pub const REG0_8: X86Opnd = AL;
-pub const REG1: X86Opnd = RCX;
-// pub const REG1_32: X86Opnd = ECX;
-
-// A block that can be invalidated needs space to write a jump.
-// We'll reserve a minimum size for any block that could
-// be invalidated. In this case the JMP takes 5 bytes, but
-// gen_send_general will always MOV the receiving object
-// into place, so 2 bytes are always written automatically.
-pub const JUMP_SIZE_IN_BYTES:usize = 3;
-
/// Status returned by code generation functions
#[derive(PartialEq, Debug)]
enum CodegenStatus {
- EndBlock,
+ SkipNextInsn,
KeepCompiling,
- CantCompile,
+ EndBlock,
}
/// Code generation function signature
type InsnGenFn = fn(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus;
+) -> Option<CodegenStatus>;
-/// Code generation state
-/// This struct only lives while code is being generated
+/// Ephemeral code generation state.
+/// Represents a [core::Block] while we build it.
pub struct JITState {
- // Block version being compiled
- block: BlockRef,
+ /// Instruction sequence for the compiling block
+ pub iseq: IseqPtr,
- // Instruction sequence this is associated with
- iseq: IseqPtr,
+ /// The iseq index of the first instruction in the block
+ starting_insn_idx: IseqIdx,
+
+ /// The [Context] entering into the first instruction of the block
+ starting_ctx: Context,
- // Index of the current instruction being compiled
- insn_idx: u32,
+ /// The placement for the machine code of the [Block]
+ output_ptr: CodePtr,
- // Opcode for the instruction being compiled
+ /// Index of the current instruction being compiled
+ insn_idx: IseqIdx,
+
+ /// Opcode for the instruction being compiled
opcode: usize,
- // PC of the instruction being compiled
+ /// PC of the instruction being compiled
pc: *mut VALUE,
- // Side exit to the instruction being compiled. See :side-exit:.
- side_exit_for_pc: Option<CodePtr>,
+ /// stack_size when it started to compile the current instruction.
+ stack_size_for_pc: u8,
+
+ /// Execution context when compilation started
+ /// This allows us to peek at run-time values
+ ec: EcPtr,
+
+ /// The outgoing branches the block will have
+ pub pending_outgoing: Vec<PendingBranchRef>,
+
+ // --- Fields for block invalidation and invariants tracking below:
+ // Public mostly so into_block defined in the sibling module core
+ // can partially move out of Self.
- // Execution context when compilation started
- // This allows us to peek at run-time values
- ec: Option<EcPtr>,
+ /// Whether we need to record the code address at
+ /// the end of this bytecode instruction for global invalidation
+ pub record_boundary_patch_point: bool,
- // Whether we need to record the code address at
- // the end of this bytecode instruction for global invalidation
- record_boundary_patch_point: bool,
+ /// Code for immediately exiting upon entry to the block.
+ /// Required for invalidation.
+ pub block_entry_exit: Option<CodePtr>,
+
+ /// A list of callable method entries that must be valid for the block to be valid.
+ pub method_lookup_assumptions: Vec<CmePtr>,
+
+ /// A list of basic operators that not be redefined for the block to be valid.
+ pub bop_assumptions: Vec<(RedefinitionFlag, ruby_basic_operators)>,
+
+ /// A list of constant expression path segments that must have
+ /// not been written to for the block to be valid.
+ pub stable_constant_names_assumption: Option<*const ID>,
+
+ /// A list of classes that are not supposed to have a singleton class.
+ pub no_singleton_class_assumptions: Vec<VALUE>,
+
+ /// When true, the block is valid only when base pointer is equal to environment pointer.
+ pub no_ep_escape: bool,
+
+ /// When true, the block is valid only when there is a total of one ractor running
+ pub block_assumes_single_ractor: bool,
+
+ /// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt)
+ perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>,
+
+ /// Stack of symbol names for --yjit-perf
+ perf_stack: Vec<String>,
}
impl JITState {
- pub fn new(blockref: &BlockRef) -> Self {
+ pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr) -> Self {
JITState {
- block: blockref.clone(),
- iseq: ptr::null(), // TODO: initialize this from the blockid
+ iseq: blockid.iseq,
+ starting_insn_idx: blockid.idx,
+ starting_ctx,
+ output_ptr,
insn_idx: 0,
opcode: 0,
pc: ptr::null_mut::<VALUE>(),
- side_exit_for_pc: None,
- ec: None,
+ stack_size_for_pc: starting_ctx.get_stack_size(),
+ pending_outgoing: vec![],
+ ec,
record_boundary_patch_point: false,
+ block_entry_exit: None,
+ method_lookup_assumptions: vec![],
+ bop_assumptions: vec![],
+ stable_constant_names_assumption: None,
+ no_singleton_class_assumptions: vec![],
+ no_ep_escape: false,
+ block_assumes_single_ractor: false,
+ perf_map: Rc::default(),
+ perf_stack: vec![],
}
}
- pub fn get_block(&self) -> BlockRef {
- self.block.clone()
- }
-
- pub fn get_insn_idx(&self) -> u32 {
+ pub fn get_insn_idx(&self) -> IseqIdx {
self.insn_idx
}
@@ -118,207 +152,412 @@ impl JITState {
self.opcode
}
- pub fn add_gc_object_offset(self: &mut JITState, ptr_offset: u32) {
- let mut gc_obj_vec: RefMut<_> = self.block.borrow_mut();
- gc_obj_vec.add_gc_object_offset(ptr_offset);
- }
-
pub fn get_pc(self: &JITState) -> *mut VALUE {
self.pc
}
-}
-use crate::codegen::JCCKinds::*;
+ pub fn get_starting_insn_idx(&self) -> IseqIdx {
+ self.starting_insn_idx
+ }
-#[allow(non_camel_case_types, unused)]
-pub enum JCCKinds {
- JCC_JNE,
- JCC_JNZ,
- JCC_JZ,
- JCC_JE,
- JCC_JBE,
- JCC_JNA,
-}
+ pub fn get_block_entry_exit(&self) -> Option<CodePtr> {
+ self.block_entry_exit
+ }
-pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE {
- // insn_len require non-test config
- #[cfg(not(test))]
- assert!(insn_len(jit.get_opcode()) > (arg_idx + 1).try_into().unwrap());
- unsafe { *(jit.pc.offset(arg_idx + 1)) }
-}
+ pub fn get_starting_ctx(&self) -> Context {
+ self.starting_ctx
+ }
-// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
-pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: VALUE) {
- assert!(matches!(reg, X86Opnd::Reg(_)));
- assert!(reg.num_bits() == 64);
+ pub fn get_arg(&self, arg_idx: isize) -> VALUE {
+ // insn_len require non-test config
+ #[cfg(not(test))]
+ assert!(insn_len(self.get_opcode()) > (arg_idx + 1).try_into().unwrap());
+ unsafe { *(self.pc.offset(arg_idx + 1)) }
+ }
+
+ /// Return true if the current ISEQ could escape an environment.
+ ///
+ /// As of vm_push_frame(), EP is always equal to BP. However, after pushing
+ /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP.
+ /// Also, some method calls escape the environment to the heap.
+ fn escapes_ep(&self) -> bool {
+ match unsafe { get_iseq_body_type(self.iseq) } {
+ // <main> frame is always associated to TOPLEVEL_BINDING.
+ ISEQ_TYPE_MAIN |
+ // Kernel#eval uses a heap EP when a Binding argument is not nil.
+ ISEQ_TYPE_EVAL => true,
+ // If this ISEQ has previously escaped EP, give up the optimization.
+ _ if iseq_escapes_ep(self.iseq) => true,
+ _ => false,
+ }
+ }
- // Load the pointer constant into the specified register
- mov(cb, reg, const_ptr_opnd(ptr.as_ptr()));
+ // Get the index of the next instruction
+ fn next_insn_idx(&self) -> u16 {
+ self.insn_idx + insn_len(self.get_opcode()) as u16
+ }
- // The pointer immediate is encoded as the last part of the mov written out
- let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+ // Check if we are compiling the instruction at the stub PC
+ // Meaning we are compiling the instruction that is next to execute
+ pub fn at_current_insn(&self) -> bool {
+ let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) };
+ ec_pc == self.pc
+ }
- if !ptr.special_const_p() {
- jit.add_gc_object_offset(ptr_offset);
+ // Peek at the nth topmost value on the Ruby stack.
+ // Returns the topmost value when n == 0.
+ pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE {
+ assert!(self.at_current_insn());
+ assert!(n < ctx.get_stack_size() as isize);
+
+ // Note: this does not account for ctx->sp_offset because
+ // this is only available when hitting a stub, and while
+ // hitting a stub, cfp->sp needs to be up to date in case
+ // codegen functions trigger GC. See :stub-sp-flush:.
+ return unsafe {
+ let sp: *mut VALUE = get_cfp_sp(self.get_cfp());
+
+ *(sp.offset(-1 - n))
+ };
}
-}
-// Get the index of the next instruction
-fn jit_next_insn_idx(jit: &JITState) -> u32 {
- jit.insn_idx + insn_len(jit.get_opcode())
-}
+ fn peek_at_self(&self) -> VALUE {
+ unsafe { get_cfp_self(self.get_cfp()) }
+ }
-// Check if we are compiling the instruction at the stub PC
-// Meaning we are compiling the instruction that is next to execute
-fn jit_at_current_insn(jit: &JITState) -> bool {
- let ec_pc: *mut VALUE = unsafe { get_cfp_pc(get_ec_cfp(jit.ec.unwrap())) };
- ec_pc == jit.pc
-}
+ fn peek_at_local(&self, n: i32) -> VALUE {
+ assert!(self.at_current_insn());
-// Peek at the nth topmost value on the Ruby stack.
-// Returns the topmost value when n == 0.
-fn jit_peek_at_stack(jit: &JITState, ctx: &Context, n: isize) -> VALUE {
- assert!(jit_at_current_insn(jit));
- assert!(n < ctx.get_stack_size() as isize);
+ let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) }
+ .try_into()
+ .unwrap();
+ assert!(n < local_table_size.try_into().unwrap());
- // Note: this does not account for ctx->sp_offset because
- // this is only available when hitting a stub, and while
- // hitting a stub, cfp->sp needs to be up to date in case
- // codegen functions trigger GC. See :stub-sp-flush:.
- return unsafe {
- let sp: *mut VALUE = get_cfp_sp(get_ec_cfp(jit.ec.unwrap()));
+ unsafe {
+ let ep = get_cfp_ep(self.get_cfp());
+ let n_isize: isize = n.try_into().unwrap();
+ let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1;
+ *ep.offset(offs)
+ }
+ }
- *(sp.offset(-1 - n))
- };
-}
+ fn peek_at_block_handler(&self, level: u32) -> VALUE {
+ assert!(self.at_current_insn());
-fn jit_peek_at_self(jit: &JITState) -> VALUE {
- unsafe { get_cfp_self(get_ec_cfp(jit.ec.unwrap())) }
-}
+ unsafe {
+ let ep = get_cfp_ep_level(self.get_cfp(), level);
+ *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize)
+ }
+ }
-fn jit_peek_at_local(jit: &JITState, n: i32) -> VALUE {
- assert!(jit_at_current_insn(jit));
+ pub fn assume_method_lookup_stable(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, cme: CmePtr) -> Option<()> {
+ jit_ensure_block_entry_exit(self, asm, ocb)?;
+ self.method_lookup_assumptions.push(cme);
- let local_table_size: isize = unsafe { get_iseq_body_local_table_size(jit.iseq) }
- .try_into()
- .unwrap();
- assert!(n < local_table_size.try_into().unwrap());
+ Some(())
+ }
- unsafe {
- let ep = get_cfp_ep(get_ec_cfp(jit.ec.unwrap()));
- let n_isize: isize = n.try_into().unwrap();
- let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1;
- *ep.offset(offs)
+ /// Assume that objects of a given class will have no singleton class.
+ /// Return true if there has been no such singleton class since boot
+ /// and we can safely invalidate it.
+ pub fn assume_no_singleton_class(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, klass: VALUE) -> bool {
+ if jit_ensure_block_entry_exit(self, asm, ocb).is_none() {
+ return false; // out of space, give up
+ }
+ if has_singleton_class_of(klass) {
+ return false; // we've seen a singleton class. disable the optimization to avoid an invalidation loop.
+ }
+ self.no_singleton_class_assumptions.push(klass);
+ true
+ }
+
+ /// Assume that base pointer is equal to environment pointer in the current ISEQ.
+ /// Return true if it's safe to assume so.
+ fn assume_no_ep_escape(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool {
+ if jit_ensure_block_entry_exit(self, asm, ocb).is_none() {
+ return false; // out of space, give up
+ }
+ if self.escapes_ep() {
+ return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop.
+ }
+ self.no_ep_escape = true;
+ true
+ }
+
+ fn get_cfp(&self) -> *mut rb_control_frame_struct {
+ unsafe { get_ec_cfp(self.ec) }
+ }
+
+ pub fn assume_stable_constant_names(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb, id: *const ID) -> Option<()> {
+ jit_ensure_block_entry_exit(self, asm, ocb)?;
+ self.stable_constant_names_assumption = Some(id);
+
+ Some(())
+ }
+
+ pub fn queue_outgoing_branch(&mut self, branch: PendingBranchRef) {
+ self.pending_outgoing.push(branch)
+ }
+
+ /// Push a symbol for --yjit-perf
+ fn perf_symbol_push(&mut self, asm: &mut Assembler, symbol_name: &str) {
+ if !self.perf_stack.is_empty() {
+ self.perf_symbol_range_end(asm);
+ }
+ self.perf_stack.push(symbol_name.to_string());
+ self.perf_symbol_range_start(asm, symbol_name);
+ }
+
+ /// Pop the stack-top symbol for --yjit-perf
+ fn perf_symbol_pop(&mut self, asm: &mut Assembler) {
+ self.perf_symbol_range_end(asm);
+ self.perf_stack.pop();
+ if let Some(symbol_name) = self.perf_stack.get(0) {
+ self.perf_symbol_range_start(asm, symbol_name);
+ }
+ }
+
+ /// Mark the start address of a symbol to be reported to perf
+ fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) {
+ let symbol_name = format!("[JIT] {}", symbol_name);
+ let syms = self.perf_map.clone();
+ asm.pos_marker(move |start, _| syms.borrow_mut().push((start, None, symbol_name.clone())));
}
-}
-// Add a comment at the current position in the code block
-fn add_comment(cb: &mut CodeBlock, comment_str: &str) {
- if cfg!(feature = "asm_comments") {
- cb.add_comment(comment_str);
+ /// Mark the end address of a symbol to be reported to perf
+ fn perf_symbol_range_end(&self, asm: &mut Assembler) {
+ let syms = self.perf_map.clone();
+ asm.pos_marker(move |end, _| {
+ if let Some((_, ref mut end_store, _)) = syms.borrow_mut().last_mut() {
+ assert_eq!(None, *end_store);
+ *end_store = Some(end);
+ }
+ });
+ }
+
+ /// Flush addresses and symbols to /tmp/perf-{pid}.map
+ fn flush_perf_symbols(&self, cb: &CodeBlock) {
+ assert_eq!(0, self.perf_stack.len());
+ let path = format!("/tmp/perf-{}.map", std::process::id());
+ let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap();
+ for sym in self.perf_map.borrow().iter() {
+ if let (start, Some(end), name) = sym {
+ // In case the code straddles two pages, part of it belongs to the symbol.
+ for (inline_start, inline_end) in cb.writable_addrs(*start, *end) {
+ use std::io::Write;
+ let code_size = inline_end - inline_start;
+ writeln!(f, "{inline_start:x} {code_size:x} {name}").unwrap();
+ }
+ }
+ }
+ }
+
+ /// Return true if we're compiling a send-like instruction, not an opt_* instruction.
+ pub fn is_sendish(&self) -> bool {
+ match unsafe { rb_iseq_opcode_at_pc(self.iseq, self.pc) } as u32 {
+ YARVINSN_send |
+ YARVINSN_opt_send_without_block |
+ YARVINSN_invokesuper => true,
+ _ => false,
+ }
}
}
-/// Increment a profiling counter with counter_name
-#[cfg(not(feature = "stats"))]
-macro_rules! gen_counter_incr {
- ($cb:tt, $counter_name:ident) => {};
+/// Macro to call jit.perf_symbol_push() without evaluating arguments when
+/// the option is turned off, which is useful for avoiding string allocation.
+macro_rules! jit_perf_symbol_push {
+ ($jit:expr, $asm:expr, $symbol_name:expr, $perf_map:expr) => {
+ if get_option!(perf_map) == Some($perf_map) {
+ $jit.perf_symbol_push($asm, $symbol_name);
+ }
+ };
}
-#[cfg(feature = "stats")]
-macro_rules! gen_counter_incr {
- ($cb:tt, $counter_name:ident) => {
- if (get_option!(gen_stats)) {
- // Get a pointer to the counter variable
- let ptr = ptr_to_counter!($counter_name);
- // Use REG1 because there might be return value in REG0
- mov($cb, REG1, const_ptr_opnd(ptr as *const u8));
- write_lock_prefix($cb); // for ractors.
- add($cb, mem_opnd(64, REG1, 0), imm_opnd(1));
+/// Macro to call jit.perf_symbol_pop(), for consistency with jit_perf_symbol_push!().
+macro_rules! jit_perf_symbol_pop {
+ ($jit:expr, $asm:expr, $perf_map:expr) => {
+ if get_option!(perf_map) == Some($perf_map) {
+ $jit.perf_symbol_pop($asm);
}
};
}
-/// Increment a counter then take an existing side exit
-#[cfg(not(feature = "stats"))]
-macro_rules! counted_exit {
- ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {{
- let _ = $ocb;
- $existing_side_exit
- }};
-}
-#[cfg(feature = "stats")]
-macro_rules! counted_exit {
- ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {
- // The counter is only incremented when stats are enabled
- if (!get_option!(gen_stats)) {
- $existing_side_exit
- } else {
- let ocb = $ocb.unwrap();
- let code_ptr = ocb.get_write_ptr();
+/// Macro to push and pop a perf symbol around a function call.
+macro_rules! perf_call {
+ // perf_call!("prefix: ", func(...)) uses "prefix: func" as a symbol.
+ ($prefix:expr, $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) ) => {
+ {
+ jit_perf_symbol_push!($jit, $asm, &format!("{}{}", $prefix, stringify!($func_name)), PerfMap::Codegen);
+ let ret = $func_name($jit, $asm, $($arg),*);
+ jit_perf_symbol_pop!($jit, $asm, PerfMap::Codegen);
+ ret
+ }
+ };
+ // perf_call! { func(...) } uses "func" as a symbol.
+ { $func_name:ident($jit:expr, $asm:expr$(, $arg:expr)*$(,)?) } => {
+ perf_call!("", $func_name($jit, $asm, $($arg),*))
+ };
+}
- // Increment the counter
- gen_counter_incr!(ocb, $counter_name);
+use crate::codegen::JCCKinds::*;
- // Jump to the existing side exit
- jmp_ptr(ocb, $existing_side_exit);
+#[allow(non_camel_case_types, unused)]
+pub enum JCCKinds {
+ JCC_JNE,
+ JCC_JNZ,
+ JCC_JZ,
+ JCC_JE,
+ JCC_JB,
+ JCC_JBE,
+ JCC_JNA,
+ JCC_JNAE,
+ JCC_JO_MUL,
+}
- // Pointer to the side-exit code
- code_ptr
- }
- };
+#[inline(always)]
+fn gen_counter_incr(asm: &mut Assembler, counter: Counter) {
+ // Assert that default counters are not incremented by generated code as this would impact performance
+ assert!(!DEFAULT_COUNTERS.contains(&counter), "gen_counter_incr incremented {:?}", counter);
+
+ if get_option!(gen_stats) {
+ asm_comment!(asm, "increment counter {}", counter.get_name());
+ let ptr = get_counter_ptr(&counter.get_name());
+ let ptr_reg = asm.load(Opnd::const_ptr(ptr as *const u8));
+ let counter_opnd = Opnd::mem(64, ptr_reg, 0);
+
+ // Increment and store the updated value
+ asm.incr_counter(counter_opnd, Opnd::UImm(1));
+ }
}
// Save the incremented PC on the CFP
// This is necessary when callees can raise or allocate
-fn jit_save_pc(jit: &JITState, cb: &mut CodeBlock, scratch_reg: X86Opnd) {
+fn jit_save_pc(jit: &JITState, asm: &mut Assembler) {
let pc: *mut VALUE = jit.get_pc();
let ptr: *mut VALUE = unsafe {
let cur_insn_len = insn_len(jit.get_opcode()) as isize;
pc.offset(cur_insn_len)
};
- mov(cb, scratch_reg, const_ptr_opnd(ptr as *const u8));
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), scratch_reg);
+
+ asm_comment!(asm, "save PC to CFP");
+ asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(ptr as *const u8));
}
/// Save the current SP on the CFP
/// This realigns the interpreter SP with the JIT SP
/// Note: this will change the current value of REG_SP,
/// which could invalidate memory operands
-fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) {
- if ctx.get_sp_offset() != 0 {
- let stack_pointer = ctx.sp_opnd(0);
- lea(cb, REG_SP, stack_pointer);
- let cfp_sp_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP);
- mov(cb, cfp_sp_opnd, REG_SP);
- ctx.set_sp_offset(0);
+fn gen_save_sp(asm: &mut Assembler) {
+ gen_save_sp_with_offset(asm, 0);
+}
+
+/// Save the current SP + offset on the CFP
+fn gen_save_sp_with_offset(asm: &mut Assembler, offset: i8) {
+ if asm.ctx.get_sp_offset() != -offset {
+ asm_comment!(asm, "save SP to CFP");
+ let stack_pointer = asm.ctx.sp_opnd(offset as i32);
+ let sp_addr = asm.lea(stack_pointer);
+ asm.mov(SP, sp_addr);
+ let cfp_sp_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
+ asm.mov(cfp_sp_opnd, SP);
+ asm.ctx.set_sp_offset(-offset);
}
}
-/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that
-/// could:
+/// Basically jit_prepare_non_leaf_call(), but this registers the current PC
+/// to lazily push a C method frame when it's necessary.
+fn jit_prepare_lazy_frame_call(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ cme: *const rb_callable_method_entry_t,
+ recv_opnd: YARVOpnd,
+) -> bool {
+ // We can use this only when the receiver is on stack.
+ let recv_idx = match recv_opnd {
+ StackOpnd(recv_idx) => recv_idx,
+ _ => unreachable!("recv_opnd must be on stack, but got: {:?}", recv_opnd),
+ };
+
+ // Get the next PC. jit_save_pc() saves that PC.
+ let pc: *mut VALUE = unsafe {
+ let cur_insn_len = insn_len(jit.get_opcode()) as isize;
+ jit.get_pc().offset(cur_insn_len)
+ };
+
+ let pc_to_cfunc = CodegenGlobals::get_pc_to_cfunc();
+ match pc_to_cfunc.get(&pc) {
+ Some(&(other_cme, _)) if other_cme != cme => {
+ // Bail out if it's not the only cme on this callsite.
+ incr_counter!(lazy_frame_failure);
+ return false;
+ }
+ _ => {
+ // Let rb_yjit_lazy_push_frame() lazily push a C frame on this PC.
+ incr_counter!(lazy_frame_count);
+ pc_to_cfunc.insert(pc, (cme, recv_idx));
+ }
+ }
+
+ // Save the PC to trigger a lazy frame push, and save the SP to get the receiver.
+ // The C func may call a method that doesn't raise, so prepare for invalidation too.
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Make sure we're ready for calling rb_vm_push_cfunc_frame().
+ let cfunc_argc = unsafe { get_mct_argc(get_cme_def_body_cfunc(cme)) };
+ if cfunc_argc != -1 {
+ assert_eq!(recv_idx as i32, cfunc_argc); // verify the receiver index if possible
+ }
+ assert!(asm.get_leaf_ccall()); // It checks the stack canary we set for known_cfunc_codegen.
+
+ true
+}
+
+/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could:
/// - Perform GC allocation
/// - Take the VM lock through RB_VM_LOCK_ENTER()
/// - Perform Ruby method call
-fn jit_prepare_routine_call(
+///
+/// If the routine doesn't call arbitrary methods, use jit_prepare_call_with_gc() instead.
+fn jit_prepare_non_leaf_call(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- scratch_reg: X86Opnd,
+ asm: &mut Assembler
) {
- jit.record_boundary_patch_point = true;
- jit_save_pc(jit, cb, scratch_reg);
- gen_save_sp(cb, ctx);
+ // Prepare for GC. Setting PC also prepares for showing a backtrace.
+ jit.record_boundary_patch_point = true; // VM lock could trigger invalidation
+ jit_save_pc(jit, asm); // for allocation tracing
+ gen_save_sp(asm); // protect objects from GC
// In case the routine calls Ruby methods, it can set local variables
- // through Kernel#binding and other means.
- ctx.clear_local_types();
+ // through Kernel#binding, rb_debug_inspector API, and other means.
+ asm.clear_local_types();
+}
+
+/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that could:
+/// - Perform GC allocation
+/// - Take the VM lock through RB_VM_LOCK_ENTER()
+fn jit_prepare_call_with_gc(
+ jit: &mut JITState,
+ asm: &mut Assembler
+) {
+ jit.record_boundary_patch_point = true; // VM lock could trigger invalidation
+ jit_save_pc(jit, asm); // for allocation tracing
+ gen_save_sp(asm); // protect objects from GC
+
+ // Expect a leaf ccall(). You should use jit_prepare_non_leaf_call() if otherwise.
+ asm.expect_leaf_ccall();
}
/// Record the current codeblock write position for rewriting into a jump into
/// the outlined block later. Used to implement global code invalidation.
-fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) {
- CodegenGlobals::push_global_inval_patch(cb.get_write_ptr(), outline_block_target_pos);
+fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) {
+ // We add a padding before pos_marker so that the previous patch will not overlap this.
+ // jump_to_next_insn() puts a patch point at the end of the block in fallthrough cases.
+ // In the fallthrough case, the next block should start with the same Context, so the
+ // patch is fine, but it should not overlap another patch.
+ asm.pad_inval_patch();
+ asm.pos_marker(move |code_ptr, cb| {
+ CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos, cb);
+ });
}
/// Verify the ctx's types and mappings against the compile-time stack, self,
@@ -328,14 +567,36 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
unsafe { CStr::from_ptr(rb_obj_info(val)).to_str().unwrap() }
}
+ // Some types such as CString only assert the class field of the object
+ // when there has never been a singleton class created for objects of that class.
+ // Once there is a singleton class created they become their weaker
+ // `T*` variant, and we more objects should pass the verification.
+ fn relax_type_with_singleton_class_assumption(ty: Type) -> Type {
+ if let Type::CString | Type::CArray | Type::CHash = ty {
+ if has_singleton_class_of(ty.known_class().unwrap()) {
+ match ty {
+ Type::CString => return Type::TString,
+ Type::CArray => return Type::TArray,
+ Type::CHash => return Type::THash,
+ _ => (),
+ }
+ }
+ }
+
+ ty
+ }
+
// Only able to check types when at current insn
- assert!(jit_at_current_insn(jit));
+ assert!(jit.at_current_insn());
- let self_val = jit_peek_at_self(jit);
+ let self_val = jit.peek_at_self();
let self_val_type = Type::from(self_val);
+ let learned_self_type = ctx.get_opnd_type(SelfOpnd);
+ let learned_self_type = relax_type_with_singleton_class_assumption(learned_self_type);
+
// Verify self operand type
- if self_val_type.diff(ctx.get_opnd_type(SelfOpnd)) == usize::MAX {
+ if self_val_type.diff(learned_self_type) == TypeDiff::Incompatible {
panic!(
"verify_ctx: ctx self type ({:?}) incompatible with actual value of self {}",
ctx.get_opnd_type(SelfOpnd),
@@ -344,14 +605,17 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
}
// Verify stack operand types
- let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u16);
+ let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u8);
for i in 0..top_idx {
- let (learned_mapping, learned_type) = ctx.get_opnd_mapping(StackOpnd(i));
- let stack_val = jit_peek_at_stack(jit, ctx, i as isize);
+ let learned_mapping = ctx.get_opnd_mapping(StackOpnd(i));
+ let learned_type = ctx.get_opnd_type(StackOpnd(i));
+ let learned_type = relax_type_with_singleton_class_assumption(learned_type);
+
+ let stack_val = jit.peek_at_stack(ctx, i as isize);
let val_type = Type::from(stack_val);
- match learned_mapping {
- TempMapping::MapToSelf => {
+ match learned_mapping.get_kind() {
+ TempMappingKind::MapToSelf => {
if self_val != stack_val {
panic!(
"verify_ctx: stack value was mapped to self, but values did not match!\n stack: {}\n self: {}",
@@ -360,8 +624,9 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
);
}
}
- TempMapping::MapToLocal(local_idx) => {
- let local_val = jit_peek_at_local(jit, local_idx.into());
+ TempMappingKind::MapToLocal => {
+ let local_idx: u8 = learned_mapping.get_local_idx();
+ let local_val = jit.peek_at_local(local_idx.into());
if local_val != stack_val {
panic!(
"verify_ctx: stack value was mapped to local, but values did not match\n stack: {}\n local {}: {}",
@@ -371,15 +636,16 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
);
}
}
- TempMapping::MapToStack => {}
+ TempMappingKind::MapToStack => {}
}
// If the actual type differs from the learned type
- if val_type.diff(learned_type) == usize::MAX {
+ if val_type.diff(learned_type) == TypeDiff::Incompatible {
panic!(
- "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {}",
+ "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {} ({:?})",
learned_type,
- obj_info_str(stack_val)
+ obj_info_str(stack_val),
+ val_type,
);
}
}
@@ -389,10 +655,11 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES);
for i in 0..top_idx {
let learned_type = ctx.get_local_type(i);
- let local_val = jit_peek_at_local(jit, i as i32);
+ let learned_type = relax_type_with_singleton_class_assumption(learned_type);
+ let local_val = jit.peek_at_local(i as i32);
let local_type = Type::from(local_val);
- if local_type.diff(learned_type) == usize::MAX {
+ if local_type.diff(learned_type) == TypeDiff::Incompatible {
panic!(
"verify_ctx: ctx type ({:?}) incompatible with actual value of local: {} (type {:?})",
learned_type,
@@ -403,295 +670,429 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
}
}
+// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
+// to the interpreter when it cannot service a stub by generating new code.
+// Before coming here, branch_stub_hit() takes care of fully reconstructing
+// interpreter state.
+fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ let ocb = ocb.unwrap();
+ let mut asm = Assembler::new();
+
+ gen_counter_incr(&mut asm, Counter::exit_from_branch_stub);
+
+ asm_comment!(asm, "exit from branch stub");
+ asm.cpop_into(SP);
+ asm.cpop_into(EC);
+ asm.cpop_into(CFP);
+
+ asm.frame_teardown();
+
+ asm.cret(Qundef.into());
+
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
+}
+
/// Generate an exit to return to the interpreter
-fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr {
- let code_ptr = cb.get_write_ptr();
+fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
+ #[cfg(all(feature = "disasm", not(test)))]
+ {
+ let opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
+ asm_comment!(asm, "exit to interpreter on {}", insn_name(opcode as usize));
+ }
- add_comment(cb, "exit to interpreter");
+ if asm.ctx.is_return_landing() {
+ asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP));
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, C_RET_OPND);
+ }
+
+ // Spill stack temps before returning to the interpreter
+ asm.spill_temps();
// Generate the code to exit to the interpreters
// Write the adjusted SP back into the CFP
- if ctx.get_sp_offset() != 0 {
- let stack_pointer = ctx.sp_opnd(0);
- lea(cb, REG_SP, stack_pointer);
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG_SP);
+ if asm.ctx.get_sp_offset() != 0 {
+ let sp_opnd = asm.lea(asm.ctx.sp_opnd(0));
+ asm.mov(
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP),
+ sp_opnd
+ );
}
// Update CFP->PC
- mov(cb, RAX, const_ptr_opnd(exit_pc as *const u8));
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), RAX);
+ asm.mov(
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC),
+ Opnd::const_ptr(exit_pc as *const u8)
+ );
// Accumulate stats about interpreter exits
- #[cfg(feature = "stats")]
if get_option!(gen_stats) {
- mov(cb, RDI, const_ptr_opnd(exit_pc as *const u8));
- call_ptr(cb, RSI, rb_yjit_count_side_exit_op as *const u8);
+ asm.ccall(
+ rb_yjit_count_side_exit_op as *const u8,
+ vec![Opnd::const_ptr(exit_pc as *const u8)]
+ );
- // If --yjit-trace-exits option is enabled, record the exit stack
- // while recording the side exits.
- if get_option!(gen_trace_exits) {
- mov(cb, C_ARG_REGS[0], const_ptr_opnd(exit_pc as *const u8));
- call_ptr(cb, REG0, rb_yjit_record_exit_stack as *const u8);
+ // If --yjit-trace-exits is enabled, record the exit stack while recording
+ // the side exits. TraceExits::Counter is handled by gen_counted_exit().
+ if get_option!(trace_exits) == Some(TraceExits::All) {
+ asm.ccall(
+ rb_yjit_record_exit_stack as *const u8,
+ vec![Opnd::const_ptr(exit_pc as *const u8)]
+ );
}
}
- pop(cb, REG_SP);
- pop(cb, REG_EC);
- pop(cb, REG_CFP);
+ asm.cpop_into(SP);
+ asm.cpop_into(EC);
+ asm.cpop_into(CFP);
- mov(cb, RAX, uimm_opnd(Qundef.into()));
- ret(cb);
+ asm.frame_teardown();
- return code_ptr;
+ asm.cret(Qundef.into());
}
-// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
-// to the interpreter when it cannot service a stub by generating new code.
-// Before coming here, branch_stub_hit() takes care of fully reconstructing
-// interpreter state.
-fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr {
- let ocb = ocb.unwrap();
- let code_ptr = ocb.get_write_ptr();
+/// :side-exit:
+/// Get an exit for the current instruction in the outlined block. The code
+/// for each instruction often begins with several guards before proceeding
+/// to do work. When guards fail, an option we have is to exit to the
+/// interpreter at an instruction boundary. The piece of code that takes
+/// care of reconstructing interpreter state and exiting out of generated
+/// code is called the side exit.
+///
+/// No guards change the logic for reconstructing interpreter state at the
+/// moment, so there is one unique side exit for each context. Note that
+/// it's incorrect to jump to the side exit after any ctx stack push operations
+/// since they change the logic required for reconstructing interpreter state.
+pub fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ let mut cb = ocb.unwrap();
+ let mut asm = Assembler::new();
+ asm.ctx = *ctx;
+ asm.set_reg_temps(ctx.get_reg_temps());
+
+ gen_exit(exit_pc, &mut asm);
+
+ asm.compile(&mut cb, None).map(|(code_ptr, _)| code_ptr)
+}
+
+/// Get a side exit. Increment a counter in it if --yjit-stats is enabled.
+pub fn gen_counted_exit(exit_pc: *mut VALUE, side_exit: CodePtr, ocb: &mut OutlinedCb, counter: Option<Counter>) -> Option<CodePtr> {
+ // The counter is only incremented when stats are enabled
+ if !get_option!(gen_stats) {
+ return Some(side_exit);
+ }
+ let counter = match counter {
+ Some(counter) => counter,
+ None => return Some(side_exit),
+ };
- gen_counter_incr!(ocb, exit_from_branch_stub);
+ let mut asm = Assembler::new();
- pop(ocb, REG_SP);
- pop(ocb, REG_EC);
- pop(ocb, REG_CFP);
+ // Increment a counter
+ gen_counter_incr(&mut asm, counter);
- mov(ocb, RAX, uimm_opnd(Qundef.into()));
- ret(ocb);
+ // Trace a counted exit if --yjit-trace-exits=counter is given.
+ // TraceExits::All is handled by gen_exit().
+ if get_option!(trace_exits) == Some(TraceExits::CountedExit(counter)) {
+ with_caller_saved_temp_regs(&mut asm, |asm| {
+ asm.ccall(rb_yjit_record_exit_stack as *const u8, vec![Opnd::const_ptr(exit_pc as *const u8)]);
+ });
+ }
- return code_ptr;
+ // Jump to the existing side exit
+ asm.jmp(Target::CodePtr(side_exit));
+
+ let ocb = ocb.unwrap();
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
-// :side-exit:
-// Get an exit for the current instruction in the outlined block. The code
-// for each instruction often begins with several guards before proceeding
-// to do work. When guards fail, an option we have is to exit to the
-// interpreter at an instruction boundary. The piece of code that takes
-// care of reconstructing interpreter state and exiting out of generated
-// code is called the side exit.
-//
-// No guards change the logic for reconstructing interpreter state at the
-// moment, so there is one unique side exit for each context. Note that
-// it's incorrect to jump to the side exit after any ctx stack push/pop operations
-// since they change the logic required for reconstructing interpreter state.
-fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr {
- match jit.side_exit_for_pc {
- None => {
- let exit_code = gen_exit(jit.pc, ctx, ocb.unwrap());
- jit.side_exit_for_pc = Some(exit_code);
- exit_code
- }
- Some(code_ptr) => code_ptr,
+/// Preserve caller-saved stack temp registers during the call of a given block
+fn with_caller_saved_temp_regs<F, R>(asm: &mut Assembler, block: F) -> R where F: FnOnce(&mut Assembler) -> R {
+ for &reg in caller_saved_temp_regs() {
+ asm.cpush(Opnd::Reg(reg)); // save stack temps
}
+ let ret = block(asm);
+ for &reg in caller_saved_temp_regs().rev() {
+ asm.cpop_into(Opnd::Reg(reg)); // restore stack temps
+ }
+ ret
}
// Ensure that there is an exit for the start of the block being compiled.
// Block invalidation uses this exit.
-pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) {
- let blockref = jit.block.clone();
- let mut block = blockref.borrow_mut();
- let block_ctx = block.get_ctx();
- let blockid = block.get_blockid();
-
- if block.entry_exit.is_some() {
- return;
+#[must_use]
+pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler, ocb: &mut OutlinedCb) -> Option<()> {
+ if jit.block_entry_exit.is_some() {
+ return Some(());
}
- if jit.insn_idx == blockid.idx {
- // We are compiling the first instruction in the block.
- // Generate the exit with the cache in jitstate.
- block.entry_exit = Some(get_side_exit(jit, ocb, &block_ctx));
+ let block_starting_context = &jit.get_starting_ctx();
+
+ // If we're compiling the first instruction in the block.
+ if jit.insn_idx == jit.starting_insn_idx {
+ // Generate the exit with the cache in Assembler.
+ let side_exit_context = SideExitContext::new(jit.pc, *block_starting_context);
+ let entry_exit = asm.get_side_exit(&side_exit_context, None, ocb);
+ jit.block_entry_exit = Some(entry_exit?);
} else {
- let pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx) };
- block.entry_exit = Some(gen_exit(pc, &block_ctx, ocb.unwrap()));
+ let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) };
+ jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, block_starting_context, ocb)?);
}
+
+ Some(())
}
-// Generate a runtime guard that ensures the PC is at the expected
-// instruction index in the iseq, otherwise takes a side-exit.
-// This is to handle the situation of optional parameters.
-// When a function with optional parameters is called, the entry
-// PC for the method isn't necessarily 0.
-fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) {
- //RUBY_ASSERT(cb != NULL);
+// Landing code for when c_return tracing is enabled. See full_cfunc_return().
+fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ let ocb = ocb.unwrap();
+ let mut asm = Assembler::new();
+
+ // This chunk of code expects REG_EC to be filled properly and
+ // RAX to contain the return value of the C method.
- let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC);
- let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
- let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8);
- mov(cb, REG0, pc_opnd);
- mov(cb, REG1, expected_pc_opnd);
- cmp(cb, REG0, REG1);
+ asm_comment!(asm, "full cfunc return");
+ asm.ccall(
+ rb_full_cfunc_return as *const u8,
+ vec![EC, C_RET_OPND]
+ );
- let pc_match = cb.new_label("pc_match".to_string());
- je_label(cb, pc_match);
+ // Count the exit
+ gen_counter_incr(&mut asm, Counter::traced_cfunc_return);
- // We're not starting at the first PC, so we need to exit.
- gen_counter_incr!(cb, leave_start_pc_non_zero);
+ // Return to the interpreter
+ asm.cpop_into(SP);
+ asm.cpop_into(EC);
+ asm.cpop_into(CFP);
- pop(cb, REG_SP);
- pop(cb, REG_EC);
- pop(cb, REG_CFP);
+ asm.frame_teardown();
- mov(cb, RAX, imm_opnd(Qundef.into()));
- ret(cb);
+ asm.cret(Qundef.into());
- // PC should match the expected insn_idx
- cb.write_label(pc_match);
- cb.link_labels();
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
-// Landing code for when c_return tracing is enabled. See full_cfunc_return().
-fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr {
- let cb = ocb.unwrap();
- let code_ptr = cb.get_write_ptr();
+/// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc.
+/// This is used by gen_leave() and gen_entry_prologue()
+fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ let ocb = ocb.unwrap();
+ let mut asm = Assembler::new();
- // This chunk of code expect REG_EC to be filled properly and
- // RAX to contain the return value of the C method.
+ // gen_leave() fully reconstructs interpreter state and leaves the
+ // return value in C_RET_OPND before coming here.
+ let ret_opnd = asm.live_reg_opnd(C_RET_OPND);
- // Call full_cfunc_return()
- mov(cb, C_ARG_REGS[0], REG_EC);
- mov(cb, C_ARG_REGS[1], RAX);
- call_ptr(cb, REG0, rb_full_cfunc_return as *const u8);
+ // Every exit to the interpreter should be counted
+ gen_counter_incr(&mut asm, Counter::leave_interp_return);
- // Count the exit
- gen_counter_incr!(cb, traced_cfunc_return);
+ asm_comment!(asm, "exit from leave");
+ asm.cpop_into(SP);
+ asm.cpop_into(EC);
+ asm.cpop_into(CFP);
- // Return to the interpreter
- pop(cb, REG_SP);
- pop(cb, REG_EC);
- pop(cb, REG_CFP);
+ asm.frame_teardown();
- mov(cb, RAX, uimm_opnd(Qundef.into()));
- ret(cb);
+ asm.cret(ret_opnd);
- return code_ptr;
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
-/// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc.
-/// This is used by gen_leave() and gen_entry_prologue()
-fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr {
+// Increment SP and transfer the execution to the interpreter after jit_exec_exception().
+// On jit_exec_exception(), you need to return Qundef to keep executing caller non-FINISH
+// frames on the interpreter. You also need to increment SP to push the return value to
+// the caller's stack, which is different from gen_stub_exit().
+fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let code_ptr = ocb.get_write_ptr();
+ let mut asm = Assembler::new();
- // Note, gen_leave() fully reconstructs interpreter state and leaves the
- // return value in RAX before coming here.
+ // gen_leave() leaves the return value in C_RET_OPND before coming here.
+ let ruby_ret_val = asm.live_reg_opnd(C_RET_OPND);
// Every exit to the interpreter should be counted
- gen_counter_incr!(ocb, leave_interp_return);
+ gen_counter_incr(&mut asm, Counter::leave_interp_return);
+
+ asm_comment!(asm, "push return value through cfp->sp");
+ let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
+ let sp = asm.load(cfp_sp);
+ asm.mov(Opnd::mem(64, sp, 0), ruby_ret_val);
+ let new_sp = asm.add(sp, SIZEOF_VALUE.into());
+ asm.mov(cfp_sp, new_sp);
+
+ asm_comment!(asm, "exit from exception");
+ asm.cpop_into(SP);
+ asm.cpop_into(EC);
+ asm.cpop_into(CFP);
- pop(ocb, REG_SP);
- pop(ocb, REG_EC);
- pop(ocb, REG_CFP);
+ asm.frame_teardown();
- ret(ocb);
+ // Execute vm_exec_core
+ asm.cret(Qundef.into());
- return code_ptr;
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
-/// Compile an interpreter entry block to be inserted into an iseq
-/// Returns None if compilation fails.
-pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> {
- const MAX_PROLOGUE_SIZE: usize = 1024;
+// Generate a runtime guard that ensures the PC is at the expected
+// instruction index in the iseq, otherwise takes an entry stub
+// that generates another check and entry.
+// This is to handle the situation of optional parameters.
+// When a function with optional parameters is called, the entry
+// PC for the method isn't necessarily 0.
+pub fn gen_entry_chain_guard(
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ iseq: IseqPtr,
+ insn_idx: u16,
+) -> Option<PendingEntryRef> {
+ let entry = new_pending_entry();
+ let stub_addr = gen_entry_stub(entry.uninit_entry.as_ptr() as usize, ocb)?;
- // Check if we have enough executable memory
- if !cb.has_capacity(MAX_PROLOGUE_SIZE) {
- return None;
- }
+ let pc_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC);
+ let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) };
+ let expected_pc_opnd = Opnd::const_ptr(expected_pc as *const u8);
- let old_write_pos = cb.get_write_pos();
+ asm_comment!(asm, "guard expected PC");
+ asm.cmp(pc_opnd, expected_pc_opnd);
- // Align the current write position to cache line boundaries
- cb.align_pos(64);
+ asm.mark_entry_start(&entry);
+ asm.jne(stub_addr.into());
+ asm.mark_entry_end(&entry);
+ return Some(entry);
+}
+/// Compile an interpreter entry block to be inserted into an iseq
+/// Returns None if compilation fails.
+/// If jit_exception is true, compile JIT code for handling exceptions.
+/// See [jit_compile_exception] for details.
+pub fn gen_entry_prologue(
+ cb: &mut CodeBlock,
+ ocb: &mut OutlinedCb,
+ iseq: IseqPtr,
+ insn_idx: u16,
+ jit_exception: bool,
+) -> Option<CodePtr> {
let code_ptr = cb.get_write_ptr();
- add_comment(cb, "yjit entry");
- push(cb, REG_CFP);
- push(cb, REG_EC);
- push(cb, REG_SP);
+ let mut asm = Assembler::new();
+ if get_option_ref!(dump_disasm).is_some() {
+ asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0));
+ } else {
+ asm_comment!(asm, "YJIT entry");
+ }
+
+ asm.frame_setup();
- // We are passed EC and CFP
- mov(cb, REG_EC, C_ARG_REGS[0]);
- mov(cb, REG_CFP, C_ARG_REGS[1]);
+ // Save the CFP, EC, SP registers to the C stack
+ asm.cpush(CFP);
+ asm.cpush(EC);
+ asm.cpush(SP);
+
+ // We are passed EC and CFP as arguments
+ asm.mov(EC, C_ARG_OPNDS[0]);
+ asm.mov(CFP, C_ARG_OPNDS[1]);
// Load the current SP from the CFP into REG_SP
- mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP));
+ asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP));
// Setup cfp->jit_return
- mov(
- cb,
- REG0,
- code_ptr_opnd(CodegenGlobals::get_leave_exit_code()),
- );
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0);
+ // If this is an exception handler entry point
+ if jit_exception {
+ // On jit_exec_exception(), it's NOT safe to return a non-Qundef value
+ // from a non-FINISH frame. This function fixes that problem.
+ // See [jit_compile_exception] for details.
+ asm.ccall(
+ rb_yjit_set_exception_return as *mut u8,
+ vec![
+ CFP,
+ Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)),
+ Opnd::const_ptr(CodegenGlobals::get_leave_exception_code().raw_ptr(cb)),
+ ],
+ );
+ } else {
+ // On jit_exec() or JIT_EXEC(), it's safe to return a non-Qundef value
+ // on the entry frame. See [jit_compile] for details.
+ asm.mov(
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN),
+ Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)),
+ );
+ }
- // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
- // the case of optional parameters, the interpreter can set the pc to a
- // different location depending on the optional parameters. If an iseq
- // has optional parameters, we'll add a runtime check that the PC we've
+ // We're compiling iseqs that we *expect* to start at `insn_idx`.
+ // But in the case of optional parameters or when handling exceptions,
+ // the interpreter can set the pc to a different location. For
+ // such scenarios, we'll add a runtime check that the PC we've
// compiled for is the same PC that the interpreter wants us to run with.
- // If they don't match, then we'll take a side exit.
- if unsafe { get_iseq_flags_has_opt(iseq) } {
- gen_pc_guard(cb, iseq, insn_idx);
- }
+ // If they don't match, then we'll jump to an entry stub and generate
+ // another PC check and entry there.
+ let pending_entry = if unsafe { get_iseq_flags_has_opt(iseq) } || jit_exception {
+ Some(gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?)
+ } else {
+ None
+ };
- // Verify MAX_PROLOGUE_SIZE
- assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE);
+ asm.compile(cb, Some(ocb))?;
- return Some(code_ptr);
+ if cb.has_dropped_bytes() {
+ None
+ } else {
+ // Mark code pages for code GC
+ let iseq_payload = get_or_create_iseq_payload(iseq);
+ for page in cb.addrs_to_pages(code_ptr, cb.get_write_ptr()) {
+ iseq_payload.pages.insert(page);
+ }
+ // Write an entry to the heap and push it to the ISEQ
+ if let Some(pending_entry) = pending_entry {
+ let pending_entry = Rc::try_unwrap(pending_entry)
+ .ok().expect("PendingEntry should be unique");
+ iseq_payload.entries.push(pending_entry.into_entry());
+ }
+ Some(code_ptr)
+ }
}
// Generate code to check for interrupts and take a side-exit.
// Warning: this function clobbers REG0
-fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) {
+fn gen_check_ints(
+ asm: &mut Assembler,
+ counter: Counter,
+) {
// Check for interrupts
// see RUBY_VM_CHECK_INTS(ec) macro
- add_comment(cb, "RUBY_VM_CHECK_INTS(ec)");
- mov(
- cb,
- REG0_32,
- mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_MASK),
- );
- not(cb, REG0_32);
- test(
- cb,
- mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
- REG0_32,
- );
- jnz_ptr(cb, side_exit);
+ asm_comment!(asm, "RUBY_VM_CHECK_INTS(ec)");
+
+ // Not checking interrupt_mask since it's zero outside finalize_deferred_heap_pages,
+ // signal_exec, or rb_postponed_job_flush.
+ let interrupt_flag = asm.load(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG));
+ asm.test(interrupt_flag, interrupt_flag);
+
+ asm.jnz(Target::side_exit(counter));
}
// Generate a stubbed unconditional jump to the next bytecode instruction.
// Blocks that are part of a guard chain can use this to share the same successor.
fn jump_to_next_insn(
jit: &mut JITState,
- current_context: &Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) {
- // Reset the depth since in current usages we only ever jump to to
+) -> Option<()> {
+ // Reset the depth since in current usages we only ever jump to
// chain_depth > 0 from the same instruction.
- let mut reset_depth = *current_context;
- reset_depth.reset_chain_depth();
+ let mut reset_depth = asm.ctx;
+ reset_depth.reset_chain_depth_and_defer();
let jump_block = BlockId {
iseq: jit.iseq,
- idx: jit_next_insn_idx(jit),
+ idx: jit.next_insn_idx(),
};
// We are at the end of the current instruction. Record the boundary.
if jit.record_boundary_patch_point {
- let next_insn = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) };
- let exit_pos = gen_exit(next_insn, &reset_depth, ocb.unwrap());
- record_global_inval_patch(cb, exit_pos);
jit.record_boundary_patch_point = false;
+ let exit_pc = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) };
+ let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, ocb);
+ record_global_inval_patch(asm, exit_pos?);
}
// Generate the jump instruction
- gen_direct_jump(jit, &reset_depth, jump_block, cb);
+ gen_direct_jump(jit, &reset_depth, jump_block, asm);
+ Some(())
}
// Compile a sequence of bytecode instructions for a given basic block version.
@@ -706,42 +1107,72 @@ pub fn gen_single_block(
ocb: &mut OutlinedCb,
) -> Result<BlockRef, ()> {
// Limit the number of specialized versions for this block
- let mut ctx = limit_block_versions(blockid, start_ctx);
+ let ctx = limit_block_versions(blockid, start_ctx);
verify_blockid(blockid);
assert!(!(blockid.idx == 0 && ctx.get_stack_size() > 0));
+ // Save machine code placement of the block. `cb` might page switch when we
+ // generate code in `ocb`.
+ let block_start_addr = cb.get_write_ptr();
+
// Instruction sequence to compile
let iseq = blockid.iseq;
let iseq_size = unsafe { get_iseq_encoded_size(iseq) };
- let mut insn_idx: c_uint = blockid.idx;
- let starting_insn_idx = insn_idx;
-
- // Allocate the new block
- let blockref = Block::new(blockid, &ctx);
+ let iseq_size: IseqIdx = if let Ok(size) = iseq_size.try_into() {
+ size
+ } else {
+ // ISeq too large to compile
+ return Err(());
+ };
+ let mut insn_idx: IseqIdx = blockid.idx;
// Initialize a JIT state object
- let mut jit = JITState::new(&blockref);
+ let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec);
jit.iseq = blockid.iseq;
- jit.ec = Some(ec);
- // Mark the start position of the block
- blockref.borrow_mut().set_start_addr(cb.get_write_ptr());
+ // Create a backend assembler instance
+ let mut asm = Assembler::new();
+ asm.ctx = ctx;
+
+ #[cfg(feature = "disasm")]
+ if get_option_ref!(dump_disasm).is_some() {
+ let blockid_idx = blockid.idx;
+ let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() };
+ asm_comment!(asm, "Block: {} {}", iseq_get_location(blockid.iseq, blockid_idx), chain_depth);
+ asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8());
+ }
+
+ // Mark the start of an ISEQ for --yjit-perf
+ jit_perf_symbol_push!(jit, &mut asm, &get_iseq_name(iseq), PerfMap::ISEQ);
+
+ if asm.ctx.is_return_landing() {
+ // Continuation of the end of gen_leave().
+ // Reload REG_SP for the current frame and transfer the return value
+ // to the stack top.
+ asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP));
+
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, C_RET_OPND);
+
+ asm.ctx.clear_return_landing();
+ }
// For each instruction to compile
// NOTE: could rewrite this loop with a std::iter::Iterator
while insn_idx < iseq_size {
// Get the current pc and opcode
- let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+ let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) };
// try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) }
.try_into()
.unwrap();
- // opt_getinlinecache wants to be in a block all on its own. Cut the block short
- // if we run into it. See gen_opt_getinlinecache() for details.
- if opcode == YARVINSN_opt_getinlinecache.as_usize() && insn_idx > starting_insn_idx {
- jump_to_next_insn(&mut jit, &ctx, cb, ocb);
+ // We need opt_getconstant_path to be in a block all on its own. Cut the block short
+ // if we run into it. This is necessary because we want to invalidate based on the
+ // instruction's index.
+ if opcode == YARVINSN_opt_getconstant_path.as_usize() && insn_idx > jit.starting_insn_idx {
+ jump_to_next_insn(&mut jit, &mut asm, ocb);
break;
}
@@ -749,56 +1180,70 @@ pub fn gen_single_block(
jit.insn_idx = insn_idx;
jit.opcode = opcode;
jit.pc = pc;
- jit.side_exit_for_pc = None;
+ jit.stack_size_for_pc = asm.ctx.get_stack_size();
+ asm.set_side_exit_context(pc, asm.ctx.get_stack_size());
+
+ // stack_pop doesn't immediately deallocate a register for stack temps,
+ // but it's safe to do so at this instruction boundary.
+ for stack_idx in asm.ctx.get_stack_size()..MAX_REG_TEMPS {
+ asm.ctx.dealloc_temp_reg(stack_idx);
+ }
// If previous instruction requested to record the boundary
if jit.record_boundary_patch_point {
// Generate an exit to this instruction and record it
- let exit_pos = gen_exit(jit.pc, &ctx, ocb.unwrap());
- record_global_inval_patch(cb, exit_pos);
+ let exit_pos = gen_outlined_exit(jit.pc, &asm.ctx, ocb).ok_or(())?;
+ record_global_inval_patch(&mut asm, exit_pos);
jit.record_boundary_patch_point = false;
}
// In debug mode, verify our existing assumption
- if cfg!(debug_assertions) && get_option!(verify_ctx) && jit_at_current_insn(&jit) {
- verify_ctx(&jit, &ctx);
+ if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() {
+ verify_ctx(&jit, &asm.ctx);
}
+ // :count-placement:
+ // Count bytecode instructions that execute in generated code.
+ // Note that the increment happens even when the output takes side exit.
+ gen_counter_incr(&mut asm, Counter::yjit_insns_count);
+
// Lookup the codegen function for this instruction
- let mut status = CantCompile;
+ let mut status = None;
if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) {
- // :count-placement:
- // Count bytecode instructions that execute in generated code.
- // Note that the increment happens even when the output takes side exit.
- gen_counter_incr!(cb, exec_instruction);
-
// Add a comment for the name of the YARV instruction
- add_comment(cb, &insn_name(opcode));
+ asm_comment!(asm, "Insn: {:04} {} (stack_size: {})", insn_idx, insn_name(opcode), asm.ctx.get_stack_size());
// If requested, dump instructions for debugging
if get_option!(dump_insns) {
println!("compiling {}", insn_name(opcode));
- print_str(cb, &format!("executing {}", insn_name(opcode)));
+ print_str(&mut asm, &format!("executing {}", insn_name(opcode)));
}
// Call the code generation function
- status = gen_fn(&mut jit, &mut ctx, cb, ocb);
+ jit_perf_symbol_push!(jit, &mut asm, &insn_name(opcode), PerfMap::Codegen);
+ status = gen_fn(&mut jit, &mut asm, ocb);
+ jit_perf_symbol_pop!(jit, &mut asm, PerfMap::Codegen);
+
+ #[cfg(debug_assertions)]
+ assert!(!asm.get_leaf_ccall(), "ccall() wasn't used after leaf_ccall was set in {}", insn_name(opcode));
}
// If we can't compile this instruction
// exit to the interpreter and stop compiling
- if status == CantCompile {
- let mut block = jit.block.borrow_mut();
-
- // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
- // the exit this generates would be wrong. We could save a copy of the entry context
- // and assert that ctx is the same here.
- let exit = gen_exit(jit.pc, &ctx, cb);
-
- // If this is the first instruction in the block, then we can use
- // the exit for block->entry_exit.
- if insn_idx == block.get_blockid().idx {
- block.entry_exit = Some(exit);
+ if status == None {
+ if get_option!(dump_insns) {
+ println!("can't compile {}", insn_name(opcode));
+ }
+
+ // Rewind stack_size using ctx.with_stack_size to allow stack_size changes
+ // before you return None.
+ asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc);
+ gen_exit(jit.pc, &mut asm);
+
+ // If this is the first instruction in the block, then
+ // the entry address is the address for block_entry_exit
+ if insn_idx == jit.starting_insn_idx {
+ jit.block_entry_exit = Some(jit.output_ptr);
}
break;
@@ -806,661 +1251,955 @@ pub fn gen_single_block(
// For now, reset the chain depth after each instruction as only the
// first instruction in the block can concern itself with the depth.
- ctx.reset_chain_depth();
+ asm.ctx.reset_chain_depth_and_defer();
// Move to the next instruction to compile
- insn_idx += insn_len(opcode);
+ insn_idx += insn_len(opcode) as u16;
+
+ // Move past next instruction when instructed
+ if status == Some(SkipNextInsn) {
+ let next_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) };
+ let next_opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, next_pc) }.try_into().unwrap();
+ insn_idx += insn_len(next_opcode) as u16;
+ }
// If the instruction terminates this block
- if status == EndBlock {
+ if status == Some(EndBlock) {
break;
}
}
+ let end_insn_idx = insn_idx;
- // Finish filling out the block
- {
- let mut block = jit.block.borrow_mut();
-
- // Mark the end position of the block
- block.set_end_addr(cb.get_write_ptr());
+ // We currently can't handle cases where the request is for a block that
+ // doesn't go to the next instruction in the same iseq.
+ assert!(!jit.record_boundary_patch_point);
- // Store the index of the last instruction in the block
- block.set_end_idx(insn_idx);
+ // Pad the block if it has the potential to be invalidated
+ if jit.block_entry_exit.is_some() {
+ asm.pad_inval_patch();
}
- // We currently can't handle cases where the request is for a block that
- // doesn't go to the next instruction.
- //assert!(!jit.record_boundary_patch_point);
+ // Mark the end of an ISEQ for --yjit-perf
+ jit_perf_symbol_pop!(jit, &mut asm, PerfMap::ISEQ);
+
+ // Compile code into the code block
+ let (_, gc_offsets) = asm.compile(cb, Some(ocb)).ok_or(())?;
+ let end_addr = cb.get_write_ptr();
+
+ // Flush perf symbols after asm.compile() writes addresses
+ if get_option!(perf_map).is_some() {
+ jit.flush_perf_symbols(cb);
+ }
// If code for the block doesn't fit, fail
if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() {
return Err(());
}
- // TODO: we may want a feature for this called dump_insns? Can leave commented for now
- /*
- if (YJIT_DUMP_MODE >= 2) {
- // Dump list of compiled instrutions
- fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
- for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
- int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
- fprintf(stderr, " %04d %s\n", idx, insn_name(opcode));
- idx += insn_len(opcode);
- }
- }
- */
-
// Block compiled successfully
- Ok(blockref)
+ Ok(jit.into_block(end_insn_idx, block_start_addr, end_addr, gc_offsets))
}
fn gen_nop(
_jit: &mut JITState,
- _ctx: &mut Context,
- _cb: &mut CodeBlock,
+ _asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Do nothing
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_pop(
_jit: &mut JITState,
- ctx: &mut Context,
- _cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Decrement SP
- ctx.stack_pop(1);
- KeepCompiling
+ asm.stack_pop(1);
+ Some(KeepCompiling)
}
fn gen_dup(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let dup_val = ctx.stack_pop(0);
- let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0));
+) -> Option<CodegenStatus> {
+ let dup_val = asm.stack_opnd(0);
+ let mapping = asm.ctx.get_opnd_mapping(dup_val.into());
- let loc0 = ctx.stack_push_mapping((mapping, tmp_type));
- mov(cb, REG0, dup_val);
- mov(cb, loc0, REG0);
+ let loc0 = asm.stack_push_mapping(mapping);
+ asm.mov(loc0, dup_val);
- KeepCompiling
+ Some(KeepCompiling)
}
// duplicate stack top n elements
fn gen_dupn(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let nval: VALUE = jit_get_arg(jit, 0);
- let VALUE(n) = nval;
+) -> Option<CodegenStatus> {
+ let n = jit.get_arg(0).as_usize();
// In practice, seems to be only used for n==2
if n != 2 {
- return CantCompile;
+ return None;
}
- let opnd1: X86Opnd = ctx.stack_opnd(1);
- let opnd0: X86Opnd = ctx.stack_opnd(0);
+ let opnd1: Opnd = asm.stack_opnd(1);
+ let opnd0: Opnd = asm.stack_opnd(0);
- let mapping1 = ctx.get_opnd_mapping(StackOpnd(1));
- let mapping0 = ctx.get_opnd_mapping(StackOpnd(0));
+ let mapping1 = asm.ctx.get_opnd_mapping(opnd1.into());
+ let mapping0 = asm.ctx.get_opnd_mapping(opnd0.into());
- let dst1: X86Opnd = ctx.stack_push_mapping(mapping1);
- mov(cb, REG0, opnd1);
- mov(cb, dst1, REG0);
+ let dst1: Opnd = asm.stack_push_mapping(mapping1);
+ asm.mov(dst1, opnd1);
- let dst0: X86Opnd = ctx.stack_push_mapping(mapping0);
- mov(cb, REG0, opnd0);
- mov(cb, dst0, REG0);
+ let dst0: Opnd = asm.stack_push_mapping(mapping0);
+ asm.mov(dst0, opnd0);
- KeepCompiling
+ Some(KeepCompiling)
}
// Swap top 2 stack entries
fn gen_swap(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- stack_swap(ctx, cb, 0, 1, REG0, REG1);
- KeepCompiling
+) -> Option<CodegenStatus> {
+ stack_swap(asm, 0, 1);
+ Some(KeepCompiling)
}
fn stack_swap(
- ctx: &mut Context,
- cb: &mut CodeBlock,
- offset0: u16,
- offset1: u16,
- _reg0: X86Opnd,
- _reg1: X86Opnd,
+ asm: &mut Assembler,
+ offset0: i32,
+ offset1: i32,
) {
- let opnd0 = ctx.stack_opnd(offset0 as i32);
- let opnd1 = ctx.stack_opnd(offset1 as i32);
+ let stack0_mem = asm.stack_opnd(offset0);
+ let stack1_mem = asm.stack_opnd(offset1);
- let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0));
- let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1));
+ let mapping0 = asm.ctx.get_opnd_mapping(stack0_mem.into());
+ let mapping1 = asm.ctx.get_opnd_mapping(stack1_mem.into());
- mov(cb, REG0, opnd0);
- mov(cb, REG1, opnd1);
- mov(cb, opnd0, REG1);
- mov(cb, opnd1, REG0);
+ let stack0_reg = asm.load(stack0_mem);
+ let stack1_reg = asm.load(stack1_mem);
+ asm.mov(stack0_mem, stack1_reg);
+ asm.mov(stack1_mem, stack0_reg);
- ctx.set_opnd_mapping(StackOpnd(offset0), mapping1);
- ctx.set_opnd_mapping(StackOpnd(offset1), mapping0);
+ asm.ctx.set_opnd_mapping(stack0_mem.into(), mapping1);
+ asm.ctx.set_opnd_mapping(stack1_mem.into(), mapping0);
}
fn gen_putnil(
- jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ _jit: &mut JITState,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- jit_putobject(jit, ctx, cb, Qnil);
- KeepCompiling
+) -> Option<CodegenStatus> {
+ jit_putobject(asm, Qnil);
+ Some(KeepCompiling)
}
-fn jit_putobject(jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, arg: VALUE) {
+fn jit_putobject(asm: &mut Assembler, arg: VALUE) {
let val_type: Type = Type::from(arg);
- let stack_top = ctx.stack_push(val_type);
-
- if arg.special_const_p() {
- // Immediates will not move and do not need to be tracked for GC
- // Thanks to this we can mov directly to memory when possible.
- let imm = imm_opnd(arg.as_i64());
-
- // 64-bit immediates can't be directly written to memory
- if imm.num_bits() <= 32 {
- mov(cb, stack_top, imm);
- } else {
- mov(cb, REG0, imm);
- mov(cb, stack_top, REG0);
- }
- } else {
- // Load the value to push into REG0
- // Note that this value may get moved by the GC
- jit_mov_gc_ptr(jit, cb, REG0, arg);
-
- // Write argument at SP
- mov(cb, stack_top, REG0);
- }
+ let stack_top = asm.stack_push(val_type);
+ asm.mov(stack_top, arg.into());
}
fn gen_putobject_int2fix(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- _ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
let opcode = jit.opcode;
let cst_val: usize = if opcode == YARVINSN_putobject_INT2FIX_0_.as_usize() {
0
} else {
1
};
+ let cst_val = VALUE::fixnum_from_usize(cst_val);
- jit_putobject(jit, ctx, cb, VALUE::fixnum_from_usize(cst_val));
- KeepCompiling
+ if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, cst_val, ocb) {
+ return Some(result);
+ }
+
+ jit_putobject(asm, cst_val);
+ Some(KeepCompiling)
}
fn gen_putobject(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- _ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let arg: VALUE = jit_get_arg(jit, 0);
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let arg: VALUE = jit.get_arg(0);
+
+ if let Some(result) = fuse_putobject_opt_ltlt(jit, asm, arg, ocb) {
+ return Some(result);
+ }
- jit_putobject(jit, ctx, cb, arg);
- KeepCompiling
+ jit_putobject(asm, arg);
+ Some(KeepCompiling)
+}
+
+/// Combine `putobject` and `opt_ltlt` together if profitable, for example when
+/// left shifting an integer by a constant amount.
+fn fuse_putobject_opt_ltlt(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ constant_object: VALUE,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let next_opcode = unsafe { rb_vm_insn_addr2opcode(jit.pc.add(insn_len(jit.opcode).as_usize()).read().as_ptr()) };
+ if next_opcode == YARVINSN_opt_ltlt as i32 && constant_object.fixnum_p() {
+ // Untag the fixnum shift amount
+ let shift_amt = constant_object.as_isize() >> 1;
+ if shift_amt > 63 || shift_amt < 0 {
+ return None;
+ }
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+
+ let lhs = jit.peek_at_stack(&asm.ctx, 0);
+ if !lhs.fixnum_p() {
+ return None;
+ }
+
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LTLT) {
+ return None;
+ }
+
+ asm_comment!(asm, "integer left shift with rhs={shift_amt}");
+ let lhs = asm.stack_opnd(0);
+
+ // Guard that lhs is a fixnum if necessary
+ let lhs_type = asm.ctx.get_opnd_type(lhs.into());
+ if lhs_type != Type::Fixnum {
+ asm_comment!(asm, "guard arg0 fixnum");
+ asm.test(lhs, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ jit_chain_guard(
+ JCC_JZ,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnums,
+ );
+ }
+
+ asm.stack_pop(1);
+ fixnum_left_shift_body(asm, lhs, shift_amt as u64);
+ return Some(SkipNextInsn);
+ }
+ return None;
}
fn gen_putself(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Load self from CFP
- let cf_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG_CFP, RUBY_OFFSET_CFP_SELF);
- mov(cb, REG0, cf_opnd);
+) -> Option<CodegenStatus> {
// Write it on the stack
- let stack_top: X86Opnd = ctx.stack_push_self();
- mov(cb, stack_top, REG0);
+ let stack_top = asm.stack_push_self();
+ asm.mov(
+ stack_top,
+ Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF)
+ );
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_putspecialobject(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let object_type = jit_get_arg(jit, 0);
-
- if object_type == VALUE(VM_SPECIAL_OBJECT_VMCORE.as_usize()) {
- let stack_top: X86Opnd = ctx.stack_push(Type::UnknownHeap);
- jit_mov_gc_ptr(jit, cb, REG0, unsafe { rb_mRubyVMFrozenCore });
- mov(cb, stack_top, REG0);
- KeepCompiling
+) -> Option<CodegenStatus> {
+ let object_type = jit.get_arg(0).as_usize();
+
+ if object_type == VM_SPECIAL_OBJECT_VMCORE.as_usize() {
+ let stack_top = asm.stack_push(Type::UnknownHeap);
+ let frozen_core = unsafe { rb_mRubyVMFrozenCore };
+ asm.mov(stack_top, frozen_core.into());
+ Some(KeepCompiling)
} else {
// TODO: implement for VM_SPECIAL_OBJECT_CBASE and
// VM_SPECIAL_OBJECT_CONST_BASE
- CantCompile
+ None
}
}
// set Nth stack entry to stack top
fn gen_setn(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let nval: VALUE = jit_get_arg(jit, 0);
- let VALUE(n) = nval;
-
- let top_val: X86Opnd = ctx.stack_pop(0);
- let dst_opnd: X86Opnd = ctx.stack_opnd(n.try_into().unwrap());
- mov(cb, REG0, top_val);
- mov(cb, dst_opnd, REG0);
+) -> Option<CodegenStatus> {
+ let n = jit.get_arg(0).as_usize();
+
+ let top_val = asm.stack_opnd(0);
+ let dst_opnd = asm.stack_opnd(n.try_into().unwrap());
+ asm.mov(
+ dst_opnd,
+ top_val
+ );
- let mapping = ctx.get_opnd_mapping(StackOpnd(0));
- ctx.set_opnd_mapping(StackOpnd(n.try_into().unwrap()), mapping);
+ let mapping = asm.ctx.get_opnd_mapping(top_val.into());
+ asm.ctx.set_opnd_mapping(dst_opnd.into(), mapping);
- KeepCompiling
+ Some(KeepCompiling)
}
// get nth stack value, then push it
fn gen_topn(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let nval: VALUE = jit_get_arg(jit, 0);
- let VALUE(n) = nval;
+) -> Option<CodegenStatus> {
+ let n = jit.get_arg(0).as_usize();
- let top_n_val = ctx.stack_opnd(n.try_into().unwrap());
- let mapping = ctx.get_opnd_mapping(StackOpnd(n.try_into().unwrap()));
+ let top_n_val = asm.stack_opnd(n.try_into().unwrap());
+ let mapping = asm.ctx.get_opnd_mapping(top_n_val.into());
+ let loc0 = asm.stack_push_mapping(mapping);
+ asm.mov(loc0, top_n_val);
- let loc0 = ctx.stack_push_mapping(mapping);
- mov(cb, REG0, top_n_val);
- mov(cb, loc0, REG0);
-
- KeepCompiling
+ Some(KeepCompiling)
}
// Pop n values off the stack
fn gen_adjuststack(
jit: &mut JITState,
- ctx: &mut Context,
- _cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let nval: VALUE = jit_get_arg(jit, 0);
- let VALUE(n) = nval;
-
- ctx.stack_pop(n);
- KeepCompiling
+) -> Option<CodegenStatus> {
+ let n = jit.get_arg(0).as_usize();
+ asm.stack_pop(n);
+ Some(KeepCompiling)
}
fn gen_opt_plus(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
- }
-
- let comptime_a = jit_peek_at_stack(jit, ctx, 1);
- let comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
- if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
- // Create a side-exit to fall back to the interpreter
- // Note: we generate the side-exit before popping operands from the stack
- let side_exit = get_side_exit(jit, ocb, ctx);
+) -> Option<CodegenStatus> {
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
- if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) {
- return CantCompile;
+ if two_fixnums {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) {
+ return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(ctx, cb, side_exit);
+ guard_two_fixnums(jit, asm, ocb);
- // Get the operands and destination from the stack
- let arg1 = ctx.stack_pop(1);
- let arg0 = ctx.stack_pop(1);
+ // Get the operands from the stack
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
// Add arg0 + arg1 and test for overflow
- mov(cb, REG0, arg0);
- sub(cb, REG0, imm_opnd(1));
- add(cb, REG0, arg1);
- jo_ptr(cb, side_exit);
+ let arg0_untag = asm.sub(arg0, Opnd::Imm(1));
+ let out_val = asm.add(arg0_untag, arg1);
+ asm.jo(Target::side_exit(Counter::opt_plus_overflow));
// Push the output on the stack
- let dst = ctx.stack_push(Type::Fixnum);
- mov(cb, dst, REG0);
+ let dst = asm.stack_push(Type::Fixnum);
+ asm.mov(dst, out_val);
- KeepCompiling
+ Some(KeepCompiling)
} else {
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
// new array initialized from top N values
fn gen_newarray(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let n = jit_get_arg(jit, 0).as_u32();
+) -> Option<CodegenStatus> {
+ let n = jit.get_arg(0).as_u32();
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_call_with_gc(jit, asm);
- let offset_magnitude = SIZEOF_VALUE as u32 * n;
- let values_ptr = ctx.sp_opnd(-(offset_magnitude as isize));
+ // If n is 0, then elts is never going to be read, so we can just pass null
+ let values_ptr = if n == 0 {
+ Opnd::UImm(0)
+ } else {
+ asm_comment!(asm, "load pointer to array elements");
+ let values_opnd = asm.ctx.sp_opnd(-(n as i32));
+ asm.lea(values_opnd)
+ };
// call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
- mov(cb, C_ARG_REGS[0], REG_EC);
- mov(cb, C_ARG_REGS[1], imm_opnd(n.into()));
- lea(cb, C_ARG_REGS[2], values_ptr);
- call_ptr(cb, REG0, rb_ec_ary_new_from_values as *const u8);
+ let new_ary = asm.ccall(
+ rb_ec_ary_new_from_values as *const u8,
+ vec![
+ EC,
+ Opnd::UImm(n.into()),
+ values_ptr
+ ]
+ );
- ctx.stack_pop(n.as_usize());
- let stack_ret = ctx.stack_push(Type::Array);
- mov(cb, stack_ret, RAX);
+ asm.stack_pop(n.as_usize());
+ let stack_ret = asm.stack_push(Type::CArray);
+ asm.mov(stack_ret, new_ary);
- KeepCompiling
+ Some(KeepCompiling)
}
// dup array
fn gen_duparray(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let ary = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let ary = jit.get_arg(0);
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_call_with_gc(jit, asm);
// call rb_ary_resurrect(VALUE ary);
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
- call_ptr(cb, REG0, rb_ary_resurrect as *const u8);
+ let new_ary = asm.ccall(
+ rb_ary_resurrect as *const u8,
+ vec![ary.into()],
+ );
- let stack_ret = ctx.stack_push(Type::Array);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::CArray);
+ asm.mov(stack_ret, new_ary);
- KeepCompiling
+ Some(KeepCompiling)
}
// dup hash
fn gen_duphash(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let hash = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let hash = jit.get_arg(0);
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_call_with_gc(jit, asm);
// call rb_hash_resurrect(VALUE hash);
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
- call_ptr(cb, REG0, rb_hash_resurrect as *const u8);
+ let hash = asm.ccall(rb_hash_resurrect as *const u8, vec![hash.into()]);
- let stack_ret = ctx.stack_push(Type::Hash);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::CHash);
+ asm.mov(stack_ret, hash);
- KeepCompiling
+ Some(KeepCompiling)
}
// call to_a on the array on the stack
fn gen_splatarray(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let flag = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let flag = jit.get_arg(0).as_usize();
- // Save the PC and SP because the callee may allocate
+ // Save the PC and SP because the callee may call #to_a
// Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
// Get the operands from the stack
- let ary_opnd = ctx.stack_pop(1);
+ let ary_opnd = asm.stack_opnd(0);
// Call rb_vm_splat_array(flag, ary)
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
- mov(cb, C_ARG_REGS[1], ary_opnd);
- call_ptr(cb, REG1, rb_vm_splat_array as *const u8);
+ let ary = asm.ccall(rb_vm_splat_array as *const u8, vec![flag.into(), ary_opnd]);
+ asm.stack_pop(1); // Keep it on stack during ccall for GC
- let stack_ret = ctx.stack_push(Type::Array);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::TArray);
+ asm.mov(stack_ret, ary);
- KeepCompiling
+ Some(KeepCompiling)
+}
+
+// call to_hash on hash to keyword splat before converting block
+// e.g. foo(**object, &block)
+fn gen_splatkw(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ // Defer compilation so we can specialize on a runtime hash operand
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+
+ let comptime_hash = jit.peek_at_stack(&asm.ctx, 1);
+ if comptime_hash.hash_p() {
+ // If a compile-time hash operand is T_HASH, just guard that it's T_HASH.
+ let hash_opnd = asm.stack_opnd(1);
+ guard_object_is_hash(asm, hash_opnd, hash_opnd.into(), Counter::splatkw_not_hash);
+ } else if comptime_hash.nil_p() {
+ // Speculate we'll see nil if compile-time hash operand is nil
+ let hash_opnd = asm.stack_opnd(1);
+ let hash_opnd_type = asm.ctx.get_opnd_type(hash_opnd.into());
+
+ if hash_opnd_type != Type::Nil {
+ asm.cmp(hash_opnd, Qnil.into());
+ asm.jne(Target::side_exit(Counter::splatkw_not_nil));
+
+ if Type::Nil.diff(hash_opnd_type) != TypeDiff::Incompatible {
+ asm.ctx.upgrade_opnd_type(hash_opnd.into(), Type::Nil);
+ }
+ }
+ } else {
+ // Otherwise, call #to_hash on the operand if it's not nil.
+
+ // Save the PC and SP because the callee may call #to_hash
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Get the operands from the stack
+ let block_opnd = asm.stack_opnd(0);
+ let block_type = asm.ctx.get_opnd_type(block_opnd.into());
+ let hash_opnd = asm.stack_opnd(1);
+
+ c_callable! {
+ fn to_hash_if_not_nil(mut obj: VALUE) -> VALUE {
+ if obj != Qnil {
+ obj = unsafe { rb_to_hash_type(obj) };
+ }
+ obj
+ }
+ }
+
+ let hash = asm.ccall(to_hash_if_not_nil as _, vec![hash_opnd]);
+ asm.stack_pop(2); // Keep it on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, hash);
+ asm.stack_push(block_type);
+ // Leave block_opnd spilled by ccall as is
+ asm.ctx.dealloc_temp_reg(asm.ctx.get_stack_size() - 1);
+ }
+
+ Some(KeepCompiling)
+}
+
+// concat two arrays
+fn gen_concatarray(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ // Save the PC and SP because the callee may call #to_a
+ // Note that this modifies REG_SP, which is why we do it first
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Get the operands from the stack
+ let ary2st_opnd = asm.stack_opnd(0);
+ let ary1_opnd = asm.stack_opnd(1);
+
+ // Call rb_vm_concat_array(ary1, ary2st)
+ let ary = asm.ccall(rb_vm_concat_array as *const u8, vec![ary1_opnd, ary2st_opnd]);
+ asm.stack_pop(2); // Keep them on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::TArray);
+ asm.mov(stack_ret, ary);
+
+ Some(KeepCompiling)
+}
+
+// concat second array to first array.
+// first argument must already be an array.
+// attempts to convert second object to array using to_a.
+fn gen_concattoarray(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ // Save the PC and SP because the callee may call #to_a
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Get the operands from the stack
+ let ary2_opnd = asm.stack_opnd(0);
+ let ary1_opnd = asm.stack_opnd(1);
+
+ let ary = asm.ccall(rb_vm_concat_to_array as *const u8, vec![ary1_opnd, ary2_opnd]);
+ asm.stack_pop(2); // Keep them on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::TArray);
+ asm.mov(stack_ret, ary);
+
+ Some(KeepCompiling)
+}
+
+// push given number of objects to array directly before.
+fn gen_pushtoarray(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let num = jit.get_arg(0).as_u64();
+
+ // Save the PC and SP because the callee may allocate
+ jit_prepare_call_with_gc(jit, asm);
+
+ // Get the operands from the stack
+ let ary_opnd = asm.stack_opnd(num as i32);
+ let objp_opnd = asm.lea(asm.ctx.sp_opnd(-(num as i32)));
+
+ let ary = asm.ccall(rb_ary_cat as *const u8, vec![ary_opnd, objp_opnd, num.into()]);
+ asm.stack_pop(num as usize + 1); // Keep it on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::TArray);
+ asm.mov(stack_ret, ary);
+
+ Some(KeepCompiling)
}
// new range initialized from top 2 values
fn gen_newrange(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let flag = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let flag = jit.get_arg(0).as_usize();
// rb_range_new() allocates and can raise
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
// val = rb_range_new(low, high, (int)flag);
- mov(cb, C_ARG_REGS[0], ctx.stack_opnd(1));
- mov(cb, C_ARG_REGS[1], ctx.stack_opnd(0));
- mov(cb, C_ARG_REGS[2], uimm_opnd(flag.into()));
- call_ptr(cb, REG0, rb_range_new as *const u8);
+ let range_opnd = asm.ccall(
+ rb_range_new as *const u8,
+ vec![
+ asm.stack_opnd(1),
+ asm.stack_opnd(0),
+ flag.into()
+ ]
+ );
- ctx.stack_pop(2);
- let stack_ret = ctx.stack_push(Type::UnknownHeap);
- mov(cb, stack_ret, RAX);
+ asm.stack_pop(2);
+ let stack_ret = asm.stack_push(Type::UnknownHeap);
+ asm.mov(stack_ret, range_opnd);
- KeepCompiling
+ Some(KeepCompiling)
}
fn guard_object_is_heap(
- cb: &mut CodeBlock,
- object_opnd: X86Opnd,
- _ctx: &mut Context,
- side_exit: CodePtr,
+ asm: &mut Assembler,
+ object: Opnd,
+ object_opnd: YARVOpnd,
+ counter: Counter,
) {
- add_comment(cb, "guard object is heap");
+ let object_type = asm.ctx.get_opnd_type(object_opnd);
+ if object_type.is_heap() {
+ return;
+ }
+
+ asm_comment!(asm, "guard object is heap");
// Test that the object is not an immediate
- test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
- jnz_ptr(cb, side_exit);
+ asm.test(object, (RUBY_IMMEDIATE_MASK as u64).into());
+ asm.jnz(Target::side_exit(counter));
+
+ // Test that the object is not false
+ asm.cmp(object, Qfalse.into());
+ asm.je(Target::side_exit(counter));
- // Test that the object is not false or nil
- cmp(cb, object_opnd, uimm_opnd(Qnil.into()));
- jbe_ptr(cb, side_exit);
+ if Type::UnknownHeap.diff(object_type) != TypeDiff::Incompatible {
+ asm.ctx.upgrade_opnd_type(object_opnd, Type::UnknownHeap);
+ }
}
fn guard_object_is_array(
- cb: &mut CodeBlock,
- object_opnd: X86Opnd,
- flags_opnd: X86Opnd,
- _ctx: &mut Context,
- side_exit: CodePtr,
+ asm: &mut Assembler,
+ object: Opnd,
+ object_opnd: YARVOpnd,
+ counter: Counter,
) {
- add_comment(cb, "guard object is array");
+ let object_type = asm.ctx.get_opnd_type(object_opnd);
+ if object_type.is_array() {
+ return;
+ }
+
+ let object_reg = match object {
+ Opnd::InsnOut { .. } => object,
+ _ => asm.load(object),
+ };
+ guard_object_is_heap(asm, object_reg, object_opnd, counter);
+
+ asm_comment!(asm, "guard object is array");
// Pull out the type mask
- mov(
- cb,
- flags_opnd,
- mem_opnd(
- 8 * SIZEOF_VALUE as u8,
- object_opnd,
- RUBY_OFFSET_RBASIC_FLAGS,
- ),
- );
- and(cb, flags_opnd, uimm_opnd(RUBY_T_MASK as u64));
+ let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS);
+ let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into());
// Compare the result with T_ARRAY
- cmp(cb, flags_opnd, uimm_opnd(RUBY_T_ARRAY as u64));
- jne_ptr(cb, side_exit);
+ asm.cmp(flags_opnd, (RUBY_T_ARRAY as u64).into());
+ asm.jne(Target::side_exit(counter));
+
+ if Type::TArray.diff(object_type) != TypeDiff::Incompatible {
+ asm.ctx.upgrade_opnd_type(object_opnd, Type::TArray);
+ }
}
-// push enough nils onto the stack to fill out an array
+fn guard_object_is_hash(
+ asm: &mut Assembler,
+ object: Opnd,
+ object_opnd: YARVOpnd,
+ counter: Counter,
+) {
+ let object_type = asm.ctx.get_opnd_type(object_opnd);
+ if object_type.is_hash() {
+ return;
+ }
+
+ let object_reg = match object {
+ Opnd::InsnOut { .. } => object,
+ _ => asm.load(object),
+ };
+ guard_object_is_heap(asm, object_reg, object_opnd, counter);
+
+ asm_comment!(asm, "guard object is hash");
+
+ // Pull out the type mask
+ let flags_opnd = Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS);
+ let flags_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into());
+
+ // Compare the result with T_HASH
+ asm.cmp(flags_opnd, (RUBY_T_HASH as u64).into());
+ asm.jne(Target::side_exit(counter));
+
+ if Type::THash.diff(object_type) != TypeDiff::Incompatible {
+ asm.ctx.upgrade_opnd_type(object_opnd, Type::THash);
+ }
+}
+
+fn guard_object_is_string(
+ asm: &mut Assembler,
+ object: Opnd,
+ object_opnd: YARVOpnd,
+ counter: Counter,
+) {
+ let object_type = asm.ctx.get_opnd_type(object_opnd);
+ if object_type.is_string() {
+ return;
+ }
+
+ let object_reg = match object {
+ Opnd::InsnOut { .. } => object,
+ _ => asm.load(object),
+ };
+ guard_object_is_heap(asm, object_reg, object_opnd, counter);
+
+ asm_comment!(asm, "guard object is string");
+
+ // Pull out the type mask
+ let flags_reg = asm.load(Opnd::mem(VALUE_BITS, object_reg, RUBY_OFFSET_RBASIC_FLAGS));
+ let flags_reg = asm.and(flags_reg, Opnd::UImm(RUBY_T_MASK as u64));
+
+ // Compare the result with T_STRING
+ asm.cmp(flags_reg, Opnd::UImm(RUBY_T_STRING as u64));
+ asm.jne(Target::side_exit(counter));
+
+ if Type::TString.diff(object_type) != TypeDiff::Incompatible {
+ asm.ctx.upgrade_opnd_type(object_opnd, Type::TString);
+ }
+}
+
+/// This guards that a special flag is not set on a hash.
+/// By passing a hash with this flag set as the last argument
+/// in a splat call, you can change the way keywords are handled
+/// to behave like ruby 2. We don't currently support this.
+fn guard_object_is_not_ruby2_keyword_hash(
+ asm: &mut Assembler,
+ object_opnd: Opnd,
+ counter: Counter,
+) {
+ asm_comment!(asm, "guard object is not ruby2 keyword hash");
+
+ let not_ruby2_keyword = asm.new_label("not_ruby2_keyword");
+ asm.test(object_opnd, (RUBY_IMMEDIATE_MASK as u64).into());
+ asm.jnz(not_ruby2_keyword);
+
+ asm.cmp(object_opnd, Qfalse.into());
+ asm.je(not_ruby2_keyword);
+
+ let flags_opnd = asm.load(Opnd::mem(
+ VALUE_BITS,
+ object_opnd,
+ RUBY_OFFSET_RBASIC_FLAGS,
+ ));
+ let type_opnd = asm.and(flags_opnd, (RUBY_T_MASK as u64).into());
+
+ asm.cmp(type_opnd, (RUBY_T_HASH as u64).into());
+ asm.jne(not_ruby2_keyword);
+
+ asm.test(flags_opnd, (RHASH_PASS_AS_KEYWORDS as u64).into());
+ asm.jnz(Target::side_exit(counter));
+
+ asm.write_label(not_ruby2_keyword);
+}
+
+/// This instruction pops a single value off the stack, converts it to an
+/// arrayif it isn’t already one using the #to_ary method, and then pushes
+/// the values from the array back onto the stack.
fn gen_expandarray(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let flag = jit_get_arg(jit, 1);
- let VALUE(flag_value) = flag;
+) -> Option<CodegenStatus> {
+ // Both arguments are rb_num_t which is unsigned
+ let num = jit.get_arg(0).as_u32();
+ let flag = jit.get_arg(1).as_usize();
// If this instruction has the splat flag, then bail out.
- if flag_value & 0x01 != 0 {
- incr_counter!(expandarray_splat);
- return CantCompile;
+ if flag & 0x01 != 0 {
+ gen_counter_incr(asm, Counter::expandarray_splat);
+ return None;
}
// If this instruction has the postarg flag, then bail out.
- if flag_value & 0x02 != 0 {
- incr_counter!(expandarray_postarg);
- return CantCompile;
+ if flag & 0x02 != 0 {
+ gen_counter_incr(asm, Counter::expandarray_postarg);
+ return None;
}
- let side_exit = get_side_exit(jit, ocb, ctx);
+ let array_opnd = asm.stack_opnd(0);
+
+ // Defer compilation so we can specialize on a runtime `self`
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, 0);
+
+ // If the comptime receiver is not an array
+ if !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_ARRAY) } {
+ // at compile time, ensure to_ary is not defined
+ let target_cme = unsafe { rb_callable_method_entry_or_negative(comptime_recv.class_of(), ID!(to_ary)) };
+ let cme_def_type = unsafe { get_cme_def_type(target_cme) };
+
+ // if to_ary is defined, return can't compile so to_ary can be called
+ if cme_def_type != VM_METHOD_TYPE_UNDEF {
+ gen_counter_incr(asm, Counter::expandarray_to_ary);
+ return None;
+ }
+
+ // invalidate compile block if to_ary is later defined
+ jit.assume_method_lookup_stable(asm, ocb, target_cme);
+
+ jit_guard_known_klass(
+ jit,
+ asm,
+ ocb,
+ comptime_recv.class_of(),
+ array_opnd,
+ array_opnd.into(),
+ comptime_recv,
+ SEND_MAX_DEPTH,
+ Counter::expandarray_not_array,
+ );
+
+ let opnd = asm.stack_pop(1); // pop after using the type info
+
+ // If we don't actually want any values, then just keep going
+ if num == 0 {
+ return Some(KeepCompiling);
+ }
- // num is the number of requested values. If there aren't enough in the
- // array then we're going to push on nils.
- let num = jit_get_arg(jit, 0);
- let array_type = ctx.get_opnd_type(StackOpnd(0));
- let array_opnd = ctx.stack_pop(1);
+ // load opnd to avoid a race because we are also pushing onto the stack
+ let opnd = asm.load(opnd);
- if matches!(array_type, Type::Nil) {
- // special case for a, b = nil pattern
- // push N nils onto the stack
- for _i in 0..(num.into()) {
- let push_opnd = ctx.stack_push(Type::Nil);
- mov(cb, push_opnd, uimm_opnd(Qnil.into()));
+ for _ in 1..num {
+ let push_opnd = asm.stack_push(Type::Nil);
+ asm.mov(push_opnd, Qnil.into());
}
- return KeepCompiling;
+
+ let push_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(push_opnd, opnd);
+
+ return Some(KeepCompiling);
}
- // Move the array from the stack into REG0 and check that it's an array.
- mov(cb, REG0, array_opnd);
- guard_object_is_heap(
- cb,
- REG0,
- ctx,
- counted_exit!(ocb, side_exit, expandarray_not_array),
- );
+ // Get the compile-time array length
+ let comptime_len = unsafe { rb_yjit_array_len(comptime_recv) as u32 };
+
+ // Move the array from the stack and check that it's an array.
guard_object_is_array(
- cb,
- REG0,
- REG1,
- ctx,
- counted_exit!(ocb, side_exit, expandarray_not_array),
+ asm,
+ array_opnd,
+ array_opnd.into(),
+ Counter::expandarray_not_array,
);
// If we don't actually want any values, then just return.
- if num == VALUE(0) {
- return KeepCompiling;
+ if num == 0 {
+ asm.stack_pop(1); // pop the array
+ return Some(KeepCompiling);
}
- // Pull out the embed flag to check if it's an embedded array.
- let flags_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RBASIC_FLAGS);
- mov(cb, REG1, flags_opnd);
-
- // Move the length of the embedded array into REG1.
- and(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_MASK as u64));
- shr(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_SHIFT as u64));
-
- // Conditionally move the length of the heap array into REG1.
- test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64));
- let array_len_opnd = mem_opnd(
- (8 * size_of::<std::os::raw::c_long>()) as u8,
- REG0,
- RUBY_OFFSET_RARRAY_AS_HEAP_LEN,
- );
- cmovz(cb, REG1, array_len_opnd);
-
- // Only handle the case where the number of values in the array is greater
- // than or equal to the number of values requested.
- cmp(cb, REG1, uimm_opnd(num.into()));
- jl_ptr(cb, counted_exit!(ocb, side_exit, expandarray_rhs_too_small));
+ let array_opnd = asm.stack_opnd(0);
+ let array_reg = asm.load(array_opnd);
+ let array_len_opnd = get_array_len(asm, array_reg);
- // Load the address of the embedded array into REG1.
- // (struct RArray *)(obj)->as.ary
- let ary_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RARRAY_AS_ARY);
- lea(cb, REG1, ary_opnd);
-
- // Conditionally load the address of the heap array into REG1.
- // (struct RArray *)(obj)->as.heap.ptr
- test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64));
- let heap_ptr_opnd = mem_opnd(
- (8 * size_of::<usize>()) as u8,
- REG0,
- RUBY_OFFSET_RARRAY_AS_HEAP_PTR,
- );
- cmovz(cb, REG1, heap_ptr_opnd);
+ // Guard on the comptime/expected array length
+ if comptime_len >= num {
+ asm_comment!(asm, "guard array length >= {}", num);
+ asm.cmp(array_len_opnd, num.into());
+ jit_chain_guard(
+ JCC_JB,
+ jit,
+ asm,
+ ocb,
+ EXPANDARRAY_MAX_CHAIN_DEPTH,
+ Counter::expandarray_chain_max_depth,
+ );
- // Loop backward through the array and push each element onto the stack.
- for i in (0..(num.as_i32())).rev() {
- let top = ctx.stack_push(Type::Unknown);
- mov(cb, REG0, mem_opnd(64, REG1, i * (SIZEOF_VALUE as i32)));
- mov(cb, top, REG0);
+ } else {
+ asm_comment!(asm, "guard array length == {}", comptime_len);
+ asm.cmp(array_len_opnd, comptime_len.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ EXPANDARRAY_MAX_CHAIN_DEPTH,
+ Counter::expandarray_chain_max_depth,
+ );
}
- KeepCompiling
-}
+ let array_opnd = asm.stack_pop(1); // pop after using the type info
-fn gen_getlocal_wc0(
- jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- _ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Compute the offset from BP to the local
- let slot_idx = jit_get_arg(jit, 0).as_i32();
- let offs: i32 = -(SIZEOF_VALUE as i32) * slot_idx;
- let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx);
-
- // Load environment pointer EP (level 0) from CFP
- gen_get_ep(cb, REG0, 0);
+ // Load the pointer to the embedded or heap array
+ let ary_opnd = if comptime_len > 0 {
+ let array_reg = asm.load(array_opnd);
+ Some(get_array_ptr(asm, array_reg))
+ } else {
+ None
+ };
- // Load the local from the EP
- mov(cb, REG0, mem_opnd(64, REG0, offs));
+ // Loop backward through the array and push each element onto the stack.
+ for i in (0..num).rev() {
+ let top = asm.stack_push(if i < comptime_len { Type::Unknown } else { Type::Nil });
+ let offset = i32::try_from(i * (SIZEOF_VALUE as u32)).unwrap();
- // Write the local at SP
- let stack_top = ctx.stack_push_local(local_idx.as_usize());
- mov(cb, stack_top, REG0);
+ // Missing elements are Qnil
+ asm_comment!(asm, "load array[{}]", i);
+ let elem_opnd = if i < comptime_len { Opnd::mem(64, ary_opnd.unwrap(), offset) } else { Qnil.into() };
+ asm.mov(top, elem_opnd);
+ }
- KeepCompiling
+ Some(KeepCompiling)
}
// Compute the index of a local variable from its slot index
-fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 {
+fn ep_offset_to_local_idx(iseq: IseqPtr, ep_offset: u32) -> u32 {
// Layout illustration
// This is an array of VALUE
// | VM_ENV_DATA_SIZE |
@@ -1471,7 +2210,7 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 {
// ^ ^ ^ ^
// +-------+---local_table_size----+ cfp->ep--+
// | |
- // +------------------slot_idx----------------+
+ // +------------------ep_offset---------------+
//
// See usages of local_var_name() from iseq.c for similar calculation.
@@ -1479,419 +2218,449 @@ fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 {
let local_table_size: i32 = unsafe { get_iseq_body_local_table_size(iseq) }
.try_into()
.unwrap();
- let op = slot_idx - (VM_ENV_DATA_SIZE as i32);
+ let op = (ep_offset - VM_ENV_DATA_SIZE) as i32;
let local_idx = local_table_size - op - 1;
assert!(local_idx >= 0 && local_idx < local_table_size);
local_idx.try_into().unwrap()
}
// Get EP at level from CFP
-fn gen_get_ep(cb: &mut CodeBlock, reg: X86Opnd, level: u32) {
- // Load environment pointer EP from CFP
- let ep_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP);
- mov(cb, reg, ep_opnd);
+fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
+ // Load environment pointer EP from CFP into a register
+ let ep_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP);
+ let mut ep_opnd = asm.load(ep_opnd);
for _ in (0..level).rev() {
// Get the previous EP from the current EP
// See GET_PREV_EP(ep) macro
// VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
- let offs = (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32);
- mov(cb, reg, mem_opnd(64, reg, offs));
- and(cb, reg, imm_opnd(!0x03));
+ let offs = SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL;
+ ep_opnd = asm.load(Opnd::mem(64, ep_opnd, offs));
+ ep_opnd = asm.and(ep_opnd, Opnd::Imm(!0x03));
}
+
+ ep_opnd
+}
+
+// Gets the EP of the ISeq of the containing method, or "local level".
+// Equivalent of GET_LEP() macro.
+fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
+ // Equivalent of get_lvar_level() in compile.c
+ fn get_lvar_level(iseq: IseqPtr) -> u32 {
+ if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
+ 0
+ } else {
+ 1 + get_lvar_level(unsafe { rb_get_iseq_body_parent_iseq(iseq) })
+ }
+ }
+
+ let level = get_lvar_level(jit.get_iseq());
+ gen_get_ep(asm, level)
}
fn gen_getlocal_generic(
- ctx: &mut Context,
- cb: &mut CodeBlock,
- local_idx: u32,
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ ep_offset: u32,
level: u32,
-) -> CodegenStatus {
- gen_get_ep(cb, REG0, level);
+) -> Option<CodegenStatus> {
+ let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm, ocb) {
+ // Load the local using SP register
+ asm.ctx.ep_opnd(-(ep_offset as i32))
+ } else {
+ // Load environment pointer EP (level 0) from CFP
+ let ep_opnd = gen_get_ep(asm, level);
- // Load the local from the block
- // val = *(vm_get_ep(GET_EP(), level) - idx);
- let offs = -(SIZEOF_VALUE as i32 * local_idx as i32);
- mov(cb, REG0, mem_opnd(64, REG0, offs));
+ // Load the local from the block
+ // val = *(vm_get_ep(GET_EP(), level) - idx);
+ let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
+ Opnd::mem(64, ep_opnd, offs)
+ };
// Write the local at SP
- let stack_top = ctx.stack_push(Type::Unknown);
- mov(cb, stack_top, REG0);
+ let stack_top = if level == 0 {
+ let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset);
+ asm.stack_push_local(local_idx.as_usize())
+ } else {
+ asm.stack_push(Type::Unknown)
+ };
+
+ asm.mov(stack_top, local_opnd);
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_getlocal(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- _ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let idx = jit_get_arg(jit, 0);
- let level = jit_get_arg(jit, 1);
- gen_getlocal_generic(ctx, cb, idx.as_u32(), level.as_u32())
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let idx = jit.get_arg(0).as_u32();
+ let level = jit.get_arg(1).as_u32();
+ gen_getlocal_generic(jit, asm, ocb, idx, level)
}
-fn gen_getlocal_wc1(
+fn gen_getlocal_wc0(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- _ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let idx = jit_get_arg(jit, 0);
- gen_getlocal_generic(ctx, cb, idx.as_u32(), 1)
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let idx = jit.get_arg(0).as_u32();
+ gen_getlocal_generic(jit, asm, ocb, idx, 0)
}
-fn gen_setlocal_wc0(
+fn gen_getlocal_wc1(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- /*
- vm_env_write(const VALUE *ep, int index, VALUE v)
- {
- VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
- if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
- VM_STACK_ENV_WRITE(ep, index, v);
- }
- else {
- vm_env_write_slowpath(ep, index, v);
- }
- }
- */
-
- let slot_idx = jit_get_arg(jit, 0).as_i32();
- let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx).as_usize();
-
- // Load environment pointer EP (level 0) from CFP
- gen_get_ep(cb, REG0, 0);
-
- // flags & VM_ENV_FLAG_WB_REQUIRED
- let flags_opnd = mem_opnd(
- 64,
- REG0,
- SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32,
- );
- test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED as i64));
-
- // Create a side-exit to fall back to the interpreter
- let side_exit = get_side_exit(jit, ocb, ctx);
-
- // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
- jnz_ptr(cb, side_exit);
-
- // Set the type of the local variable in the context
- let temp_type = ctx.get_opnd_type(StackOpnd(0));
- ctx.set_local_type(local_idx, temp_type);
-
- // Pop the value to write from the stack
- let stack_top = ctx.stack_pop(1);
- mov(cb, REG1, stack_top);
-
- // Write the value at the environment pointer
- let offs: i32 = -8 * slot_idx;
- mov(cb, mem_opnd(64, REG0, offs), REG1);
-
- KeepCompiling
+) -> Option<CodegenStatus> {
+ let idx = jit.get_arg(0).as_u32();
+ gen_getlocal_generic(jit, asm, ocb, idx, 1)
}
fn gen_setlocal_generic(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
- local_idx: i32,
+ ep_offset: u32,
level: u32,
-) -> CodegenStatus {
- // Load environment pointer EP at level
- gen_get_ep(cb, REG0, level);
+) -> Option<CodegenStatus> {
+ let value_type = asm.ctx.get_opnd_type(StackOpnd(0));
+
+ // Fallback because of write barrier
+ if asm.ctx.get_chain_depth() > 0 {
+ // Load environment pointer EP at level
+ let ep_opnd = gen_get_ep(asm, level);
+
+ // This function should not yield to the GC.
+ // void rb_vm_env_write(const VALUE *ep, int index, VALUE v)
+ let index = -(ep_offset as i64);
+ let value_opnd = asm.stack_opnd(0);
+ asm.ccall(
+ rb_vm_env_write as *const u8,
+ vec![
+ ep_opnd,
+ index.into(),
+ value_opnd,
+ ]
+ );
+ asm.stack_pop(1);
- // flags & VM_ENV_FLAG_WB_REQUIRED
- let flags_opnd = mem_opnd(
- 64,
- REG0,
- SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32,
- );
- test(cb, flags_opnd, uimm_opnd(VM_ENV_FLAG_WB_REQUIRED.into()));
+ return Some(KeepCompiling);
+ }
- // Create a side-exit to fall back to the interpreter
- let side_exit = get_side_exit(jit, ocb, ctx);
+ let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm, ocb) {
+ // Load flags and the local using SP register
+ let local_opnd = asm.ctx.ep_opnd(-(ep_offset as i32));
+ let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32);
+ (flags_opnd, local_opnd)
+ } else {
+ // Load flags and the local for the level
+ let ep_opnd = gen_get_ep(asm, level);
+ let flags_opnd = Opnd::mem(
+ 64,
+ ep_opnd,
+ SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32,
+ );
+ (flags_opnd, Opnd::mem(64, ep_opnd, -SIZEOF_VALUE_I32 * ep_offset as i32))
+ };
- // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
- jnz_ptr(cb, side_exit);
+ // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers
+ // only affect heap objects being written. If we know an immediate value is being written we
+ // can skip this check.
+ if !value_type.is_imm() {
+ // flags & VM_ENV_FLAG_WB_REQUIRED
+ asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into());
+
+ // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
+ assert!(asm.ctx.get_chain_depth() == 0);
+ jit_chain_guard(
+ JCC_JNZ,
+ jit,
+ asm,
+ ocb,
+ 1,
+ Counter::setlocal_wb_required,
+ );
+ }
+
+ if level == 0 {
+ let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize();
+ asm.ctx.set_local_type(local_idx, value_type);
+ }
// Pop the value to write from the stack
- let stack_top = ctx.stack_pop(1);
- mov(cb, REG1, stack_top);
+ let stack_top = asm.stack_pop(1);
// Write the value at the environment pointer
- let offs = -(SIZEOF_VALUE as i32 * local_idx);
- mov(cb, mem_opnd(64, REG0, offs), REG1);
+ asm.mov(local_opnd, stack_top);
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_setlocal(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let idx = jit_get_arg(jit, 0).as_i32();
- let level = jit_get_arg(jit, 1).as_u32();
- gen_setlocal_generic(jit, ctx, cb, ocb, idx, level)
+) -> Option<CodegenStatus> {
+ let idx = jit.get_arg(0).as_u32();
+ let level = jit.get_arg(1).as_u32();
+ gen_setlocal_generic(jit, asm, ocb, idx, level)
+}
+
+fn gen_setlocal_wc0(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let idx = jit.get_arg(0).as_u32();
+ gen_setlocal_generic(jit, asm, ocb, idx, 0)
}
fn gen_setlocal_wc1(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let idx = jit_get_arg(jit, 0).as_i32();
- gen_setlocal_generic(jit, ctx, cb, ocb, idx, 1)
+) -> Option<CodegenStatus> {
+ let idx = jit.get_arg(0).as_u32();
+ gen_setlocal_generic(jit, asm, ocb, idx, 1)
}
// new hash initialized from top N values
fn gen_newhash(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let num: i64 = jit_get_arg(jit, 0).as_i64();
+) -> Option<CodegenStatus> {
+ let num: u64 = jit.get_arg(0).as_u64();
// Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_call_with_gc(jit, asm);
if num != 0 {
// val = rb_hash_new_with_size(num / 2);
- mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
- call_ptr(cb, REG0, rb_hash_new_with_size as *const u8);
+ let new_hash = asm.ccall(
+ rb_hash_new_with_size as *const u8,
+ vec![Opnd::UImm(num / 2)]
+ );
+
+ // Save the allocated hash as we want to push it after insertion
+ asm.cpush(new_hash);
+ asm.cpush(new_hash); // x86 alignment
- // save the allocated hash as we want to push it after insertion
- push(cb, RAX);
- push(cb, RAX); // alignment
+ // Get a pointer to the values to insert into the hash
+ let stack_addr_from_top = asm.lea(asm.stack_opnd((num - 1) as i32));
// rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
- mov(cb, C_ARG_REGS[0], imm_opnd(num));
- lea(
- cb,
- C_ARG_REGS[1],
- ctx.stack_opnd((num - 1).try_into().unwrap()),
+ asm.ccall(
+ rb_hash_bulk_insert as *const u8,
+ vec![
+ Opnd::UImm(num),
+ stack_addr_from_top,
+ new_hash
+ ]
);
- mov(cb, C_ARG_REGS[2], RAX);
- call_ptr(cb, REG0, rb_hash_bulk_insert as *const u8);
- pop(cb, RAX); // alignment
- pop(cb, RAX);
+ let new_hash = asm.cpop();
+ asm.cpop_into(new_hash); // x86 alignment
- ctx.stack_pop(num.try_into().unwrap());
- let stack_ret = ctx.stack_push(Type::Hash);
- mov(cb, stack_ret, RAX);
+ asm.stack_pop(num.try_into().unwrap());
+ let stack_ret = asm.stack_push(Type::CHash);
+ asm.mov(stack_ret, new_hash);
} else {
// val = rb_hash_new();
- call_ptr(cb, REG0, rb_hash_new as *const u8);
-
- let stack_ret = ctx.stack_push(Type::Hash);
- mov(cb, stack_ret, RAX);
+ let new_hash = asm.ccall(rb_hash_new as *const u8, vec![]);
+ let stack_ret = asm.stack_push(Type::CHash);
+ asm.mov(stack_ret, new_hash);
}
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_putstring(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let put_val = jit.get_arg(0);
+
+ // Save the PC and SP because the callee will allocate
+ jit_prepare_call_with_gc(jit, asm);
+
+ let str_opnd = asm.ccall(
+ rb_ec_str_resurrect as *const u8,
+ vec![EC, put_val.into(), 0.into()]
+ );
+
+ let stack_top = asm.stack_push(Type::CString);
+ asm.mov(stack_top, str_opnd);
+
+ Some(KeepCompiling)
+}
+
+fn gen_putchilledstring(
+ jit: &mut JITState,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let put_val = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let put_val = jit.get_arg(0);
// Save the PC and SP because the callee will allocate
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_call_with_gc(jit, asm);
- mov(cb, C_ARG_REGS[0], REG_EC);
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
- call_ptr(cb, REG0, rb_ec_str_resurrect as *const u8);
+ let str_opnd = asm.ccall(
+ rb_ec_str_resurrect as *const u8,
+ vec![EC, put_val.into(), 1.into()]
+ );
- let stack_top = ctx.stack_push(Type::CString);
- mov(cb, stack_top, RAX);
+ let stack_top = asm.stack_push(Type::CString);
+ asm.mov(stack_top, str_opnd);
- KeepCompiling
+ Some(KeepCompiling)
+}
+
+fn gen_checkmatch(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let flag = jit.get_arg(0).as_u32();
+
+ // rb_vm_check_match is not leaf unless flag is VM_CHECKMATCH_TYPE_WHEN.
+ // See also: leafness_of_checkmatch() and check_match()
+ if flag != VM_CHECKMATCH_TYPE_WHEN {
+ jit_prepare_non_leaf_call(jit, asm);
+ }
+
+ let pattern = asm.stack_opnd(0);
+ let target = asm.stack_opnd(1);
+
+ extern "C" {
+ fn rb_vm_check_match(ec: EcPtr, target: VALUE, pattern: VALUE, num: u32) -> VALUE;
+ }
+ let result = asm.ccall(rb_vm_check_match as *const u8, vec![EC, target, pattern, flag.into()]);
+ asm.stack_pop(2); // Keep them on stack during ccall for GC
+
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, result);
+
+ Some(KeepCompiling)
}
// Push Qtrue or Qfalse depending on whether the given keyword was supplied by
// the caller
fn gen_checkkeyword(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// When a keyword is unspecified past index 32, a hash will be used
// instead. This can only happen in iseqs taking more than 32 keywords.
if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } {
- return CantCompile;
+ return None;
}
// The EP offset to the undefined bits local
- let bits_offset = jit_get_arg(jit, 0).as_i32();
+ let bits_offset = jit.get_arg(0).as_i32();
// The index of the keyword we want to check
- let index: i64 = jit_get_arg(jit, 1).as_i64();
+ let index: i64 = jit.get_arg(1).as_i64();
// Load environment pointer EP
- gen_get_ep(cb, REG0, 0);
+ let ep_opnd = gen_get_ep(asm, 0);
// VALUE kw_bits = *(ep - bits);
- let bits_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * -bits_offset);
+ let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset);
// unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
// if ((b & (0x01 << idx))) {
//
// We can skip the FIX2ULONG conversion by shifting the bit we test
let bit_test: i64 = 0x01 << (index + 1);
- test(cb, bits_opnd, imm_opnd(bit_test));
- mov(cb, REG0, uimm_opnd(Qfalse.into()));
- mov(cb, REG1, uimm_opnd(Qtrue.into()));
- cmovz(cb, REG0, REG1);
-
- let stack_ret = ctx.stack_push(Type::UnknownImm);
- mov(cb, stack_ret, REG0);
-
- KeepCompiling
-}
-
-fn gen_jnz_to_target0(
- cb: &mut CodeBlock,
- target0: CodePtr,
- _target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- match shape {
- BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
- BranchShape::Default => jnz_ptr(cb, target0),
- }
-}
+ asm.test(bits_opnd, Opnd::Imm(bit_test));
+ let ret_opnd = asm.csel_z(Qtrue.into(), Qfalse.into());
-fn gen_jz_to_target0(
- cb: &mut CodeBlock,
- target0: CodePtr,
- _target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- match shape {
- BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
- BranchShape::Default => jz_ptr(cb, target0),
- }
-}
+ let stack_ret = asm.stack_push(Type::UnknownImm);
+ asm.mov(stack_ret, ret_opnd);
-fn gen_jbe_to_target0(
- cb: &mut CodeBlock,
- target0: CodePtr,
- _target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- match shape {
- BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
- BranchShape::Default => jbe_ptr(cb, target0),
- }
+ Some(KeepCompiling)
}
// Generate a jump to a stub that recompiles the current YARV instruction on failure.
-// When depth_limitk is exceeded, generate a jump to a side exit.
+// When depth_limit is exceeded, generate a jump to a side exit.
fn jit_chain_guard(
jcc: JCCKinds,
- jit: &JITState,
- ctx: &Context,
- cb: &mut CodeBlock,
+ jit: &mut JITState,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
- depth_limit: i32,
- side_exit: CodePtr,
+ depth_limit: u8,
+ counter: Counter,
) {
let target0_gen_fn = match jcc {
- JCC_JNE | JCC_JNZ => gen_jnz_to_target0,
- JCC_JZ | JCC_JE => gen_jz_to_target0,
- JCC_JBE | JCC_JNA => gen_jbe_to_target0,
+ JCC_JNE | JCC_JNZ => BranchGenFn::JNZToTarget0,
+ JCC_JZ | JCC_JE => BranchGenFn::JZToTarget0,
+ JCC_JBE | JCC_JNA => BranchGenFn::JBEToTarget0,
+ JCC_JB | JCC_JNAE => BranchGenFn::JBToTarget0,
+ JCC_JO_MUL => BranchGenFn::JOMulToTarget0,
};
- if (ctx.get_chain_depth() as i32) < depth_limit {
- let mut deeper = *ctx;
+ if asm.ctx.get_chain_depth() < depth_limit {
+ // Rewind Context to use the stack_size at the beginning of this instruction.
+ let mut deeper = asm.ctx.with_stack_size(jit.stack_size_for_pc);
deeper.increment_chain_depth();
let bid = BlockId {
iseq: jit.iseq,
idx: jit.insn_idx,
};
- gen_branch(jit, ctx, cb, ocb, bid, &deeper, None, None, target0_gen_fn);
+ gen_branch(jit, asm, ocb, bid, &deeper, None, None, target0_gen_fn);
} else {
- target0_gen_fn(cb, side_exit, None, BranchShape::Default);
+ target0_gen_fn.call(asm, Target::side_exit(counter), None);
}
}
-// up to 5 different classes, and embedded or not for each
-pub const GET_IVAR_MAX_DEPTH: i32 = 10;
+// up to 8 different shapes for each
+pub const GET_IVAR_MAX_DEPTH: u8 = 8;
-// hashes and arrays
-pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2;
-
-// up to 5 different classes
-pub const SEND_MAX_DEPTH: i32 = 5;
-
-// Codegen for setting an instance variable.
-// Preconditions:
-// - receiver is in REG0
-// - receiver has the same class as CLASS_OF(comptime_receiver)
-// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
-fn gen_set_ivar(
- jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- recv: VALUE,
- ivar_name: ID,
-) -> CodegenStatus {
- // Save the PC and SP because the callee may allocate
- // Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+// up to 8 different shapes for each
+pub const SET_IVAR_MAX_DEPTH: u8 = 8;
- // Get the operands from the stack
- let val_opnd = ctx.stack_pop(1);
- let recv_opnd = ctx.stack_pop(1);
+// hashes and arrays
+pub const OPT_AREF_MAX_CHAIN_DEPTH: u8 = 2;
- let ivar_index: u32 = unsafe { rb_obj_ensure_iv_index_mapping(recv, ivar_name) };
+// expandarray
+pub const EXPANDARRAY_MAX_CHAIN_DEPTH: u8 = 4;
- // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
- mov(cb, C_ARG_REGS[0], recv_opnd);
- mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index.into()));
- mov(cb, C_ARG_REGS[2], val_opnd);
- call_ptr(cb, REG0, rb_vm_set_ivar_idx as *const u8);
+// up to 5 different methods for send
+pub const SEND_MAX_DEPTH: u8 = 5;
- let out_opnd = ctx.stack_push(Type::Unknown);
- mov(cb, out_opnd, RAX);
+// up to 20 different offsets for case-when
+pub const CASE_WHEN_MAX_DEPTH: u8 = 20;
- KeepCompiling
-}
+pub const MAX_SPLAT_LENGTH: i32 = 127;
// Codegen for getting an instance variable.
// Preconditions:
-// - receiver is in REG0
// - receiver has the same class as CLASS_OF(comptime_receiver)
// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
fn gen_get_ivar(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
- max_chain_depth: i32,
+ max_chain_depth: u8,
comptime_receiver: VALUE,
ivar_name: ID,
- reg0_opnd: InsnOpnd,
- side_exit: CodePtr,
-) -> CodegenStatus {
+ recv: Opnd,
+ recv_opnd: YARVOpnd,
+) -> Option<CodegenStatus> {
let comptime_val_klass = comptime_receiver.class_of();
- let starting_context = *ctx; // make a copy for use with jit_chain_guard
+
+ // If recv isn't already a register, load it.
+ let recv = match recv {
+ Opnd::InsnOut { .. } => recv,
+ _ => asm.load(recv),
+ };
// Check if the comptime class uses a custom allocator
let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
@@ -1905,386 +2674,687 @@ fn gen_get_ivar(
// Check if the comptime receiver is a T_OBJECT
let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
+ // Use a general C call at the last chain to avoid exits on megamorphic shapes
+ let megamorphic = asm.ctx.get_chain_depth() >= max_chain_depth;
+ if megamorphic {
+ gen_counter_incr(asm, Counter::num_getivar_megamorphic);
+ }
// If the class uses the default allocator, instances should all be T_OBJECT
// NOTE: This assumes nobody changes the allocator of the class after allocation.
// Eventually, we can encode whether an object is T_OBJECT or not
// inside object shapes.
- if !receiver_t_object || uses_custom_allocator {
+ // too-complex shapes can't use index access, so we use rb_ivar_get for them too.
+ if !receiver_t_object || uses_custom_allocator || comptime_receiver.shape_too_complex() || megamorphic {
// General case. Call rb_ivar_get().
// VALUE rb_ivar_get(VALUE obj, ID id)
- add_comment(cb, "call rb_ivar_get()");
+ asm_comment!(asm, "call rb_ivar_get()");
- // The function could raise exceptions.
- jit_prepare_routine_call(jit, ctx, cb, REG1);
+ // The function could raise RactorIsolationError.
+ jit_prepare_non_leaf_call(jit, asm);
- mov(cb, C_ARG_REGS[0], REG0);
- mov(cb, C_ARG_REGS[1], uimm_opnd(ivar_name));
- call_ptr(cb, REG1, rb_ivar_get as *const u8);
+ let ivar_val = asm.ccall(rb_ivar_get as *const u8, vec![recv, Opnd::UImm(ivar_name)]);
- if reg0_opnd != SelfOpnd {
- ctx.stack_pop(1);
+ if recv_opnd != SelfOpnd {
+ asm.stack_pop(1);
}
+
// Push the ivar on the stack
- let out_opnd = ctx.stack_push(Type::Unknown);
- mov(cb, out_opnd, RAX);
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ivar_val);
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, ctx, cb, ocb);
- return EndBlock;
- }
-
- /*
- // FIXME:
- // This check was added because of a failure in a test involving the
- // Nokogiri Document class where we see a T_DATA that still has the default
- // allocator.
- // Aaron Patterson argues that this is a bug in the C extension, because
- // people could call .allocate() on the class and still get a T_OBJECT
- // For now I added an extra dynamic check that the receiver is T_OBJECT
- // so we can safely pass all the tests in Shopify Core.
- //
- // Guard that the receiver is T_OBJECT
- // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
- add_comment(cb, "guard receiver is T_OBJECT");
- mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
- and(cb, REG1, imm_opnd(RUBY_T_MASK));
- cmp(cb, REG1, imm_opnd(T_OBJECT));
- jit_chain_guard(JCC_JNE, jit, &starting_context, cb, ocb, max_chain_depth, side_exit);
- */
-
- // FIXME: Mapping the index could fail when there is too many ivar names. If we're
- // compiling for a branch stub that can cause the exception to be thrown from the
- // wrong PC.
- let ivar_index =
- unsafe { rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name) }.as_usize();
-
- // Pop receiver if it's on the temp stack
- if reg0_opnd != SelfOpnd {
- ctx.stack_pop(1);
+ jump_to_next_insn(jit, asm, ocb);
+ return Some(EndBlock);
}
- // Compile time self is embedded and the ivar index lands within the object
- let test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) };
- if test_result && ivar_index < (ROBJECT_EMBED_LEN_MAX.as_usize()) {
- // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
-
- // Guard that self is embedded
- // TODO: BT and JC is shorter
- add_comment(cb, "guard embedded getivar");
- let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS);
- test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64));
- let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic);
- jit_chain_guard(
- JCC_JZ,
- jit,
- &starting_context,
- cb,
- ocb,
- max_chain_depth,
- side_exit,
- );
+ let ivar_index = unsafe {
+ let shape_id = comptime_receiver.shape_id_of();
+ let shape = rb_shape_get_shape_by_id(shape_id);
+ let mut ivar_index: u32 = 0;
+ if rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) {
+ Some(ivar_index as usize)
+ } else {
+ None
+ }
+ };
- // Load the variable
- let offs = RUBY_OFFSET_ROBJECT_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32;
- let ivar_opnd = mem_opnd(64, REG0, offs);
- mov(cb, REG1, ivar_opnd);
+ // Guard heap object (recv_opnd must be used before stack_pop)
+ guard_object_is_heap(asm, recv, recv_opnd, Counter::getivar_not_heap);
- // Guard that the variable is not Qundef
- cmp(cb, REG1, uimm_opnd(Qundef.into()));
- mov(cb, REG0, uimm_opnd(Qnil.into()));
- cmove(cb, REG1, REG0);
+ // Compile time self is embedded and the ivar index lands within the object
+ let embed_test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED.as_usize())) != VALUE(0) };
- // Push the ivar on the stack
- let out_opnd = ctx.stack_push(Type::Unknown);
- mov(cb, out_opnd, REG1);
- } else {
- // Compile time value is *not* embedded.
+ let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
+ let shape_id_offset = unsafe { rb_shape_id_offset() };
+ let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
- // Guard that value is *not* embedded
- // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
- add_comment(cb, "guard extended getivar");
- let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS);
- test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64));
- let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic);
- jit_chain_guard(
- JCC_JNZ,
- jit,
- &starting_context,
- cb,
- ocb,
- max_chain_depth,
- side_exit,
- );
+ asm_comment!(asm, "guard shape");
+ asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64));
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ max_chain_depth,
+ Counter::getivar_megamorphic,
+ );
- // Check that the extended table is big enough
- if ivar_index > (ROBJECT_EMBED_LEN_MAX.as_usize()) {
- // Check that the slot is inside the extended table (num_slots > index)
- let num_slots = mem_opnd(32, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV);
+ // Pop receiver if it's on the temp stack
+ if recv_opnd != SelfOpnd {
+ asm.stack_pop(1);
+ }
- cmp(cb, num_slots, uimm_opnd(ivar_index as u64));
- jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range));
+ match ivar_index {
+ // If there is no IVAR index, then the ivar was undefined
+ // when we entered the compiler. That means we can just return
+ // nil for this shape + iv name
+ None => {
+ let out_opnd = asm.stack_push(Type::Nil);
+ asm.mov(out_opnd, Qnil.into());
}
+ Some(ivar_index) => {
+ if embed_test_result {
+ // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
- // Get a pointer to the extended table
- let tbl_opnd = mem_opnd(64, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR);
- mov(cb, REG0, tbl_opnd);
+ // Load the variable
+ let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32;
+ let ivar_opnd = Opnd::mem(64, recv, offs);
- // Read the ivar from the extended table
- let ivar_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE * ivar_index) as i32);
- mov(cb, REG0, ivar_opnd);
+ // Push the ivar on the stack
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ivar_opnd);
+ } else {
+ // Compile time value is *not* embedded.
- // Check that the ivar is not Qundef
- cmp(cb, REG0, uimm_opnd(Qundef.into()));
- mov(cb, REG1, uimm_opnd(Qnil.into()));
- cmove(cb, REG0, REG1);
+ // Get a pointer to the extended table
+ let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32));
- // Push the ivar on the stack
- let out_opnd = ctx.stack_push(Type::Unknown);
- mov(cb, out_opnd, REG0);
+ // Read the ivar from the extended table
+ let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32);
+
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ivar_opnd);
+ }
+ }
}
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, ctx, cb, ocb);
- EndBlock
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
}
fn gen_getinstancevariable(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
}
- let ivar_name = jit_get_arg(jit, 0).as_u64();
-
- let comptime_val = jit_peek_at_self(jit);
- let comptime_val_klass = comptime_val.class_of();
+ let ivar_name = jit.get_arg(0).as_u64();
- // Generate a side exit
- let side_exit = get_side_exit(jit, ocb, ctx);
+ let comptime_val = jit.peek_at_self();
// Guard that the receiver has the same class as the one from compile time.
- mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF));
-
- jit_guard_known_klass(
- jit,
- ctx,
- cb,
- ocb,
- comptime_val_klass,
- SelfOpnd,
- comptime_val,
- GET_IVAR_MAX_DEPTH,
- side_exit,
- );
+ let self_asm_opnd = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF);
gen_get_ivar(
jit,
- ctx,
- cb,
+ asm,
ocb,
GET_IVAR_MAX_DEPTH,
comptime_val,
ivar_name,
+ self_asm_opnd,
SelfOpnd,
- side_exit,
)
}
+// Generate an IV write.
+// This function doesn't deal with writing the shape, or expanding an object
+// to use an IV buffer if necessary. That is the callers responsibility
+fn gen_write_iv(
+ asm: &mut Assembler,
+ comptime_receiver: VALUE,
+ recv: Opnd,
+ ivar_index: usize,
+ set_value: Opnd,
+ extension_needed: bool)
+{
+ // Compile time self is embedded and the ivar index lands within the object
+ let embed_test_result = comptime_receiver.embedded_p() && !extension_needed;
+
+ if embed_test_result {
+ // Find the IV offset
+ let offs = ROBJECT_OFFSET_AS_ARY as i32 + (ivar_index * SIZEOF_VALUE) as i32;
+ let ivar_opnd = Opnd::mem(64, recv, offs);
+
+ // Write the IV
+ asm_comment!(asm, "write IV");
+ asm.mov(ivar_opnd, set_value);
+ } else {
+ // Compile time value is *not* embedded.
+
+ // Get a pointer to the extended table
+ let tbl_opnd = asm.load(Opnd::mem(64, recv, ROBJECT_OFFSET_AS_HEAP_IVPTR as i32));
+
+ // Write the ivar in to the extended table
+ let ivar_opnd = Opnd::mem(64, tbl_opnd, (SIZEOF_VALUE * ivar_index) as i32);
+
+ asm_comment!(asm, "write IV");
+ asm.mov(ivar_opnd, set_value);
+ }
+}
+
fn gen_setinstancevariable(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- _ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let id = jit_get_arg(jit, 0);
- let ic = jit_get_arg(jit, 1).as_u64(); // type IVC
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ // Defer compilation so we can specialize on a runtime `self`
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
- // Save the PC and SP because the callee may allocate
- // Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ let ivar_name = jit.get_arg(0).as_u64();
+ let ic = jit.get_arg(1).as_ptr();
+ let comptime_receiver = jit.peek_at_self();
+ gen_set_ivar(
+ jit,
+ asm,
+ ocb,
+ comptime_receiver,
+ ivar_name,
+ SelfOpnd,
+ Some(ic),
+ )
+}
- // Get the operands from the stack
- let val_opnd = ctx.stack_pop(1);
+/// Set an instance variable on setinstancevariable or attr_writer.
+/// It switches the behavior based on what recv_opnd is given.
+/// * SelfOpnd: setinstancevariable, which doesn't push a result onto the stack.
+/// * StackOpnd: attr_writer, which pushes a result onto the stack.
+fn gen_set_ivar(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ comptime_receiver: VALUE,
+ ivar_name: ID,
+ recv_opnd: YARVOpnd,
+ ic: Option<*const iseq_inline_iv_cache_entry>,
+) -> Option<CodegenStatus> {
+ let comptime_val_klass = comptime_receiver.class_of();
- // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
- mov(
- cb,
- C_ARG_REGS[1],
- mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
- );
- mov(cb, C_ARG_REGS[3], val_opnd);
- mov(cb, C_ARG_REGS[2], uimm_opnd(id.into()));
- mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic as *const u8));
- let iseq = VALUE(jit.iseq as usize);
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], iseq);
- call_ptr(cb, REG0, rb_vm_setinstancevariable as *const u8);
+ // If the comptime receiver is frozen, writing an IV will raise an exception
+ // and we don't want to JIT code to deal with that situation.
+ if comptime_receiver.is_frozen() {
+ gen_counter_incr(asm, Counter::setivar_frozen);
+ return None;
+ }
+
+ let stack_type = asm.ctx.get_opnd_type(StackOpnd(0));
+
+ // Check if the comptime class uses a custom allocator
+ let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
+ let uses_custom_allocator = match custom_allocator {
+ Some(alloc_fun) => {
+ let allocate_instance = rb_class_allocate_instance as *const u8;
+ alloc_fun as *const u8 != allocate_instance
+ }
+ None => false,
+ };
+
+ // Check if the comptime receiver is a T_OBJECT
+ let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
+ // Use a general C call at the last chain to avoid exits on megamorphic shapes
+ let megamorphic = asm.ctx.get_chain_depth() >= SET_IVAR_MAX_DEPTH;
+ if megamorphic {
+ gen_counter_incr(asm, Counter::num_setivar_megamorphic);
+ }
+
+ // Get the iv index
+ let shape_too_complex = comptime_receiver.shape_too_complex();
+ let ivar_index = if !shape_too_complex {
+ let shape_id = comptime_receiver.shape_id_of();
+ let shape = unsafe { rb_shape_get_shape_by_id(shape_id) };
+ let mut ivar_index: u32 = 0;
+ if unsafe { rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index) } {
+ Some(ivar_index as usize)
+ } else {
+ None
+ }
+ } else {
+ None
+ };
+
+ // The current shape doesn't contain this iv, we need to transition to another shape.
+ let new_shape = if !shape_too_complex && receiver_t_object && ivar_index.is_none() {
+ let current_shape = comptime_receiver.shape_of();
+ let next_shape = unsafe { rb_shape_get_next(current_shape, comptime_receiver, ivar_name) };
+ let next_shape_id = unsafe { rb_shape_id(next_shape) };
+
+ // If the VM ran out of shapes, or this class generated too many leaf,
+ // it may be de-optimized into OBJ_TOO_COMPLEX_SHAPE (hash-table).
+ if next_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID {
+ Some((next_shape_id, None, 0_usize))
+ } else {
+ let current_capacity = unsafe { (*current_shape).capacity };
+
+ // If the new shape has a different capacity, or is TOO_COMPLEX, we'll have to
+ // reallocate it.
+ let needs_extension = unsafe { (*current_shape).capacity != (*next_shape).capacity };
+
+ // We can write to the object, but we need to transition the shape
+ let ivar_index = unsafe { (*current_shape).next_iv_index } as usize;
+
+ let needs_extension = if needs_extension {
+ Some((current_capacity, unsafe { (*next_shape).capacity }))
+ } else {
+ None
+ };
+ Some((next_shape_id, needs_extension, ivar_index))
+ }
+ } else {
+ None
+ };
+ let new_shape_too_complex = matches!(new_shape, Some((OBJ_TOO_COMPLEX_SHAPE_ID, _, _)));
+
+ // If the receiver isn't a T_OBJECT, or uses a custom allocator,
+ // then just write out the IV write as a function call.
+ // too-complex shapes can't use index access, so we use rb_ivar_get for them too.
+ if !receiver_t_object || uses_custom_allocator || shape_too_complex || new_shape_too_complex || megamorphic {
+ // The function could raise FrozenError.
+ // Note that this modifies REG_SP, which is why we do it first
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Get the operands from the stack
+ let val_opnd = asm.stack_opnd(0);
+
+ if let StackOpnd(index) = recv_opnd { // attr_writer
+ let recv = asm.stack_opnd(index as i32);
+ asm_comment!(asm, "call rb_vm_set_ivar_id()");
+ asm.ccall(
+ rb_vm_set_ivar_id as *const u8,
+ vec![
+ recv,
+ Opnd::UImm(ivar_name),
+ val_opnd,
+ ],
+ );
+ } else { // setinstancevariable
+ asm_comment!(asm, "call rb_vm_setinstancevariable()");
+ asm.ccall(
+ rb_vm_setinstancevariable as *const u8,
+ vec![
+ Opnd::const_ptr(jit.iseq as *const u8),
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF),
+ ivar_name.into(),
+ val_opnd,
+ Opnd::const_ptr(ic.unwrap() as *const u8),
+ ],
+ );
+ }
+ } else {
+ // Get the receiver
+ let mut recv = asm.load(if let StackOpnd(index) = recv_opnd {
+ asm.stack_opnd(index as i32)
+ } else {
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)
+ });
+
+ // Upgrade type
+ guard_object_is_heap(asm, recv, recv_opnd, Counter::setivar_not_heap);
+
+ let expected_shape = unsafe { rb_shape_get_shape_id(comptime_receiver) };
+ let shape_id_offset = unsafe { rb_shape_id_offset() };
+ let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
+
+ asm_comment!(asm, "guard shape");
+ asm.cmp(shape_opnd, Opnd::UImm(expected_shape as u64));
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SET_IVAR_MAX_DEPTH,
+ Counter::setivar_megamorphic,
+ );
+
+ let write_val;
+
+ match ivar_index {
+ // If we don't have an instance variable index, then we need to
+ // transition out of the current shape.
+ None => {
+ let (new_shape_id, needs_extension, ivar_index) = new_shape.unwrap();
+ if let Some((current_capacity, new_capacity)) = needs_extension {
+ // Generate the C call so that runtime code will increase
+ // the capacity and set the buffer.
+ asm_comment!(asm, "call rb_ensure_iv_list_size");
+
+ // It allocates so can trigger GC, which takes the VM lock
+ // so could yield to a different ractor.
+ jit_prepare_call_with_gc(jit, asm);
+ asm.ccall(rb_ensure_iv_list_size as *const u8,
+ vec![
+ recv,
+ Opnd::UImm(current_capacity.into()),
+ Opnd::UImm(new_capacity.into())
+ ]
+ );
+
+ // Load the receiver again after the function call
+ recv = asm.load(if let StackOpnd(index) = recv_opnd {
+ asm.stack_opnd(index as i32)
+ } else {
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)
+ });
+ }
+
+ write_val = asm.stack_opnd(0);
+ gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension.is_some());
+
+ asm_comment!(asm, "write shape");
+
+ let shape_id_offset = unsafe { rb_shape_id_offset() };
+ let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
+
+ // Store the new shape
+ asm.store(shape_opnd, Opnd::UImm(new_shape_id as u64));
+ },
+
+ Some(ivar_index) => {
+ // If the iv index already exists, then we don't need to
+ // transition to a new shape. The reason is because we find
+ // the iv index by searching up the shape tree. If we've
+ // made the transition already, then there's no reason to
+ // update the shape on the object. Just set the IV.
+ write_val = asm.stack_opnd(0);
+ gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false);
+ },
+ }
+
+ // If we know the stack value is an immediate, there's no need to
+ // generate WB code.
+ if !stack_type.is_imm() {
+ asm.spill_temps(); // for ccall (unconditionally spill them for RegTemps consistency)
+ let skip_wb = asm.new_label("skip_wb");
+ // If the value we're writing is an immediate, we don't need to WB
+ asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into());
+ asm.jnz(skip_wb);
+
+ // If the value we're writing is nil or false, we don't need to WB
+ asm.cmp(write_val, Qnil.into());
+ asm.jbe(skip_wb);
+
+ asm_comment!(asm, "write barrier");
+ asm.ccall(
+ rb_gc_writebarrier as *const u8,
+ vec![
+ recv,
+ write_val,
+ ]
+ );
+
+ asm.write_label(skip_wb);
+ }
+ }
+ let write_val = asm.stack_pop(1); // Keep write_val on stack during ccall for GC
- KeepCompiling
+ // If it's attr_writer, i.e. recv_opnd is StackOpnd, we need to pop
+ // the receiver and push the written value onto the stack.
+ if let StackOpnd(_) = recv_opnd {
+ asm.stack_pop(1); // Pop receiver
+
+ let out_opnd = asm.stack_push(Type::Unknown); // Push a return value
+ asm.mov(out_opnd, write_val);
+ }
+
+ Some(KeepCompiling)
}
fn gen_defined(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let op_type = jit_get_arg(jit, 0);
- let obj = jit_get_arg(jit, 1);
- let pushval = jit_get_arg(jit, 2);
+) -> Option<CodegenStatus> {
+ let op_type = jit.get_arg(0).as_u64();
+ let obj = jit.get_arg(1);
+ let pushval = jit.get_arg(2);
- // Save the PC and SP because the callee may allocate
- // Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ match op_type as u32 {
+ DEFINED_YIELD => {
+ asm.stack_pop(1); // v operand is not used
+ let out_opnd = asm.stack_push(Type::Unknown); // nil or "yield"
- // Get the operands from the stack
- let v_opnd = ctx.stack_pop(1);
-
- // Call vm_defined(ec, reg_cfp, op_type, obj, v)
- mov(cb, C_ARG_REGS[0], REG_EC);
- mov(cb, C_ARG_REGS[1], REG_CFP);
- mov(cb, C_ARG_REGS[2], uimm_opnd(op_type.into()));
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], obj);
- mov(cb, C_ARG_REGS[4], v_opnd);
- call_ptr(cb, REG0, rb_vm_defined as *const u8);
-
- // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
- // val = pushval;
- // }
- jit_mov_gc_ptr(jit, cb, REG1, pushval);
- cmp(cb, AL, imm_opnd(0));
- mov(cb, RAX, uimm_opnd(Qnil.into()));
- cmovnz(cb, RAX, REG1);
+ gen_block_given(jit, asm, out_opnd, pushval.into(), Qnil.into());
+ }
+ _ => {
+ // Save the PC and SP because the callee may allocate or call #respond_to?
+ // Note that this modifies REG_SP, which is why we do it first
+ jit_prepare_non_leaf_call(jit, asm);
- // Push the return value onto the stack
- let out_type = if pushval.special_const_p() {
- Type::UnknownImm
- } else {
- Type::Unknown
+ // Get the operands from the stack
+ let v_opnd = asm.stack_opnd(0);
+
+ // Call vm_defined(ec, reg_cfp, op_type, obj, v)
+ let def_result = asm.ccall(rb_vm_defined as *const u8, vec![EC, CFP, op_type.into(), obj.into(), v_opnd]);
+ asm.stack_pop(1); // Keep it on stack during ccall for GC
+
+ // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
+ // val = pushval;
+ // }
+ asm.test(def_result, Opnd::UImm(255));
+ let out_value = asm.csel_nz(pushval.into(), Qnil.into());
+
+ // Push the return value onto the stack
+ let out_type = if pushval.special_const_p() {
+ Type::UnknownImm
+ } else {
+ Type::Unknown
+ };
+ let stack_ret = asm.stack_push(out_type);
+ asm.mov(stack_ret, out_value);
+ }
+ }
+
+ Some(KeepCompiling)
+}
+
+fn gen_definedivar(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ // Defer compilation so we can specialize base on a runtime receiver
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+
+ let ivar_name = jit.get_arg(0).as_u64();
+ // Value that will be pushed on the stack if the ivar is defined. In practice this is always the
+ // string "instance-variable". If the ivar is not defined, nil will be pushed instead.
+ let pushval = jit.get_arg(2);
+
+ // Get the receiver
+ let recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF));
+
+ // Specialize base on compile time values
+ let comptime_receiver = jit.peek_at_self();
+
+ if comptime_receiver.shape_too_complex() || asm.ctx.get_chain_depth() >= GET_IVAR_MAX_DEPTH {
+ // Fall back to calling rb_ivar_defined
+
+ // Save the PC and SP because the callee may allocate
+ // Note that this modifies REG_SP, which is why we do it first
+ jit_prepare_call_with_gc(jit, asm);
+
+ // Call rb_ivar_defined(recv, ivar_name)
+ let def_result = asm.ccall(rb_ivar_defined as *const u8, vec![recv, ivar_name.into()]);
+
+ // if (rb_ivar_defined(recv, ivar_name)) {
+ // val = pushval;
+ // }
+ asm.test(def_result, Opnd::UImm(255));
+ let out_value = asm.csel_nz(pushval.into(), Qnil.into());
+
+ // Push the return value onto the stack
+ let out_type = if pushval.special_const_p() { Type::UnknownImm } else { Type::Unknown };
+ let stack_ret = asm.stack_push(out_type);
+ asm.mov(stack_ret, out_value);
+
+ return Some(KeepCompiling)
+ }
+
+ let shape_id = comptime_receiver.shape_id_of();
+ let ivar_exists = unsafe {
+ let shape = rb_shape_get_shape_by_id(shape_id);
+ let mut ivar_index: u32 = 0;
+ rb_shape_get_iv_index(shape, ivar_name, &mut ivar_index)
};
- let stack_ret = ctx.stack_push(out_type);
- mov(cb, stack_ret, RAX);
- KeepCompiling
+ // Guard heap object (recv_opnd must be used before stack_pop)
+ guard_object_is_heap(asm, recv, SelfOpnd, Counter::definedivar_not_heap);
+
+ let shape_id_offset = unsafe { rb_shape_id_offset() };
+ let shape_opnd = Opnd::mem(SHAPE_ID_NUM_BITS as u8, recv, shape_id_offset);
+
+ asm_comment!(asm, "guard shape");
+ asm.cmp(shape_opnd, Opnd::UImm(shape_id as u64));
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ GET_IVAR_MAX_DEPTH,
+ Counter::definedivar_megamorphic,
+ );
+
+ let result = if ivar_exists { pushval } else { Qnil };
+ jit_putobject(asm, result);
+
+ // Jump to next instruction. This allows guard chains to share the same successor.
+ jump_to_next_insn(jit, asm, ocb);
+
+ return Some(EndBlock);
}
fn gen_checktype(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let type_val = jit_get_arg(jit, 0).as_u32();
+) -> Option<CodegenStatus> {
+ let type_val = jit.get_arg(0).as_u32();
// Only three types are emitted by compile.c at the moment
if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val {
- let val_type = ctx.get_opnd_type(StackOpnd(0));
- let val = ctx.stack_pop(1);
+ let val_type = asm.ctx.get_opnd_type(StackOpnd(0));
+ let val = asm.stack_pop(1);
// Check if we know from type information
- match (type_val, val_type) {
- (RUBY_T_STRING, Type::TString)
- | (RUBY_T_STRING, Type::CString)
- | (RUBY_T_ARRAY, Type::Array)
- | (RUBY_T_HASH, Type::Hash) => {
- // guaranteed type match
- let stack_ret = ctx.stack_push(Type::True);
- mov(cb, stack_ret, uimm_opnd(Qtrue.as_u64()));
- return KeepCompiling;
- }
- _ if val_type.is_imm() || val_type.is_specific() => {
- // guaranteed not to match T_STRING/T_ARRAY/T_HASH
- let stack_ret = ctx.stack_push(Type::False);
- mov(cb, stack_ret, uimm_opnd(Qfalse.as_u64()));
- return KeepCompiling;
- }
+ match val_type.known_value_type() {
+ Some(value_type) => {
+ if value_type == type_val {
+ jit_putobject(asm, Qtrue);
+ return Some(KeepCompiling);
+ } else {
+ jit_putobject(asm, Qfalse);
+ return Some(KeepCompiling);
+ }
+ },
_ => (),
}
- mov(cb, REG0, val);
- mov(cb, REG1, uimm_opnd(Qfalse.as_u64()));
-
- let ret = cb.new_label("ret".to_string());
+ let ret = asm.new_label("ret");
+ let val = asm.load(val);
if !val_type.is_heap() {
// if (SPECIAL_CONST_P(val)) {
// Return Qfalse via REG1 if not on heap
- test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
- jnz_label(cb, ret);
- cmp(cb, REG0, uimm_opnd(Qnil.as_u64()));
- jbe_label(cb, ret);
+ asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into());
+ asm.jnz(ret);
+ asm.cmp(val, Qfalse.into());
+ asm.je(ret);
}
// Check type on object
- mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS));
- and(cb, REG0, uimm_opnd(RUBY_T_MASK as u64));
- cmp(cb, REG0, uimm_opnd(type_val as u64));
- mov(cb, REG0, uimm_opnd(Qtrue.as_u64()));
- // REG1 contains Qfalse from above
- cmove(cb, REG1, REG0);
-
- cb.write_label(ret);
- let stack_ret = ctx.stack_push(Type::UnknownImm);
- mov(cb, stack_ret, REG1);
- cb.link_labels();
-
- KeepCompiling
+ let object_type = asm.and(
+ Opnd::mem(64, val, RUBY_OFFSET_RBASIC_FLAGS),
+ Opnd::UImm(RUBY_T_MASK.into()));
+ asm.cmp(object_type, Opnd::UImm(type_val.into()));
+ let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into());
+
+ asm.write_label(ret);
+ let stack_ret = asm.stack_push(Type::UnknownImm);
+ asm.mov(stack_ret, ret_opnd);
+
+ Some(KeepCompiling)
} else {
- CantCompile
+ None
}
}
fn gen_concatstrings(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let n = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let n = jit.get_arg(0).as_usize();
- // Save the PC and SP because we are allocating
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ // rb_str_concat_literals may raise Encoding::CompatibilityError
+ jit_prepare_non_leaf_call(jit, asm);
- let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize()));
+ let values_ptr = asm.lea(asm.ctx.sp_opnd(-(n as i32)));
- // call rb_str_concat_literals(long n, const VALUE *strings);
- mov(cb, C_ARG_REGS[0], imm_opnd(n.into()));
- lea(cb, C_ARG_REGS[1], values_ptr);
- call_ptr(cb, REG0, rb_str_concat_literals as *const u8);
+ // call rb_str_concat_literals(size_t n, const VALUE *strings);
+ let return_value = asm.ccall(
+ rb_str_concat_literals as *const u8,
+ vec![n.into(), values_ptr]
+ );
- ctx.stack_pop(n.as_usize());
- let stack_ret = ctx.stack_push(Type::CString);
- mov(cb, stack_ret, RAX);
+ asm.stack_pop(n);
+ let stack_ret = asm.stack_push(Type::TString);
+ asm.mov(stack_ret, return_value);
- KeepCompiling
+ Some(KeepCompiling)
}
-fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) {
+fn guard_two_fixnums(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) {
+ let counter = Counter::guard_send_not_fixnums;
+
+ // Get stack operands without popping them
+ let arg1 = asm.stack_opnd(0);
+ let arg0 = asm.stack_opnd(1);
+
// Get the stack operand types
- let arg1_type = ctx.get_opnd_type(StackOpnd(0));
- let arg0_type = ctx.get_opnd_type(StackOpnd(1));
+ let arg1_type = asm.ctx.get_opnd_type(arg1.into());
+ let arg0_type = asm.ctx.get_opnd_type(arg0.into());
if arg0_type.is_heap() || arg1_type.is_heap() {
- add_comment(cb, "arg is heap object");
- jmp_ptr(cb, side_exit);
+ asm_comment!(asm, "arg is heap object");
+ asm.jmp(Target::side_exit(counter));
return;
}
if arg0_type != Type::Fixnum && arg0_type.is_specific() {
- add_comment(cb, "arg0 not fixnum");
- jmp_ptr(cb, side_exit);
+ asm_comment!(asm, "arg0 not fixnum");
+ asm.jmp(Target::side_exit(counter));
return;
}
if arg1_type != Type::Fixnum && arg1_type.is_specific() {
- add_comment(cb, "arg1 not fixnum");
- jmp_ptr(cb, side_exit);
+ asm_comment!(asm, "arg1 not fixnum");
+ asm.jmp(Target::side_exit(counter));
return;
}
@@ -2293,812 +3363,961 @@ fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr)
assert!(arg0_type == Type::Fixnum || arg0_type.is_unknown());
assert!(arg1_type == Type::Fixnum || arg1_type.is_unknown());
- // Get stack operands without popping them
- let arg1 = ctx.stack_opnd(0);
- let arg0 = ctx.stack_opnd(1);
-
- // If not fixnums, fall back
+ // If not fixnums at run-time, fall back
if arg0_type != Type::Fixnum {
- add_comment(cb, "guard arg0 fixnum");
- test(cb, arg0, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
- jz_ptr(cb, side_exit);
+ asm_comment!(asm, "guard arg0 fixnum");
+ asm.test(arg0, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ jit_chain_guard(
+ JCC_JZ,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ counter,
+ );
}
if arg1_type != Type::Fixnum {
- add_comment(cb, "guard arg1 fixnum");
- test(cb, arg1, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
- jz_ptr(cb, side_exit);
+ asm_comment!(asm, "guard arg1 fixnum");
+ asm.test(arg1, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ jit_chain_guard(
+ JCC_JZ,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ counter,
+ );
}
// Set stack types in context
- ctx.upgrade_opnd_type(StackOpnd(0), Type::Fixnum);
- ctx.upgrade_opnd_type(StackOpnd(1), Type::Fixnum);
+ asm.ctx.upgrade_opnd_type(arg1.into(), Type::Fixnum);
+ asm.ctx.upgrade_opnd_type(arg0.into(), Type::Fixnum);
}
// Conditional move operation used by comparison operators
-type CmovFn = fn(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) -> ();
+type CmovFn = fn(cb: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> Opnd;
fn gen_fixnum_cmp(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
cmov_op: CmovFn,
-) -> CodegenStatus {
- // Defer compilation so we can specialize base on a runtime receiver
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
- }
-
- let comptime_a = jit_peek_at_stack(jit, ctx, 1);
- let comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
- if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
- // Create a side-exit to fall back to the interpreter
- // Note: we generate the side-exit before popping operands from the stack
- let side_exit = get_side_exit(jit, ocb, ctx);
+ bop: ruby_basic_operators,
+) -> Option<CodegenStatus> {
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => {
+ // Defer compilation so we can specialize based on a runtime receiver
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
- if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LT) {
- return CantCompile;
+ if two_fixnums {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, bop) {
+ return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(ctx, cb, side_exit);
+ guard_two_fixnums(jit, asm, ocb);
// Get the operands from the stack
- let arg1 = ctx.stack_pop(1);
- let arg0 = ctx.stack_pop(1);
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
// Compare the arguments
- xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
- mov(cb, REG1, arg0);
- cmp(cb, REG1, arg1);
- mov(cb, REG1, uimm_opnd(Qtrue.into()));
- cmov_op(cb, REG0, REG1);
+ asm.cmp(arg0, arg1);
+ let bool_opnd = cmov_op(asm, Qtrue.into(), Qfalse.into());
// Push the output on the stack
- let dst = ctx.stack_push(Type::Unknown);
- mov(cb, dst, REG0);
+ let dst = asm.stack_push(Type::UnknownImm);
+ asm.mov(dst, bool_opnd);
- KeepCompiling
+ Some(KeepCompiling)
} else {
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
fn gen_opt_lt(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- gen_fixnum_cmp(jit, ctx, cb, ocb, cmovl)
+) -> Option<CodegenStatus> {
+ gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_l, BOP_LT)
}
fn gen_opt_le(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- gen_fixnum_cmp(jit, ctx, cb, ocb, cmovle)
+) -> Option<CodegenStatus> {
+ gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_le, BOP_LE)
}
fn gen_opt_ge(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- gen_fixnum_cmp(jit, ctx, cb, ocb, cmovge)
+) -> Option<CodegenStatus> {
+ gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_ge, BOP_GE)
}
fn gen_opt_gt(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- gen_fixnum_cmp(jit, ctx, cb, ocb, cmovg)
+) -> Option<CodegenStatus> {
+ gen_fixnum_cmp(jit, asm, ocb, Assembler::csel_g, BOP_GT)
}
// Implements specialized equality for either two fixnum or two strings
-// Returns true if code was generated, otherwise false
+// Returns None if enough type information isn't available, Some(true)
+// if code was generated, otherwise Some(false).
fn gen_equality_specialized(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
- side_exit: CodePtr,
-) -> bool {
- let comptime_a = jit_peek_at_stack(jit, ctx, 1);
- let comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
- let a_opnd = ctx.stack_opnd(1);
- let b_opnd = ctx.stack_opnd(0);
+ gen_eq: bool,
+) -> Option<bool> {
+ let a_opnd = asm.stack_opnd(1);
+ let b_opnd = asm.stack_opnd(0);
+
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => return None,
+ };
- if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
- if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) {
+ if two_fixnums {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) {
// if overridden, emit the generic version
- return false;
+ return Some(false);
}
- guard_two_fixnums(ctx, cb, side_exit);
-
- mov(cb, REG0, a_opnd);
- cmp(cb, REG0, b_opnd);
+ guard_two_fixnums(jit, asm, ocb);
- mov(cb, REG0, imm_opnd(Qfalse.into()));
- mov(cb, REG1, imm_opnd(Qtrue.into()));
- cmove(cb, REG0, REG1);
+ asm.cmp(a_opnd, b_opnd);
+ let val = if gen_eq {
+ asm.csel_e(Qtrue.into(), Qfalse.into())
+ } else {
+ asm.csel_ne(Qtrue.into(), Qfalse.into())
+ };
// Push the output on the stack
- ctx.stack_pop(2);
- let dst = ctx.stack_push(Type::UnknownImm);
- mov(cb, dst, REG0);
+ asm.stack_pop(2);
+ let dst = asm.stack_push(Type::UnknownImm);
+ asm.mov(dst, val);
- true
- } else if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString }
- {
- if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) {
+ return Some(true);
+ }
+
+ if !jit.at_current_insn() {
+ return None;
+ }
+ let comptime_a = jit.peek_at_stack(&asm.ctx, 1);
+ let comptime_b = jit.peek_at_stack(&asm.ctx, 0);
+
+ if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString } {
+ if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) {
// if overridden, emit the generic version
- return false;
+ return Some(false);
}
- // Load a and b in preparation for call later
- mov(cb, C_ARG_REGS[0], a_opnd);
- mov(cb, C_ARG_REGS[1], b_opnd);
-
// Guard that a is a String
- mov(cb, REG0, C_ARG_REGS[0]);
jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
unsafe { rb_cString },
- StackOpnd(1),
+ a_opnd,
+ a_opnd.into(),
comptime_a,
SEND_MAX_DEPTH,
- side_exit,
+ Counter::guard_send_not_string,
);
- let ret = cb.new_label("ret".to_string());
+ let equal = asm.new_label("equal");
+ let ret = asm.new_label("ret");
+
+ // Spill for ccall. For safety, unconditionally spill temps before branching.
+ asm.spill_temps();
// If they are equal by identity, return true
- cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
- mov(cb, RAX, imm_opnd(Qtrue.into()));
- je_label(cb, ret);
+ asm.cmp(a_opnd, b_opnd);
+ asm.je(equal);
// Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
- let btype = ctx.get_opnd_type(StackOpnd(0));
- if btype != Type::TString && btype != Type::CString {
- mov(cb, REG0, C_ARG_REGS[1]);
+ let btype = asm.ctx.get_opnd_type(b_opnd.into());
+ if btype.known_value_type() != Some(RUBY_T_STRING) {
// Note: any T_STRING is valid here, but we check for a ::String for simplicity
// To pass a mutable static variable (rb_cString) requires an unsafe block
jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
unsafe { rb_cString },
- StackOpnd(0),
+ b_opnd,
+ b_opnd.into(),
comptime_b,
SEND_MAX_DEPTH,
- side_exit,
+ Counter::guard_send_not_string,
);
}
// Call rb_str_eql_internal(a, b)
- call_ptr(cb, REG0, rb_str_eql_internal as *const u8);
+ let val = asm.ccall(
+ if gen_eq { rb_str_eql_internal } else { rb_str_neq_internal } as *const u8,
+ vec![a_opnd, b_opnd],
+ );
// Push the output on the stack
- cb.write_label(ret);
- ctx.stack_pop(2);
- let dst = ctx.stack_push(Type::UnknownImm);
- mov(cb, dst, RAX);
- cb.link_labels();
+ asm.stack_pop(2);
+ let dst = asm.stack_push(Type::UnknownImm);
+ asm.mov(dst, val);
+ asm.jmp(ret);
- true
+ asm.write_label(equal);
+ asm.mov(dst, if gen_eq { Qtrue } else { Qfalse }.into());
+
+ asm.write_label(ret);
+
+ Some(true)
} else {
- false
+ Some(false)
}
}
fn gen_opt_eq(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Defer compilation so we can specialize base on a runtime receiver
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
- }
-
- // Create a side-exit to fall back to the interpreter
- let side_exit = get_side_exit(jit, ocb, ctx);
+) -> Option<CodegenStatus> {
+ let specialized = match gen_equality_specialized(jit, asm, ocb, true) {
+ Some(specialized) => specialized,
+ None => {
+ // Defer compilation so we can specialize base on a runtime receiver
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
- if gen_equality_specialized(jit, ctx, cb, ocb, side_exit) {
- jump_to_next_insn(jit, ctx, cb, ocb);
- EndBlock
+ if specialized {
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
} else {
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
fn gen_opt_neq(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// opt_neq is passed two rb_call_data as arguments:
// first for ==, second for !=
- let cd = jit_get_arg(jit, 1).as_ptr();
- return gen_send_general(jit, ctx, cb, ocb, cd, None);
+ let cd = jit.get_arg(1).as_ptr();
+ perf_call! { gen_send_general(jit, asm, ocb, cd, None) }
}
fn gen_opt_aref(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr();
+) -> Option<CodegenStatus> {
+ let cd: *const rb_call_data = jit.get_arg(0).as_ptr();
let argc = unsafe { vm_ci_argc((*cd).ci) };
// Only JIT one arg calls like `ary[6]`
if argc != 1 {
- gen_counter_incr!(cb, oaref_argc_not_one);
- return CantCompile;
+ gen_counter_incr(asm, Counter::opt_aref_argc_not_one);
+ return None;
}
// Defer compilation so we can specialize base on a runtime receiver
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
}
- // Remember the context on entry for adding guard chains
- let starting_context = *ctx;
-
// Specialize base on compile time values
- let comptime_idx = jit_peek_at_stack(jit, ctx, 0);
- let comptime_recv = jit_peek_at_stack(jit, ctx, 1);
-
- // Create a side-exit to fall back to the interpreter
- let side_exit = get_side_exit(jit, ocb, ctx);
+ let comptime_idx = jit.peek_at_stack(&asm.ctx, 0);
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, 1);
if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_idx.fixnum_p() {
- if !assume_bop_not_redefined(jit, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) {
- return CantCompile;
+ if !assume_bop_not_redefined(jit, asm, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) {
+ return None;
}
- // Pop the stack operands
- let idx_opnd = ctx.stack_pop(1);
- let recv_opnd = ctx.stack_pop(1);
- mov(cb, REG0, recv_opnd);
-
- // if (SPECIAL_CONST_P(recv)) {
- // Bail if receiver is not a heap object
- test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
- jnz_ptr(cb, side_exit);
- cmp(cb, REG0, uimm_opnd(Qfalse.into()));
- je_ptr(cb, side_exit);
- cmp(cb, REG0, uimm_opnd(Qnil.into()));
- je_ptr(cb, side_exit);
+ // Get the stack operands
+ let idx_opnd = asm.stack_opnd(0);
+ let recv_opnd = asm.stack_opnd(1);
- // Bail if recv has a class other than ::Array.
+ // Guard that the receiver is an ::Array
// BOP_AREF check above is only good for ::Array.
- mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS));
- mov(cb, REG0, uimm_opnd(unsafe { rb_cArray }.into()));
- cmp(cb, REG0, REG1);
- jit_chain_guard(
- JCC_JNE,
+ jit_guard_known_klass(
jit,
- &starting_context,
- cb,
+ asm,
ocb,
+ unsafe { rb_cArray },
+ recv_opnd,
+ recv_opnd.into(),
+ comptime_recv,
OPT_AREF_MAX_CHAIN_DEPTH,
- side_exit,
+ Counter::opt_aref_not_array,
);
// Bail if idx is not a FIXNUM
- mov(cb, REG1, idx_opnd);
- test(cb, REG1, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
- jz_ptr(cb, counted_exit!(ocb, side_exit, oaref_arg_not_fixnum));
+ let idx_reg = asm.load(idx_opnd);
+ asm.test(idx_reg, (RUBY_FIXNUM_FLAG as u64).into());
+ asm.jz(Target::side_exit(Counter::opt_aref_arg_not_fixnum));
// Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
// It never raises or allocates, so we don't need to write to cfp->pc.
{
- mov(cb, RDI, recv_opnd);
- sar(cb, REG1, uimm_opnd(1)); // Convert fixnum to int
- mov(cb, RSI, REG1);
- call_ptr(cb, REG0, rb_ary_entry_internal as *const u8);
+ // Pop the argument and the receiver
+ asm.stack_pop(2);
+
+ let idx_reg = asm.rshift(idx_reg, Opnd::UImm(1)); // Convert fixnum to int
+ let val = asm.ccall(rb_ary_entry_internal as *const u8, vec![recv_opnd, idx_reg]);
// Push the return value onto the stack
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
}
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, ctx, cb, ocb);
- return EndBlock;
+ jump_to_next_insn(jit, asm, ocb);
+ return Some(EndBlock);
} else if comptime_recv.class_of() == unsafe { rb_cHash } {
- if !assume_bop_not_redefined(jit, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) {
- return CantCompile;
+ if !assume_bop_not_redefined(jit, asm, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) {
+ return None;
}
- let key_opnd = ctx.stack_opnd(0);
- let recv_opnd = ctx.stack_opnd(1);
+ let recv_opnd = asm.stack_opnd(1);
// Guard that the receiver is a hash
- mov(cb, REG0, recv_opnd);
jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
unsafe { rb_cHash },
- StackOpnd(1),
+ recv_opnd,
+ recv_opnd.into(),
comptime_recv,
OPT_AREF_MAX_CHAIN_DEPTH,
- side_exit,
+ Counter::opt_aref_not_hash,
);
- // Setup arguments for rb_hash_aref().
- mov(cb, C_ARG_REGS[0], REG0);
- mov(cb, C_ARG_REGS[1], key_opnd);
-
// Prepare to call rb_hash_aref(). It might call #hash on the key.
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
- call_ptr(cb, REG0, rb_hash_aref as *const u8);
+ // Call rb_hash_aref
+ let key_opnd = asm.stack_opnd(0);
+ let recv_opnd = asm.stack_opnd(1);
+ let val = asm.ccall(rb_hash_aref as *const u8, vec![recv_opnd, key_opnd]);
// Pop the key and the receiver
- ctx.stack_pop(2);
+ asm.stack_pop(2);
// Push the return value onto the stack
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
// Jump to next instruction. This allows guard chains to share the same successor.
- jump_to_next_insn(jit, ctx, cb, ocb);
- EndBlock
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
} else {
// General case. Call the [] method.
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
fn gen_opt_aset(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
}
- let comptime_recv = jit_peek_at_stack(jit, ctx, 2);
- let comptime_key = jit_peek_at_stack(jit, ctx, 1);
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, 2);
+ let comptime_key = jit.peek_at_stack(&asm.ctx, 1);
// Get the operands from the stack
- let recv = ctx.stack_opnd(2);
- let key = ctx.stack_opnd(1);
- let val = ctx.stack_opnd(0);
+ let recv = asm.stack_opnd(2);
+ let key = asm.stack_opnd(1);
+ let _val = asm.stack_opnd(0);
if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_key.fixnum_p() {
- let side_exit = get_side_exit(jit, ocb, ctx);
-
// Guard receiver is an Array
- mov(cb, REG0, recv);
jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
unsafe { rb_cArray },
- StackOpnd(2),
+ recv,
+ recv.into(),
comptime_recv,
SEND_MAX_DEPTH,
- side_exit,
+ Counter::opt_aset_not_array,
);
// Guard key is a fixnum
- mov(cb, REG0, key);
jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
unsafe { rb_cInteger },
- StackOpnd(1),
+ key,
+ key.into(),
comptime_key,
SEND_MAX_DEPTH,
- side_exit,
+ Counter::opt_aset_not_fixnum,
);
- // Call rb_ary_store
- mov(cb, C_ARG_REGS[0], recv);
- mov(cb, C_ARG_REGS[1], key);
- sar(cb, C_ARG_REGS[1], uimm_opnd(1)); // FIX2LONG(key)
- mov(cb, C_ARG_REGS[2], val);
-
// We might allocate or raise
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
- call_ptr(cb, REG0, rb_ary_store as *const u8);
+ // Call rb_ary_store
+ let recv = asm.stack_opnd(2);
+ let key = asm.load(asm.stack_opnd(1));
+ let key = asm.rshift(key, Opnd::UImm(1)); // FIX2LONG(key)
+ let val = asm.stack_opnd(0);
+ asm.ccall(rb_ary_store as *const u8, vec![recv, key, val]);
// rb_ary_store returns void
// stored value should still be on stack
- mov(cb, REG0, ctx.stack_opnd(0));
+ let val = asm.load(asm.stack_opnd(0));
// Push the return value onto the stack
- ctx.stack_pop(3);
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, REG0);
+ asm.stack_pop(3);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
- jump_to_next_insn(jit, ctx, cb, ocb);
- return EndBlock;
+ jump_to_next_insn(jit, asm, ocb);
+ return Some(EndBlock);
} else if comptime_recv.class_of() == unsafe { rb_cHash } {
- let side_exit = get_side_exit(jit, ocb, ctx);
-
// Guard receiver is a Hash
- mov(cb, REG0, recv);
jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
unsafe { rb_cHash },
- StackOpnd(2),
+ recv,
+ recv.into(),
comptime_recv,
SEND_MAX_DEPTH,
- side_exit,
+ Counter::opt_aset_not_hash,
);
- // Call rb_hash_aset
- mov(cb, C_ARG_REGS[0], recv);
- mov(cb, C_ARG_REGS[1], key);
- mov(cb, C_ARG_REGS[2], val);
-
// We might allocate or raise
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
- call_ptr(cb, REG0, rb_hash_aset as *const u8);
+ // Call rb_hash_aset
+ let recv = asm.stack_opnd(2);
+ let key = asm.stack_opnd(1);
+ let val = asm.stack_opnd(0);
+ let ret = asm.ccall(rb_hash_aset as *const u8, vec![recv, key, val]);
// Push the return value onto the stack
- ctx.stack_pop(3);
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ asm.stack_pop(3);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, ret);
- jump_to_next_insn(jit, ctx, cb, ocb);
- EndBlock
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
} else {
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
-fn gen_opt_and(
+fn gen_opt_aref_with(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Defer compilation so we can specialize on a runtime `self`
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus>{
+ // We might allocate or raise
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let key_opnd = Opnd::Value(jit.get_arg(0));
+ let recv_opnd = asm.stack_opnd(0);
+
+ extern "C" {
+ fn rb_vm_opt_aref_with(recv: VALUE, key: VALUE) -> VALUE;
}
- let comptime_a = jit_peek_at_stack(jit, ctx, 1);
- let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+ let val_opnd = asm.ccall(
+ rb_vm_opt_aref_with as *const u8,
+ vec![
+ recv_opnd,
+ key_opnd
+ ],
+ );
+ asm.stack_pop(1); // Keep it on stack during GC
+
+ asm.cmp(val_opnd, Qundef.into());
+ asm.je(Target::side_exit(Counter::opt_aref_with_qundef));
- if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
- // Create a side-exit to fall back to the interpreter
- // Note: we generate the side-exit before popping operands from the stack
- let side_exit = get_side_exit(jit, ocb, ctx);
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, val_opnd);
- if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) {
- return CantCompile;
+ return Some(KeepCompiling);
+}
+
+fn gen_opt_and(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => {
+ // Defer compilation so we can specialize on a runtime `self`
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
+
+ if two_fixnums {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) {
+ return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(ctx, cb, side_exit);
+ guard_two_fixnums(jit, asm, ocb);
// Get the operands and destination from the stack
- let arg1 = ctx.stack_pop(1);
- let arg0 = ctx.stack_pop(1);
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
// Do the bitwise and arg0 & arg1
- mov(cb, REG0, arg0);
- and(cb, REG0, arg1);
+ let val = asm.and(arg0, arg1);
// Push the output on the stack
- let dst = ctx.stack_push(Type::Fixnum);
- mov(cb, dst, REG0);
+ let dst = asm.stack_push(Type::Fixnum);
+ asm.mov(dst, val);
- KeepCompiling
+ Some(KeepCompiling)
} else {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
fn gen_opt_or(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Defer compilation so we can specialize on a runtime `self`
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
- }
-
- let comptime_a = jit_peek_at_stack(jit, ctx, 1);
- let comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
- if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
- // Create a side-exit to fall back to the interpreter
- // Note: we generate the side-exit before popping operands from the stack
- let side_exit = get_side_exit(jit, ocb, ctx);
+) -> Option<CodegenStatus> {
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => {
+ // Defer compilation so we can specialize on a runtime `self`
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
- if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) {
- return CantCompile;
+ if two_fixnums {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) {
+ return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(ctx, cb, side_exit);
+ guard_two_fixnums(jit, asm, ocb);
// Get the operands and destination from the stack
- let arg1 = ctx.stack_pop(1);
- let arg0 = ctx.stack_pop(1);
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
// Do the bitwise or arg0 | arg1
- mov(cb, REG0, arg0);
- or(cb, REG0, arg1);
+ let val = asm.or(arg0, arg1);
// Push the output on the stack
- let dst = ctx.stack_push(Type::Fixnum);
- mov(cb, dst, REG0);
+ let dst = asm.stack_push(Type::Fixnum);
+ asm.mov(dst, val);
- KeepCompiling
+ Some(KeepCompiling)
} else {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
fn gen_opt_minus(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Defer compilation so we can specialize on a runtime `self`
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
- }
-
- let comptime_a = jit_peek_at_stack(jit, ctx, 1);
- let comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
- if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
- // Create a side-exit to fall back to the interpreter
- // Note: we generate the side-exit before popping operands from the stack
- let side_exit = get_side_exit(jit, ocb, ctx);
+) -> Option<CodegenStatus> {
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => {
+ // Defer compilation so we can specialize on a runtime `self`
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
- if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) {
- return CantCompile;
+ if two_fixnums {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) {
+ return None;
}
// Check that both operands are fixnums
- guard_two_fixnums(ctx, cb, side_exit);
+ guard_two_fixnums(jit, asm, ocb);
// Get the operands and destination from the stack
- let arg1 = ctx.stack_pop(1);
- let arg0 = ctx.stack_pop(1);
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
// Subtract arg0 - arg1 and test for overflow
- mov(cb, REG0, arg0);
- sub(cb, REG0, arg1);
- jo_ptr(cb, side_exit);
- add(cb, REG0, imm_opnd(1));
+ let val_untag = asm.sub(arg0, arg1);
+ asm.jo(Target::side_exit(Counter::opt_minus_overflow));
+ let val = asm.add(val_untag, Opnd::Imm(1));
// Push the output on the stack
- let dst = ctx.stack_push(Type::Fixnum);
- mov(cb, dst, REG0);
+ let dst = asm.stack_push(Type::Fixnum);
+ asm.mov(dst, val);
- KeepCompiling
+ Some(KeepCompiling)
} else {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
}
fn gen_opt_mult(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+) -> Option<CodegenStatus> {
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
+
+ // Fallback to a method call if it overflows
+ if two_fixnums && asm.ctx.get_chain_depth() == 0 {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MULT) {
+ return None;
+ }
+
+ // Check that both operands are fixnums
+ guard_two_fixnums(jit, asm, ocb);
+
+ // Get the operands from the stack
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
+
+ // Do some bitwise gymnastics to handle tag bits
+ // x * y is translated to (x >> 1) * (y - 1) + 1
+ let arg0_untag = asm.rshift(arg0, Opnd::UImm(1));
+ let arg1_untag = asm.sub(arg1, Opnd::UImm(1));
+ let out_val = asm.mul(arg0_untag, arg1_untag);
+ jit_chain_guard(JCC_JO_MUL, jit, asm, ocb, 1, Counter::opt_mult_overflow);
+ let out_val = asm.add(out_val, Opnd::UImm(1));
+
+ // Push the output on the stack
+ let dst = asm.stack_push(Type::Fixnum);
+ asm.mov(dst, out_val);
+
+ Some(KeepCompiling)
+ } else {
+ gen_opt_send_without_block(jit, asm, ocb)
+ }
}
fn gen_opt_div(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
fn gen_opt_mod(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // Save the PC and SP because the callee may allocate bignums
- // Note that this modifies REG_SP, which is why we do it first
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+) -> Option<CodegenStatus> {
+ let two_fixnums = match asm.ctx.two_fixnums_on_stack(jit) {
+ Some(two_fixnums) => two_fixnums,
+ None => {
+ // Defer compilation so we can specialize on a runtime `self`
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ };
- let side_exit = get_side_exit(jit, ocb, ctx);
+ if two_fixnums {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MOD) {
+ return None;
+ }
- // Get the operands from the stack
- let arg1 = ctx.stack_pop(1);
- let arg0 = ctx.stack_pop(1);
+ // Check that both operands are fixnums
+ guard_two_fixnums(jit, asm, ocb);
+
+ // Get the operands and destination from the stack
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
- // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
- mov(cb, C_ARG_REGS[0], arg0);
- mov(cb, C_ARG_REGS[1], arg1);
- call_ptr(cb, REG0, rb_vm_opt_mod as *const u8);
+ // Check for arg0 % 0
+ asm.cmp(arg1, Opnd::Imm(VALUE::fixnum_from_usize(0).as_i64()));
+ asm.je(Target::side_exit(Counter::opt_mod_zero));
- // If val == Qundef, bail to do a method call
- cmp(cb, RAX, imm_opnd(Qundef.as_i64()));
- je_ptr(cb, side_exit);
+ // Call rb_fix_mod_fix(VALUE recv, VALUE obj)
+ let ret = asm.ccall(rb_fix_mod_fix as *const u8, vec![arg0, arg1]);
- // Push the return value onto the stack
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ // Push the return value onto the stack
+ // When the two arguments are fixnums, the modulo output is always a fixnum
+ let stack_ret = asm.stack_push(Type::Fixnum);
+ asm.mov(stack_ret, ret);
- KeepCompiling
+ Some(KeepCompiling)
+ } else {
+ // Delegate to send, call the method on the recv
+ gen_opt_send_without_block(jit, asm, ocb)
+ }
}
fn gen_opt_ltlt(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
fn gen_opt_nil_p(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
fn gen_opt_empty_p(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
fn gen_opt_succ(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Delegate to send, call the method on the recv
- gen_opt_send_without_block(jit, ctx, cb, ocb)
+ gen_opt_send_without_block(jit, asm, ocb)
}
fn gen_opt_str_freeze(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) {
- return CantCompile;
+) -> Option<CodegenStatus> {
+ if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) {
+ return None;
}
- let str = jit_get_arg(jit, 0);
- jit_mov_gc_ptr(jit, cb, REG0, str);
+ let str = jit.get_arg(0);
// Push the return value onto the stack
- let stack_ret = ctx.stack_push(Type::CString);
- mov(cb, stack_ret, REG0);
+ let stack_ret = asm.stack_push(Type::CString);
+ asm.mov(stack_ret, str.into());
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_opt_str_uminus(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) {
- return CantCompile;
+) -> Option<CodegenStatus> {
+ if !assume_bop_not_redefined(jit, asm, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) {
+ return None;
}
- let str = jit_get_arg(jit, 0);
- jit_mov_gc_ptr(jit, cb, REG0, str);
+ let str = jit.get_arg(0);
// Push the return value onto the stack
- let stack_ret = ctx.stack_push(Type::CString);
- mov(cb, stack_ret, REG0);
+ let stack_ret = asm.stack_push(Type::CString);
+ asm.mov(stack_ret, str.into());
- KeepCompiling
+ Some(KeepCompiling)
+}
+
+fn gen_opt_newarray_max(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let num = jit.get_arg(0).as_u32();
+
+ // Save the PC and SP because we may call #max
+ jit_prepare_non_leaf_call(jit, asm);
+
+ extern "C" {
+ fn rb_vm_opt_newarray_max(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE;
+ }
+
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
+ let values_ptr = asm.lea(values_opnd);
+
+ let val_opnd = asm.ccall(
+ rb_vm_opt_newarray_max as *const u8,
+ vec![
+ EC,
+ num.into(),
+ values_ptr
+ ],
+ );
+
+ asm.stack_pop(num.as_usize());
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val_opnd);
+
+ Some(KeepCompiling)
+}
+
+fn gen_opt_newarray_send(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let method = jit.get_arg(1).as_u64();
+
+ if method == ID!(min) {
+ gen_opt_newarray_min(jit, asm, _ocb)
+ } else if method == ID!(max) {
+ gen_opt_newarray_max(jit, asm, _ocb)
+ } else if method == ID!(hash) {
+ gen_opt_newarray_hash(jit, asm, _ocb)
+ } else {
+ None
+ }
+}
+
+fn gen_opt_newarray_hash(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+
+ let num = jit.get_arg(0).as_u32();
+
+ // Save the PC and SP because we may call #hash
+ jit_prepare_non_leaf_call(jit, asm);
+
+ extern "C" {
+ fn rb_vm_opt_newarray_hash(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE;
+ }
+
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
+ let values_ptr = asm.lea(values_opnd);
+
+ let val_opnd = asm.ccall(
+ rb_vm_opt_newarray_hash as *const u8,
+ vec![
+ EC,
+ num.into(),
+ values_ptr
+ ],
+ );
+
+ asm.stack_pop(num.as_usize());
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val_opnd);
+
+ Some(KeepCompiling)
+}
+
+fn gen_opt_newarray_min(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+
+ let num = jit.get_arg(0).as_u32();
+
+ // Save the PC and SP because we may call #min
+ jit_prepare_non_leaf_call(jit, asm);
+
+ extern "C" {
+ fn rb_vm_opt_newarray_min(ec: EcPtr, num: u32, elts: *const VALUE) -> VALUE;
+ }
+
+ let values_opnd = asm.ctx.sp_opnd(-(num as i32));
+ let values_ptr = asm.lea(values_opnd);
+
+ let val_opnd = asm.ccall(
+ rb_vm_opt_newarray_min as *const u8,
+ vec![
+ EC,
+ num.into(),
+ values_ptr
+ ],
+ );
+
+ asm.stack_pop(num.as_usize());
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val_opnd);
+
+ Some(KeepCompiling)
}
fn gen_opt_not(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- return gen_opt_send_without_block(jit, ctx, cb, ocb);
+) -> Option<CodegenStatus> {
+ return gen_opt_send_without_block(jit, asm, ocb);
}
fn gen_opt_size(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- return gen_opt_send_without_block(jit, ctx, cb, ocb);
+) -> Option<CodegenStatus> {
+ return gen_opt_send_without_block(jit, asm, ocb);
}
fn gen_opt_length(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- return gen_opt_send_without_block(jit, ctx, cb, ocb);
+) -> Option<CodegenStatus> {
+ return gen_opt_send_without_block(jit, asm, ocb);
}
fn gen_opt_regexpmatch2(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- return gen_opt_send_without_block(jit, ctx, cb, ocb);
+) -> Option<CodegenStatus> {
+ return gen_opt_send_without_block(jit, asm, ocb);
}
fn gen_opt_case_dispatch(
- _jit: &mut JITState,
- ctx: &mut Context,
- _cb: &mut CodeBlock,
- _ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
// Normally this instruction would lookup the key in a hash and jump to an
// offset based on that.
// Instead we can take the fallback case and continue with the next
@@ -3106,55 +4325,96 @@ fn gen_opt_case_dispatch(
// We'd hope that our jitted code will be sufficiently fast without the
// hash lookup, at least for small hashes, but it's worth revisiting this
// assumption in the future.
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
- ctx.stack_pop(1);
+ let case_hash = jit.get_arg(0);
+ let else_offset = jit.get_arg(1).as_u32();
- KeepCompiling // continue with the next instruction
-}
+ // Try to reorder case/else branches so that ones that are actually used come first.
+ // Supporting only Fixnum for now so that the implementation can be an equality check.
+ let key_opnd = asm.stack_opnd(0);
+ let comptime_key = jit.peek_at_stack(&asm.ctx, 0);
-fn gen_branchif_branch(
- cb: &mut CodeBlock,
- target0: CodePtr,
- target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- assert!(target1 != None);
- match shape {
- BranchShape::Next0 => {
- jz_ptr(cb, target1.unwrap());
- }
- BranchShape::Next1 => {
- jnz_ptr(cb, target0);
+ // Check that all cases are fixnums to avoid having to register BOP assumptions on
+ // all the types that case hashes support. This spends compile time to save memory.
+ fn case_hash_all_fixnum_p(hash: VALUE) -> bool {
+ let mut all_fixnum = true;
+ unsafe {
+ unsafe extern "C" fn per_case(key: st_data_t, _value: st_data_t, data: st_data_t) -> c_int {
+ (if VALUE(key as usize).fixnum_p() {
+ ST_CONTINUE
+ } else {
+ (data as *mut bool).write(false);
+ ST_STOP
+ }) as c_int
+ }
+ rb_hash_stlike_foreach(hash, Some(per_case), (&mut all_fixnum) as *mut _ as st_data_t);
}
- BranchShape::Default => {
- jnz_ptr(cb, target0);
- jmp_ptr(cb, target1.unwrap());
+
+ all_fixnum
+ }
+
+ // If megamorphic, fallback to compiling branch instructions after opt_case_dispatch
+ let megamorphic = asm.ctx.get_chain_depth() >= CASE_WHEN_MAX_DEPTH;
+ if megamorphic {
+ gen_counter_incr(asm, Counter::num_opt_case_dispatch_megamorphic);
+ }
+
+ if comptime_key.fixnum_p() && comptime_key.0 <= u32::MAX.as_usize() && case_hash_all_fixnum_p(case_hash) && !megamorphic {
+ if !assume_bop_not_redefined(jit, asm, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQQ) {
+ return None;
}
+
+ // Check if the key is the same value
+ asm.cmp(key_opnd, comptime_key.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ CASE_WHEN_MAX_DEPTH,
+ Counter::opt_case_dispatch_megamorphic,
+ );
+ asm.stack_pop(1); // Pop key_opnd
+
+ // Get the offset for the compile-time key
+ let mut offset = 0;
+ unsafe { rb_hash_stlike_lookup(case_hash, comptime_key.0 as _, &mut offset) };
+ let jump_offset = if offset == 0 {
+ // NOTE: If we hit the else branch with various values, it could negatively impact the performance.
+ else_offset
+ } else {
+ (offset as u32) >> 1 // FIX2LONG
+ };
+
+ // Jump to the offset of case or else
+ let jump_idx = jit.next_insn_idx() as u32 + jump_offset;
+ let jump_block = BlockId { iseq: jit.iseq, idx: jump_idx.try_into().unwrap() };
+ gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm);
+ Some(EndBlock)
+ } else {
+ asm.stack_pop(1); // Pop key_opnd
+ Some(KeepCompiling) // continue with === branches
}
}
fn gen_branchif(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let jump_offset = jit_get_arg(jit, 0).as_i32();
+) -> Option<CodegenStatus> {
+ let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
if jump_offset < 0 {
- let side_exit = get_side_exit(jit, ocb, ctx);
- gen_check_ints(cb, side_exit);
+ gen_check_ints(asm, Counter::branchif_interrupted);
}
- // Test if any bit (outside of the Qnil bit) is on
- // RUBY_Qfalse /* ...0000 0000 */
- // RUBY_Qnil /* ...0000 1000 */
- let val_opnd = ctx.stack_pop(1);
- test(cb, val_opnd, imm_opnd(!Qnil.as_i64()));
-
// Get the branch target instruction offsets
- let next_idx = jit_next_insn_idx(jit);
+ let next_idx = jit.next_insn_idx();
let jump_idx = (next_idx as i32) + jump_offset;
let next_block = BlockId {
iseq: jit.iseq,
@@ -3162,63 +4422,54 @@ fn gen_branchif(
};
let jump_block = BlockId {
iseq: jit.iseq,
- idx: jump_idx as u32,
+ idx: jump_idx.try_into().unwrap(),
};
- // Generate the branch instructions
- gen_branch(
- jit,
- ctx,
- cb,
- ocb,
- jump_block,
- ctx,
- Some(next_block),
- Some(ctx),
- gen_branchif_branch,
- );
+ // Test if any bit (outside of the Qnil bit) is on
+ // See RB_TEST()
+ let val_type = asm.ctx.get_opnd_type(StackOpnd(0));
+ let val_opnd = asm.stack_pop(1);
- EndBlock
-}
+ incr_counter!(branch_insn_count);
-fn gen_branchunless_branch(
- cb: &mut CodeBlock,
- target0: CodePtr,
- target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- match shape {
- BranchShape::Next0 => jnz_ptr(cb, target1.unwrap()),
- BranchShape::Next1 => jz_ptr(cb, target0),
- BranchShape::Default => {
- jz_ptr(cb, target0);
- jmp_ptr(cb, target1.unwrap());
- }
+ if let Some(result) = val_type.known_truthy() {
+ let target = if result { jump_block } else { next_block };
+ gen_direct_jump(jit, &asm.ctx.clone(), target, asm);
+ incr_counter!(branch_known_count);
+ } else {
+ asm.test(val_opnd, Opnd::Imm(!Qnil.as_i64()));
+
+ // Generate the branch instructions
+ let ctx = asm.ctx;
+ gen_branch(
+ jit,
+ asm,
+ ocb,
+ jump_block,
+ &ctx,
+ Some(next_block),
+ Some(&ctx),
+ BranchGenFn::BranchIf(Cell::new(BranchShape::Default)),
+ );
}
+
+ Some(EndBlock)
}
fn gen_branchunless(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let jump_offset = jit_get_arg(jit, 0).as_i32();
+) -> Option<CodegenStatus> {
+ let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
if jump_offset < 0 {
- let side_exit = get_side_exit(jit, ocb, ctx);
- gen_check_ints(cb, side_exit);
+ gen_check_ints(asm, Counter::branchunless_interrupted);
}
- // Test if any bit (outside of the Qnil bit) is on
- // RUBY_Qfalse /* ...0000 0000 */
- // RUBY_Qnil /* ...0000 1000 */
- let val_opnd = ctx.stack_pop(1);
- test(cb, val_opnd, imm_opnd(!Qnil.as_i64()));
-
// Get the branch target instruction offsets
- let next_idx = jit_next_insn_idx(jit) as i32;
+ let next_idx = jit.next_insn_idx() as i32;
let jump_idx = next_idx + jump_offset;
let next_block = BlockId {
iseq: jit.iseq,
@@ -3229,59 +4480,52 @@ fn gen_branchunless(
idx: jump_idx.try_into().unwrap(),
};
- // Generate the branch instructions
- gen_branch(
- jit,
- ctx,
- cb,
- ocb,
- jump_block,
- ctx,
- Some(next_block),
- Some(ctx),
- gen_branchunless_branch,
- );
+ let val_type = asm.ctx.get_opnd_type(StackOpnd(0));
+ let val_opnd = asm.stack_pop(1);
- EndBlock
-}
+ incr_counter!(branch_insn_count);
-fn gen_branchnil_branch(
- cb: &mut CodeBlock,
- target0: CodePtr,
- target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- match shape {
- BranchShape::Next0 => jne_ptr(cb, target1.unwrap()),
- BranchShape::Next1 => je_ptr(cb, target0),
- BranchShape::Default => {
- je_ptr(cb, target0);
- jmp_ptr(cb, target1.unwrap());
- }
+ if let Some(result) = val_type.known_truthy() {
+ let target = if result { next_block } else { jump_block };
+ gen_direct_jump(jit, &asm.ctx.clone(), target, asm);
+ incr_counter!(branch_known_count);
+ } else {
+ // Test if any bit (outside of the Qnil bit) is on
+ // See RB_TEST()
+ let not_qnil = !Qnil.as_i64();
+ asm.test(val_opnd, not_qnil.into());
+
+ // Generate the branch instructions
+ let ctx = asm.ctx;
+ gen_branch(
+ jit,
+ asm,
+ ocb,
+ jump_block,
+ &ctx,
+ Some(next_block),
+ Some(&ctx),
+ BranchGenFn::BranchUnless(Cell::new(BranchShape::Default)),
+ );
}
+
+ Some(EndBlock)
}
fn gen_branchnil(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let jump_offset = jit_get_arg(jit, 0).as_i32();
+) -> Option<CodegenStatus> {
+ let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
if jump_offset < 0 {
- let side_exit = get_side_exit(jit, ocb, ctx);
- gen_check_ints(cb, side_exit);
+ gen_check_ints(asm, Counter::branchnil_interrupted);
}
- // Test if the value is Qnil
- // RUBY_Qnil /* ...0000 1000 */
- let val_opnd = ctx.stack_pop(1);
- cmp(cb, val_opnd, uimm_opnd(Qnil.into()));
-
// Get the branch target instruction offsets
- let next_idx = jit_next_insn_idx(jit) as i32;
+ let next_idx = jit.next_insn_idx() as i32;
let jump_idx = next_idx + jump_offset;
let next_block = BlockId {
iseq: jit.iseq,
@@ -3292,47 +4536,101 @@ fn gen_branchnil(
idx: jump_idx.try_into().unwrap(),
};
- // Generate the branch instructions
- gen_branch(
- jit,
- ctx,
- cb,
- ocb,
- jump_block,
- ctx,
- Some(next_block),
- Some(ctx),
- gen_branchnil_branch,
- );
+ let val_type = asm.ctx.get_opnd_type(StackOpnd(0));
+ let val_opnd = asm.stack_pop(1);
+
+ incr_counter!(branch_insn_count);
+
+ if let Some(result) = val_type.known_nil() {
+ let target = if result { jump_block } else { next_block };
+ gen_direct_jump(jit, &asm.ctx.clone(), target, asm);
+ incr_counter!(branch_known_count);
+ } else {
+ // Test if the value is Qnil
+ asm.cmp(val_opnd, Opnd::UImm(Qnil.into()));
+ // Generate the branch instructions
+ let ctx = asm.ctx;
+ gen_branch(
+ jit,
+ asm,
+ ocb,
+ jump_block,
+ &ctx,
+ Some(next_block),
+ Some(&ctx),
+ BranchGenFn::BranchNil(Cell::new(BranchShape::Default)),
+ );
+ }
+
+ Some(EndBlock)
+}
+
+fn gen_throw(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let throw_state = jit.get_arg(0).as_u64();
+ let throwobj = asm.stack_pop(1);
+ let throwobj = asm.load(throwobj);
+
+ // Gather some statistics about throw
+ gen_counter_incr(asm, Counter::num_throw);
+ match (throw_state & VM_THROW_STATE_MASK as u64) as u32 {
+ RUBY_TAG_BREAK => gen_counter_incr(asm, Counter::num_throw_break),
+ RUBY_TAG_RETRY => gen_counter_incr(asm, Counter::num_throw_retry),
+ RUBY_TAG_RETURN => gen_counter_incr(asm, Counter::num_throw_return),
+ _ => {},
+ }
+
+ // THROW_DATA_NEW allocates. Save SP for GC and PC for allocation tracing as
+ // well as handling the catch table. However, not using jit_prepare_call_with_gc
+ // since we don't need a patch point for this implementation.
+ jit_save_pc(jit, asm);
+ gen_save_sp(asm);
+
+ // rb_vm_throw verifies it's a valid throw, sets ec->tag->state, and returns throw
+ // data, which is throwobj or a vm_throw_data wrapping it. When ec->tag->state is
+ // set, JIT code callers will handle the throw with vm_exec_handle_exception.
+ extern "C" {
+ fn rb_vm_throw(ec: EcPtr, reg_cfp: CfpPtr, throw_state: u32, throwobj: VALUE) -> VALUE;
+ }
+ let val = asm.ccall(rb_vm_throw as *mut u8, vec![EC, CFP, throw_state.into(), throwobj]);
+
+ asm_comment!(asm, "exit from throw");
+ asm.cpop_into(SP);
+ asm.cpop_into(EC);
+ asm.cpop_into(CFP);
- EndBlock
+ asm.frame_teardown();
+
+ asm.cret(val);
+ Some(EndBlock)
}
fn gen_jump(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let jump_offset = jit_get_arg(jit, 0).as_i32();
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ let jump_offset = jit.get_arg(0).as_i32();
// Check for interrupts, but only on backward branches that may create loops
if jump_offset < 0 {
- let side_exit = get_side_exit(jit, ocb, ctx);
- gen_check_ints(cb, side_exit);
+ gen_check_ints(asm, Counter::jump_interrupted);
}
// Get the branch target instruction offsets
- let jump_idx = (jit_next_insn_idx(jit) as i32) + jump_offset;
+ let jump_idx = jit.next_insn_idx() as i32 + jump_offset;
let jump_block = BlockId {
iseq: jit.iseq,
- idx: jump_idx as u32,
+ idx: jump_idx.try_into().unwrap(),
};
// Generate the jump instruction
- gen_direct_jump(jit, ctx, jump_block, cb);
+ gen_direct_jump(jit, &asm.ctx.clone(), jump_block, asm);
- EndBlock
+ Some(EndBlock)
}
/// Guard that self or a stack operand has the same class as `known_klass`, using
@@ -3341,66 +4639,71 @@ fn gen_jump(
/// the guard generated for one will fail for the other.
///
/// Recompile as contingency if possible, or take side exit a last resort.
-
fn jit_guard_known_klass(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
known_klass: VALUE,
- insn_opnd: InsnOpnd,
+ obj_opnd: Opnd,
+ insn_opnd: YARVOpnd,
sample_instance: VALUE,
- max_chain_depth: i32,
- side_exit: CodePtr,
+ max_chain_depth: u8,
+ counter: Counter,
) {
- let val_type = ctx.get_opnd_type(insn_opnd);
+ let val_type = asm.ctx.get_opnd_type(insn_opnd);
+
+ if val_type.known_class() == Some(known_klass) {
+ // Unless frozen, Array, Hash, and String objects may change their RBASIC_CLASS
+ // when they get a singleton class. Those types need invalidations.
+ if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&known_klass) } {
+ if jit.assume_no_singleton_class(asm, ocb, known_klass) {
+ // Speculate that this object will not have a singleton class,
+ // and invalidate the block in case it does.
+ return;
+ }
+ } else {
+ // We already know from type information that this is a match
+ return;
+ }
+ }
if unsafe { known_klass == rb_cNilClass } {
assert!(!val_type.is_heap());
- if val_type != Type::Nil {
- assert!(val_type.is_unknown());
+ assert!(val_type.is_unknown());
- add_comment(cb, "guard object is nil");
- cmp(cb, REG0, imm_opnd(Qnil.into()));
- jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+ asm_comment!(asm, "guard object is nil");
+ asm.cmp(obj_opnd, Qnil.into());
+ jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
- ctx.upgrade_opnd_type(insn_opnd, Type::Nil);
- }
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::Nil);
} else if unsafe { known_klass == rb_cTrueClass } {
assert!(!val_type.is_heap());
- if val_type != Type::True {
- assert!(val_type.is_unknown());
+ assert!(val_type.is_unknown());
- add_comment(cb, "guard object is true");
- cmp(cb, REG0, imm_opnd(Qtrue.into()));
- jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+ asm_comment!(asm, "guard object is true");
+ asm.cmp(obj_opnd, Qtrue.into());
+ jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
- ctx.upgrade_opnd_type(insn_opnd, Type::True);
- }
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::True);
} else if unsafe { known_klass == rb_cFalseClass } {
assert!(!val_type.is_heap());
- if val_type != Type::False {
- assert!(val_type.is_unknown());
+ assert!(val_type.is_unknown());
- add_comment(cb, "guard object is false");
- assert!(Qfalse.as_i32() == 0);
- test(cb, REG0, REG0);
- jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+ asm_comment!(asm, "guard object is false");
+ assert!(Qfalse.as_i32() == 0);
+ asm.test(obj_opnd, obj_opnd);
+ jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter);
- ctx.upgrade_opnd_type(insn_opnd, Type::False);
- }
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::False);
} else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() {
- assert!(!val_type.is_heap());
// We will guard fixnum and bignum as though they were separate classes
// BIGNUM can be handled by the general else case below
- if val_type != Type::Fixnum || !val_type.is_imm() {
- assert!(val_type.is_unknown());
+ assert!(val_type.is_unknown());
- add_comment(cb, "guard object is fixnum");
- test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64));
- jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
- ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum);
- }
+ asm_comment!(asm, "guard object is fixnum");
+ asm.test(obj_opnd, Opnd::Imm(RUBY_FIXNUM_FLAG as i64));
+ jit_chain_guard(JCC_JZ, jit, asm, ocb, max_chain_depth, counter);
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum);
} else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() {
assert!(!val_type.is_heap());
// We will guard STATIC vs DYNAMIC as though they were separate classes
@@ -3408,11 +4711,11 @@ fn jit_guard_known_klass(
if val_type != Type::ImmSymbol || !val_type.is_imm() {
assert!(val_type.is_unknown());
- add_comment(cb, "guard object is static symbol");
+ asm_comment!(asm, "guard object is static symbol");
assert!(RUBY_SPECIAL_SHIFT == 8);
- cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64));
- jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
- ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol);
+ asm.cmp(obj_opnd.with_num_bits(8).unwrap(), Opnd::UImm(RUBY_SYMBOL_FLAG as u64));
+ jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol);
}
} else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() {
assert!(!val_type.is_heap());
@@ -3420,16 +4723,16 @@ fn jit_guard_known_klass(
assert!(val_type.is_unknown());
// We will guard flonum vs heap float as though they were separate classes
- add_comment(cb, "guard object is flonum");
- mov(cb, REG1, REG0);
- and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64));
- cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64));
- jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
- ctx.upgrade_opnd_type(insn_opnd, Type::Flonum);
+ asm_comment!(asm, "guard object is flonum");
+ let flag_bits = asm.and(obj_opnd, Opnd::UImm(RUBY_FLONUM_MASK as u64));
+ asm.cmp(flag_bits, Opnd::UImm(RUBY_FLONUM_FLAG as u64));
+ jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::Flonum);
}
} else if unsafe {
FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON as usize)) != VALUE(0)
- && sample_instance == rb_attr_get(known_klass, id__attached__ as ID)
+ && sample_instance == rb_class_attached_object(known_klass)
+ && !rb_obj_is_kind_of(sample_instance, rb_cIO).test()
} {
// Singleton classes are attached to one specific object, so we can
// avoid one memory access (and potentially the is_heap check) by
@@ -3441,11 +4744,11 @@ fn jit_guard_known_klass(
// that its singleton class is empty, so we can't avoid the memory
// access. As an example, `Object.new.singleton_class` is an object in
// this situation.
- add_comment(cb, "guard known object with singleton class");
- // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
- jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
- cmp(cb, REG0, REG1);
- jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+ // Also, guarding by identity is incorrect for IO objects because
+ // IO#reopen can be used to change the class and singleton class of IO objects!
+ asm_comment!(asm, "guard known object with singleton class");
+ asm.cmp(obj_opnd, sample_instance.into());
+ jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
} else if val_type == Type::CString && unsafe { known_klass == rb_cString } {
// guard elided because the context says we've already checked
unsafe {
@@ -3457,27 +4760,34 @@ fn jit_guard_known_klass(
// Check that the receiver is a heap object
// Note: if we get here, the class doesn't have immediate instances.
if !val_type.is_heap() {
- add_comment(cb, "guard not immediate");
- assert!(Qfalse.as_i32() < Qnil.as_i32());
- test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64));
- jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
- cmp(cb, REG0, imm_opnd(Qnil.into()));
- jit_chain_guard(JCC_JBE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+ asm_comment!(asm, "guard not immediate");
+ asm.test(obj_opnd, (RUBY_IMMEDIATE_MASK as u64).into());
+ jit_chain_guard(JCC_JNZ, jit, asm, ocb, max_chain_depth, counter);
+ asm.cmp(obj_opnd, Qfalse.into());
+ jit_chain_guard(JCC_JE, jit, asm, ocb, max_chain_depth, counter);
- ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap);
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap);
}
- let klass_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS);
+ // If obj_opnd isn't already a register, load it.
+ let obj_opnd = match obj_opnd {
+ Opnd::InsnOut { .. } => obj_opnd,
+ _ => asm.load(obj_opnd),
+ };
+ let klass_opnd = Opnd::mem(64, obj_opnd, RUBY_OFFSET_RBASIC_KLASS);
// Bail if receiver class is different from known_klass
// TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
- add_comment(cb, "guard known class");
- jit_mov_gc_ptr(jit, cb, REG1, known_klass);
- cmp(cb, klass_opnd, REG1);
- jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+ asm_comment!(asm, "guard known class");
+ asm.cmp(klass_opnd, known_klass.into());
+ jit_chain_guard(JCC_JNE, jit, asm, ocb, max_chain_depth, counter);
if known_klass == unsafe { rb_cString } {
- ctx.upgrade_opnd_type(insn_opnd, Type::CString);
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::CString);
+ } else if known_klass == unsafe { rb_cArray } {
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::CArray);
+ } else if known_klass == unsafe { rb_cHash } {
+ asm.ctx.upgrade_opnd_type(insn_opnd, Type::CHash);
}
}
}
@@ -3485,29 +4795,23 @@ fn jit_guard_known_klass(
// Generate ancestry guard for protected callee.
// Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
fn jit_protected_callee_ancestry_guard(
- jit: &mut JITState,
- cb: &mut CodeBlock,
- ocb: &mut OutlinedCb,
+ asm: &mut Assembler,
cme: *const rb_callable_method_entry_t,
- side_exit: CodePtr,
) {
// See vm_call_method().
- mov(
- cb,
- C_ARG_REGS[0],
- mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
- );
let def_class = unsafe { (*cme).defined_class };
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], def_class);
// Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
// VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
- call_ptr(cb, REG0, rb_obj_is_kind_of as *mut u8);
- test(cb, RAX, RAX);
- jz_ptr(
- cb,
- counted_exit!(ocb, side_exit, send_se_protected_check_failed),
+ let val = asm.ccall(
+ rb_obj_is_kind_of as *mut u8,
+ vec![
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF),
+ def_class.into(),
+ ],
);
+ asm.test(val, val);
+ asm.jz(Target::side_exit(Counter::guard_send_se_protected_check_failed))
}
// Codegen for rb_obj_not().
@@ -3515,160 +4819,951 @@ fn jit_protected_callee_ancestry_guard(
// arity guards.
fn jit_rb_obj_not(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- let recv_opnd = ctx.get_opnd_type(StackOpnd(0));
-
- if recv_opnd == Type::Nil || recv_opnd == Type::False {
- add_comment(cb, "rb_obj_not(nil_or_false)");
- ctx.stack_pop(1);
- let out_opnd = ctx.stack_push(Type::True);
- mov(cb, out_opnd, uimm_opnd(Qtrue.into()));
- } else if recv_opnd.is_heap() || recv_opnd.is_specific() {
- // Note: recv_opnd != Type::Nil && recv_opnd != Type::False.
- add_comment(cb, "rb_obj_not(truthy)");
- ctx.stack_pop(1);
- let out_opnd = ctx.stack_push(Type::False);
- mov(cb, out_opnd, uimm_opnd(Qfalse.into()));
- } else {
- // jit_guard_known_klass() already ran on the receiver which should
- // have deduced deduced the type of the receiver. This case should be
- // rare if not unreachable.
- return false;
+ let recv_opnd = asm.ctx.get_opnd_type(StackOpnd(0));
+
+ match recv_opnd.known_truthy() {
+ Some(false) => {
+ asm_comment!(asm, "rb_obj_not(nil_or_false)");
+ asm.stack_pop(1);
+ let out_opnd = asm.stack_push(Type::True);
+ asm.mov(out_opnd, Qtrue.into());
+ },
+ Some(true) => {
+ // Note: recv_opnd != Type::Nil && recv_opnd != Type::False.
+ asm_comment!(asm, "rb_obj_not(truthy)");
+ asm.stack_pop(1);
+ let out_opnd = asm.stack_push(Type::False);
+ asm.mov(out_opnd, Qfalse.into());
+ },
+ _ => {
+ return false;
+ },
}
+
true
}
// Codegen for rb_true()
fn jit_rb_true(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- add_comment(cb, "nil? == true");
- ctx.stack_pop(1);
- let stack_ret = ctx.stack_push(Type::True);
- mov(cb, stack_ret, uimm_opnd(Qtrue.into()));
+ asm_comment!(asm, "nil? == true");
+ asm.stack_pop(1);
+ let stack_ret = asm.stack_push(Type::True);
+ asm.mov(stack_ret, Qtrue.into());
true
}
// Codegen for rb_false()
fn jit_rb_false(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- add_comment(cb, "nil? == false");
- ctx.stack_pop(1);
- let stack_ret = ctx.stack_push(Type::False);
- mov(cb, stack_ret, uimm_opnd(Qfalse.into()));
+ asm_comment!(asm, "nil? == false");
+ asm.stack_pop(1);
+ let stack_ret = asm.stack_push(Type::False);
+ asm.mov(stack_ret, Qfalse.into());
true
}
+/// Codegen for Kernel#is_a?
+fn jit_rb_kernel_is_a(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ known_recv_class: Option<VALUE>,
+) -> bool {
+ if argc != 1 {
+ return false;
+ }
+
+ // If this is a super call we might not know the class
+ if known_recv_class.is_none() {
+ return false;
+ }
+
+ // Important note: The output code will simply `return true/false`.
+ // Correctness follows from:
+ // - `known_recv_class` implies there is a guard scheduled before here
+ // for a particular `CLASS_OF(lhs)`.
+ // - We guard that rhs is identical to the compile-time sample
+ // - In general, for any two Class instances A, B, `A < B` does not change at runtime.
+ // Class#superclass is stable.
+
+ let sample_rhs = jit.peek_at_stack(&asm.ctx, 0);
+ let sample_lhs = jit.peek_at_stack(&asm.ctx, 1);
+
+ // We are not allowing module here because the module hierarchy can change at runtime.
+ if !unsafe { RB_TYPE_P(sample_rhs, RUBY_T_CLASS) } {
+ return false;
+ }
+ let sample_is_a = unsafe { rb_obj_is_kind_of(sample_lhs, sample_rhs) == Qtrue };
+
+ asm_comment!(asm, "Kernel#is_a?");
+ asm.cmp(asm.stack_opnd(0), sample_rhs.into());
+ asm.jne(Target::side_exit(Counter::guard_send_is_a_class_mismatch));
+
+ asm.stack_pop(2);
+
+ if sample_is_a {
+ let stack_ret = asm.stack_push(Type::True);
+ asm.mov(stack_ret, Qtrue.into());
+ } else {
+ let stack_ret = asm.stack_push(Type::False);
+ asm.mov(stack_ret, Qfalse.into());
+ }
+ return true;
+}
+
+/// Codegen for Kernel#instance_of?
+fn jit_rb_kernel_instance_of(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ known_recv_class: Option<VALUE>,
+) -> bool {
+ if argc != 1 {
+ return false;
+ }
+
+ // If this is a super call we might not know the class
+ if known_recv_class.is_none() {
+ return false;
+ }
+
+ // Important note: The output code will simply `return true/false`.
+ // Correctness follows from:
+ // - `known_recv_class` implies there is a guard scheduled before here
+ // for a particular `CLASS_OF(lhs)`.
+ // - We guard that rhs is identical to the compile-time sample
+ // - For a particular `CLASS_OF(lhs)`, `rb_obj_class(lhs)` does not change.
+ // (because for any singleton class `s`, `s.superclass.equal?(s.attached_object.class)`)
+
+ let sample_rhs = jit.peek_at_stack(&asm.ctx, 0);
+ let sample_lhs = jit.peek_at_stack(&asm.ctx, 1);
+
+ // Filters out cases where the C implementation raises
+ if unsafe { !(RB_TYPE_P(sample_rhs, RUBY_T_CLASS) || RB_TYPE_P(sample_rhs, RUBY_T_MODULE)) } {
+ return false;
+ }
+
+ // We need to grab the class here to deal with singleton classes.
+ // Instance of grabs the "real class" of the object rather than the
+ // singleton class.
+ let sample_lhs_real_class = unsafe { rb_obj_class(sample_lhs) };
+
+ let sample_instance_of = sample_lhs_real_class == sample_rhs;
+
+ asm_comment!(asm, "Kernel#instance_of?");
+ asm.cmp(asm.stack_opnd(0), sample_rhs.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_instance_of_class_mismatch,
+ );
+
+ asm.stack_pop(2);
+
+ if sample_instance_of {
+ let stack_ret = asm.stack_push(Type::True);
+ asm.mov(stack_ret, Qtrue.into());
+ } else {
+ let stack_ret = asm.stack_push(Type::False);
+ asm.mov(stack_ret, Qfalse.into());
+ }
+ return true;
+}
+
+fn jit_rb_mod_eqq(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if argc != 1 {
+ return false;
+ }
+
+ asm_comment!(asm, "Module#===");
+ // By being here, we know that the receiver is a T_MODULE or a T_CLASS, because Module#=== can
+ // only live on these objects. With that, we can call rb_obj_is_kind_of() without
+ // jit_prepare_non_leaf_call() or a control frame push because it can't raise, allocate, or call
+ // Ruby methods with these inputs.
+ // Note the difference in approach from Kernel#is_a? because we don't get a free guard for the
+ // right hand side.
+ let rhs = asm.stack_pop(1);
+ let lhs = asm.stack_pop(1); // the module
+ let ret = asm.ccall(rb_obj_is_kind_of as *const u8, vec![rhs, lhs]);
+
+ // Return the result
+ let stack_ret = asm.stack_push(Type::UnknownImm);
+ asm.mov(stack_ret, ret);
+
+ return true;
+}
+
// Codegen for rb_obj_equal()
// object identity comparison
fn jit_rb_obj_equal(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- add_comment(cb, "equal?");
- let obj1 = ctx.stack_pop(1);
- let obj2 = ctx.stack_pop(1);
-
- mov(cb, REG0, obj1);
- cmp(cb, REG0, obj2);
- mov(cb, REG0, uimm_opnd(Qtrue.into()));
- mov(cb, REG1, uimm_opnd(Qfalse.into()));
- cmovne(cb, REG0, REG1);
-
- let stack_ret = ctx.stack_push(Type::UnknownImm);
- mov(cb, stack_ret, REG0);
+ asm_comment!(asm, "equal?");
+ let obj1 = asm.stack_pop(1);
+ let obj2 = asm.stack_pop(1);
+
+ asm.cmp(obj1, obj2);
+ let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into());
+
+ let stack_ret = asm.stack_push(Type::UnknownImm);
+ asm.mov(stack_ret, ret_opnd);
true
}
-/// If string is frozen, duplicate it to get a non-frozen string. Otherwise, return it.
-fn jit_rb_str_uplus(
+// Codegen for rb_obj_not_equal()
+// object identity comparison
+fn jit_rb_obj_not_equal(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ gen_equality_specialized(jit, asm, ocb, false) == Some(true)
+}
+
+// Codegen for rb_int_equal()
+fn jit_rb_int_equal(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Check that both operands are fixnums
+ guard_two_fixnums(jit, asm, ocb);
+
+ // Compare the arguments
+ asm_comment!(asm, "rb_int_equal");
+ let arg1 = asm.stack_pop(1);
+ let arg0 = asm.stack_pop(1);
+ asm.cmp(arg0, arg1);
+ let ret_opnd = asm.csel_e(Qtrue.into(), Qfalse.into());
+
+ let stack_ret = asm.stack_push(Type::UnknownImm);
+ asm.mov(stack_ret, ret_opnd);
+ true
+}
+
+fn jit_rb_int_succ(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard the receiver is fixnum
+ let recv_type = asm.ctx.get_opnd_type(StackOpnd(0));
+ let recv = asm.stack_pop(1);
+ if recv_type != Type::Fixnum {
+ asm_comment!(asm, "guard object is fixnum");
+ asm.test(recv, Opnd::Imm(RUBY_FIXNUM_FLAG as i64));
+ asm.jz(Target::side_exit(Counter::opt_succ_not_fixnum));
+ }
+
+ asm_comment!(asm, "Integer#succ");
+ let out_val = asm.add(recv, Opnd::Imm(2)); // 2 is untagged Fixnum 1
+ asm.jo(Target::side_exit(Counter::opt_succ_overflow));
+
+ // Push the output onto the stack
+ let dst = asm.stack_push(Type::Fixnum);
+ asm.mov(dst, out_val);
+
+ true
+}
+
+fn jit_rb_int_div(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
+ return false;
+ }
+ guard_two_fixnums(jit, asm, ocb);
+
+ // rb_fix_div_fix may GC-allocate for Bignum
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Integer#/");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ // Check for arg0 % 0
+ asm.cmp(obj, VALUE::fixnum_from_usize(0).as_i64().into());
+ asm.je(Target::side_exit(Counter::opt_div_zero));
+
+ let ret = asm.ccall(rb_fix_div_fix as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep them during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_int_lshift(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
+ return false;
+ }
+ guard_two_fixnums(jit, asm, ocb);
+
+ let comptime_shift = jit.peek_at_stack(&asm.ctx, 0);
+
+ if !comptime_shift.fixnum_p() {
+ return false;
+ }
+
+ // Untag the fixnum shift amount
+ let shift_amt = comptime_shift.as_isize() >> 1;
+ if shift_amt > 63 || shift_amt < 0 {
+ return false;
+ }
+
+ // Fallback to a C call if the shift amount varies
+ // This check is needed because the chain guard will side-exit
+ // if its max depth is reached
+ if asm.ctx.get_chain_depth() > 0 {
+ return false;
+ }
+
+ let rhs = asm.stack_pop(1);
+ let lhs = asm.stack_pop(1);
+
+ // Guard on the shift amount we speculated on
+ asm.cmp(rhs, comptime_shift.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ 1,
+ Counter::lshift_amount_changed,
+ );
+
+ fixnum_left_shift_body(asm, lhs, shift_amt as u64);
+ true
+}
+
+fn fixnum_left_shift_body(asm: &mut Assembler, lhs: Opnd, shift_amt: u64) {
+ let in_val = asm.sub(lhs, 1.into());
+ let shift_opnd = Opnd::UImm(shift_amt);
+ let out_val = asm.lshift(in_val, shift_opnd);
+ let unshifted = asm.rshift(out_val, shift_opnd);
+
+ // Guard that we did not overflow
+ asm.cmp(unshifted, in_val);
+ asm.jne(Target::side_exit(Counter::lshift_overflow));
+
+ // Re-tag the output value
+ let out_val = asm.add(out_val, 1.into());
+
+ let ret_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(ret_opnd, out_val);
+}
+
+fn jit_rb_int_rshift(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
+ return false;
+ }
+ guard_two_fixnums(jit, asm, ocb);
+
+ let comptime_shift = jit.peek_at_stack(&asm.ctx, 0);
+
+ // Untag the fixnum shift amount
+ let shift_amt = comptime_shift.as_isize() >> 1;
+ if shift_amt > 63 || shift_amt < 0 {
+ return false;
+ }
+
+ // Fallback to a C call if the shift amount varies
+ // This check is needed because the chain guard will side-exit
+ // if its max depth is reached
+ if asm.ctx.get_chain_depth() > 0 {
+ return false;
+ }
+
+ let rhs = asm.stack_pop(1);
+ let lhs = asm.stack_pop(1);
+
+ // Guard on the shift amount we speculated on
+ asm.cmp(rhs, comptime_shift.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ 1,
+ Counter::rshift_amount_changed,
+ );
+
+ let shift_opnd = Opnd::UImm(shift_amt as u64);
+ let out_val = asm.rshift(lhs, shift_opnd);
+ let out_val = asm.or(out_val, 1.into());
+
+ let ret_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(ret_opnd, out_val);
+ true
+}
+
+fn jit_rb_int_xor(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
+ return false;
+ }
+ guard_two_fixnums(jit, asm, ocb);
+
+ let rhs = asm.stack_pop(1);
+ let lhs = asm.stack_pop(1);
+
+ // XOR and then re-tag the resulting fixnum
+ let out_val = asm.xor(lhs, rhs);
+ let out_val = asm.or(out_val, 1.into());
+
+ let ret_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(ret_opnd, out_val);
+ true
+}
+
+fn jit_rb_int_aref(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if argc != 1 {
+ return false;
+ }
+ if asm.ctx.two_fixnums_on_stack(jit) != Some(true) {
+ return false;
+ }
+ guard_two_fixnums(jit, asm, ocb);
+
+ asm_comment!(asm, "Integer#[]");
+ let obj = asm.stack_pop(1);
+ let recv = asm.stack_pop(1);
+
+ let ret = asm.ccall(rb_fix_aref as *const u8, vec![recv, obj]);
+
+ let ret_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_float_plus(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ ocb,
+ comptime_obj.class_of(),
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#+");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_plus as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_float_minus(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ ocb,
+ comptime_obj.class_of(),
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#-");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_minus as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_float_mul(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ ocb,
+ comptime_obj.class_of(),
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#*");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_mul as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_rb_float_div(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Guard obj is Fixnum or Flonum to avoid rb_funcall on rb_num_coerce_bin
+ let comptime_obj = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_obj.fixnum_p() || comptime_obj.flonum_p() {
+ let obj = asm.stack_opnd(0);
+ jit_guard_known_klass(
+ jit,
+ asm,
+ ocb,
+ comptime_obj.class_of(),
+ obj,
+ obj.into(),
+ comptime_obj,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_not_fixnum_or_flonum,
+ );
+ } else {
+ return false;
+ }
+
+ // Save the PC and SP because the callee may allocate Float on heap
+ jit_prepare_call_with_gc(jit, asm);
+
+ asm_comment!(asm, "Float#/");
+ let obj = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let ret = asm.ccall(rb_float_div as *const u8, vec![recv, obj]);
+ asm.stack_pop(2); // Keep recv during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::Unknown); // Flonum or heap Float
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+/// If string is frozen, duplicate it to get a non-frozen string. Otherwise, return it.
+fn jit_rb_str_uplus(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
) -> bool
{
- let recv = ctx.stack_pop(1);
+ if argc != 0 {
+ return false;
+ }
+
+ // We allocate when we dup the string
+ jit_prepare_call_with_gc(jit, asm);
+ asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
+
+ asm_comment!(asm, "Unary plus on string");
+ let recv_opnd = asm.stack_pop(1);
+ let recv_opnd = asm.load(recv_opnd);
+ let flags_opnd = asm.load(Opnd::mem(64, recv_opnd, RUBY_OFFSET_RBASIC_FLAGS));
+ asm.test(flags_opnd, Opnd::Imm(RUBY_FL_FREEZE as i64));
+
+ let ret_label = asm.new_label("stack_ret");
+
+ // String#+@ can only exist on T_STRING
+ let stack_ret = asm.stack_push(Type::TString);
+
+ // If the string isn't frozen, we just return it.
+ asm.mov(stack_ret, recv_opnd);
+ asm.jz(ret_label);
+
+ // Str is frozen - duplicate it
+ asm.spill_temps(); // for ccall
+ let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]);
+ asm.mov(stack_ret, ret_opnd);
- add_comment(cb, "Unary plus on string");
- mov(cb, REG0, recv);
- mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS));
- test(cb, REG1, imm_opnd(RUBY_FL_FREEZE as i64));
+ asm.write_label(ret_label);
- let ret_label = cb.new_label("stack_ret".to_string());
- // If the string isn't frozen, we just return it. It's already in REG0.
- jz_label(cb, ret_label);
+ true
+}
- // Str is frozen - duplicate
- mov(cb, C_ARG_REGS[0], REG0);
- call_ptr(cb, REG0, rb_str_dup as *const u8);
- // Return value is in REG0, drop through and return it.
+fn jit_rb_str_length(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm_comment!(asm, "String#length");
+ extern "C" {
+ fn rb_str_length(str: VALUE) -> VALUE;
+ }
- cb.write_label(ret_label);
- // We guard for an exact-class match on the receiver of rb_cString
- let stack_ret = ctx.stack_push(Type::CString);
- mov(cb, stack_ret, REG0);
+ // This function cannot allocate or raise an exceptions
+ let recv = asm.stack_opnd(0);
+ let ret_opnd = asm.ccall(rb_str_length as *const u8, vec![recv]);
+ asm.stack_pop(1); // Keep recv on stack during ccall for GC
+
+ // Should be guaranteed to be a fixnum on 64-bit systems
+ let out_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(out_opnd, ret_opnd);
- cb.link_labels();
true
}
fn jit_rb_str_bytesize(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- add_comment(cb, "String#bytesize");
+ asm_comment!(asm, "String#bytesize");
- let recv = ctx.stack_pop(1);
- mov(cb, C_ARG_REGS[0], recv);
- call_ptr(cb, REG0, rb_str_bytesize as *const u8);
+ let recv = asm.stack_pop(1);
- let out_opnd = ctx.stack_push(Type::Fixnum);
- mov(cb, out_opnd, RAX);
+ asm_comment!(asm, "get string length");
+ let str_len_opnd = Opnd::mem(
+ std::os::raw::c_long::BITS as u8,
+ asm.load(recv),
+ RUBY_OFFSET_RSTRING_LEN as i32,
+ );
+
+ let len = asm.load(str_len_opnd);
+ let shifted_val = asm.lshift(len, Opnd::UImm(1));
+ let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ let out_opnd = asm.stack_push(Type::Fixnum);
+
+ asm.mov(out_opnd, out_val);
+
+ true
+}
+
+fn jit_rb_str_byteslice(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ if argc != 2 {
+ return false
+ }
+
+ // rb_str_byte_substr should be leaf if indexes are fixnums
+ match (asm.ctx.get_opnd_type(StackOpnd(0)), asm.ctx.get_opnd_type(StackOpnd(1))) {
+ (Type::Fixnum, Type::Fixnum) => {},
+ // Raises when non-integers are passed in, which requires the method frame
+ // to be pushed for the backtrace
+ _ => if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) {
+ return false;
+ }
+ }
+ asm_comment!(asm, "String#byteslice");
+
+ // rb_str_byte_substr allocates a substring
+ jit_prepare_call_with_gc(jit, asm);
+
+ // Get stack operands after potential SP change
+ let len = asm.stack_opnd(0);
+ let beg = asm.stack_opnd(1);
+ let recv = asm.stack_opnd(2);
+
+ let ret_opnd = asm.ccall(rb_str_byte_substr as *const u8, vec![recv, beg, len]);
+ asm.stack_pop(3);
+
+ let out_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(out_opnd, ret_opnd);
+
+ true
+}
+
+fn jit_rb_str_getbyte(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm_comment!(asm, "String#getbyte");
+
+ // Don't pop since we may bail
+ let idx = asm.stack_opnd(0);
+ let recv = asm.stack_opnd(1);
+
+ let comptime_idx = jit.peek_at_stack(&asm.ctx, 0);
+ if comptime_idx.fixnum_p(){
+ jit_guard_known_klass(
+ jit,
+ asm,
+ ocb,
+ comptime_idx.class_of(),
+ idx,
+ idx.into(),
+ comptime_idx,
+ SEND_MAX_DEPTH,
+ Counter::getbyte_idx_not_fixnum,
+ );
+ } else {
+ return false;
+ }
+
+ // Untag the index
+ let idx = asm.rshift(idx, Opnd::UImm(1));
+
+ // If index is negative, exit
+ asm.cmp(idx, Opnd::UImm(0));
+ asm.jl(Target::side_exit(Counter::getbyte_idx_negative));
+
+ asm_comment!(asm, "get string length");
+ let recv = asm.load(recv);
+ let str_len_opnd = Opnd::mem(
+ std::os::raw::c_long::BITS as u8,
+ asm.load(recv),
+ RUBY_OFFSET_RSTRING_LEN as i32,
+ );
+
+ // Exit if the indes is out of bounds
+ asm.cmp(idx, str_len_opnd);
+ asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds));
+
+ let str_ptr = get_string_ptr(asm, recv);
+ // FIXME: could use SIB indexing here with proper support in backend
+ let str_ptr = asm.add(str_ptr, idx);
+ let byte = asm.load(Opnd::mem(8, str_ptr, 0));
+
+ // Zero-extend the byte to 64 bits
+ let byte = byte.with_num_bits(64).unwrap();
+ let byte = asm.and(byte, 0xFF.into());
+
+ // Tag the byte
+ let byte = asm.lshift(byte, Opnd::UImm(1));
+ let byte = asm.or(byte, Opnd::UImm(1));
+
+ asm.stack_pop(2); // Keep them on stack during ccall for GC
+ let out_opnd = asm.stack_push(Type::Fixnum);
+ asm.mov(out_opnd, byte);
+
+ true
+}
+
+fn jit_rb_str_setbyte(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ // Raises when index is out of range. Lazily push a frame in that case.
+ if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(2)) {
+ return false;
+ }
+ asm_comment!(asm, "String#setbyte");
+
+ let value = asm.stack_opnd(0);
+ let index = asm.stack_opnd(1);
+ let recv = asm.stack_opnd(2);
+
+ let ret_opnd = asm.ccall(rb_str_setbyte as *const u8, vec![recv, index, value]);
+ asm.stack_pop(3); // Keep them on stack during ccall for GC
+
+ let out_opnd = asm.stack_push(Type::UnknownImm);
+ asm.mov(out_opnd, ret_opnd);
true
}
@@ -3679,17 +5774,16 @@ fn jit_rb_str_bytesize(
// this situation happens a lot in some workloads.
fn jit_rb_str_to_s(
_jit: &mut JITState,
- _ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- known_recv_class: *const VALUE,
+ known_recv_class: Option<VALUE>,
) -> bool {
- if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } {
- add_comment(cb, "to_s on plain string");
+ if unsafe { known_recv_class == Some(rb_cString) } {
+ asm_comment!(asm, "to_s on plain string");
// The method returns the receiver, which is already on the stack.
// No stack movement.
return true;
@@ -3697,129 +5791,420 @@ fn jit_rb_str_to_s(
false
}
-// Codegen for rb_str_concat()
+// Codegen for rb_str_empty_p()
+fn jit_rb_str_empty_p(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ let recv_opnd = asm.stack_pop(1);
+
+ asm_comment!(asm, "get string length");
+ let str_len_opnd = Opnd::mem(
+ std::os::raw::c_long::BITS as u8,
+ asm.load(recv_opnd),
+ RUBY_OFFSET_RSTRING_LEN as i32,
+ );
+
+ asm.cmp(str_len_opnd, Opnd::UImm(0));
+ let string_empty = asm.csel_e(Qtrue.into(), Qfalse.into());
+ let out_opnd = asm.stack_push(Type::UnknownImm);
+ asm.mov(out_opnd, string_empty);
+
+ return true;
+}
+
+// Codegen for rb_str_concat() -- *not* String#concat
// Frequently strings are concatenated using "out_str << next_str".
// This is common in Erb and similar templating languages.
fn jit_rb_str_concat(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- ocb: &mut OutlinedCb,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- let comptime_arg = jit_peek_at_stack(jit, ctx, 0);
- let comptime_arg_type = ctx.get_opnd_type(StackOpnd(0));
-
- // String#<< can take an integer codepoint as an argument, but we don't optimise that.
- // Also, a non-string argument would have to call .to_str on itself before being treated
- // as a string, and that would require saving pc/sp, which we don't do here.
- // TODO: figure out how we should optimise a string-subtype argument here
- if comptime_arg_type != Type::CString && comptime_arg.class_of() != unsafe { rb_cString } {
+ // The << operator can accept integer codepoints for characters
+ // as the argument. We only specially optimise string arguments.
+ // If the peeked-at compile time argument is something other than
+ // a string, assume it won't be a string later either.
+ let comptime_arg = jit.peek_at_stack(&asm.ctx, 0);
+ if ! unsafe { RB_TYPE_P(comptime_arg, RUBY_T_STRING) } {
return false;
}
- // Generate a side exit
- let side_exit = get_side_exit(jit, ocb, ctx);
+ // Guard that the concat argument is a string
+ guard_object_is_string(asm, asm.stack_opnd(0), StackOpnd(0), Counter::guard_send_not_string);
- // Guard that the argument is of class String at runtime.
- let arg_opnd = ctx.stack_opnd(0);
- mov(cb, REG0, arg_opnd);
- jit_guard_known_klass(
- jit,
- ctx,
- cb,
- ocb,
- unsafe { rb_cString },
- StackOpnd(0),
- comptime_arg,
- SEND_MAX_DEPTH,
- side_exit,
- );
+ // Guard buffers from GC since rb_str_buf_append may allocate.
+ // rb_str_buf_append may raise Encoding::CompatibilityError, but we accept compromised
+ // backtraces on this method since the interpreter does the same thing on opt_ltlt.
+ jit_prepare_non_leaf_call(jit, asm);
+ asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
- let concat_arg = ctx.stack_pop(1);
- let recv = ctx.stack_pop(1);
+ let concat_arg = asm.stack_pop(1);
+ let recv = asm.stack_pop(1);
// Test if string encodings differ. If different, use rb_str_append. If the same,
// use rb_yjit_str_simple_append, which calls rb_str_cat.
- add_comment(cb, "<< on strings");
-
- // Both rb_str_append and rb_yjit_str_simple_append take identical args
- mov(cb, C_ARG_REGS[0], recv);
- mov(cb, C_ARG_REGS[1], concat_arg);
+ asm_comment!(asm, "<< on strings");
// Take receiver's object flags XOR arg's flags. If any
// string-encoding flags are different between the two,
// the encodings don't match.
- mov(cb, REG0, recv);
- mov(cb, REG1, concat_arg);
- mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS));
- xor(cb, REG0, mem_opnd(64, REG1, RUBY_OFFSET_RBASIC_FLAGS));
- test(cb, REG0, uimm_opnd(RUBY_ENCODING_MASK as u64));
+ let recv_reg = asm.load(recv);
+ let concat_arg_reg = asm.load(concat_arg);
+ let flags_xor = asm.xor(
+ Opnd::mem(64, recv_reg, RUBY_OFFSET_RBASIC_FLAGS),
+ Opnd::mem(64, concat_arg_reg, RUBY_OFFSET_RBASIC_FLAGS)
+ );
+ asm.test(flags_xor, Opnd::UImm(RUBY_ENCODING_MASK as u64));
- let enc_mismatch = cb.new_label("enc_mismatch".to_string());
- jne_label(cb, enc_mismatch);
+ let enc_mismatch = asm.new_label("enc_mismatch");
+ asm.jnz(enc_mismatch);
// If encodings match, call the simple append function and jump to return
- call_ptr(cb, REG0, rb_yjit_str_simple_append as *const u8);
- let ret_label: usize = cb.new_label("stack_return".to_string());
- jmp_label(cb, ret_label);
+ let ret_opnd = asm.ccall(rb_yjit_str_simple_append as *const u8, vec![recv, concat_arg]);
+ let ret_label = asm.new_label("func_return");
+ let stack_ret = asm.stack_push(Type::TString);
+ asm.mov(stack_ret, ret_opnd);
+ asm.stack_pop(1); // forget stack_ret to re-push after ccall
+ asm.jmp(ret_label);
// If encodings are different, use a slower encoding-aware concatenate
- cb.write_label(enc_mismatch);
- call_ptr(cb, REG0, rb_str_append as *const u8);
+ asm.write_label(enc_mismatch);
+ asm.spill_temps(); // Ignore the register for the other local branch
+ let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]);
+ let stack_ret = asm.stack_push(Type::TString);
+ asm.mov(stack_ret, ret_opnd);
// Drop through to return
- cb.write_label(ret_label);
- let stack_ret = ctx.stack_push(Type::CString);
- mov(cb, stack_ret, RAX);
+ asm.write_label(ret_label);
+
+ true
+}
+
+// Codegen for rb_ary_empty_p()
+fn jit_rb_ary_empty_p(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ let array_opnd = asm.stack_pop(1);
+ let array_reg = asm.load(array_opnd);
+ let len_opnd = get_array_len(asm, array_reg);
+
+ asm.test(len_opnd, len_opnd);
+ let bool_val = asm.csel_z(Qtrue.into(), Qfalse.into());
+
+ let out_opnd = asm.stack_push(Type::UnknownImm);
+ asm.store(out_opnd, bool_val);
+
+ return true;
+}
+
+// Codegen for rb_ary_length()
+fn jit_rb_ary_length(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ let array_opnd = asm.stack_pop(1);
+ let array_reg = asm.load(array_opnd);
+ let len_opnd = get_array_len(asm, array_reg);
+
+ // Convert the length to a fixnum
+ let shifted_val = asm.lshift(len_opnd, Opnd::UImm(1));
+ let out_val = asm.or(shifted_val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64));
+
+ let out_opnd = asm.stack_push(Type::Fixnum);
+ asm.store(out_opnd, out_val);
+
+ return true;
+}
+
+fn jit_rb_ary_push(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm_comment!(asm, "Array#<<");
+
+ // rb_ary_push allocates memory for buffer extension and can raise FrozenError
+ // Not using a lazy frame here since the interpreter also has a truncated
+ // stack trace from opt_ltlt.
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let item_opnd = asm.stack_opnd(0);
+ let ary_opnd = asm.stack_opnd(1);
+ let ret = asm.ccall(rb_ary_push as *const u8, vec![ary_opnd, item_opnd]);
+ asm.stack_pop(2); // Keep them on stack during ccall for GC
+
+ let ret_opnd = asm.stack_push(Type::TArray);
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+// Just a leaf method, but not using `Primitive.attr! :leaf` since BOP methods can't use it.
+fn jit_rb_hash_empty_p(
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm_comment!(asm, "Hash#empty?");
+
+ let hash_opnd = asm.stack_pop(1);
+ let ret = asm.ccall(rb_hash_empty_p as *const u8, vec![hash_opnd]);
+
+ let ret_opnd = asm.stack_push(Type::UnknownImm);
+ asm.mov(ret_opnd, ret);
+ true
+}
+
+fn jit_obj_respond_to(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ known_recv_class: Option<VALUE>,
+) -> bool {
+ // respond_to(:sym) or respond_to(:sym, true)
+ if argc != 1 && argc != 2 {
+ return false;
+ }
+
+ let recv_class = match known_recv_class {
+ Some(class) => class,
+ None => return false,
+ };
+
+ // Get the method_id from compile time. We will later add a guard against it.
+ let mid_sym = jit.peek_at_stack(&asm.ctx, (argc - 1) as isize);
+ if !mid_sym.static_sym_p() {
+ return false
+ }
+ let mid = unsafe { rb_sym2id(mid_sym) };
+
+ // Option<bool> representing the value of the "include_all" argument and whether it's known
+ let allow_priv = if argc == 1 {
+ // Default is false
+ Some(false)
+ } else {
+ // Get value from type information (may or may not be known)
+ asm.ctx.get_opnd_type(StackOpnd(0)).known_truthy()
+ };
+
+ let target_cme = unsafe { rb_callable_method_entry_or_negative(recv_class, mid) };
+
+ // Should never be null, as in that case we will be returned a "negative CME"
+ assert!(!target_cme.is_null());
+
+ let cme_def_type = unsafe { get_cme_def_type(target_cme) };
+
+ if cme_def_type == VM_METHOD_TYPE_REFINED {
+ return false;
+ }
+
+ let visibility = if cme_def_type == VM_METHOD_TYPE_UNDEF {
+ METHOD_VISI_UNDEF
+ } else {
+ unsafe { METHOD_ENTRY_VISI(target_cme) }
+ };
+
+ let result = match (visibility, allow_priv) {
+ (METHOD_VISI_UNDEF, _) => {
+ // No method, we can return false given respond_to_missing? hasn't been overridden.
+ // In the future, we might want to jit the call to respond_to_missing?
+ if !assume_method_basic_definition(jit, asm, ocb, recv_class, ID!(respond_to_missing)) {
+ return false;
+ }
+ Qfalse
+ }
+ (METHOD_VISI_PUBLIC, _) | // Public method => fine regardless of include_all
+ (_, Some(true)) => { // include_all => all visibility are acceptable
+ // Method exists and has acceptable visibility
+ if cme_def_type == VM_METHOD_TYPE_NOTIMPLEMENTED {
+ // C method with rb_f_notimplement(). `respond_to?` returns false
+ // without consulting `respond_to_missing?`. See also: rb_add_method_cfunc()
+ Qfalse
+ } else {
+ Qtrue
+ }
+ }
+ (_, _) => return false // not public and include_all not known, can't compile
+ };
+
+ // Invalidate this block if method lookup changes for the method being queried. This works
+ // both for the case where a method does or does not exist, as for the latter we asked for a
+ // "negative CME" earlier.
+ jit.assume_method_lookup_stable(asm, ocb, target_cme);
+
+ if argc == 2 {
+ // pop include_all argument (we only use its type info)
+ asm.stack_pop(1);
+ }
+
+ let sym_opnd = asm.stack_pop(1);
+ let _recv_opnd = asm.stack_pop(1);
+
+ // This is necessary because we have no guarantee that sym_opnd is a constant
+ asm_comment!(asm, "guard known mid");
+ asm.cmp(sym_opnd, mid_sym.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_respond_to_mid_mismatch,
+ );
+
+ jit_putobject(asm, result);
+
+ true
+}
+
+fn jit_rb_f_block_given_p(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm.stack_pop(1);
+ let out_opnd = asm.stack_push(Type::UnknownImm);
+
+ gen_block_given(jit, asm, out_opnd, Qtrue.into(), Qfalse.into());
+
+ true
+}
+
+fn gen_block_given(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ out_opnd: Opnd,
+ true_opnd: Opnd,
+ false_opnd: Opnd,
+) {
+ asm_comment!(asm, "block_given?");
+
+ // Same as rb_vm_frame_block_handler
+ let ep_opnd = gen_get_lep(jit, asm);
+ let block_handler = asm.load(
+ Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
+ );
+
+ // Return `block_handler != VM_BLOCK_HANDLER_NONE`
+ asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into());
+ let block_given = asm.csel_ne(true_opnd, false_opnd);
+ asm.mov(out_opnd, block_given);
+}
+
+// Codegen for rb_class_superclass()
+fn jit_rb_class_superclass(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ cme: *const rb_callable_method_entry_t,
+ _block: Option<crate::codegen::BlockHandler>,
+ _argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ extern "C" {
+ fn rb_class_superclass(klass: VALUE) -> VALUE;
+ }
+
+ if !jit_prepare_lazy_frame_call(jit, asm, cme, StackOpnd(0)) {
+ return false;
+ }
+
+ asm_comment!(asm, "Class#superclass");
+ let recv_opnd = asm.stack_opnd(0);
+ let ret = asm.ccall(rb_class_superclass as *const u8, vec![recv_opnd]);
+
+ asm.stack_pop(1);
+ let ret_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(ret_opnd, ret);
- cb.link_labels();
true
}
fn jit_thread_s_current(
_jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
_ci: *const rb_callinfo,
_cme: *const rb_callable_method_entry_t,
- _block: Option<IseqPtr>,
+ _block: Option<BlockHandler>,
_argc: i32,
- _known_recv_class: *const VALUE,
+ _known_recv_class: Option<VALUE>,
) -> bool {
- add_comment(cb, "Thread.current");
- ctx.stack_pop(1);
+ asm_comment!(asm, "Thread.current");
+ asm.stack_pop(1);
// ec->thread_ptr
- let ec_thread_ptr = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_THREAD_PTR);
- mov(cb, REG0, ec_thread_ptr);
+ let ec_thread_opnd = asm.load(Opnd::mem(64, EC, RUBY_OFFSET_EC_THREAD_PTR));
// thread->self
- let thread_self = mem_opnd(64, REG0, RUBY_OFFSET_THREAD_SELF);
- mov(cb, REG0, thread_self);
+ let thread_self = Opnd::mem(64, ec_thread_opnd, RUBY_OFFSET_THREAD_SELF);
- let stack_ret = ctx.stack_push(Type::UnknownHeap);
- mov(cb, stack_ret, REG0);
+ let stack_ret = asm.stack_push(Type::UnknownHeap);
+ asm.mov(stack_ret, thread_self);
true
}
// Check if we know how to codegen for a particular cfunc method
fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option<MethodGenFn> {
let method_serial = unsafe { get_def_method_serial(def) };
+ let table = unsafe { METHOD_CODEGEN_TABLE.as_ref().unwrap() };
- CodegenGlobals::look_up_codegen_method(method_serial)
+ let option_ref = table.get(&method_serial);
+ match option_ref {
+ None => None,
+ Some(&mgf) => Some(mgf), // Deref
+ }
}
// Is anyone listening for :c_call and :c_return event currently?
fn c_method_tracing_currently_enabled(jit: &JITState) -> bool {
// Defer to C implementation in yjit.c
unsafe {
- rb_c_method_tracing_currently_enabled(jit.ec.unwrap() as *mut rb_execution_context_struct)
+ rb_c_method_tracing_currently_enabled(jit.ec)
}
}
@@ -3840,26 +6225,188 @@ unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> V
hash
}
+// SpecVal is a single value in an iseq invocation's environment on the stack,
+// at sp[-2]. Depending on the frame type, it can serve different purposes,
+// which are covered here by enum variants.
+enum SpecVal {
+ BlockHandler(Option<BlockHandler>),
+ PrevEP(*const VALUE),
+ PrevEPOpnd(Opnd),
+}
+
+// Each variant represents a branch in vm_caller_setup_arg_block.
+#[derive(Clone, Copy)]
+pub enum BlockHandler {
+ // send, invokesuper: blockiseq operand
+ BlockISeq(IseqPtr),
+ // invokesuper: GET_BLOCK_HANDLER() (GET_LEP()[VM_ENV_DATA_INDEX_SPECVAL])
+ LEPSpecVal,
+ // part of the allocate-free block forwarding scheme
+ BlockParamProxy,
+ // To avoid holding the block arg (e.g. proc and symbol) across C calls,
+ // we might need to set the block handler early in the call sequence
+ AlreadySet,
+}
+
+struct ControlFrame {
+ recv: Opnd,
+ sp: Opnd,
+ iseq: Option<IseqPtr>,
+ pc: Option<u64>,
+ frame_type: u32,
+ specval: SpecVal,
+ cme: *const rb_callable_method_entry_t,
+}
+
+// Codegen performing a similar (but not identical) function to vm_push_frame
+//
+// This will generate the code to:
+// * initialize locals to Qnil
+// * push the environment (cme, block handler, frame type)
+// * push a new CFP
+// * save the new CFP to ec->cfp
+//
+// Notes:
+// * Provided sp should point to the new frame's sp, immediately following locals and the environment
+// * At entry, CFP points to the caller (not callee) frame
+// * At exit, ec->cfp is updated to the pushed CFP
+// * SP register is updated only if frame.iseq is set
+// * Stack overflow is not checked (should be done by the caller)
+// * Interrupts are not checked (should be done by the caller)
+fn gen_push_frame(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ frame: ControlFrame,
+) {
+ let sp = frame.sp;
+
+ asm_comment!(asm, "push cme, specval, frame type");
+
+ // Write method entry at sp[-3]
+ // sp[-3] = me;
+ // Use compile time cme. It's assumed to be valid because we are notified when
+ // any cme we depend on become outdated. See yjit_method_lookup_change().
+ asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -3), VALUE::from(frame.cme).into());
+
+ // Write special value at sp[-2]. It's either a block handler or a pointer to
+ // the outer environment depending on the frame type.
+ // sp[-2] = specval;
+ let specval: Opnd = match frame.specval {
+ SpecVal::BlockHandler(None) => VM_BLOCK_HANDLER_NONE.into(),
+ SpecVal::BlockHandler(Some(block_handler)) => {
+ match block_handler {
+ BlockHandler::BlockISeq(block_iseq) => {
+ // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
+ // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases
+ // with cfp->block_code.
+ asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into());
+
+ let cfp_self = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF));
+ asm.or(cfp_self, Opnd::Imm(1))
+ }
+ BlockHandler::LEPSpecVal => {
+ let lep_opnd = gen_get_lep(jit, asm);
+ asm.load(Opnd::mem(64, lep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL))
+ }
+ BlockHandler::BlockParamProxy => {
+ let ep_opnd = gen_get_lep(jit, asm);
+ let block_handler = asm.load(
+ Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
+ );
+ block_handler
+ }
+ BlockHandler::AlreadySet => 0.into(), // unused
+ }
+ }
+ SpecVal::PrevEP(prev_ep) => {
+ let tagged_prev_ep = (prev_ep as usize) | 1;
+ VALUE(tagged_prev_ep).into()
+ }
+ SpecVal::PrevEPOpnd(ep_opnd) => {
+ asm.or(ep_opnd, 1.into())
+ }
+ };
+ if let SpecVal::BlockHandler(Some(BlockHandler::AlreadySet)) = frame.specval {
+ asm_comment!(asm, "specval should have been set");
+ } else {
+ asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -2), specval);
+ }
+
+ // Write env flags at sp[-1]
+ // sp[-1] = frame_type;
+ asm.store(Opnd::mem(64, sp, SIZEOF_VALUE_I32 * -1), frame.frame_type.into());
+
+ // Allocate a new CFP (ec->cfp--)
+ fn cfp_opnd(offset: i32) -> Opnd {
+ Opnd::mem(64, CFP, offset - (RUBY_SIZEOF_CONTROL_FRAME as i32))
+ }
+
+ // Setup the new frame
+ // *cfp = (const struct rb_control_frame_struct) {
+ // .pc = <unset for iseq, 0 for cfunc>,
+ // .sp = sp,
+ // .iseq = <iseq for iseq, 0 for cfunc>,
+ // .self = recv,
+ // .ep = <sp - 1>,
+ // .block_code = 0,
+ // };
+ asm_comment!(asm, "push callee control frame");
+
+ // For an iseq call PC may be None, in which case we will not set PC and will allow jitted code
+ // to set it as necessary.
+ if let Some(pc) = frame.pc {
+ asm.mov(cfp_opnd(RUBY_OFFSET_CFP_PC), pc.into());
+ };
+ asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SP), sp);
+ let iseq: Opnd = if let Some(iseq) = frame.iseq {
+ VALUE::from(iseq).into()
+ } else {
+ 0.into()
+ };
+ asm.mov(cfp_opnd(RUBY_OFFSET_CFP_ISEQ), iseq);
+ asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv);
+ asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into());
+
+ if frame.iseq.is_some() {
+ // Spill stack temps to let the callee use them (must be done before changing the SP register)
+ asm.spill_temps();
+
+ // Saving SP before calculating ep avoids a dependency on a register
+ // However this must be done after referencing frame.recv, which may be SP-relative
+ asm.mov(SP, sp);
+ }
+ let ep = asm.sub(sp, SIZEOF_VALUE.into());
+ asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep);
+}
+
fn gen_send_cfunc(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
- block: Option<IseqPtr>,
+ block: Option<BlockHandler>,
+ recv_known_class: Option<VALUE>,
+ flags: u32,
argc: i32,
- recv_known_klass: *const VALUE,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
let cfunc = unsafe { get_cme_def_body_cfunc(cme) };
let cfunc_argc = unsafe { get_mct_argc(cfunc) };
+ let mut argc = argc;
+
+ // Splat call to a C method that takes `VALUE *` and `len`
+ let variable_splat = flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc == -1;
+ let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0;
- // If the function expects a Ruby array of arguments
- if cfunc_argc < 0 && cfunc_argc != -1 {
- gen_counter_incr!(cb, send_cfunc_ruby_array_varg);
- return CantCompile;
+ // If it's a splat and the method expects a Ruby array of arguments
+ if cfunc_argc == -2 && flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::send_cfunc_splat_neg2);
+ return None;
}
+ exit_if_kwsplat_non_nil(asm, flags, Counter::send_cfunc_kw_splat_non_nil)?;
+ let kw_splat = flags & VM_CALL_KW_SPLAT != 0;
+
let kw_arg = unsafe { vm_ci_kwarg(ci) };
let kw_arg_num = if kw_arg.is_null() {
0
@@ -3867,326 +6414,613 @@ fn gen_send_cfunc(
unsafe { get_cikw_keyword_len(kw_arg) }
};
- // Number of args which will be passed through to the callee
- // This is adjusted by the kwargs being combined into a hash.
- let passed_argc = if kw_arg.is_null() {
- argc
- } else {
- argc - kw_arg_num + 1
- };
-
- // If the argument count doesn't match
- if cfunc_argc >= 0 && cfunc_argc != passed_argc {
- gen_counter_incr!(cb, send_cfunc_argc_mismatch);
- return CantCompile;
- }
-
- // Don't JIT functions that need C stack arguments for now
- if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_REGS.len() as i32) {
- gen_counter_incr!(cb, send_cfunc_toomany_args);
- return CantCompile;
+ if kw_arg_num != 0 && flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::send_cfunc_splat_with_kw);
+ return None;
}
if c_method_tracing_currently_enabled(jit) {
// Don't JIT if tracing c_call or c_return
- gen_counter_incr!(cb, send_cfunc_tracing);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_cfunc_tracing);
+ return None;
}
+ // Increment total cfunc send count
+ gen_counter_incr(asm, Counter::num_send_cfunc);
+
// Delegate to codegen for C methods if we have it.
- if kw_arg.is_null() {
- let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def });
- if let Some(known_cfunc_codegen) = codegen_p {
- let start_pos = cb.get_write_ptr().raw_ptr() as usize;
- if known_cfunc_codegen(jit, ctx, cb, ocb, ci, cme, block, argc, recv_known_klass) {
- let written_bytes = cb.get_write_ptr().raw_ptr() as usize - start_pos;
- if written_bytes < JUMP_SIZE_IN_BYTES {
- add_comment(cb, "Writing NOPs to leave room for later invalidation code");
- nop(cb, (JUMP_SIZE_IN_BYTES - written_bytes) as u32);
- }
+ if kw_arg.is_null() &&
+ !kw_splat &&
+ flags & VM_CALL_OPT_SEND == 0 &&
+ flags & VM_CALL_ARGS_SPLAT == 0 &&
+ (cfunc_argc == -1 || argc == cfunc_argc) {
+ let expected_stack_after = asm.ctx.get_stack_size() as i32 - argc;
+ if let Some(known_cfunc_codegen) = lookup_cfunc_codegen(unsafe { (*cme).def }) {
+ // We don't push a frame for specialized cfunc codegen, so the generated code must be leaf.
+ // However, the interpreter doesn't push a frame on opt_* instruction either, so we allow
+ // non-sendish instructions to break this rule as an exception.
+ let cfunc_codegen = if jit.is_sendish() {
+ asm.with_leaf_ccall(|asm|
+ perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ocb, ci, cme, block, argc, recv_known_class))
+ )
+ } else {
+ perf_call!("gen_send_cfunc: ", known_cfunc_codegen(jit, asm, ocb, ci, cme, block, argc, recv_known_class))
+ };
+
+ if cfunc_codegen {
+ assert_eq!(expected_stack_after, asm.ctx.get_stack_size() as i32);
+ gen_counter_incr(asm, Counter::num_send_cfunc_inline);
// cfunc codegen generated code. Terminate the block so
// there isn't multiple calls in the same block.
- jump_to_next_insn(jit, ctx, cb, ocb);
- return EndBlock;
+ jump_to_next_insn(jit, asm, ocb);
+ return Some(EndBlock);
}
}
}
- // Create a side-exit to fall back to the interpreter
- let side_exit = get_side_exit(jit, ocb, ctx);
-
// Check for interrupts
- gen_check_ints(cb, side_exit);
+ gen_check_ints(asm, Counter::guard_send_interrupted);
// Stack overflow check
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
// REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t)
- add_comment(cb, "stack overflow check");
- lea(
- cb,
- REG0,
- ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize),
- );
- cmp(cb, REG_CFP, REG0);
- jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow));
+ asm_comment!(asm, "stack overflow check");
+ const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)");
+ let stack_limit = asm.lea(asm.ctx.sp_opnd((4 + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE)) as i32));
+ asm.cmp(CFP, stack_limit);
+ asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow));
+
+ // Guard for variable length splat call before any modifications to the stack
+ if variable_splat {
+ let splat_array_idx = i32::from(kw_splat) + i32::from(block_arg);
+ let comptime_splat_array = jit.peek_at_stack(&asm.ctx, splat_array_idx as isize);
+ if unsafe { rb_yjit_ruby2_keywords_splat_p(comptime_splat_array) } != 0 {
+ gen_counter_incr(asm, Counter::send_cfunc_splat_varg_ruby2_keywords);
+ return None;
+ }
- // Points to the receiver operand on the stack
- let recv = ctx.stack_opnd(argc);
+ let splat_array = asm.stack_opnd(splat_array_idx);
+ guard_object_is_array(asm, splat_array, splat_array.into(), Counter::guard_send_splat_not_array);
- // Store incremented PC into current control frame in case callee raises.
- jit_save_pc(jit, cb, REG0);
+ asm_comment!(asm, "guard variable length splat call servicable");
+ let sp = asm.ctx.sp_opnd(0);
+ let proceed = asm.ccall(rb_yjit_splat_varg_checks as _, vec![sp, splat_array, CFP]);
+ asm.cmp(proceed, Qfalse.into());
+ asm.je(Target::side_exit(Counter::guard_send_cfunc_bad_splat_vargs));
+ }
- if let Some(block_iseq) = block {
- // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
- // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases
- // with cfp->block_code.
- jit_mov_gc_ptr(jit, cb, REG0, VALUE(block_iseq as usize));
- let block_code_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE);
- mov(cb, block_code_opnd, REG0);
+ // Number of args which will be passed through to the callee
+ // This is adjusted by the kwargs being combined into a hash.
+ let mut passed_argc = if kw_arg.is_null() {
+ argc
+ } else {
+ argc - kw_arg_num + 1
+ };
+
+ // Exclude the kw_splat hash from arity check
+ if kw_splat {
+ passed_argc -= 1;
}
- // Increment the stack pointer by 3 (in the callee)
- // sp += 3
- lea(cb, REG0, ctx.sp_opnd((SIZEOF_VALUE as isize) * 3));
+ // If the argument count doesn't match
+ if cfunc_argc >= 0 && cfunc_argc != passed_argc && flags & VM_CALL_ARGS_SPLAT == 0 {
+ gen_counter_incr(asm, Counter::send_cfunc_argc_mismatch);
+ return None;
+ }
- // Write method entry at sp[-3]
- // sp[-3] = me;
- // Put compile time cme into REG1. It's assumed to be valid because we are notified when
- // any cme we depend on become outdated. See yjit_method_lookup_change().
- jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize));
- mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
-
- // Write block handler at sp[-2]
- // sp[-2] = block_handler;
- if let Some(_block_iseq) = block {
- // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
- let cfp_self = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF);
- lea(cb, REG1, cfp_self);
- or(cb, REG1, imm_opnd(1));
- mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
+ // Don't JIT functions that need C stack arguments for now
+ if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_OPNDS.len() as i32) {
+ gen_counter_incr(asm, Counter::send_cfunc_toomany_args);
+ return None;
+ }
+
+ let block_arg_type = if block_arg {
+ Some(asm.ctx.get_opnd_type(StackOpnd(0)))
} else {
- let dst_opnd = mem_opnd(64, REG0, 8 * -2);
- mov(cb, dst_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into()));
+ None
+ };
+
+ match block_arg_type {
+ Some(Type::Nil | Type::BlockParamProxy) => {
+ // We'll handle this later
+ }
+ None => {
+ // Nothing to do
+ }
+ _ => {
+ gen_counter_incr(asm, Counter::send_cfunc_block_arg);
+ return None;
+ }
}
- // Write env flags at sp[-1]
- // sp[-1] = frame_type;
+ match block_arg_type {
+ Some(Type::Nil) => {
+ // We have a nil block arg, so let's pop it off the args
+ asm.stack_pop(1);
+ }
+ Some(Type::BlockParamProxy) => {
+ // We don't need the actual stack value
+ asm.stack_pop(1);
+ }
+ None => {
+ // Nothing to do
+ }
+ _ => {
+ assert!(false);
+ }
+ }
+
+ // Pop the empty kw_splat hash
+ if kw_splat {
+ // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil()
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
+ asm.stack_pop(1);
+ argc -= 1;
+ }
+
+ // Splat handling when C method takes a static number of arguments.
+ // push_splat_args() does stack manipulation so we can no longer side exit
+ if flags & VM_CALL_ARGS_SPLAT != 0 && cfunc_argc >= 0 {
+ let required_args : u32 = (cfunc_argc as u32).saturating_sub(argc as u32 - 1);
+ // + 1 because we pass self
+ if required_args + 1 >= C_ARG_OPNDS.len() as u32 {
+ gen_counter_incr(asm, Counter::send_cfunc_toomany_args);
+ return None;
+ }
+
+ // We are going to assume that the splat fills
+ // all the remaining arguments. So the number of args
+ // should just equal the number of args the cfunc takes.
+ // In the generated code we test if this is true
+ // and if not side exit.
+ argc = cfunc_argc;
+ passed_argc = argc;
+ push_splat_args(required_args, asm)
+ }
+
+ // This is a .send call and we need to adjust the stack
+ if flags & VM_CALL_OPT_SEND != 0 {
+ handle_opt_send_shift_stack(asm, argc);
+ }
+
+ // Push a dynamic number of items from the splat array to the stack when calling a vargs method
+ let dynamic_splat_size = if variable_splat {
+ asm_comment!(asm, "variable length splat");
+ let stack_splat_array = asm.lea(asm.stack_opnd(0));
+ Some(asm.ccall(rb_yjit_splat_varg_cfunc as _, vec![stack_splat_array]))
+ } else {
+ None
+ };
+
+ // Points to the receiver operand on the stack
+ let recv = asm.stack_opnd(argc);
+
+ // Store incremented PC into current control frame in case callee raises.
+ jit_save_pc(jit, asm);
+
+ // Find callee's SP with space for metadata.
+ // Usually sp+3.
+ let sp = if let Some(splat_size) = dynamic_splat_size {
+ // Compute the callee's SP at runtime in case we accept a variable size for the splat array
+ const _: () = assert!(SIZEOF_VALUE == 8, "opting for a shift since mul on A64 takes no immediates");
+ let splat_size_bytes = asm.lshift(splat_size, 3usize.into());
+ // 3 items for method metadata, minus one to remove the splat array
+ let static_stack_top = asm.lea(asm.ctx.sp_opnd(2));
+ asm.add(static_stack_top, splat_size_bytes)
+ } else {
+ asm.lea(asm.ctx.sp_opnd(3))
+ };
+
+ let specval = if block_arg_type == Some(Type::BlockParamProxy) {
+ SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy))
+ } else {
+ SpecVal::BlockHandler(block)
+ };
+
let mut frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
if !kw_arg.is_null() {
frame_type |= VM_FRAME_FLAG_CFRAME_KW
}
- mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into()));
- // Allocate a new CFP (ec->cfp--)
- let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
- sub(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
-
- // Setup the new frame
- // *cfp = (const struct rb_control_frame_struct) {
- // .pc = 0,
- // .sp = sp,
- // .iseq = 0,
- // .self = recv,
- // .ep = sp - 1,
- // .block_code = 0,
- // .__bp__ = sp,
- // };
+ perf_call!("gen_send_cfunc: ", gen_push_frame(jit, asm, ControlFrame {
+ frame_type,
+ specval,
+ cme,
+ recv,
+ sp,
+ pc: if cfg!(debug_assertions) {
+ Some(!0) // Poison value. Helps to fail fast.
+ } else {
+ None // Leave PC uninitialized as cfuncs shouldn't read it
+ },
+ iseq: None,
+ }));
- // Can we re-use ec_cfp_opnd from above?
- let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
- mov(cb, REG1, ec_cfp_opnd);
- mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_PC), imm_opnd(0));
-
- mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SP), REG0);
- mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_ISEQ), imm_opnd(0));
- mov(
- cb,
- mem_opnd(64, REG1, RUBY_OFFSET_CFP_BLOCK_CODE),
- imm_opnd(0),
- );
- mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_BP), REG0);
- sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64));
- mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_EP), REG0);
- mov(cb, REG0, recv);
- mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SELF), REG0);
-
- /*
- // Verify that we are calling the right function
- if (YJIT_CHECK_MODE > 0) { // TODO: will we have a YJIT_CHECK_MODE?
- // Call check_cfunc_dispatch
- mov(cb, C_ARG_REGS[0], recv);
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
- mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
- call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
- }
- */
+ asm_comment!(asm, "set ec->cfp");
+ let new_cfp = asm.lea(Opnd::mem(64, CFP, -(RUBY_SIZEOF_CONTROL_FRAME as i32)));
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), new_cfp);
if !kw_arg.is_null() {
// Build a hash from all kwargs passed
- jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], VALUE(ci as usize));
- lea(cb, C_ARG_REGS[1], ctx.sp_opnd(0));
- call_ptr(cb, REG0, build_kwhash as *const u8);
+ asm_comment!(asm, "build_kwhash");
+ let imemo_ci = VALUE(ci as usize);
+ assert_ne!(0, unsafe { rb_IMEMO_TYPE_P(imemo_ci, imemo_callinfo) },
+ "we assume all callinfos with kwargs are on the GC heap");
+ let sp = asm.lea(asm.ctx.sp_opnd(0));
+ let kwargs = asm.ccall(build_kwhash as *const u8, vec![imemo_ci.into(), sp]);
// Replace the stack location at the start of kwargs with the new hash
- let stack_opnd = ctx.stack_opnd(argc - passed_argc);
- mov(cb, stack_opnd, RAX);
+ let stack_opnd = asm.stack_opnd(argc - passed_argc);
+ asm.mov(stack_opnd, kwargs);
}
- // Copy SP into RAX because REG_SP will get overwritten
- lea(cb, RAX, ctx.sp_opnd(0));
-
- // Pop the C function arguments from the stack (in the caller)
- ctx.stack_pop((argc + 1).try_into().unwrap());
-
// Write interpreter SP into CFP.
- // Needed in case the callee yields to the block.
- gen_save_sp(cb, ctx);
+ // We don't pop arguments yet to use registers for passing them, but we
+ // have to set cfp->sp below them for full_cfunc_return() invalidation.
+ gen_save_sp_with_offset(asm, -(argc + 1) as i8);
// Non-variadic method
- if cfunc_argc >= 0 {
+ let args = if cfunc_argc >= 0 {
// Copy the arguments from the stack to the C argument registers
// self is the 0th argument and is at index argc from the stack top
- for i in 0..=passed_argc as usize {
- let stack_opnd = mem_opnd(64, RAX, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32);
- let c_arg_reg = C_ARG_REGS[i];
- mov(cb, c_arg_reg, stack_opnd);
- }
+ (0..=passed_argc).map(|i|
+ asm.stack_opnd(argc - i)
+ ).collect()
}
-
// Variadic method
- if cfunc_argc == -1 {
+ else if cfunc_argc == -1 {
// The method gets a pointer to the first argument
// rb_f_puts(int argc, VALUE *argv, VALUE recv)
- mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc.into()));
- lea(
- cb,
- C_ARG_REGS[1],
- mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE_I32),
- );
- mov(
- cb,
- C_ARG_REGS[2],
- mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE_I32),
+
+ let passed_argc_opnd = if let Some(splat_size) = dynamic_splat_size {
+ // The final argc is the size of the splat, minus one for the splat array itself
+ asm.add(splat_size, (passed_argc - 1).into())
+ } else {
+ // Without a splat, passed_argc is static
+ Opnd::Imm(passed_argc.into())
+ };
+
+ vec![
+ passed_argc_opnd,
+ asm.lea(asm.ctx.sp_opnd(-argc)),
+ asm.stack_opnd(argc),
+ ]
+ }
+ // Variadic method taking a Ruby array
+ else if cfunc_argc == -2 {
+ // Slurp up all the arguments into an array
+ let stack_args = asm.lea(asm.ctx.sp_opnd(-argc));
+ let args_array = asm.ccall(
+ rb_ec_ary_new_from_values as _,
+ vec![EC, passed_argc.into(), stack_args]
);
- }
+
+ // Example signature:
+ // VALUE neg2_method(VALUE self, VALUE argv)
+ vec![asm.stack_opnd(argc), args_array]
+ } else {
+ panic!("unexpected cfunc_args: {}", cfunc_argc)
+ };
// Call the C function
// VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
// cfunc comes from compile-time cme->def, which we assume to be stable.
// Invalidation logic is in yjit_method_lookup_change()
- add_comment(cb, "call C function");
- call_ptr(cb, REG0, unsafe { get_mct_func(cfunc) });
+ asm_comment!(asm, "call C function");
+ let ret = asm.ccall(unsafe { get_mct_func(cfunc) }.cast(), args);
+ asm.stack_pop((argc + 1).try_into().unwrap()); // Pop arguments after ccall to use registers for passing them.
// Record code position for TracePoint patching. See full_cfunc_return().
- record_global_inval_patch(cb, CodegenGlobals::get_outline_full_cfunc_return_pos());
+ record_global_inval_patch(asm, CodegenGlobals::get_outline_full_cfunc_return_pos());
// Push the return value on the Ruby stack
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, ret);
+
+ // Log the name of the method we're calling to. We intentionally don't do this for inlined cfuncs.
+ // We also do this after the C call to minimize the impact of spill_temps() on asm.ccall().
+ if get_option!(gen_stats) {
+ // Assemble the method name string
+ let mid = unsafe { vm_ci_mid(ci) };
+ let name_str = get_method_name(recv_known_class, mid);
+
+ // Get an index for this cfunc name
+ let cfunc_idx = get_cfunc_idx(&name_str);
+
+ // Increment the counter for this cfunc
+ asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]);
+ }
// Pop the stack frame (ec->cfp++)
- // Can we reuse ec_cfp_opnd from above?
- let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
- add(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+ // Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved
+ // register
+ let ec_cfp_opnd = Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP);
+ asm.store(ec_cfp_opnd, CFP);
// cfunc calls may corrupt types
- ctx.clear_local_types();
+ asm.clear_local_types();
// Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
// which allows for sharing the same successor.
// Jump (fall through) to the call continuation block
// We do this to end the current block after the call
- jump_to_next_insn(jit, ctx, cb, ocb);
- EndBlock
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
}
-fn gen_return_branch(
- cb: &mut CodeBlock,
- target0: CodePtr,
- _target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- match shape {
- BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
- BranchShape::Default => {
- mov(cb, REG0, code_ptr_opnd(target0));
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0);
+// Generate RARRAY_LEN. For array_opnd, use Opnd::Reg to reduce memory access,
+// and use Opnd::Mem to save registers.
+fn get_array_len(asm: &mut Assembler, array_opnd: Opnd) -> Opnd {
+ asm_comment!(asm, "get array length for embedded or heap");
+
+ // Pull out the embed flag to check if it's an embedded array.
+ let array_reg = match array_opnd {
+ Opnd::InsnOut { .. } => array_opnd,
+ _ => asm.load(array_opnd),
+ };
+ let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
+
+ // Get the length of the array
+ let emb_len_opnd = asm.and(flags_opnd, (RARRAY_EMBED_LEN_MASK as u64).into());
+ let emb_len_opnd = asm.rshift(emb_len_opnd, (RARRAY_EMBED_LEN_SHIFT as u64).into());
+
+ // Conditionally move the length of the heap array
+ let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
+ asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into());
+
+ let array_reg = match array_opnd {
+ Opnd::InsnOut { .. } => array_opnd,
+ _ => asm.load(array_opnd),
+ };
+ let array_len_opnd = Opnd::mem(
+ std::os::raw::c_long::BITS as u8,
+ array_reg,
+ RUBY_OFFSET_RARRAY_AS_HEAP_LEN,
+ );
+
+ // Select the array length value
+ asm.csel_nz(emb_len_opnd, array_len_opnd)
+}
+
+// Generate RARRAY_CONST_PTR (part of RARRAY_AREF)
+fn get_array_ptr(asm: &mut Assembler, array_reg: Opnd) -> Opnd {
+ asm_comment!(asm, "get array pointer for embedded or heap");
+
+ let flags_opnd = Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RBASIC_FLAGS);
+ asm.test(flags_opnd, (RARRAY_EMBED_FLAG as u64).into());
+ let heap_ptr_opnd = Opnd::mem(
+ usize::BITS as u8,
+ array_reg,
+ RUBY_OFFSET_RARRAY_AS_HEAP_PTR,
+ );
+
+ // Load the address of the embedded array
+ // (struct RArray *)(obj)->as.ary
+ let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, array_reg, RUBY_OFFSET_RARRAY_AS_ARY));
+ asm.csel_nz(ary_opnd, heap_ptr_opnd)
+}
+
+// Generate RSTRING_PTR
+fn get_string_ptr(asm: &mut Assembler, string_reg: Opnd) -> Opnd {
+ asm_comment!(asm, "get string pointer for embedded or heap");
+
+ let flags_opnd = Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RBASIC_FLAGS);
+ asm.test(flags_opnd, (RSTRING_NOEMBED as u64).into());
+ let heap_ptr_opnd = asm.load(Opnd::mem(
+ usize::BITS as u8,
+ string_reg,
+ RUBY_OFFSET_RSTRING_AS_HEAP_PTR,
+ ));
+
+ // Load the address of the embedded array
+ // (struct RString *)(obj)->as.ary
+ let ary_opnd = asm.lea(Opnd::mem(VALUE_BITS, string_reg, RUBY_OFFSET_RSTRING_AS_ARY));
+ asm.csel_nz(heap_ptr_opnd, ary_opnd)
+}
+
+/// Pushes arguments from an array to the stack. Differs from push splat because
+/// the array can have items left over. Array is assumed to be T_ARRAY without guards.
+fn copy_splat_args_for_rest_callee(array: Opnd, num_args: u32, asm: &mut Assembler) {
+ asm_comment!(asm, "copy_splat_args_for_rest_callee");
+
+ // Unused operands cause the backend to panic
+ if num_args == 0 {
+ return;
+ }
+
+ asm_comment!(asm, "Push arguments from array");
+
+ let array_reg = asm.load(array);
+ let ary_opnd = get_array_ptr(asm, array_reg);
+ for i in 0..num_args {
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32));
+ }
+}
+
+/// Pushes arguments from an array to the stack that are passed with a splat (i.e. *args)
+/// It optimistically compiles to a static size that is the exact number of arguments
+/// needed for the function.
+fn push_splat_args(required_args: u32, asm: &mut Assembler) {
+ asm_comment!(asm, "push_splat_args");
+
+ let array_opnd = asm.stack_opnd(0);
+ guard_object_is_array(
+ asm,
+ array_opnd,
+ array_opnd.into(),
+ Counter::guard_send_splat_not_array,
+ );
+
+ let array_len_opnd = get_array_len(asm, array_opnd);
+
+ asm_comment!(asm, "Guard for expected splat length");
+ asm.cmp(array_len_opnd, required_args.into());
+ asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal));
+
+ asm_comment!(asm, "Check last argument is not ruby2keyword hash");
+
+ // Need to repeat this here to deal with register allocation
+ let array_reg = asm.load(asm.stack_opnd(0));
+
+ let ary_opnd = get_array_ptr(asm, array_reg);
+
+ let last_array_value = asm.load(Opnd::mem(64, ary_opnd, (required_args as i32 - 1) * (SIZEOF_VALUE as i32)));
+
+ guard_object_is_not_ruby2_keyword_hash(
+ asm,
+ last_array_value,
+ Counter::guard_send_splatarray_last_ruby2_keywords,
+ );
+
+ asm_comment!(asm, "Push arguments from array");
+ let array_opnd = asm.stack_pop(1);
+
+ if required_args > 0 {
+ let array_reg = asm.load(array_opnd);
+ let ary_opnd = get_array_ptr(asm, array_reg);
+
+ for i in 0..required_args {
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, Opnd::mem(64, ary_opnd, i as i32 * SIZEOF_VALUE_I32));
}
+
+ asm_comment!(asm, "end push_each");
}
}
-fn gen_send_iseq(
+fn gen_send_bmethod(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
- block: Option<IseqPtr>,
+ block: Option<BlockHandler>,
+ flags: u32,
argc: i32,
-) -> CodegenStatus {
- let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
- let mut argc = argc;
+) -> Option<CodegenStatus> {
+ let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) };
- // When you have keyword arguments, there is an extra object that gets
- // placed on the stack the represents a bitmap of the keywords that were not
- // specified at the call site. We need to keep track of the fact that this
- // value is present on the stack in order to properly set up the callee's
- // stack pointer.
- let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) };
- let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0;
+ let proc = unsafe { rb_yjit_get_proc_ptr(procv) };
+ let proc_block = unsafe { &(*proc).block };
- if unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0 {
- // We can't handle tailcalls
- gen_counter_incr!(cb, send_iseq_tailcall);
- return CantCompile;
+ if proc_block.type_ != block_type_iseq {
+ return None;
}
- // No support for callees with these parameters yet as they require allocation
- // or complex handling.
- if unsafe {
- get_iseq_flags_has_rest(iseq)
- || get_iseq_flags_has_post(iseq)
- || get_iseq_flags_has_kwrest(iseq)
- } {
- gen_counter_incr!(cb, send_iseq_complex_callee);
- return CantCompile;
+ let capture = unsafe { proc_block.as_.captured.as_ref() };
+ let iseq = unsafe { *capture.code.iseq.as_ref() };
+
+ // Optimize for single ractor mode and avoid runtime check for
+ // "defined with an un-shareable Proc in a different Ractor"
+ if !assume_single_ractor_mode(jit, asm, ocb) {
+ gen_counter_incr(asm, Counter::send_bmethod_ractor);
+ return None;
}
- // If we have keyword arguments being passed to a callee that only takes
- // positionals, then we need to allocate a hash. For now we're going to
- // call that too complex and bail.
- if supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) } {
- gen_counter_incr!(cb, send_iseq_complex_callee);
- return CantCompile;
+ // Passing a block to a block needs logic different from passing
+ // a block to a method and sometimes requires allocation. Bail for now.
+ if block.is_some() {
+ gen_counter_incr(asm, Counter::send_bmethod_block_arg);
+ return None;
}
- // If we have a method accepting no kwargs (**nil), exit if we have passed
- // it any kwargs.
- if supplying_kws && unsafe { get_iseq_flags_has_accepts_no_kwarg(iseq) } {
- gen_counter_incr!(cb, send_iseq_complex_callee);
- return CantCompile;
+ let frame_type = VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_BMETHOD | VM_FRAME_FLAG_LAMBDA;
+ perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, Some(capture.ep), cme, block, flags, argc, None) }
+}
+
+/// The kind of a value an ISEQ returns
+enum IseqReturn {
+ Value(VALUE),
+ LocalVariable(u32),
+ Receiver,
+}
+
+extern {
+ fn rb_simple_iseq_p(iseq: IseqPtr) -> bool;
+}
+
+/// Return the ISEQ's return value if it consists of one simple instruction and leave.
+fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option<Opnd>, ci_flags: u32) -> Option<IseqReturn> {
+ // Expect only two instructions and one possible operand
+ let iseq_size = unsafe { get_iseq_encoded_size(iseq) };
+ if !(2..=3).contains(&iseq_size) {
+ return None;
}
- // For computing number of locals to set up for the callee
- let mut num_params = unsafe { get_iseq_body_param_size(iseq) };
+ // Get the first two instructions
+ let first_insn = iseq_opcode_at_idx(iseq, 0);
+ let second_insn = iseq_opcode_at_idx(iseq, insn_len(first_insn as usize));
- // Block parameter handling. This mirrors setup_parameters_complex().
- if unsafe { get_iseq_flags_has_block(iseq) } {
- if unsafe { get_iseq_body_local_iseq(iseq) == iseq } {
- num_params -= 1;
- } else {
- // In this case (param.flags.has_block && local_iseq != iseq),
- // the block argument is setup as a local variable and requires
- // materialization (allocation). Bail.
- gen_counter_incr!(cb, send_iseq_complex_callee);
- return CantCompile;
+ // Extract the return value if known
+ if second_insn != YARVINSN_leave {
+ return None;
+ }
+ match first_insn {
+ YARVINSN_getlocal_WC_0 => {
+ // Only accept simple positional only cases for both the caller and the callee.
+ // Reject block ISEQs to avoid autosplat and other block parameter complications.
+ if captured_opnd.is_none() && unsafe { rb_simple_iseq_p(iseq) } && ci_flags & VM_CALL_ARGS_SIMPLE != 0 {
+ let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32();
+ let local_idx = ep_offset_to_local_idx(iseq, ep_offset);
+ Some(IseqReturn::LocalVariable(local_idx))
+ } else {
+ None
+ }
}
+ YARVINSN_putnil => Some(IseqReturn::Value(Qnil)),
+ YARVINSN_putobject => Some(IseqReturn::Value(unsafe { *rb_iseq_pc_at_idx(iseq, 1) })),
+ YARVINSN_putobject_INT2FIX_0_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(0))),
+ YARVINSN_putobject_INT2FIX_1_ => Some(IseqReturn::Value(VALUE::fixnum_from_usize(1))),
+ // We don't support invokeblock for now. Such ISEQs are likely not used by blocks anyway.
+ YARVINSN_putself if captured_opnd.is_none() => Some(IseqReturn::Receiver),
+ _ => None,
}
+}
+
+fn gen_send_iseq(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ iseq: *const rb_iseq_t,
+ ci: *const rb_callinfo,
+ frame_type: u32,
+ prev_ep: Option<*const VALUE>,
+ cme: *const rb_callable_method_entry_t,
+ block: Option<BlockHandler>,
+ flags: u32,
+ argc: i32,
+ captured_opnd: Option<Opnd>,
+) -> Option<CodegenStatus> {
+ // Argument count. We will change this as we gather values from
+ // sources to satisfy the callee's parameters. To help make sense
+ // of changes, note that:
+ // - Parameters syntactically on the left have lower addresses.
+ // For example, all the lead (required) and optional parameters
+ // have lower addresses than the rest parameter array.
+ // - The larger the index one passes to Assembler::stack_opnd(),
+ // the *lower* the address.
+ let mut argc = argc;
+
+ // Iseqs with keyword parameters have a hidden, unnamed parameter local
+ // that the callee could use to know which keywords are unspecified
+ // (see the `checkkeyword` instruction and check `ruby --dump=insn -e 'def foo(k:itself)=k'`).
+ // We always need to set up this local if the call goes through.
+ let has_kwrest = unsafe { get_iseq_flags_has_kwrest(iseq) };
+ let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) } || has_kwrest;
+ let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0;
+ let iseq_has_rest = unsafe { get_iseq_flags_has_rest(iseq) };
+ let iseq_has_block_param = unsafe { get_iseq_flags_has_block(iseq) };
+ let arg_setup_block = captured_opnd.is_some(); // arg_setup_type: arg_setup_block (invokeblock)
+ let kw_splat = flags & VM_CALL_KW_SPLAT != 0;
+ let splat_call = flags & VM_CALL_ARGS_SPLAT != 0;
+
+ // For computing offsets to callee locals
+ let num_params = unsafe { get_iseq_body_param_size(iseq) as i32 };
+ let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 };
- let mut start_pc_offset = 0;
+ let mut start_pc_offset: u16 = 0;
let required_num = unsafe { get_iseq_body_param_lead_num(iseq) };
// This struct represents the metadata about the caller-specified
@@ -4198,393 +7032,628 @@ fn gen_send_iseq(
unsafe { get_cikw_keyword_len(kw_arg) }
};
- // Arity handling and optional parameter setup
- let opts_filled = argc - required_num - kw_arg_num;
+ // Arity handling and optional parameter setup for positional arguments.
+ // Splats are handled later.
+ let mut opts_filled = argc - required_num - kw_arg_num - i32::from(kw_splat) - i32::from(splat_call);
let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) };
- let opts_missing: i32 = opt_num - opts_filled;
+ // With a rest parameter or a yield to a block,
+ // callers can pass more than required + optional.
+ // So we cap ops_filled at opt_num.
+ if iseq_has_rest || arg_setup_block {
+ opts_filled = min(opts_filled, opt_num);
+ }
+ let mut opts_missing: i32 = opt_num - opts_filled;
+
+ let block_arg = flags & VM_CALL_ARGS_BLOCKARG != 0;
+ // Stack index of the splat array
+ let splat_pos = i32::from(block_arg) + i32::from(kw_splat) + kw_arg_num;
+
+ exit_if_stack_too_large(iseq)?;
+ exit_if_tail_call(asm, ci)?;
+ exit_if_has_post(asm, iseq)?;
+ exit_if_kwsplat_non_nil(asm, flags, Counter::send_iseq_kw_splat_non_nil)?;
+ exit_if_has_rest_and_captured(asm, iseq_has_rest, captured_opnd)?;
+ exit_if_has_kwrest_and_captured(asm, has_kwrest, captured_opnd)?;
+ exit_if_has_rest_and_supplying_kws(asm, iseq_has_rest, supplying_kws)?;
+ exit_if_supplying_kw_and_has_no_kw(asm, supplying_kws, doing_kw_call)?;
+ exit_if_supplying_kws_and_accept_no_kwargs(asm, supplying_kws, iseq)?;
+ exit_if_doing_kw_and_splat(asm, doing_kw_call, flags)?;
+ exit_if_wrong_number_arguments(asm, arg_setup_block, opts_filled, flags, opt_num, iseq_has_rest)?;
+ exit_if_doing_kw_and_opts_missing(asm, doing_kw_call, opts_missing)?;
+ exit_if_has_rest_and_optional_and_block(asm, iseq_has_rest, opt_num, iseq, block_arg)?;
+ let block_arg_type = exit_if_unsupported_block_arg_type(jit, asm, block_arg)?;
+
+ // Bail if we can't drop extra arguments for a yield by just popping them
+ if supplying_kws && arg_setup_block && argc > (kw_arg_num + required_num + opt_num) {
+ gen_counter_incr(asm, Counter::send_iseq_complex_discard_extras);
+ return None;
+ }
- if opts_filled < 0 || opts_filled > opt_num {
- gen_counter_incr!(cb, send_iseq_arity_error);
- return CantCompile;
+ // Block parameter handling. This mirrors setup_parameters_complex().
+ if iseq_has_block_param {
+ if unsafe { get_iseq_body_local_iseq(iseq) == iseq } {
+ // Do nothing
+ } else {
+ // In this case (param.flags.has_block && local_iseq != iseq),
+ // the block argument is setup as a local variable and requires
+ // materialization (allocation). Bail.
+ gen_counter_incr(asm, Counter::send_iseq_materialized_block);
+ return None;
+ }
}
- // If we have unfilled optional arguments and keyword arguments then we
- // would need to move adjust the arguments location to account for that.
- // For now we aren't handling this case.
- if doing_kw_call && opts_missing > 0 {
- gen_counter_incr!(cb, send_iseq_complex_callee);
- return CantCompile;
+ // Check that required keyword arguments are supplied and find any extras
+ // that should go into the keyword rest parameter (**kw_rest).
+ if doing_kw_call {
+ gen_iseq_kw_call_checks(asm, iseq, kw_arg, has_kwrest, kw_arg_num)?;
+ }
+
+ let splat_array_length = if splat_call {
+ let array = jit.peek_at_stack(&asm.ctx, splat_pos as isize);
+ let array_length = if array == Qnil {
+ 0
+ } else if unsafe { !RB_TYPE_P(array, RUBY_T_ARRAY) } {
+ gen_counter_incr(asm, Counter::send_iseq_splat_not_array);
+ return None;
+ } else {
+ unsafe { rb_yjit_array_len(array) as u32}
+ };
+
+ // Arity check accounting for size of the splat. When callee has rest parameters, we insert
+ // runtime guards later in copy_splat_args_for_rest_callee()
+ if !iseq_has_rest {
+ let supplying = argc - 1 - i32::from(kw_splat) + array_length as i32;
+ if (required_num..=required_num + opt_num).contains(&supplying) == false {
+ gen_counter_incr(asm, Counter::send_iseq_splat_arity_error);
+ return None;
+ }
+ }
+
+ if iseq_has_rest && opt_num > 0 {
+ // If we have a rest and option arguments
+ // we are going to set the pc_offset for where
+ // to jump in the called method.
+ // If the number of args change, that would need to
+ // change and we don't change that dynmically so we side exit.
+ // On a normal splat without rest and option args this is handled
+ // elsewhere depending on the case
+ asm_comment!(asm, "Side exit if length doesn't not equal compile time length");
+ let array_len_opnd = get_array_len(asm, asm.stack_opnd(splat_pos));
+ asm.cmp(array_len_opnd, array_length.into());
+ asm.jne(Target::side_exit(Counter::guard_send_splatarray_length_not_equal));
+ }
+
+ Some(array_length)
+ } else {
+ None
+ };
+
+ // Check if we need the arg0 splat handling of vm_callee_setup_block_arg()
+ // Also known as "autosplat" inside setup_parameters_complex().
+ // Autosplat checks argc == 1 after splat and kwsplat processing, so make
+ // sure to amend this if we start support kw_splat.
+ let block_arg0_splat = arg_setup_block
+ && (argc == 1 || (argc == 2 && splat_array_length == Some(0)))
+ && !supplying_kws && !doing_kw_call
+ && unsafe {
+ (get_iseq_flags_has_lead(iseq) || opt_num > 1)
+ && !get_iseq_flags_ambiguous_param0(iseq)
+ };
+ if block_arg0_splat {
+ // If block_arg0_splat, we still need side exits after splat, but
+ // the splat modifies the stack which breaks side exits. So bail out.
+ if splat_call {
+ gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_args_splat);
+ return None;
+ }
+ // The block_arg0_splat implementation cannot deal with optional parameters.
+ // This is a setup_parameters_complex() situation and interacts with the
+ // starting position of the callee.
+ if opt_num > 1 {
+ gen_counter_incr(asm, Counter::invokeblock_iseq_arg0_optional);
+ return None;
+ }
+ }
+
+ // Adjust `opts_filled` and `opts_missing` taking
+ // into account the size of the splat expansion.
+ if let Some(len) = splat_array_length {
+ assert_eq!(kw_arg_num, 0); // Due to exit_if_doing_kw_and_splat().
+ // Simplifies calculation below.
+ let num_args = argc - 1 - i32::from(kw_splat) + len as i32;
+
+ opts_filled = if num_args >= required_num {
+ min(num_args - required_num, opt_num)
+ } else {
+ 0
+ };
+ opts_missing = opt_num - opts_filled;
}
+ assert_eq!(opts_missing + opts_filled, opt_num);
+ assert!(opts_filled >= 0);
+
+ // ISeq with optional parameters start at different
+ // locations depending on the number of optionals given.
if opt_num > 0 {
- num_params -= opts_missing as u32;
+ assert!(opts_filled >= 0);
unsafe {
let opt_table = get_iseq_body_param_opt_table(iseq);
- start_pc_offset = (*opt_table.offset(opts_filled as isize)).as_u32();
+ start_pc_offset = opt_table.offset(opts_filled as isize).read().try_into().unwrap();
}
}
- if doing_kw_call {
- // Here we're calling a method with keyword arguments and specifying
- // keyword arguments at this call site.
-
- // This struct represents the metadata about the callee-specified
- // keyword parameters.
- let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
- let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap();
- let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
-
- let mut required_kwargs_filled = 0;
-
- if keyword_num > 30 {
- // We have so many keywords that (1 << num) encoded as a FIXNUM
- // (which shifts it left one more) no longer fits inside a 32-bit
- // immediate.
- gen_counter_incr!(cb, send_iseq_complex_callee);
- return CantCompile;
- }
-
- // Check that the kwargs being passed are valid
- if supplying_kws {
- // This is the list of keyword arguments that the callee specified
- // in its initial declaration.
- // SAFETY: see compile.c for sizing of this slice.
- let callee_kwargs = unsafe { slice::from_raw_parts((*keyword).table, keyword_num) };
-
- // Here we're going to build up a list of the IDs that correspond to
- // the caller-specified keyword arguments. If they're not in the
- // same order as the order specified in the callee declaration, then
- // we're going to need to generate some code to swap values around
- // on the stack.
- let kw_arg_keyword_len: usize =
- unsafe { get_cikw_keyword_len(kw_arg) }.try_into().unwrap();
- let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len];
- for kwarg_idx in 0..kw_arg_keyword_len {
- let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) };
- caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
- }
+ // Increment total ISEQ send count
+ gen_counter_incr(asm, Counter::num_send_iseq);
- // First, we're going to be sure that the names of every
- // caller-specified keyword argument correspond to a name in the
- // list of callee-specified keyword parameters.
- for caller_kwarg in caller_kwargs {
- let search_result = callee_kwargs
- .iter()
- .enumerate() // inject element index
- .find(|(_, &kwarg)| kwarg == caller_kwarg);
-
- match search_result {
- None => {
- // If the keyword was never found, then we know we have a
- // mismatch in the names of the keyword arguments, so we need to
- // bail.
- gen_counter_incr!(cb, send_iseq_kwargs_mismatch);
- return CantCompile;
- }
- Some((callee_idx, _)) if callee_idx < keyword_required_num => {
- // Keep a count to ensure all required kwargs are specified
- required_kwargs_filled += 1;
- }
- _ => (),
+ // Shortcut for special `Primitive.attr! :leaf` builtins
+ let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) };
+ let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) };
+ let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) };
+ let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins
+ if let (None, Some(builtin_info), true, false, None | Some(0)) =
+ (block, builtin_func, builtin_attrs & BUILTIN_ATTR_LEAF != 0, opt_send_call, splat_array_length) {
+ let builtin_argc = unsafe { (*builtin_info).argc };
+ if builtin_argc + 1 < (C_ARG_OPNDS.len() as i32) {
+ // We pop the block arg without using it because:
+ // - the builtin is leaf, so it promises to not `yield`.
+ // - no leaf builtins have block param at the time of writing, and
+ // adding one requires interpreter changes to support.
+ if block_arg_type.is_some() {
+ if iseq_has_block_param {
+ gen_counter_incr(asm, Counter::send_iseq_leaf_builtin_block_arg_block_param);
+ return None;
}
+ asm.stack_pop(1);
}
- }
- assert!(required_kwargs_filled <= keyword_required_num);
- if required_kwargs_filled != keyword_required_num {
- gen_counter_incr!(cb, send_iseq_kwargs_mismatch);
- return CantCompile;
- }
- }
- // Number of locals that are not parameters
- let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 } - (num_params as i32);
+ // Pop empty kw_splat hash which passes nothing (exit_if_kwsplat_non_nil())
+ if kw_splat {
+ asm.stack_pop(1);
+ }
- // Create a side-exit to fall back to the interpreter
- let side_exit = get_side_exit(jit, ocb, ctx);
+ // Pop empty splat array which passes nothing
+ if let Some(0) = splat_array_length {
+ asm.stack_pop(1);
+ }
- // Check for interrupts
- gen_check_ints(cb, side_exit);
+ asm_comment!(asm, "inlined leaf builtin");
+ gen_counter_incr(asm, Counter::num_send_iseq_leaf);
- let leaf_builtin_raw = unsafe { rb_leaf_builtin_function(iseq) };
- let leaf_builtin: Option<*const rb_builtin_function> = if leaf_builtin_raw.is_null() {
- None
- } else {
- Some(leaf_builtin_raw)
- };
- if let (None, Some(builtin_info)) = (block, leaf_builtin) {
- let builtin_argc = unsafe { (*builtin_info).argc };
- if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_REGS.len() as i32) {
- add_comment(cb, "inlined leaf builtin");
+ // The callee may allocate, e.g. Integer#abs on a Bignum.
+ // Save SP for GC, save PC for allocation tracing, and prepare
+ // for global invalidation after GC's VM lock contention.
+ jit_prepare_call_with_gc(jit, asm);
// Call the builtin func (ec, recv, arg1, arg2, ...)
- mov(cb, C_ARG_REGS[0], REG_EC);
+ let mut args = vec![EC];
// Copy self and arguments
for i in 0..=builtin_argc {
- let stack_opnd = ctx.stack_opnd(builtin_argc - i);
- let idx: usize = (i + 1).try_into().unwrap();
- let c_arg_reg = C_ARG_REGS[idx];
- mov(cb, c_arg_reg, stack_opnd);
+ let stack_opnd = asm.stack_opnd(builtin_argc - i);
+ args.push(stack_opnd);
}
- ctx.stack_pop((builtin_argc + 1).try_into().unwrap());
- let builtin_func_ptr = unsafe { (*builtin_info).func_ptr as *const u8 };
- call_ptr(cb, REG0, builtin_func_ptr);
+ let val = asm.ccall(unsafe { (*builtin_info).func_ptr as *const u8 }, args);
+ asm.stack_pop((builtin_argc + 1).try_into().unwrap()); // Keep them on stack during ccall for GC
// Push the return value
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
// Note: assuming that the leaf builtin doesn't change local variables here.
// Seems like a safe assumption.
- return KeepCompiling;
+ // Let guard chains share the same successor
+ jump_to_next_insn(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+ }
+
+ // Inline simple ISEQs whose return value is known at compile time
+ if let (Some(value), None, false) = (iseq_get_return_value(iseq, captured_opnd, flags), block_arg_type, opt_send_call) {
+ asm_comment!(asm, "inlined simple ISEQ");
+ gen_counter_incr(asm, Counter::num_send_iseq_inline);
+
+ match value {
+ IseqReturn::LocalVariable(local_idx) => {
+ // Put the local variable at the return slot
+ let stack_local = asm.stack_opnd(argc - 1 - local_idx as i32);
+ let stack_return = asm.stack_opnd(argc);
+ asm.mov(stack_return, stack_local);
+
+ // Update the mapping for the return value
+ let mapping = asm.ctx.get_opnd_mapping(stack_local.into());
+ asm.ctx.set_opnd_mapping(stack_return.into(), mapping);
+
+ // Pop everything but the return value
+ asm.stack_pop(argc as usize);
+ }
+ IseqReturn::Value(value) => {
+ // Pop receiver and arguments
+ asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 });
+
+ // Push the return value
+ let stack_ret = asm.stack_push(Type::from(value));
+ asm.mov(stack_ret, value.into());
+ },
+ IseqReturn::Receiver => {
+ // Just pop arguments and leave the receiver on stack
+ asm.stack_pop(argc as usize);
+ }
}
+
+ // Let guard chains share the same successor
+ jump_to_next_insn(jit, asm, ocb);
+ return Some(EndBlock);
}
// Stack overflow check
// Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
- add_comment(cb, "stack overflow check");
+ asm_comment!(asm, "stack overflow check");
+ const _: () = assert!(RUBY_SIZEOF_CONTROL_FRAME % SIZEOF_VALUE == 0, "sizeof(rb_control_frame_t) is a multiple of sizeof(VALUE)");
let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap();
- let locals_offs =
- (SIZEOF_VALUE as i32) * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32);
- lea(cb, REG0, ctx.sp_opnd(locals_offs as isize));
- cmp(cb, REG_CFP, REG0);
- jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow));
+ let locals_offs = (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME / SIZEOF_VALUE) as i32;
+ let stack_limit = asm.lea(asm.ctx.sp_opnd(locals_offs));
+ asm.cmp(CFP, stack_limit);
+ asm.jbe(Target::side_exit(Counter::guard_send_se_cf_overflow));
+
+ if iseq_has_rest && splat_call {
+ // Insert length guard for a call to copy_splat_args_for_rest_callee()
+ // that will come later. We will have made changes to
+ // the stack by spilling or handling __send__ shifting
+ // by the time we get to that code, so we need the
+ // guard here where we can still side exit.
+ let non_rest_arg_count = argc - i32::from(kw_splat) - 1;
+ if non_rest_arg_count < required_num + opt_num {
+ let take_count: u32 = (required_num - non_rest_arg_count + opts_filled)
+ .try_into().unwrap();
+
+ if take_count > 0 {
+ asm_comment!(asm, "guard splat_array_length >= {take_count}");
+
+ let splat_array = asm.stack_opnd(splat_pos);
+ let array_len_opnd = get_array_len(asm, splat_array);
+ asm.cmp(array_len_opnd, take_count.into());
+ asm.jl(Target::side_exit(Counter::guard_send_iseq_has_rest_and_splat_too_few));
+ }
+ }
- if doing_kw_call {
- // Here we're calling a method with keyword arguments and specifying
- // keyword arguments at this call site.
+ // All splats need to guard for ruby2_keywords hash. Check with a function call when
+ // splatting into a rest param since the index for the last item in the array is dynamic.
+ asm_comment!(asm, "guard no ruby2_keywords hash in splat");
+ let bad_splat = asm.ccall(rb_yjit_ruby2_keywords_splat_p as _, vec![asm.stack_opnd(splat_pos)]);
+ asm.cmp(bad_splat, 0.into());
+ asm.jnz(Target::side_exit(Counter::guard_send_splatarray_last_ruby2_keywords));
+ }
- // Number of positional arguments the callee expects before the first
- // keyword argument
- let args_before_kw = required_num + opt_num;
+ match block_arg_type {
+ Some(BlockArg::Nil) => {
+ // We have a nil block arg, so let's pop it off the args
+ asm.stack_pop(1);
+ }
+ Some(BlockArg::BlockParamProxy) => {
+ // We don't need the actual stack value
+ asm.stack_pop(1);
+ }
+ Some(BlockArg::TProc) => {
+ // Place the proc as the block handler. We do this early because
+ // the block arg being at the top of the stack gets in the way of
+ // rest param handling later. Also, since there are C calls that
+ // come later, we can't hold this value in a register and place it
+ // near the end when we push a new control frame.
+ asm_comment!(asm, "guard block arg is a proc");
+ // Simple predicate, no need for jit_prepare_non_leaf_call().
+ let is_proc = asm.ccall(rb_obj_is_proc as _, vec![asm.stack_opnd(0)]);
+ asm.cmp(is_proc, Qfalse.into());
+ jit_chain_guard(
+ JCC_JE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_block_arg_type,
+ );
- // This struct represents the metadata about the caller-specified
- // keyword arguments.
- let ci_kwarg = unsafe { vm_ci_kwarg(ci) };
- let caller_keyword_len: usize = if ci_kwarg.is_null() {
- 0
- } else {
- unsafe { get_cikw_keyword_len(ci_kwarg) }
- .try_into()
- .unwrap()
- };
+ let callee_ep = -argc + num_locals + VM_ENV_DATA_SIZE as i32 - 1;
+ let callee_specval = callee_ep + VM_ENV_DATA_INDEX_SPECVAL;
+ if callee_specval < 0 {
+ // Can't write to sp[-n] since that's where the arguments are
+ gen_counter_incr(asm, Counter::send_iseq_clobbering_block_arg);
+ return None;
+ }
+ let proc = asm.stack_pop(1); // Pop first, as argc doesn't account for the block arg
+ let callee_specval = asm.ctx.sp_opnd(callee_specval);
+ asm.store(callee_specval, proc);
+ }
+ None => {
+ // Nothing to do
+ }
+ }
- // This struct represents the metadata about the callee-specified
- // keyword parameters.
- let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+ if kw_splat {
+ // Only `**nil` is supported right now. Checked in exit_if_kwsplat_non_nil()
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
+ asm.stack_pop(1);
+ argc -= 1;
+ }
- add_comment(cb, "keyword args");
+ // push_splat_args does stack manipulation so we can no longer side exit
+ if let Some(array_length) = splat_array_length {
+ if !iseq_has_rest {
+ // Speculate that future splats will be done with
+ // an array that has the same length. We will insert guards.
+ argc = argc - 1 + array_length as i32;
+ if argc + asm.ctx.get_stack_size() as i32 > MAX_SPLAT_LENGTH {
+ gen_counter_incr(asm, Counter::send_splat_too_long);
+ return None;
+ }
+ push_splat_args(array_length, asm);
+ }
+ }
- // This is the list of keyword arguments that the callee specified
- // in its initial declaration.
- let callee_kwargs = unsafe { (*keyword).table };
- let total_kwargs: usize = unsafe { (*keyword).num }.try_into().unwrap();
+ // This is a .send call and we need to adjust the stack
+ // TODO: This can be more efficient if we do it before
+ // extracting from the splat array above.
+ if flags & VM_CALL_OPT_SEND != 0 {
+ handle_opt_send_shift_stack(asm, argc);
+ }
- // Here we're going to build up a list of the IDs that correspond to
- // the caller-specified keyword arguments. If they're not in the
- // same order as the order specified in the callee declaration, then
- // we're going to need to generate some code to swap values around
- // on the stack.
- let mut caller_kwargs: Vec<ID> = vec![0; total_kwargs];
+ if iseq_has_rest {
+ // We are going to allocate so setting pc and sp.
+ jit_save_pc(jit, asm);
+ gen_save_sp(asm);
- for kwarg_idx in 0..caller_keyword_len {
- let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) };
- caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
- }
- let mut kwarg_idx = caller_keyword_len;
+ let rest_param_array = if splat_call {
+ let non_rest_arg_count = argc - 1;
+ // We start by dupping the array because someone else might have
+ // a reference to it. This also normalizes to an ::Array instance.
+ let array = asm.stack_opnd(0);
+ let array = asm.ccall(
+ rb_ary_dup as *const u8,
+ vec![array],
+ );
+ asm.stack_pop(1); // Pop array after ccall to use a register for passing it.
+
+ // This is the end stack state of all `non_rest_arg_count` situations below
+ argc = required_num + opts_filled;
+
+ if non_rest_arg_count > required_num + opt_num {
+ // If we have more arguments than required, we need to prepend
+ // the items from the stack onto the array.
+ let diff: u32 = (non_rest_arg_count - (required_num + opt_num))
+ .try_into().unwrap();
+
+ // diff is >0 so no need to worry about null pointer
+ asm_comment!(asm, "load pointer to array elements");
+ let values_opnd = asm.ctx.sp_opnd(-(diff as i32));
+ let values_ptr = asm.lea(values_opnd);
+
+ asm_comment!(asm, "prepend stack values to rest array");
+ let array = asm.ccall(
+ rb_ary_unshift_m as *const u8,
+ vec![Opnd::UImm(diff as u64), values_ptr, array],
+ );
+ asm.stack_pop(diff as usize);
- let mut unspecified_bits = 0;
+ array
+ } else if non_rest_arg_count < required_num + opt_num {
+ // If we have fewer arguments than required, we need to take some
+ // from the array and move them to the stack.
+ asm_comment!(asm, "take items from splat array");
- let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
- for callee_idx in keyword_required_num..total_kwargs {
- let mut already_passed = false;
- let callee_kwarg = unsafe { *(callee_kwargs.offset(callee_idx.try_into().unwrap())) };
+ let take_count: u32 = (required_num - non_rest_arg_count + opts_filled)
+ .try_into().unwrap();
- for caller_idx in 0..caller_keyword_len {
- if caller_kwargs[caller_idx] == callee_kwarg {
- already_passed = true;
- break;
- }
- }
+ // Copy required arguments to the stack without modifying the array
+ copy_splat_args_for_rest_callee(array, take_count, asm);
- if !already_passed {
- // Reserve space on the stack for each default value we'll be
- // filling in (which is done in the next loop). Also increments
- // argc so that the callee's SP is recorded correctly.
- argc += 1;
- let default_arg = ctx.stack_push(Type::Unknown);
-
- // callee_idx - keyword->required_num is used in a couple of places below.
- let req_num: isize = unsafe { (*keyword).required_num }.try_into().unwrap();
- let callee_idx_isize: isize = callee_idx.try_into().unwrap();
- let extra_args = callee_idx_isize - req_num;
-
- //VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
- let mut default_value = unsafe { *((*keyword).default_values.offset(extra_args)) };
-
- if default_value == Qundef {
- // Qundef means that this value is not constant and must be
- // recalculated at runtime, so we record it in unspecified_bits
- // (Qnil is then used as a placeholder instead of Qundef).
- unspecified_bits |= 0x01 << extra_args;
- default_value = Qnil;
- }
+ // We will now slice the array to give us a new array of the correct size
+ let sliced = asm.ccall(rb_yjit_rb_ary_subseq_length as *const u8, vec![array, Opnd::UImm(take_count.into())]);
- jit_mov_gc_ptr(jit, cb, REG0, default_value);
- mov(cb, default_arg, REG0);
+ sliced
+ } else {
+ // The arguments are equal so we can just push to the stack
+ asm_comment!(asm, "same length for splat array and rest param");
+ assert!(non_rest_arg_count == required_num + opt_num);
- caller_kwargs[kwarg_idx] = callee_kwarg;
- kwarg_idx += 1;
+ array
}
- }
-
- assert!(kwarg_idx == total_kwargs);
+ } else {
+ asm_comment!(asm, "rest parameter without splat");
+
+ assert!(argc >= required_num);
+ let n = (argc - required_num - opts_filled) as u32;
+ argc = required_num + opts_filled;
+ // If n is 0, then elts is never going to be read, so we can just pass null
+ let values_ptr = if n == 0 {
+ Opnd::UImm(0)
+ } else {
+ asm_comment!(asm, "load pointer to array elements");
+ let values_opnd = asm.ctx.sp_opnd(-(n as i32));
+ asm.lea(values_opnd)
+ };
+
+ let new_ary = asm.ccall(
+ rb_ec_ary_new_from_values as *const u8,
+ vec![
+ EC,
+ Opnd::UImm(n.into()),
+ values_ptr
+ ]
+ );
+ asm.stack_pop(n.as_usize());
- // Next, we're going to loop through every keyword that was
- // specified by the caller and make sure that it's in the correct
- // place. If it's not we're going to swap it around with another one.
- for kwarg_idx in 0..total_kwargs {
- let kwarg_idx_isize: isize = kwarg_idx.try_into().unwrap();
- let callee_kwarg = unsafe { *(callee_kwargs.offset(kwarg_idx_isize)) };
+ new_ary
+ };
- // If the argument is already in the right order, then we don't
- // need to generate any code since the expected value is already
- // in the right place on the stack.
- if callee_kwarg == caller_kwargs[kwarg_idx] {
- continue;
- }
+ // Find where to put the rest parameter array
+ let rest_param = if opts_missing == 0 {
+ // All optionals are filled, the rest param goes at the top of the stack
+ argc += 1;
+ asm.stack_push(Type::TArray)
+ } else {
+ // The top of the stack will be a missing optional, but the rest
+ // parameter needs to be placed after all the missing optionals.
+ // Place it using a stack operand with a negative stack index.
+ // (Higher magnitude negative stack index have higher address.)
+ assert!(opts_missing > 0);
+ // The argument deepest in the stack will be the 0th local in the callee.
+ let callee_locals_base = argc - 1;
+ let rest_param_stack_idx = callee_locals_base - required_num - opt_num;
+ assert!(rest_param_stack_idx < 0);
+ asm.stack_opnd(rest_param_stack_idx)
+ };
+ // Store rest param to memory to avoid register shuffle as
+ // we won't be reading it for the remainder of the block.
+ asm.ctx.dealloc_temp_reg(rest_param.stack_idx());
+ asm.store(rest_param, rest_param_array);
+ }
+
+ // Pop surplus positional arguments when yielding
+ if arg_setup_block {
+ let extras = argc - required_num - opt_num;
+ if extras > 0 {
+ // Checked earlier. If there are keyword args, then
+ // the positional arguments are not at the stack top.
+ assert_eq!(0, kw_arg_num);
+
+ asm.stack_pop(extras as usize);
+ argc = required_num + opt_num;
+ }
+ }
- // In this case the argument is not in the right place, so we
- // need to find its position where it _should_ be and swap with
- // that location.
- for swap_idx in (kwarg_idx + 1)..total_kwargs {
- if callee_kwarg == caller_kwargs[swap_idx] {
- // First we're going to generate the code that is going
- // to perform the actual swapping at runtime.
- let swap_idx_i32: i32 = swap_idx.try_into().unwrap();
- let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap();
- let offset0: u16 = (argc - 1 - swap_idx_i32 - args_before_kw)
- .try_into()
- .unwrap();
- let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw)
- .try_into()
- .unwrap();
- stack_swap(ctx, cb, offset0, offset1, REG1, REG0);
-
- // Next we're going to do some bookkeeping on our end so
- // that we know the order that the arguments are
- // actually in now.
- caller_kwargs.swap(kwarg_idx, swap_idx);
+ // Keyword argument passing
+ if doing_kw_call {
+ argc = gen_iseq_kw_call(jit, asm, kw_arg, iseq, argc, has_kwrest);
+ }
+
+ // Same as vm_callee_setup_block_arg_arg0_check and vm_callee_setup_block_arg_arg0_splat
+ // on vm_callee_setup_block_arg for arg_setup_block. This is done after CALLER_SETUP_ARG
+ // and CALLER_REMOVE_EMPTY_KW_SPLAT, so this implementation is put here. This may need
+ // side exits, so you still need to allow side exits here if block_arg0_splat is true.
+ // Note that you can't have side exits after this arg0 splat.
+ if block_arg0_splat {
+ let arg0_opnd = asm.stack_opnd(0);
+
+ // Only handle the case that you don't need to_ary conversion
+ let not_array_counter = Counter::invokeblock_iseq_arg0_not_array;
+ guard_object_is_array(asm, arg0_opnd, arg0_opnd.into(), not_array_counter);
+
+ // Only handle the same that the array length == ISEQ's lead_num (most common)
+ let arg0_len_opnd = get_array_len(asm, arg0_opnd);
+ let lead_num = unsafe { rb_get_iseq_body_param_lead_num(iseq) };
+ asm.cmp(arg0_len_opnd, lead_num.into());
+ asm.jne(Target::side_exit(Counter::invokeblock_iseq_arg0_wrong_len));
+
+ let arg0_reg = asm.load(arg0_opnd);
+ let array_opnd = get_array_ptr(asm, arg0_reg);
+ asm_comment!(asm, "push splat arg0 onto the stack");
+ asm.stack_pop(argc.try_into().unwrap());
+ for i in 0..lead_num {
+ let stack_opnd = asm.stack_push(Type::Unknown);
+ asm.mov(stack_opnd, Opnd::mem(64, array_opnd, SIZEOF_VALUE_I32 * i));
+ }
+ argc = lead_num;
+ }
- break;
- }
- }
+ fn nil_fill(comment: &'static str, fill_range: std::ops::Range<i32>, asm: &mut Assembler) {
+ if fill_range.is_empty() {
+ return;
}
- // Keyword arguments cause a special extra local variable to be
- // pushed onto the stack that represents the parameters that weren't
- // explicitly given a value and have a non-constant default.
- let unspec_opnd = uimm_opnd(VALUE::fixnum_from_usize(unspecified_bits).as_u64());
- mov(cb, ctx.stack_opnd(-1), unspec_opnd);
+ asm_comment!(asm, "{}", comment);
+ for i in fill_range {
+ let value_slot = asm.ctx.sp_opnd(i);
+ asm.store(value_slot, Qnil.into());
+ }
}
- // Points to the receiver operand on the stack
- let recv = ctx.stack_opnd(argc);
+ // Nil-initialize missing optional parameters
+ nil_fill(
+ "nil-initialize missing optionals",
+ {
+ let begin = -argc + required_num + opts_filled;
+ let end = -argc + required_num + opt_num;
- // Store the updated SP on the current frame (pop arguments and receiver)
- add_comment(cb, "store caller sp");
- lea(
- cb,
- REG0,
- ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1)),
+ begin..end
+ },
+ asm
);
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0);
-
- // Store the next PC in the current frame
- jit_save_pc(jit, cb, REG0);
+ // Nil-initialize the block parameter. It's the last parameter local
+ if iseq_has_block_param {
+ let block_param = asm.ctx.sp_opnd(-argc + num_params - 1);
+ asm.store(block_param, Qnil.into());
+ }
+ // Nil-initialize non-parameter locals
+ nil_fill(
+ "nil-initialize locals",
+ {
+ let begin = -argc + num_params;
+ let end = -argc + num_locals;
- if let Some(block_val) = block {
- // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
- // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
- // with cfp->block_code.
- let gc_ptr = VALUE(block_val as usize);
- jit_mov_gc_ptr(jit, cb, REG0, gc_ptr);
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), REG0);
- }
+ begin..end
+ },
+ asm
+ );
- // Adjust the callee's stack pointer
- let offs =
- (SIZEOF_VALUE as isize) * (3 + (num_locals as isize) + if doing_kw_call { 1 } else { 0 });
- lea(cb, REG0, ctx.sp_opnd(offs));
+ // Points to the receiver operand on the stack unless a captured environment is used
+ let recv = match captured_opnd {
+ Some(captured_opnd) => asm.load(Opnd::mem(64, captured_opnd, 0)), // captured->self
+ _ => asm.stack_opnd(argc),
+ };
+ let captured_self = captured_opnd.is_some();
+ let sp_offset = argc + if captured_self { 0 } else { 1 };
- // Initialize local variables to Qnil
- for i in 0..num_locals {
- let offs = (SIZEOF_VALUE as i32) * (i - num_locals - 3);
- mov(cb, mem_opnd(64, REG0, offs), uimm_opnd(Qnil.into()));
- }
+ // Store the updated SP on the current frame (pop arguments and receiver)
+ asm_comment!(asm, "store caller sp");
+ let caller_sp = asm.lea(asm.ctx.sp_opnd(-sp_offset));
+ asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), caller_sp);
- add_comment(cb, "push env");
- // Put compile time cme into REG1. It's assumed to be valid because we are notified when
- // any cme we depend on become outdated. See yjit_method_lookup_change().
- jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize));
- // Write method entry at sp[-3]
- // sp[-3] = me;
- mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
+ // Store the next PC in the current frame
+ jit_save_pc(jit, asm);
- // Write block handler at sp[-2]
- // sp[-2] = block_handler;
- match block {
- Some(_) => {
- // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
- lea(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF));
- or(cb, REG1, imm_opnd(1));
- mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
- }
- None => {
- mov(
- cb,
- mem_opnd(64, REG0, 8 * -2),
- uimm_opnd(VM_BLOCK_HANDLER_NONE.into()),
- );
- }
- }
+ // Adjust the callee's stack pointer
+ let callee_sp = asm.lea(asm.ctx.sp_opnd(-argc + num_locals + VM_ENV_DATA_SIZE as i32));
+
+ let specval = if let Some(prev_ep) = prev_ep {
+ // We've already side-exited if the callee expects a block, so we
+ // ignore any supplied block here
+ SpecVal::PrevEP(prev_ep)
+ } else if let Some(captured_opnd) = captured_opnd {
+ let ep_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32)); // captured->ep
+ SpecVal::PrevEPOpnd(ep_opnd)
+ } else if let Some(BlockArg::TProc) = block_arg_type {
+ SpecVal::BlockHandler(Some(BlockHandler::AlreadySet))
+ } else if let Some(BlockArg::BlockParamProxy) = block_arg_type {
+ SpecVal::BlockHandler(Some(BlockHandler::BlockParamProxy))
+ } else {
+ SpecVal::BlockHandler(block)
+ };
- // Write env flags at sp[-1]
- // sp[-1] = frame_type;
- let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
- mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into()));
+ // Setup the new frame
+ perf_call!("gen_send_iseq: ", gen_push_frame(jit, asm, ControlFrame {
+ frame_type,
+ specval,
+ cme,
+ recv,
+ sp: callee_sp,
+ iseq: Some(iseq),
+ pc: None, // We are calling into jitted code, which will set the PC as necessary
+ }));
+
+ // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
+ // We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
+ if get_option!(gen_stats) {
+ // Assemble the ISEQ name string
+ let name_str = get_iseq_name(iseq);
- add_comment(cb, "push callee CFP");
- // Allocate a new CFP (ec->cfp--)
- sub(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
- mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP);
+ // Get an index for this ISEQ name
+ let iseq_idx = get_iseq_idx(&name_str);
- // Setup the new frame
- // *cfp = (const struct rb_control_frame_struct) {
- // .pc = pc,
- // .sp = sp,
- // .iseq = iseq,
- // .self = recv,
- // .ep = sp - 1,
- // .block_code = 0,
- // .__bp__ = sp,
- // };
- mov(cb, REG1, recv);
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), REG1);
- mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0);
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BP), REG0);
- sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64));
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP), REG0);
- jit_mov_gc_ptr(jit, cb, REG0, VALUE(iseq as usize));
- mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ), REG0);
- mov(
- cb,
- mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE),
- imm_opnd(0),
- );
+ // Increment the counter for this cfunc
+ asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
+ }
// No need to set cfp->pc since the callee sets it whenever calling into routines
// that could look at it through jit_save_pc().
@@ -4594,49 +7663,61 @@ fn gen_send_iseq(
// Stub so we can return to JITted code
let return_block = BlockId {
iseq: jit.iseq,
- idx: jit_next_insn_idx(jit),
+ idx: jit.next_insn_idx(),
};
// Create a context for the callee
- let mut callee_ctx = Context::new(); // Was DEFAULT_CTX
+ let mut callee_ctx = Context::default();
+
+ // If the callee has :inline_block annotation and the callsite has a block ISEQ,
+ // duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
+ if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
+ callee_ctx.set_inline_block(iseq);
+ }
// Set the argument types in the callee's context
for arg_idx in 0..argc {
- let stack_offs: u16 = (argc - arg_idx - 1).try_into().unwrap();
- let arg_type = ctx.get_opnd_type(StackOpnd(stack_offs));
+ let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap();
+ let arg_type = asm.ctx.get_opnd_type(StackOpnd(stack_offs));
callee_ctx.set_local_type(arg_idx.try_into().unwrap(), arg_type);
}
- let recv_type = ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap()));
+ let recv_type = if captured_self {
+ Type::Unknown // we don't track the type information of captured->self for now
+ } else {
+ asm.ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap()))
+ };
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
// The callee might change locals through Kernel#binding and other means.
- ctx.clear_local_types();
+ asm.clear_local_types();
- // Pop arguments and receiver in return context, push the return value
- // After the return, sp_offset will be 1. The codegen for leave writes
- // the return value in case of JIT-to-JIT return.
- let mut return_ctx = *ctx;
- return_ctx.stack_pop((argc + 1).try_into().unwrap());
- return_ctx.stack_push(Type::Unknown);
- return_ctx.set_sp_offset(1);
- return_ctx.reset_chain_depth();
+ // Pop arguments and receiver in return context and
+ // mark it as a continuation of gen_leave()
+ let mut return_asm = Assembler::new();
+ return_asm.ctx = asm.ctx;
+ return_asm.stack_pop(sp_offset.try_into().unwrap());
+ return_asm.ctx.set_sp_offset(0); // We set SP on the caller's frame above
+ return_asm.ctx.reset_chain_depth_and_defer();
+ return_asm.ctx.set_as_return_landing();
// Write the JIT return address on the callee frame
gen_branch(
jit,
- ctx,
- cb,
+ asm,
ocb,
return_block,
- &return_ctx,
- Some(return_block),
- Some(&return_ctx),
- gen_return_branch,
+ &return_asm.ctx,
+ None,
+ None,
+ BranchGenFn::JITReturn,
);
- //print_str(cb, "calling Ruby func:");
- //print_str(cb, rb_id2name(vm_ci_mid(ci)));
+ // ec->cfp is updated after cfp->jit_return for rb_profile_frames() safety
+ asm_comment!(asm, "switch to new CFP");
+ let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
+ asm.mov(CFP, new_cfp);
+ asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
// Directly jump to the entry point of the callee
gen_direct_jump(
@@ -4646,24 +7727,514 @@ fn gen_send_iseq(
iseq: iseq,
idx: start_pc_offset,
},
- cb,
+ asm,
);
- EndBlock
+ Some(EndBlock)
+}
+
+// Check if we can handle a keyword call
+fn gen_iseq_kw_call_checks(
+ asm: &mut Assembler,
+ iseq: *const rb_iseq_t,
+ kw_arg: *const rb_callinfo_kwarg,
+ has_kwrest: bool,
+ caller_kw_num: i32
+) -> Option<()> {
+ // This struct represents the metadata about the callee-specified
+ // keyword parameters.
+ let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+ let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap();
+ let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+
+ let mut required_kwargs_filled = 0;
+
+ if keyword_num > 30 || caller_kw_num > 64 {
+ // We have so many keywords that (1 << num) encoded as a FIXNUM
+ // (which shifts it left one more) no longer fits inside a 32-bit
+ // immediate. Similarly, we use a u64 in case of keyword rest parameter.
+ gen_counter_incr(asm, Counter::send_iseq_too_many_kwargs);
+ return None;
+ }
+
+ // Check that the kwargs being passed are valid
+ if caller_kw_num > 0 {
+ // This is the list of keyword arguments that the callee specified
+ // in its initial declaration.
+ // SAFETY: see compile.c for sizing of this slice.
+ let callee_kwargs = if keyword_num == 0 {
+ &[]
+ } else {
+ unsafe { slice::from_raw_parts((*keyword).table, keyword_num) }
+ };
+
+ // Here we're going to build up a list of the IDs that correspond to
+ // the caller-specified keyword arguments. If they're not in the
+ // same order as the order specified in the callee declaration, then
+ // we're going to need to generate some code to swap values around
+ // on the stack.
+ let kw_arg_keyword_len = caller_kw_num as usize;
+ let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len];
+ for kwarg_idx in 0..kw_arg_keyword_len {
+ let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) };
+ caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
+ }
+
+ // First, we're going to be sure that the names of every
+ // caller-specified keyword argument correspond to a name in the
+ // list of callee-specified keyword parameters.
+ for caller_kwarg in caller_kwargs {
+ let search_result = callee_kwargs
+ .iter()
+ .enumerate() // inject element index
+ .find(|(_, &kwarg)| kwarg == caller_kwarg);
+
+ match search_result {
+ None if !has_kwrest => {
+ // If the keyword was never found, then we know we have a
+ // mismatch in the names of the keyword arguments, so we need to
+ // bail.
+ gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch);
+ return None;
+ }
+ Some((callee_idx, _)) if callee_idx < keyword_required_num => {
+ // Keep a count to ensure all required kwargs are specified
+ required_kwargs_filled += 1;
+ }
+ _ => (),
+ }
+ }
+ }
+ assert!(required_kwargs_filled <= keyword_required_num);
+ if required_kwargs_filled != keyword_required_num {
+ gen_counter_incr(asm, Counter::send_iseq_kwargs_mismatch);
+ return None;
+ }
+
+ Some(())
+}
+
+// Codegen for keyword argument handling. Essentially private to gen_send_iseq() since
+// there are a lot of preconditions to check before reaching this code.
+fn gen_iseq_kw_call(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ci_kwarg: *const rb_callinfo_kwarg,
+ iseq: *const rb_iseq_t,
+ mut argc: i32,
+ has_kwrest: bool,
+) -> i32 {
+ let caller_keyword_len_i32: i32 = if ci_kwarg.is_null() {
+ 0
+ } else {
+ unsafe { get_cikw_keyword_len(ci_kwarg) }
+ };
+ let caller_keyword_len: usize = caller_keyword_len_i32.try_into().unwrap();
+ let anon_kwrest = unsafe { rb_get_iseq_flags_anon_kwrest(iseq) && !get_iseq_flags_has_kw(iseq) };
+
+ // This struct represents the metadata about the callee-specified
+ // keyword parameters.
+ let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+
+ asm_comment!(asm, "keyword args");
+
+ // This is the list of keyword arguments that the callee specified
+ // in its initial declaration.
+ let callee_kwargs = unsafe { (*keyword).table };
+ let callee_kw_count_i32: i32 = unsafe { (*keyword).num };
+ let callee_kw_count: usize = callee_kw_count_i32.try_into().unwrap();
+ let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+
+ // Here we're going to build up a list of the IDs that correspond to
+ // the caller-specified keyword arguments. If they're not in the
+ // same order as the order specified in the callee declaration, then
+ // we're going to need to generate some code to swap values around
+ // on the stack.
+ let mut kwargs_order: Vec<ID> = vec![0; cmp::max(caller_keyword_len, callee_kw_count)];
+ for kwarg_idx in 0..caller_keyword_len {
+ let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) };
+ kwargs_order[kwarg_idx] = unsafe { rb_sym2id(sym) };
+ }
+
+ let mut unspecified_bits = 0;
+
+ // The stack_opnd() index to the 0th keyword argument.
+ let kwargs_stack_base = caller_keyword_len_i32 - 1;
+
+ // Build the keyword rest parameter hash before we make any changes to the order of
+ // the supplied keyword arguments
+ let kwrest_type = if has_kwrest {
+ c_callable! {
+ fn build_kw_rest(rest_mask: u64, stack_kwargs: *const VALUE, keywords: *const rb_callinfo_kwarg) -> VALUE {
+ if keywords.is_null() {
+ return unsafe { rb_hash_new() };
+ }
+
+ // Use the total number of supplied keywords as a size upper bound
+ let keyword_len = unsafe { (*keywords).keyword_len } as usize;
+ let hash = unsafe { rb_hash_new_with_size(keyword_len as u64) };
+
+ // Put pairs into the kwrest hash as the mask describes
+ for kwarg_idx in 0..keyword_len {
+ if (rest_mask & (1 << kwarg_idx)) != 0 {
+ unsafe {
+ let keyword_symbol = (*keywords).keywords.as_ptr().add(kwarg_idx).read();
+ let keyword_value = stack_kwargs.add(kwarg_idx).read();
+ rb_hash_aset(hash, keyword_symbol, keyword_value);
+ }
+ }
+ }
+ return hash;
+ }
+ }
+
+ asm_comment!(asm, "build kwrest hash");
+
+ // Make a bit mask describing which keywords should go into kwrest.
+ let mut rest_mask: u64 = 0;
+ // Index for one argument that will go into kwrest.
+ let mut rest_collected_idx = None;
+ for (supplied_kw_idx, &supplied_kw) in kwargs_order.iter().take(caller_keyword_len).enumerate() {
+ let mut found = false;
+ for callee_idx in 0..callee_kw_count {
+ let callee_kw = unsafe { callee_kwargs.add(callee_idx).read() };
+ if callee_kw == supplied_kw {
+ found = true;
+ break;
+ }
+ }
+ if !found {
+ rest_mask |= 1 << supplied_kw_idx;
+ if rest_collected_idx.is_none() {
+ rest_collected_idx = Some(supplied_kw_idx as i32);
+ }
+ }
+ }
+
+ let (kwrest, kwrest_type) = if rest_mask == 0 && anon_kwrest {
+ // In case the kwrest hash should be empty and is anonymous in the callee,
+ // we can pass nil instead of allocating. Anonymous kwrest can only be
+ // delegated, and nil is the same as an empty hash when delegating.
+ (Qnil.into(), Type::Nil)
+ } else {
+ // Save PC and SP before allocating
+ jit_save_pc(jit, asm);
+ gen_save_sp(asm);
+
+ // Build the kwrest hash. `struct rb_callinfo_kwarg` is malloc'd, so no GC concerns.
+ let kwargs_start = asm.lea(asm.ctx.sp_opnd(-caller_keyword_len_i32));
+ let hash = asm.ccall(
+ build_kw_rest as _,
+ vec![rest_mask.into(), kwargs_start, Opnd::const_ptr(ci_kwarg.cast())]
+ );
+ (hash, Type::THash)
+ };
+
+ // The kwrest parameter sits after `unspecified_bits` if the callee specifies any
+ // keywords.
+ let stack_kwrest_idx = kwargs_stack_base - callee_kw_count_i32 - i32::from(callee_kw_count > 0);
+ let stack_kwrest = asm.stack_opnd(stack_kwrest_idx);
+ // If `stack_kwrest` already has another argument there, we need to stow it elsewhere
+ // first before putting kwrest there. Use `rest_collected_idx` because that value went
+ // into kwrest so the slot is now free.
+ let kwrest_idx = callee_kw_count + usize::from(callee_kw_count > 0);
+ if let (Some(rest_collected_idx), true) = (rest_collected_idx, kwrest_idx < caller_keyword_len) {
+ let rest_collected = asm.stack_opnd(kwargs_stack_base - rest_collected_idx);
+ let mapping = asm.ctx.get_opnd_mapping(stack_kwrest.into());
+ asm.mov(rest_collected, stack_kwrest);
+ asm.ctx.set_opnd_mapping(rest_collected.into(), mapping);
+ // Update our bookkeeping to inform the reordering step later.
+ kwargs_order[rest_collected_idx as usize] = kwargs_order[kwrest_idx];
+ kwargs_order[kwrest_idx] = 0;
+ }
+ // Put kwrest straight into memory, since we might pop it later
+ asm.ctx.dealloc_temp_reg(stack_kwrest.stack_idx());
+ asm.mov(stack_kwrest, kwrest);
+ if stack_kwrest_idx >= 0 {
+ asm.ctx.set_opnd_mapping(stack_kwrest.into(), TempMapping::map_to_stack(kwrest_type));
+ }
+
+ Some(kwrest_type)
+ } else {
+ None
+ };
+
+ // Ensure the stack is large enough for the callee
+ for _ in caller_keyword_len..callee_kw_count {
+ argc += 1;
+ asm.stack_push(Type::Unknown);
+ }
+ // Now this is the stack_opnd() index to the 0th keyword argument.
+ let kwargs_stack_base = kwargs_order.len() as i32 - 1;
+
+ // Next, we're going to loop through every keyword that was
+ // specified by the caller and make sure that it's in the correct
+ // place. If it's not we're going to swap it around with another one.
+ for kwarg_idx in 0..callee_kw_count {
+ let callee_kwarg = unsafe { callee_kwargs.add(kwarg_idx).read() };
+
+ // If the argument is already in the right order, then we don't
+ // need to generate any code since the expected value is already
+ // in the right place on the stack.
+ if callee_kwarg == kwargs_order[kwarg_idx] {
+ continue;
+ }
+
+ // In this case the argument is not in the right place, so we
+ // need to find its position where it _should_ be and swap with
+ // that location.
+ for swap_idx in 0..kwargs_order.len() {
+ if callee_kwarg == kwargs_order[swap_idx] {
+ // First we're going to generate the code that is going
+ // to perform the actual swapping at runtime.
+ let swap_idx_i32: i32 = swap_idx.try_into().unwrap();
+ let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap();
+ let offset0 = kwargs_stack_base - swap_idx_i32;
+ let offset1 = kwargs_stack_base - kwarg_idx_i32;
+ stack_swap(asm, offset0, offset1);
+
+ // Next we're going to do some bookkeeping on our end so
+ // that we know the order that the arguments are
+ // actually in now.
+ kwargs_order.swap(kwarg_idx, swap_idx);
+
+ break;
+ }
+ }
+ }
+
+ // Now that every caller specified kwarg is in the right place, filling
+ // in unspecified default paramters won't overwrite anything.
+ for kwarg_idx in keyword_required_num..callee_kw_count {
+ if kwargs_order[kwarg_idx] != unsafe { callee_kwargs.add(kwarg_idx).read() } {
+ let default_param_idx = kwarg_idx - keyword_required_num;
+ let mut default_value = unsafe { (*keyword).default_values.add(default_param_idx).read() };
+
+ if default_value == Qundef {
+ // Qundef means that this value is not constant and must be
+ // recalculated at runtime, so we record it in unspecified_bits
+ // (Qnil is then used as a placeholder instead of Qundef).
+ unspecified_bits |= 0x01 << default_param_idx;
+ default_value = Qnil;
+ }
+
+ let default_param = asm.stack_opnd(kwargs_stack_base - kwarg_idx as i32);
+ let param_type = Type::from(default_value);
+ asm.mov(default_param, default_value.into());
+ asm.ctx.set_opnd_mapping(default_param.into(), TempMapping::map_to_stack(param_type));
+ }
+ }
+
+ // Pop extra arguments that went into kwrest now that they're at stack top
+ if has_kwrest && caller_keyword_len > callee_kw_count {
+ let extra_kwarg_count = caller_keyword_len - callee_kw_count;
+ asm.stack_pop(extra_kwarg_count);
+ argc = argc - extra_kwarg_count as i32;
+ }
+
+ // Keyword arguments cause a special extra local variable to be
+ // pushed onto the stack that represents the parameters that weren't
+ // explicitly given a value and have a non-constant default.
+ if callee_kw_count > 0 {
+ let unspec_opnd = VALUE::fixnum_from_usize(unspecified_bits).as_u64();
+ let top = asm.stack_push(Type::Fixnum);
+ asm.mov(top, unspec_opnd.into());
+ argc += 1;
+ }
+
+ // The kwrest parameter sits after `unspecified_bits`
+ if let Some(kwrest_type) = kwrest_type {
+ let kwrest = asm.stack_push(kwrest_type);
+ // We put the kwrest parameter in memory earlier
+ asm.ctx.dealloc_temp_reg(kwrest.stack_idx());
+ argc += 1;
+ }
+
+ argc
+}
+
+/// This is a helper function to allow us to exit early
+/// during code generation if a predicate is true.
+/// We return Option<()> here because we will be able to
+/// short-circuit using the ? operator if we return None.
+/// It would be great if rust let you implement ? for your
+/// own types, but as of right now they don't.
+fn exit_if(asm: &mut Assembler, pred: bool, counter: Counter) -> Option<()> {
+ if pred {
+ gen_counter_incr(asm, counter);
+ return None
+ }
+ Some(())
+}
+
+#[must_use]
+fn exit_if_tail_call(asm: &mut Assembler, ci: *const rb_callinfo) -> Option<()> {
+ exit_if(asm, unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0, Counter::send_iseq_tailcall)
+}
+
+#[must_use]
+fn exit_if_has_post(asm: &mut Assembler, iseq: *const rb_iseq_t) -> Option<()> {
+ exit_if(asm, unsafe { get_iseq_flags_has_post(iseq) }, Counter::send_iseq_has_post)
+}
+
+#[must_use]
+fn exit_if_kwsplat_non_nil(asm: &mut Assembler, flags: u32, counter: Counter) -> Option<()> {
+ let kw_splat = flags & VM_CALL_KW_SPLAT != 0;
+ let kw_splat_stack = StackOpnd((flags & VM_CALL_ARGS_BLOCKARG != 0).into());
+ exit_if(asm, kw_splat && asm.ctx.get_opnd_type(kw_splat_stack) != Type::Nil, counter)
+}
+
+#[must_use]
+fn exit_if_has_rest_and_captured(asm: &mut Assembler, iseq_has_rest: bool, captured_opnd: Option<Opnd>) -> Option<()> {
+ exit_if(asm, iseq_has_rest && captured_opnd.is_some(), Counter::send_iseq_has_rest_and_captured)
+}
+
+#[must_use]
+fn exit_if_has_kwrest_and_captured(asm: &mut Assembler, iseq_has_kwrest: bool, captured_opnd: Option<Opnd>) -> Option<()> {
+ // We need to call a C function to allocate the kwrest hash, but also need to hold the captred
+ // block across the call, which we can't do.
+ exit_if(asm, iseq_has_kwrest && captured_opnd.is_some(), Counter::send_iseq_has_kwrest_and_captured)
+}
+
+#[must_use]
+fn exit_if_has_rest_and_supplying_kws(asm: &mut Assembler, iseq_has_rest: bool, supplying_kws: bool) -> Option<()> {
+ // There can be a gap between the rest parameter array and the supplied keywords, or
+ // no space to put the rest array (e.g. `def foo(*arr, k:) = arr; foo(k: 1)` 1 is
+ // sitting where the rest array should be).
+ exit_if(
+ asm,
+ iseq_has_rest && supplying_kws,
+ Counter::send_iseq_has_rest_and_kw_supplied,
+ )
+}
+
+#[must_use]
+fn exit_if_supplying_kw_and_has_no_kw(asm: &mut Assembler, supplying_kws: bool, callee_kws: bool) -> Option<()> {
+ // Passing keyword arguments to a callee means allocating a hash and treating
+ // that as a positional argument. Bail for now.
+ exit_if(
+ asm,
+ supplying_kws && !callee_kws,
+ Counter::send_iseq_has_no_kw,
+ )
+}
+
+#[must_use]
+fn exit_if_supplying_kws_and_accept_no_kwargs(asm: &mut Assembler, supplying_kws: bool, iseq: *const rb_iseq_t) -> Option<()> {
+ // If we have a method accepting no kwargs (**nil), exit if we have passed
+ // it any kwargs.
+ exit_if(
+ asm,
+ supplying_kws && unsafe { get_iseq_flags_accepts_no_kwarg(iseq) },
+ Counter::send_iseq_accepts_no_kwarg
+ )
+}
+
+#[must_use]
+fn exit_if_doing_kw_and_splat(asm: &mut Assembler, doing_kw_call: bool, flags: u32) -> Option<()> {
+ exit_if(asm, doing_kw_call && flags & VM_CALL_ARGS_SPLAT != 0, Counter::send_iseq_splat_with_kw)
+}
+
+#[must_use]
+fn exit_if_wrong_number_arguments(
+ asm: &mut Assembler,
+ args_setup_block: bool,
+ opts_filled: i32,
+ flags: u32,
+ opt_num: i32,
+ iseq_has_rest: bool,
+) -> Option<()> {
+ // Too few arguments and no splat to make up for it
+ let too_few = opts_filled < 0 && flags & VM_CALL_ARGS_SPLAT == 0;
+ // Too many arguments and no sink that take them
+ let too_many = opts_filled > opt_num && !(iseq_has_rest || args_setup_block);
+
+ exit_if(asm, too_few || too_many, Counter::send_iseq_arity_error)
+}
+
+#[must_use]
+fn exit_if_doing_kw_and_opts_missing(asm: &mut Assembler, doing_kw_call: bool, opts_missing: i32) -> Option<()> {
+ // If we have unfilled optional arguments and keyword arguments then we
+ // would need to adjust the arguments location to account for that.
+ // For now we aren't handling this case.
+ exit_if(asm, doing_kw_call && opts_missing > 0, Counter::send_iseq_missing_optional_kw)
+}
+
+#[must_use]
+fn exit_if_has_rest_and_optional_and_block(asm: &mut Assembler, iseq_has_rest: bool, opt_num: i32, iseq: *const rb_iseq_t, block_arg: bool) -> Option<()> {
+ exit_if(
+ asm,
+ iseq_has_rest && opt_num != 0 && (unsafe { get_iseq_flags_has_block(iseq) } || block_arg),
+ Counter::send_iseq_has_rest_opt_and_block
+ )
+}
+
+#[derive(Clone, Copy)]
+enum BlockArg {
+ Nil,
+ /// A special sentinel value indicating the block parameter should be read from
+ /// the current surrounding cfp
+ BlockParamProxy,
+ /// A proc object. Could be an instance of a subclass of ::rb_cProc
+ TProc,
+}
+
+#[must_use]
+fn exit_if_unsupported_block_arg_type(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ supplying_block_arg: bool
+) -> Option<Option<BlockArg>> {
+ let block_arg_type = if supplying_block_arg {
+ asm.ctx.get_opnd_type(StackOpnd(0))
+ } else {
+ // Passing no block argument
+ return Some(None);
+ };
+
+ match block_arg_type {
+ // We'll handle Nil and BlockParamProxy later
+ Type::Nil => Some(Some(BlockArg::Nil)),
+ Type::BlockParamProxy => Some(Some(BlockArg::BlockParamProxy)),
+ _ if {
+ let sample_block_arg = jit.peek_at_stack(&asm.ctx, 0);
+ unsafe { rb_obj_is_proc(sample_block_arg) }.test()
+ } => {
+ // Speculate that we'll have a proc as the block arg
+ Some(Some(BlockArg::TProc))
+ }
+ _ => {
+ gen_counter_incr(asm, Counter::send_iseq_block_arg_type);
+ None
+ }
+ }
+}
+
+#[must_use]
+fn exit_if_stack_too_large(iseq: *const rb_iseq_t) -> Option<()> {
+ let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) };
+ // Reject ISEQs with very large temp stacks,
+ // this will allow us to use u8/i8 values to track stack_size and sp_offset
+ if stack_max >= i8::MAX as u32 {
+ incr_counter!(iseq_stack_too_large);
+ return None;
+ }
+ Some(())
}
fn gen_struct_aref(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
comptime_recv: VALUE,
- _comptime_recv_klass: VALUE,
-) -> CodegenStatus {
+ flags: u32,
+ argc: i32,
+) -> Option<CodegenStatus> {
+
if unsafe { vm_ci_argc(ci) } != 0 {
- return CantCompile;
+ return None;
}
let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
@@ -4679,50 +8250,57 @@ fn gen_struct_aref(
{
let native_off = (off as i64) * (SIZEOF_VALUE as i64);
if native_off > (i32::MAX as i64) {
- return CantCompile;
+ return None;
}
}
+ // This is a .send call and we need to adjust the stack
+ if flags & VM_CALL_OPT_SEND != 0 {
+ handle_opt_send_shift_stack(asm, argc);
+ }
+
// All structs from the same Struct class should have the same
// length. So if our comptime_recv is embedded all runtime
// structs of the same class should be as well, and the same is
// true of the converse.
let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) };
- add_comment(cb, "struct aref");
-
- let recv = ctx.stack_pop(1);
+ asm_comment!(asm, "struct aref");
- mov(cb, REG0, recv);
+ let recv = asm.stack_pop(1);
+ let recv = asm.load(recv);
- if embedded != VALUE(0) {
- let ary_elt = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_ARY + (8 * off));
- mov(cb, REG0, ary_elt);
+ let val = if embedded != VALUE(0) {
+ Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_ARY + (SIZEOF_VALUE_I32 * off))
} else {
- let rstruct_ptr = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR);
- mov(cb, REG0, rstruct_ptr);
- mov(cb, REG0, mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * off));
- }
+ let rstruct_ptr = asm.load(Opnd::mem(64, recv, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR));
+ Opnd::mem(64, rstruct_ptr, SIZEOF_VALUE_I32 * off)
+ };
- let ret = ctx.stack_push(Type::Unknown);
- mov(cb, ret, REG0);
+ let ret = asm.stack_push(Type::Unknown);
+ asm.mov(ret, val);
- jump_to_next_insn(jit, ctx, cb, ocb);
- EndBlock
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
}
fn gen_struct_aset(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
comptime_recv: VALUE,
- _comptime_recv_klass: VALUE,
-) -> CodegenStatus {
+ flags: u32,
+ argc: i32,
+) -> Option<CodegenStatus> {
if unsafe { vm_ci_argc(ci) } != 1 {
- return CantCompile;
+ return None;
+ }
+
+ // This is a .send call and we need to adjust the stack
+ if flags & VM_CALL_OPT_SEND != 0 {
+ handle_opt_send_shift_stack(asm, argc);
}
let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
@@ -4733,31 +8311,77 @@ fn gen_struct_aset(
assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) });
assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) });
- add_comment(cb, "struct aset");
+ asm_comment!(asm, "struct aset");
- let val = ctx.stack_pop(1);
- let recv = ctx.stack_pop(1);
+ let val = asm.stack_pop(1);
+ let recv = asm.stack_pop(1);
- mov(cb, C_ARG_REGS[0], recv);
- mov(cb, C_ARG_REGS[1], imm_opnd(off as i64));
- mov(cb, C_ARG_REGS[2], val);
- call_ptr(cb, REG0, RSTRUCT_SET as *const u8);
+ let val = asm.ccall(RSTRUCT_SET as *const u8, vec![recv, (off as i64).into(), val]);
- let ret = ctx.stack_push(Type::Unknown);
- mov(cb, ret, RAX);
+ let ret = asm.stack_push(Type::Unknown);
+ asm.mov(ret, val);
- jump_to_next_insn(jit, ctx, cb, ocb);
- EndBlock
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
+}
+
+// Generate code that calls a method with dynamic dispatch
+fn gen_send_dynamic<F: Fn(&mut Assembler) -> Opnd>(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ cd: *const rb_call_data,
+ sp_pops: usize,
+ vm_sendish: F,
+) -> Option<CodegenStatus> {
+ // Our frame handling is not compatible with tailcall
+ if unsafe { vm_ci_flag((*cd).ci) } & VM_CALL_TAILCALL != 0 {
+ return None;
+ }
+ jit_perf_symbol_push!(jit, asm, "gen_send_dynamic", PerfMap::Codegen);
+
+ // Rewind stack_size using ctx.with_stack_size to allow stack_size changes
+ // before you return None.
+ asm.ctx = asm.ctx.with_stack_size(jit.stack_size_for_pc);
+
+ // Save PC and SP to prepare for dynamic dispatch
+ jit_prepare_non_leaf_call(jit, asm);
+
+ // Squash stack canary that might be left over from elsewhere
+ assert_eq!(false, asm.get_leaf_ccall());
+ if cfg!(debug_assertions) {
+ asm.store(asm.ctx.sp_opnd(0), 0.into());
+ }
+
+ // Dispatch a method
+ let ret = vm_sendish(asm);
+
+ // Pop arguments and a receiver
+ asm.stack_pop(sp_pops);
+
+ // Push the return value
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, ret);
+
+ // Fix the interpreter SP deviated by vm_sendish
+ asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), SP);
+
+ gen_counter_incr(asm, Counter::num_send_dynamic);
+
+ jit_perf_symbol_pop!(jit, asm, PerfMap::Codegen);
+
+ // End the current block for invalidationg and sharing the same successor
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
}
fn gen_send_general(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
cd: *const rb_call_data,
- block: Option<IseqPtr>,
-) -> CodegenStatus {
+ block: Option<BlockHandler>,
+) -> Option<CodegenStatus> {
// Relevant definitions:
// rb_execution_context_t : vm_core.h
// invoker, cfunc logic : method.h, vm_method.c
@@ -4769,62 +8393,78 @@ fn gen_send_general(
// see vm_call_method().
let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
- let argc = unsafe { vm_ci_argc(ci) };
- let mid = unsafe { vm_ci_mid(ci) };
- let flags = unsafe { vm_ci_flag(ci) };
+ let mut argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
+ let mut mid = unsafe { vm_ci_mid(ci) };
+ let mut flags = unsafe { vm_ci_flag(ci) };
- // Don't JIT calls with keyword splat
- if flags & VM_CALL_KW_SPLAT != 0 {
- gen_counter_incr!(cb, send_kw_splat);
- return CantCompile;
+ // Defer compilation so we can specialize on class of receiver
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
}
- // Don't JIT calls that aren't simple
- // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
- if flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr!(cb, send_args_splat);
- return CantCompile;
- }
- if flags & VM_CALL_ARGS_BLOCKARG != 0 {
- gen_counter_incr!(cb, send_block_arg);
- return CantCompile;
+ let recv_idx = argc + if flags & VM_CALL_ARGS_BLOCKARG != 0 { 1 } else { 0 };
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, recv_idx as isize);
+ let comptime_recv_klass = comptime_recv.class_of();
+ assert_eq!(RUBY_T_CLASS, comptime_recv_klass.builtin_type(),
+ "objects visible to ruby code should have a T_CLASS in their klass field");
+
+ // Don't compile calls through singleton classes to avoid retaining the receiver.
+ // Make an exception for class methods since classes tend to be retained anyways.
+ // Also compile calls on top_self to help tests.
+ if VALUE(0) != unsafe { FL_TEST(comptime_recv_klass, VALUE(RUBY_FL_SINGLETON as usize)) }
+ && comptime_recv != unsafe { rb_vm_top_self() }
+ && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_CLASS) }
+ && !unsafe { RB_TYPE_P(comptime_recv, RUBY_T_MODULE) } {
+ gen_counter_incr(asm, Counter::send_singleton_class);
+ return None;
}
- // Defer compilation so we can specialize on class of receiver
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
- }
+ // Points to the receiver operand on the stack
+ let recv = asm.stack_opnd(recv_idx);
+ let recv_opnd: YARVOpnd = recv.into();
- let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize);
- let comptime_recv_klass = comptime_recv.class_of();
+ // Log the name of the method we're calling to
+ #[cfg(feature = "disasm")]
+ asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid));
- // Guard that the receiver has the same class as the one from compile time
- let side_exit = get_side_exit(jit, ocb, ctx);
+ // Gather some statistics about sends
+ gen_counter_incr(asm, Counter::num_send);
+ if let Some(_known_klass) = asm.ctx.get_opnd_type(recv_opnd).known_class() {
+ gen_counter_incr(asm, Counter::num_send_known_class);
+ }
+ if asm.ctx.get_chain_depth() > 1 {
+ gen_counter_incr(asm, Counter::num_send_polymorphic);
+ }
+ // If megamorphic, let the caller fallback to dynamic dispatch
+ if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH {
+ gen_counter_incr(asm, Counter::send_megamorphic);
+ return None;
+ }
- // Points to the receiver operand on the stack
- let recv = ctx.stack_opnd(argc);
- let recv_opnd = StackOpnd(argc.try_into().unwrap());
- mov(cb, REG0, recv);
- jit_guard_known_klass(
+ perf_call!("gen_send_general: ", jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
comptime_recv_klass,
+ recv,
recv_opnd,
comptime_recv,
SEND_MAX_DEPTH,
- side_exit,
- );
+ Counter::guard_send_klass_megamorphic,
+ ));
// Do method lookup
let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) };
if cme.is_null() {
- // TODO: counter
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_cme_not_found);
+ return None;
}
+ // Load an overloaded cme if applicable. See vm_search_cc().
+ // It allows you to use a faster ISEQ if possible.
+ cme = unsafe { rb_check_overloaded_cme(cme, ci) };
+
let visi = unsafe { METHOD_ENTRY_VISI(cme) };
match visi {
METHOD_VISI_PUBLIC => {
@@ -4834,15 +8474,16 @@ fn gen_send_general(
if flags & VM_CALL_FCALL == 0 {
// Can only call private methods with FCALL callsites.
// (at the moment they are callsites without a receiver or an explicit `self` receiver)
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_private_not_fcall);
+ return None;
}
}
METHOD_VISI_PROTECTED => {
// If the method call is an FCALL, it is always valid
if flags & VM_CALL_FCALL == 0 {
- // otherwise we need an ancestry check to ensure the receiver is vaild to be called
+ // otherwise we need an ancestry check to ensure the receiver is valid to be called
// as protected
- jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit);
+ jit_protected_callee_ancestry_guard(asm, cme);
}
}
_ => {
@@ -4852,33 +8493,69 @@ fn gen_send_general(
// Register block for invalidation
//assert!(cme->called_id == mid);
- assume_method_lookup_stable(jit, ocb, comptime_recv_klass, cme);
+ jit.assume_method_lookup_stable(asm, ocb, cme);
// To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
loop {
let def_type = unsafe { get_cme_def_type(cme) };
+
match def_type {
VM_METHOD_TYPE_ISEQ => {
- return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc);
+ let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
+ let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
+ return perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, block, flags, argc, None) };
}
VM_METHOD_TYPE_CFUNC => {
- return gen_send_cfunc(
+ return perf_call! { gen_send_cfunc(
jit,
- ctx,
- cb,
+ asm,
ocb,
ci,
cme,
block,
+ Some(comptime_recv_klass),
+ flags,
argc,
- &comptime_recv_klass,
- );
+ ) };
}
VM_METHOD_TYPE_IVAR => {
+ // This is a .send call not supported right now for attr_reader
+ if flags & VM_CALL_OPT_SEND != 0 {
+ gen_counter_incr(asm, Counter::send_send_attr_reader);
+ return None;
+ }
+
+ if flags & VM_CALL_ARGS_BLOCKARG != 0 {
+ match asm.ctx.get_opnd_type(StackOpnd(0)) {
+ Type::Nil | Type::BlockParamProxy => {
+ // Getters ignore the block arg, and these types of block args can be
+ // passed without side-effect (never any `to_proc` call).
+ asm.stack_pop(1);
+ }
+ _ => {
+ gen_counter_incr(asm, Counter::send_getter_block_arg);
+ return None;
+ }
+ }
+ }
+
if argc != 0 {
- // Argument count mismatch. Getters take no arguments.
- gen_counter_incr!(cb, send_getter_arity);
- return CantCompile;
+ // Guard for simple splat of empty array
+ if VM_CALL_ARGS_SPLAT == flags & (VM_CALL_ARGS_SPLAT | VM_CALL_KWARG | VM_CALL_KW_SPLAT)
+ && argc == 1 {
+ // Not using chain guards since on failure these likely end up just raising
+ // ArgumentError
+ let splat = asm.stack_opnd(0);
+ guard_object_is_array(asm, splat, splat.into(), Counter::guard_send_getter_splat_non_empty);
+ let splat_len = get_array_len(asm, splat);
+ asm.cmp(splat_len, 0.into());
+ asm.jne(Target::side_exit(Counter::guard_send_getter_splat_non_empty));
+ asm.stack_pop(1);
+ } else {
+ // Argument count mismatch. Getters take no arguments.
+ gen_counter_incr(asm, Counter::send_getter_arity);
+ return None;
+ }
}
if c_method_tracing_currently_enabled(jit) {
@@ -4891,102 +8568,231 @@ fn gen_send_general(
// attr_accessor is invalidated and we exit at the closest
// instruction boundary which is always outside of the body of
// the attr_accessor code.
- gen_counter_incr!(cb, send_cfunc_tracing);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_cfunc_tracing);
+ return None;
}
- mov(cb, REG0, recv);
+ let recv = asm.stack_opnd(0); // the receiver should now be the stack top
let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
return gen_get_ivar(
jit,
- ctx,
- cb,
+ asm,
ocb,
SEND_MAX_DEPTH,
comptime_recv,
ivar_name,
- recv_opnd,
- side_exit,
+ recv,
+ recv.into(),
);
}
VM_METHOD_TYPE_ATTRSET => {
+ // This is a .send call not supported right now for attr_writer
+ if flags & VM_CALL_OPT_SEND != 0 {
+ gen_counter_incr(asm, Counter::send_send_attr_writer);
+ return None;
+ }
+ if flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::send_args_splat_attrset);
+ return None;
+ }
if flags & VM_CALL_KWARG != 0 {
- gen_counter_incr!(cb, send_attrset_kwargs);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_attrset_kwargs);
+ return None;
} else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } {
- gen_counter_incr!(cb, send_ivar_set_method);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_ivar_set_method);
+ return None;
} else if c_method_tracing_currently_enabled(jit) {
// Can't generate code for firing c_call and c_return events
// See :attr-tracing:
- gen_counter_incr!(cb, send_cfunc_tracing);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_cfunc_tracing);
+ return None;
+ } else if flags & VM_CALL_ARGS_BLOCKARG != 0 {
+ gen_counter_incr(asm, Counter::send_attrset_block_arg);
+ return None;
} else {
let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
- return gen_set_ivar(jit, ctx, cb, comptime_recv, ivar_name);
+ return gen_set_ivar(jit, asm, ocb, comptime_recv, ivar_name, StackOpnd(1), None);
}
}
// Block method, e.g. define_method(:foo) { :my_block }
VM_METHOD_TYPE_BMETHOD => {
- gen_counter_incr!(cb, send_bmethod);
- return CantCompile;
- }
- VM_METHOD_TYPE_ZSUPER => {
- gen_counter_incr!(cb, send_zsuper_method);
- return CantCompile;
+ if flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::send_args_splat_bmethod);
+ return None;
+ }
+ return gen_send_bmethod(jit, asm, ocb, ci, cme, block, flags, argc);
}
VM_METHOD_TYPE_ALIAS => {
// Retrieve the aliased method and re-enter the switch
cme = unsafe { rb_aliased_callable_method_entry(cme) };
continue;
}
- VM_METHOD_TYPE_UNDEF => {
- gen_counter_incr!(cb, send_undef_method);
- return CantCompile;
- }
- VM_METHOD_TYPE_NOTIMPLEMENTED => {
- gen_counter_incr!(cb, send_not_implemented_method);
- return CantCompile;
- }
// Send family of methods, e.g. call/apply
VM_METHOD_TYPE_OPTIMIZED => {
+ if flags & VM_CALL_ARGS_BLOCKARG != 0 {
+ gen_counter_incr(asm, Counter::send_optimized_block_arg);
+ return None;
+ }
+
let opt_type = unsafe { get_cme_def_body_optimized_type(cme) };
match opt_type {
OPTIMIZED_METHOD_TYPE_SEND => {
- gen_counter_incr!(cb, send_optimized_method_send);
- return CantCompile;
+ // This is for method calls like `foo.send(:bar)`
+ // The `send` method does not get its own stack frame.
+ // instead we look up the method and call it,
+ // doing some stack shifting based on the VM_CALL_OPT_SEND flag
+
+ // Reject nested cases such as `send(:send, :alias_for_send, :foo))`.
+ // We would need to do some stack manipulation here or keep track of how
+ // many levels deep we need to stack manipulate. Because of how exits
+ // currently work, we can't do stack manipulation until we will no longer
+ // side exit.
+ if flags & VM_CALL_OPT_SEND != 0 {
+ gen_counter_incr(asm, Counter::send_send_nested);
+ return None;
+ }
+
+ if argc == 0 {
+ gen_counter_incr(asm, Counter::send_send_wrong_args);
+ return None;
+ }
+
+ argc -= 1;
+
+ let compile_time_name = jit.peek_at_stack(&asm.ctx, argc as isize);
+
+ mid = unsafe { rb_get_symbol_id(compile_time_name) };
+ if mid == 0 {
+ // This also rejects method names that need conversion
+ gen_counter_incr(asm, Counter::send_send_null_mid);
+ return None;
+ }
+
+ cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) };
+ if cme.is_null() {
+ gen_counter_incr(asm, Counter::send_send_null_cme);
+ return None;
+ }
+
+ flags |= VM_CALL_FCALL | VM_CALL_OPT_SEND;
+
+ jit.assume_method_lookup_stable(asm, ocb, cme);
+
+ asm_comment!(
+ asm,
+ "guard sending method name \'{}\'",
+ unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap_or_else(|| "<unknown>".to_owned()),
+ );
+
+ let name_opnd = asm.stack_opnd(argc);
+ let symbol_id_opnd = asm.ccall(rb_get_symbol_id as *const u8, vec![name_opnd]);
+
+ asm.cmp(symbol_id_opnd, mid.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_send_send_name_chain,
+ );
+
+ // We have changed the argc, flags, mid, and cme, so we need to re-enter the match
+ // and compile whatever method we found from send.
+ continue;
+
}
OPTIMIZED_METHOD_TYPE_CALL => {
- gen_counter_incr!(cb, send_optimized_method_call);
- return CantCompile;
+
+ if block.is_some() {
+ gen_counter_incr(asm, Counter::send_call_block);
+ return None;
+ }
+
+ if flags & VM_CALL_KWARG != 0 {
+ gen_counter_incr(asm, Counter::send_call_kwarg);
+ return None;
+ }
+
+ if flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::send_args_splat_opt_call);
+ return None;
+ }
+
+ // Optimize for single ractor mode and avoid runtime check for
+ // "defined with an un-shareable Proc in a different Ractor"
+ if !assume_single_ractor_mode(jit, asm, ocb) {
+ gen_counter_incr(asm, Counter::send_call_multi_ractor);
+ return None;
+ }
+
+ // If this is a .send call we need to adjust the stack
+ if flags & VM_CALL_OPT_SEND != 0 {
+ handle_opt_send_shift_stack(asm, argc);
+ }
+
+ // About to reset the SP, need to load this here
+ let recv_load = asm.load(recv);
+
+ let sp = asm.lea(asm.ctx.sp_opnd(0));
+
+ // Save the PC and SP because the callee can make Ruby calls
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let kw_splat = flags & VM_CALL_KW_SPLAT;
+ let stack_argument_pointer = asm.lea(Opnd::mem(64, sp, -(argc) * SIZEOF_VALUE_I32));
+
+ let ret = asm.ccall(rb_optimized_call as *const u8, vec![
+ recv_load,
+ EC,
+ argc.into(),
+ stack_argument_pointer,
+ kw_splat.into(),
+ VM_BLOCK_HANDLER_NONE.into(),
+ ]);
+
+ asm.stack_pop(argc as usize + 1);
+
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, ret);
+ return Some(KeepCompiling);
+
}
OPTIMIZED_METHOD_TYPE_BLOCK_CALL => {
- gen_counter_incr!(cb, send_optimized_method_block_call);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_optimized_method_block_call);
+ return None;
}
OPTIMIZED_METHOD_TYPE_STRUCT_AREF => {
+ if flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::send_args_splat_aref);
+ return None;
+ }
return gen_struct_aref(
jit,
- ctx,
- cb,
+ asm,
ocb,
ci,
cme,
comptime_recv,
- comptime_recv_klass,
+ flags,
+ argc,
);
}
OPTIMIZED_METHOD_TYPE_STRUCT_ASET => {
+ if flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::send_args_splat_aset);
+ return None;
+ }
return gen_struct_aset(
jit,
- ctx,
- cb,
+ asm,
ocb,
ci,
cme,
comptime_recv,
- comptime_recv_klass,
+ flags,
+ argc,
);
}
_ => {
@@ -4994,13 +8800,25 @@ fn gen_send_general(
}
}
}
+ VM_METHOD_TYPE_ZSUPER => {
+ gen_counter_incr(asm, Counter::send_zsuper_method);
+ return None;
+ }
+ VM_METHOD_TYPE_UNDEF => {
+ gen_counter_incr(asm, Counter::send_undef_method);
+ return None;
+ }
+ VM_METHOD_TYPE_NOTIMPLEMENTED => {
+ gen_counter_incr(asm, Counter::send_not_implemented_method);
+ return None;
+ }
VM_METHOD_TYPE_MISSING => {
- gen_counter_incr!(cb, send_missing_method);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_missing_method);
+ return None;
}
VM_METHOD_TYPE_REFINED => {
- gen_counter_incr!(cb, send_refined_method);
- return CantCompile;
+ gen_counter_incr(asm, Counter::send_refined_method);
+ return None;
}
_ => {
unreachable!();
@@ -5009,91 +8827,345 @@ fn gen_send_general(
}
}
+/// Assemble "{class_name}#{method_name}" from a class pointer and a method ID
+fn get_method_name(class: Option<VALUE>, mid: u64) -> String {
+ let class_name = class.and_then(|class| unsafe {
+ cstr_to_rust_string(rb_class2name(class))
+ }).unwrap_or_else(|| "Unknown".to_string());
+ let method_name = if mid != 0 {
+ unsafe { cstr_to_rust_string(rb_id2name(mid)) }
+ } else {
+ None
+ }.unwrap_or_else(|| "Unknown".to_string());
+ format!("{}#{}", class_name, method_name)
+}
+
+/// Assemble "{label}@{iseq_path}:{lineno}" (iseq_inspect() format) from an ISEQ
+fn get_iseq_name(iseq: IseqPtr) -> String {
+ let c_string = unsafe { rb_yjit_iseq_inspect(iseq) };
+ let string = unsafe { CStr::from_ptr(c_string) }.to_str()
+ .unwrap_or_else(|_| "not UTF-8").to_string();
+ unsafe { ruby_xfree(c_string as *mut c_void); }
+ string
+}
+
+/// Shifts the stack for send in order to remove the name of the method
+/// Comment below borrow from vm_call_opt_send in vm_insnhelper.c
+/// E.g. when argc == 2
+/// | | | | TOPN
+/// +------+ | |
+/// | arg1 | ---+ | | 0
+/// +------+ | +------+
+/// | arg0 | -+ +-> | arg1 | 1
+/// +------+ | +------+
+/// | sym | +---> | arg0 | 2
+/// +------+ +------+
+/// | recv | | recv | 3
+///--+------+--------+------+------
+///
+/// We do this for our compiletime context and the actual stack
+fn handle_opt_send_shift_stack(asm: &mut Assembler, argc: i32) {
+ asm_comment!(asm, "shift_stack");
+ for j in (0..argc).rev() {
+ let opnd = asm.stack_opnd(j);
+ let opnd2 = asm.stack_opnd(j + 1);
+ asm.mov(opnd2, opnd);
+ }
+ asm.shift_stack(argc as usize);
+}
+
fn gen_opt_send_without_block(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let cd = jit_get_arg(jit, 0).as_ptr();
+) -> Option<CodegenStatus> {
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ if let Some(status) = perf_call! { gen_send_general(jit, asm, ocb, cd, None) } {
+ return Some(status);
+ }
- gen_send_general(jit, ctx, cb, ocb, cd, None)
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send
+ gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_opt_send_without_block as *const u8,
+ vec![EC, CFP, (cd as usize).into()],
+ )
+ })
}
fn gen_send(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ let block = jit.get_arg(1).as_optional_ptr().map(|iseq| BlockHandler::BlockISeq(iseq));
+ if let Some(status) = perf_call! { gen_send_general(jit, asm, ocb, cd, block) } {
+ return Some(status);
+ }
+
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send
+ let blockiseq = jit.get_arg(1).as_iseq();
+ gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_send(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_send as *const u8,
+ vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()],
+ )
+ })
+}
+
+fn gen_invokeblock(
+ jit: &mut JITState,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let cd = jit_get_arg(jit, 0).as_ptr();
- let block = jit_get_arg(jit, 1).as_optional_ptr();
- return gen_send_general(jit, ctx, cb, ocb, cd, block);
+) -> Option<CodegenStatus> {
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ if let Some(status) = gen_invokeblock_specialized(jit, asm, ocb, cd) {
+ return Some(status);
+ }
+
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send
+ gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_invokeblock_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_invokeblock(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_invokeblock as *const u8,
+ vec![EC, CFP, (cd as usize).into()],
+ )
+ })
+}
+
+fn gen_invokeblock_specialized(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ cd: *const rb_call_data,
+) -> Option<CodegenStatus> {
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
+
+ // Fallback to dynamic dispatch if this callsite is megamorphic
+ if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH {
+ gen_counter_incr(asm, Counter::invokeblock_megamorphic);
+ return None;
+ }
+
+ // Get call info
+ let ci = unsafe { get_call_data_ci(cd) };
+ let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
+ let flags = unsafe { vm_ci_flag(ci) };
+
+ // Get block_handler
+ let cfp = jit.get_cfp();
+ let lep = unsafe { rb_vm_ep_local_ep(get_cfp_ep(cfp)) };
+ let comptime_handler = unsafe { *lep.offset(VM_ENV_DATA_INDEX_SPECVAL.try_into().unwrap()) };
+
+ // Handle each block_handler type
+ if comptime_handler.0 == VM_BLOCK_HANDLER_NONE as usize { // no block given
+ gen_counter_incr(asm, Counter::invokeblock_none);
+ None
+ } else if comptime_handler.0 & 0x3 == 0x1 { // VM_BH_ISEQ_BLOCK_P
+ asm_comment!(asm, "get local EP");
+ let ep_opnd = gen_get_lep(jit, asm);
+ let block_handler_opnd = asm.load(
+ Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
+ );
+
+ asm_comment!(asm, "guard block_handler type");
+ let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer
+ asm.cmp(tag_opnd, 0x1.into()); // VM_BH_ISEQ_BLOCK_P
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_invokeblock_tag_changed,
+ );
+
+ // If the current ISEQ is annotated to be inlined but it's not being inlined here,
+ // generate a dynamic dispatch to avoid making this yield megamorphic.
+ if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
+ gen_counter_incr(asm, Counter::invokeblock_iseq_not_inlined);
+ return None;
+ }
+
+ let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() };
+ let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() };
+
+ asm_comment!(asm, "guard known ISEQ");
+ let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3));
+ let iseq_opnd = asm.load(Opnd::mem(64, captured_opnd, SIZEOF_VALUE_I32 * 2));
+ asm.cmp(iseq_opnd, VALUE::from(comptime_iseq).into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_invokeblock_iseq_block_changed,
+ );
+
+ perf_call! { gen_send_iseq(jit, asm, ocb, comptime_iseq, ci, VM_FRAME_MAGIC_BLOCK, None, 0 as _, None, flags, argc, Some(captured_opnd)) }
+ } else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P
+ // We aren't handling CALLER_SETUP_ARG and CALLER_REMOVE_EMPTY_KW_SPLAT yet.
+ if flags & VM_CALL_ARGS_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::invokeblock_ifunc_args_splat);
+ return None;
+ }
+ if flags & VM_CALL_KW_SPLAT != 0 {
+ gen_counter_incr(asm, Counter::invokeblock_ifunc_kw_splat);
+ return None;
+ }
+
+ asm_comment!(asm, "get local EP");
+ let ep_opnd = gen_get_lep(jit, asm);
+ let block_handler_opnd = asm.load(
+ Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
+ );
+
+ asm_comment!(asm, "guard block_handler type");
+ let tag_opnd = asm.and(block_handler_opnd, 0x3.into()); // block_handler is a tagged pointer
+ asm.cmp(tag_opnd, 0x3.into()); // VM_BH_IFUNC_P
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_invokeblock_tag_changed,
+ );
+
+ // The cfunc may not be leaf
+ jit_prepare_non_leaf_call(jit, asm);
+
+ extern "C" {
+ fn rb_vm_yield_with_cfunc(ec: EcPtr, captured: *const rb_captured_block, argc: c_int, argv: *const VALUE) -> VALUE;
+ }
+ asm_comment!(asm, "call ifunc");
+ let captured_opnd = asm.and(block_handler_opnd, Opnd::Imm(!0x3));
+ let argv = asm.lea(asm.ctx.sp_opnd(-argc));
+ let ret = asm.ccall(
+ rb_vm_yield_with_cfunc as *const u8,
+ vec![EC, captured_opnd, argc.into(), argv],
+ );
+
+ asm.stack_pop(argc.try_into().unwrap());
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, ret);
+
+ // cfunc calls may corrupt types
+ asm.clear_local_types();
+
+ // Share the successor with other chains
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
+ } else if comptime_handler.symbol_p() {
+ gen_counter_incr(asm, Counter::invokeblock_symbol);
+ None
+ } else { // Proc
+ gen_counter_incr(asm, Counter::invokeblock_proc);
+ None
+ }
}
fn gen_invokesuper(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr();
- let block: Option<IseqPtr> = jit_get_arg(jit, 1).as_optional_ptr();
+) -> Option<CodegenStatus> {
+ // Generate specialized code if possible
+ let cd = jit.get_arg(0).as_ptr();
+ if let Some(status) = gen_invokesuper_specialized(jit, asm, ocb, cd) {
+ return Some(status);
+ }
+
+ // Otherwise, fallback to dynamic dispatch using the interpreter's implementation of send
+ let blockiseq = jit.get_arg(1).as_iseq();
+ gen_send_dynamic(jit, asm, ocb, cd, unsafe { rb_yjit_sendish_sp_pops((*cd).ci) }, |asm| {
+ extern "C" {
+ fn rb_vm_invokesuper(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE;
+ }
+ asm.ccall(
+ rb_vm_invokesuper as *const u8,
+ vec![EC, CFP, (cd as usize).into(), VALUE(blockiseq as usize).into()],
+ )
+ })
+}
+fn gen_invokesuper_specialized(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ cd: *const rb_call_data,
+) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on class of receiver
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
}
- let me = unsafe { rb_vm_frame_method_entry(get_ec_cfp(jit.ec.unwrap())) };
+ // Handle the last two branches of vm_caller_setup_arg_block
+ let block = if let Some(iseq) = jit.get_arg(1).as_optional_ptr() {
+ BlockHandler::BlockISeq(iseq)
+ } else {
+ BlockHandler::LEPSpecVal
+ };
+
+ // Fallback to dynamic dispatch if this callsite is megamorphic
+ if asm.ctx.get_chain_depth() >= SEND_MAX_DEPTH {
+ gen_counter_incr(asm, Counter::invokesuper_megamorphic);
+ return None;
+ }
+
+ let me = unsafe { rb_vm_frame_method_entry(jit.get_cfp()) };
if me.is_null() {
- return CantCompile;
+ gen_counter_incr(asm, Counter::invokesuper_no_me);
+ return None;
}
// FIXME: We should track and invalidate this block when this cme is invalidated
let current_defined_class = unsafe { (*me).defined_class };
let mid = unsafe { get_def_original_id((*me).def) };
- if me != unsafe { rb_callable_method_entry(current_defined_class, (*me).called_id) } {
- // Though we likely could generate this call, as we are only concerned
- // with the method entry remaining valid, assume_method_lookup_stable
- // below requires that the method lookup matches as well
- return CantCompile;
- }
-
// vm_search_normal_superclass
let rbasic_ptr: *const RBasic = current_defined_class.as_ptr();
if current_defined_class.builtin_type() == RUBY_T_ICLASS
&& unsafe { RB_TYPE_P((*rbasic_ptr).klass, RUBY_T_MODULE) && FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT.as_usize())) != VALUE(0) }
{
- return CantCompile;
+ gen_counter_incr(asm, Counter::invokesuper_refinement);
+ return None;
}
let comptime_superclass =
unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) };
let ci = unsafe { get_call_data_ci(cd) };
- let argc = unsafe { vm_ci_argc(ci) };
+ let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap();
let ci_flags = unsafe { vm_ci_flag(ci) };
// Don't JIT calls that aren't simple
// Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
- if ci_flags & VM_CALL_ARGS_SPLAT != 0 {
- gen_counter_incr!(cb, send_args_splat);
- return CantCompile;
- }
+
if ci_flags & VM_CALL_KWARG != 0 {
- gen_counter_incr!(cb, send_keywords);
- return CantCompile;
+ gen_counter_incr(asm, Counter::invokesuper_kwarg);
+ return None;
}
if ci_flags & VM_CALL_KW_SPLAT != 0 {
- gen_counter_incr!(cb, send_kw_splat);
- return CantCompile;
- }
- if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 {
- gen_counter_incr!(cb, send_block_arg);
- return CantCompile;
+ gen_counter_incr(asm, Counter::invokesuper_kw_splat);
+ return None;
}
// Ensure we haven't rebound this method onto an incompatible class.
@@ -5101,487 +9173,523 @@ fn gen_invokesuper(
// cheaper calculations first, but since we specialize on the method entry
// and so only have to do this once at compile time this is fine to always
// check and side exit.
- let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize);
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize);
if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) {
- return CantCompile;
+ gen_counter_incr(asm, Counter::invokesuper_defined_class_mismatch);
+ return None;
+ }
+
+ // Don't compile `super` on objects with singleton class to avoid retaining the receiver.
+ if VALUE(0) != unsafe { FL_TEST(comptime_recv.class_of(), VALUE(RUBY_FL_SINGLETON as usize)) } {
+ gen_counter_incr(asm, Counter::invokesuper_singleton_class);
+ return None;
}
// Do method lookup
let cme = unsafe { rb_callable_method_entry(comptime_superclass, mid) };
-
if cme.is_null() {
- return CantCompile;
+ gen_counter_incr(asm, Counter::invokesuper_no_cme);
+ return None;
}
// Check that we'll be able to write this method dispatch before generating checks
let cme_def_type = unsafe { get_cme_def_type(cme) };
if cme_def_type != VM_METHOD_TYPE_ISEQ && cme_def_type != VM_METHOD_TYPE_CFUNC {
// others unimplemented
- return CantCompile;
- }
-
- // Guard that the receiver has the same class as the one from compile time
- let side_exit = get_side_exit(jit, ocb, ctx);
-
- let cfp = unsafe { get_ec_cfp(jit.ec.unwrap()) };
- let ep = unsafe { get_cfp_ep(cfp) };
- let cref_me = unsafe { *ep.offset(VM_ENV_DATA_INDEX_ME_CREF.try_into().unwrap()) };
- let me_as_value = VALUE(me as usize);
- if cref_me != me_as_value {
- // This will be the case for super within a block
- return CantCompile;
+ gen_counter_incr(asm, Counter::invokesuper_not_iseq_or_cfunc);
+ return None;
}
- add_comment(cb, "guard known me");
- mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
- let ep_me_opnd = mem_opnd(
+ asm_comment!(asm, "guard known me");
+ let lep_opnd = gen_get_lep(jit, asm);
+ let ep_me_opnd = Opnd::mem(
64,
- REG0,
- (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_ME_CREF as i32),
+ lep_opnd,
+ SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF,
);
- jit_mov_gc_ptr(jit, cb, REG1, me_as_value);
- cmp(cb, ep_me_opnd, REG1);
- jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_me_changed));
-
- if block.is_none() {
- // Guard no block passed
- // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
- // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
- //
- // TODO: this could properly forward the current block handler, but
- // would require changes to gen_send_*
- add_comment(cb, "guard no block given");
- // EP is in REG0 from above
- let ep_specval_opnd = mem_opnd(
- 64,
- REG0,
- (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32),
- );
- cmp(cb, ep_specval_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into()));
- jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_block));
- }
- // Points to the receiver operand on the stack
- let recv = ctx.stack_opnd(argc);
- mov(cb, REG0, recv);
+ let me_as_value = VALUE(me as usize);
+ asm.cmp(ep_me_opnd, me_as_value.into());
+ jit_chain_guard(
+ JCC_JNE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::guard_invokesuper_me_changed,
+ );
// We need to assume that both our current method entry and the super
// method entry we invoke remain stable
- assume_method_lookup_stable(jit, ocb, current_defined_class, me);
- assume_method_lookup_stable(jit, ocb, comptime_superclass, cme);
+ jit.assume_method_lookup_stable(asm, ocb, me);
+ jit.assume_method_lookup_stable(asm, ocb, cme);
// Method calls may corrupt types
- ctx.clear_local_types();
+ asm.clear_local_types();
match cme_def_type {
- VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc),
+ VM_METHOD_TYPE_ISEQ => {
+ let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
+ let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
+ perf_call! { gen_send_iseq(jit, asm, ocb, iseq, ci, frame_type, None, cme, Some(block), ci_flags, argc, None) }
+ }
VM_METHOD_TYPE_CFUNC => {
- gen_send_cfunc(jit, ctx, cb, ocb, ci, cme, block, argc, ptr::null())
+ perf_call! { gen_send_cfunc(jit, asm, ocb, ci, cme, Some(block), None, ci_flags, argc) }
}
_ => unreachable!(),
}
}
fn gen_leave(
- jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+ _jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
// Only the return value should be on the stack
- assert!(ctx.get_stack_size() == 1);
-
- // Create a side-exit to fall back to the interpreter
- let side_exit = get_side_exit(jit, ocb, ctx);
-
- // Load environment pointer EP from CFP
- mov(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+ assert_eq!(1, asm.ctx.get_stack_size(), "leave instruction expects stack size 1, but was: {}", asm.ctx.get_stack_size());
// Check for interrupts
- add_comment(cb, "check for interrupts");
- gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt));
-
- // Load the return value
- mov(cb, REG0, ctx.stack_pop(1));
+ gen_check_ints(asm, Counter::leave_se_interrupt);
// Pop the current frame (ec->cfp++)
// Note: the return PC is already in the previous CFP
- add_comment(cb, "pop stack frame");
- add(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
- mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP);
+ asm_comment!(asm, "pop stack frame");
+ let incr_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
+ asm.mov(CFP, incr_cfp);
+ asm.mov(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);
+
+ // Load the return value
+ let retval_opnd = asm.stack_pop(1);
- // Reload REG_SP for the caller and write the return value.
- // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
- mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP));
- mov(cb, mem_opnd(64, REG_SP, 0), REG0);
+ // Move the return value into the C return register
+ asm.mov(C_RET_OPND, retval_opnd);
- // Jump to the JIT return address on the frame that was just popped
+ // Jump to the JIT return address on the frame that was just popped.
+ // There are a few possible jump targets:
+ // - gen_leave_exit() and gen_leave_exception(), for C callers
+ // - Return context set up by gen_send_iseq()
+ // We don't write the return value to stack memory like the interpreter here.
+ // Each jump target do it as necessary.
let offset_to_jit_return =
- -(RUBY_SIZEOF_CONTROL_FRAME as i32) + (RUBY_OFFSET_CFP_JIT_RETURN as i32);
- jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
+ -(RUBY_SIZEOF_CONTROL_FRAME as i32) + RUBY_OFFSET_CFP_JIT_RETURN;
+ asm.jmp_opnd(Opnd::mem(64, CFP, offset_to_jit_return));
- EndBlock
+ Some(EndBlock)
}
fn gen_getglobal(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let gid = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let gid = jit.get_arg(0).as_usize();
// Save the PC and SP because we might make a Ruby call for warning
- jit_prepare_routine_call(jit, ctx, cb, REG0);
-
- mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64()));
+ jit_prepare_non_leaf_call(jit, asm);
- call_ptr(cb, REG0, rb_gvar_get as *const u8);
+ let val_opnd = asm.ccall(
+ rb_gvar_get as *const u8,
+ vec![ gid.into() ]
+ );
- let top = ctx.stack_push(Type::Unknown);
- mov(cb, top, RAX);
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, val_opnd);
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_setglobal(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let gid = jit_get_arg(jit, 0);
+) -> Option<CodegenStatus> {
+ let gid = jit.get_arg(0).as_usize();
// Save the PC and SP because we might make a Ruby call for
// Kernel#set_trace_var
- jit_prepare_routine_call(jit, ctx, cb, REG0);
-
- mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64()));
-
- let val = ctx.stack_pop(1);
-
- mov(cb, C_ARG_REGS[1], val);
-
- call_ptr(cb, REG0, rb_gvar_set as *const u8);
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let val = asm.stack_opnd(0);
+ asm.ccall(
+ rb_gvar_set as *const u8,
+ vec![
+ gid.into(),
+ val,
+ ],
+ );
+ asm.stack_pop(1); // Keep it during ccall for GC
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_anytostring(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Save the PC and SP since we might call #to_s
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
- let str = ctx.stack_pop(1);
- let val = ctx.stack_pop(1);
+ let str = asm.stack_opnd(0);
+ let val = asm.stack_opnd(1);
- mov(cb, C_ARG_REGS[0], str);
- mov(cb, C_ARG_REGS[1], val);
-
- call_ptr(cb, REG0, rb_obj_as_string_result as *const u8);
+ let val = asm.ccall(rb_obj_as_string_result as *const u8, vec![str, val]);
+ asm.stack_pop(2); // Keep them during ccall for GC
// Push the return value
- let stack_ret = ctx.stack_push(Type::TString);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::TString);
+ asm.mov(stack_ret, val);
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_objtostring(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- if !jit_at_current_insn(jit) {
- defer_compilation(jit, ctx, cb, ocb);
- return EndBlock;
+) -> Option<CodegenStatus> {
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
}
- let recv = ctx.stack_opnd(0);
- let comptime_recv = jit_peek_at_stack(jit, ctx, 0);
+ let recv = asm.stack_opnd(0);
+ let comptime_recv = jit.peek_at_stack(&asm.ctx, 0);
if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } {
- let side_exit = get_side_exit(jit, ocb, ctx);
-
- mov(cb, REG0, recv);
jit_guard_known_klass(
jit,
- ctx,
- cb,
+ asm,
ocb,
comptime_recv.class_of(),
- StackOpnd(0),
+ recv,
+ recv.into(),
comptime_recv,
SEND_MAX_DEPTH,
- side_exit,
+ Counter::objtostring_not_string,
);
+
// No work needed. The string value is already on the top of the stack.
- KeepCompiling
+ Some(KeepCompiling)
} else {
- let cd = jit_get_arg(jit, 0).as_ptr();
- gen_send_general(jit, ctx, cb, ocb, cd, None)
+ let cd = jit.get_arg(0).as_ptr();
+ perf_call! { gen_send_general(jit, asm, ocb, cd, None) }
}
}
fn gen_intern(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// Save the PC and SP because we might allocate
- jit_prepare_routine_call(jit, ctx, cb, REG0);
-
- let str = ctx.stack_pop(1);
-
- mov(cb, C_ARG_REGS[0], str);
+ jit_prepare_call_with_gc(jit, asm);
- call_ptr(cb, REG0, rb_str_intern as *const u8);
+ let str = asm.stack_opnd(0);
+ let sym = asm.ccall(rb_str_intern as *const u8, vec![str]);
+ asm.stack_pop(1); // Keep it during ccall for GC
// Push the return value
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, sym);
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_toregexp(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let opt = jit_get_arg(jit, 0).as_i64();
- let cnt = jit_get_arg(jit, 1).as_usize();
+) -> Option<CodegenStatus> {
+ let opt = jit.get_arg(0).as_i64();
+ let cnt = jit.get_arg(1).as_usize();
// Save the PC and SP because this allocates an object and could
// raise an exception.
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
- let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize)));
- ctx.stack_pop(cnt);
+ let values_ptr = asm.lea(asm.ctx.sp_opnd(-(cnt as i32)));
- mov(cb, C_ARG_REGS[0], imm_opnd(0));
- mov(cb, C_ARG_REGS[1], imm_opnd(cnt.try_into().unwrap()));
- lea(cb, C_ARG_REGS[2], values_ptr);
- call_ptr(cb, REG0, rb_ary_tmp_new_from_values as *const u8);
+ let ary = asm.ccall(
+ rb_ary_tmp_new_from_values as *const u8,
+ vec![
+ Opnd::Imm(0),
+ cnt.into(),
+ values_ptr,
+ ]
+ );
+ asm.stack_pop(cnt); // Let ccall spill them
// Save the array so we can clear it later
- push(cb, RAX);
- push(cb, RAX); // Alignment
- mov(cb, C_ARG_REGS[0], RAX);
- mov(cb, C_ARG_REGS[1], imm_opnd(opt));
- call_ptr(cb, REG0, rb_reg_new_ary as *const u8);
+ asm.cpush(ary);
+ asm.cpush(ary); // Alignment
+
+ let val = asm.ccall(
+ rb_reg_new_ary as *const u8,
+ vec![
+ ary,
+ Opnd::Imm(opt),
+ ]
+ );
// The actual regex is in RAX now. Pop the temp array from
// rb_ary_tmp_new_from_values into C arg regs so we can clear it
- pop(cb, REG1); // Alignment
- pop(cb, C_ARG_REGS[0]);
+ let ary = asm.cpop(); // Alignment
+ asm.cpop_into(ary);
// The value we want to push on the stack is in RAX right now
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::UnknownHeap);
+ asm.mov(stack_ret, val);
// Clear the temp array.
- call_ptr(cb, REG0, rb_ary_clear as *const u8);
+ asm.ccall(rb_ary_clear as *const u8, vec![ary]);
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_getspecial(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// This takes two arguments, key and type
// key is only used when type == 0
// A non-zero type determines which type of backref to fetch
- //rb_num_t key = jit_get_arg(jit, 0);
- let rtype = jit_get_arg(jit, 1).as_u64();
+ //rb_num_t key = jit.jit_get_arg(0);
+ let rtype = jit.get_arg(1).as_u64();
if rtype == 0 {
// not yet implemented
- return CantCompile;
+ return None;
} else if rtype & 0x01 != 0 {
// Fetch a "special" backref based on a char encoded by shifting by 1
// Can raise if matchdata uninitialized
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
// call rb_backref_get()
- add_comment(cb, "rb_backref_get");
- call_ptr(cb, REG0, rb_backref_get as *const u8);
- mov(cb, C_ARG_REGS[0], RAX);
+ asm_comment!(asm, "rb_backref_get");
+ let backref = asm.ccall(rb_backref_get as *const u8, vec![]);
let rt_u8: u8 = (rtype >> 1).try_into().unwrap();
- match rt_u8.into() {
+ let val = match rt_u8.into() {
'&' => {
- add_comment(cb, "rb_reg_last_match");
- call_ptr(cb, REG0, rb_reg_last_match as *const u8);
+ asm_comment!(asm, "rb_reg_last_match");
+ asm.ccall(rb_reg_last_match as *const u8, vec![backref])
}
'`' => {
- add_comment(cb, "rb_reg_match_pre");
- call_ptr(cb, REG0, rb_reg_match_pre as *const u8);
+ asm_comment!(asm, "rb_reg_match_pre");
+ asm.ccall(rb_reg_match_pre as *const u8, vec![backref])
}
'\'' => {
- add_comment(cb, "rb_reg_match_post");
- call_ptr(cb, REG0, rb_reg_match_post as *const u8);
+ asm_comment!(asm, "rb_reg_match_post");
+ asm.ccall(rb_reg_match_post as *const u8, vec![backref])
}
'+' => {
- add_comment(cb, "rb_reg_match_last");
- call_ptr(cb, REG0, rb_reg_match_last as *const u8);
+ asm_comment!(asm, "rb_reg_match_last");
+ asm.ccall(rb_reg_match_last as *const u8, vec![backref])
}
_ => panic!("invalid back-ref"),
- }
+ };
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
- KeepCompiling
+ Some(KeepCompiling)
} else {
// Fetch the N-th match from the last backref based on type shifted by 1
// Can raise if matchdata uninitialized
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
// call rb_backref_get()
- add_comment(cb, "rb_backref_get");
- call_ptr(cb, REG0, rb_backref_get as *const u8);
+ asm_comment!(asm, "rb_backref_get");
+ let backref = asm.ccall(rb_backref_get as *const u8, vec![]);
// rb_reg_nth_match((int)(type >> 1), backref);
- add_comment(cb, "rb_reg_nth_match");
- mov(
- cb,
- C_ARG_REGS[0],
- imm_opnd((rtype >> 1).try_into().unwrap()),
+ asm_comment!(asm, "rb_reg_nth_match");
+ let val = asm.ccall(
+ rb_reg_nth_match as *const u8,
+ vec![
+ Opnd::Imm((rtype >> 1).try_into().unwrap()),
+ backref,
+ ]
);
- mov(cb, C_ARG_REGS[1], RAX);
- call_ptr(cb, REG0, rb_reg_nth_match as *const u8);
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
- KeepCompiling
+ Some(KeepCompiling)
}
}
fn gen_getclassvariable(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// rb_vm_getclassvariable can raise exceptions.
- jit_prepare_routine_call(jit, ctx, cb, REG0);
-
- let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ);
- mov(cb, C_ARG_REGS[0], cfp_iseq_opnd);
- mov(cb, C_ARG_REGS[1], REG_CFP);
- mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64()));
- mov(cb, C_ARG_REGS[3], uimm_opnd(jit_get_arg(jit, 1).as_u64()));
-
- call_ptr(cb, REG0, rb_vm_getclassvariable as *const u8);
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let val_opnd = asm.ccall(
+ rb_vm_getclassvariable as *const u8,
+ vec![
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ),
+ CFP,
+ Opnd::UImm(jit.get_arg(0).as_u64()),
+ Opnd::UImm(jit.get_arg(1).as_u64()),
+ ],
+ );
- let stack_top = ctx.stack_push(Type::Unknown);
- mov(cb, stack_top, RAX);
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, val_opnd);
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_setclassvariable(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+) -> Option<CodegenStatus> {
// rb_vm_setclassvariable can raise exceptions.
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let val = asm.stack_opnd(0);
+ asm.ccall(
+ rb_vm_setclassvariable as *const u8,
+ vec![
+ Opnd::mem(64, CFP, RUBY_OFFSET_CFP_ISEQ),
+ CFP,
+ Opnd::UImm(jit.get_arg(0).as_u64()),
+ val,
+ Opnd::UImm(jit.get_arg(1).as_u64()),
+ ],
+ );
+ asm.stack_pop(1); // Keep it during ccall for GC
+
+ Some(KeepCompiling)
+}
+
+fn gen_getconstant(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
+
+ let id = jit.get_arg(0).as_usize();
- let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ);
- mov(cb, C_ARG_REGS[0], cfp_iseq_opnd);
- mov(cb, C_ARG_REGS[1], REG_CFP);
- mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64()));
- mov(cb, C_ARG_REGS[3], ctx.stack_pop(1));
- mov(cb, C_ARG_REGS[4], uimm_opnd(jit_get_arg(jit, 1).as_u64()));
+ // vm_get_ev_const can raise exceptions.
+ jit_prepare_non_leaf_call(jit, asm);
- call_ptr(cb, REG0, rb_vm_setclassvariable as *const u8);
+ let allow_nil_opnd = asm.stack_opnd(0);
+ let klass_opnd = asm.stack_opnd(1);
- KeepCompiling
+ extern "C" {
+ fn rb_vm_get_ev_const(ec: EcPtr, klass: VALUE, id: ID, allow_nil: VALUE) -> VALUE;
+ }
+
+ let val_opnd = asm.ccall(
+ rb_vm_get_ev_const as *const u8,
+ vec![
+ EC,
+ klass_opnd,
+ id.into(),
+ allow_nil_opnd
+ ],
+ );
+ asm.stack_pop(2); // Keep them during ccall for GC
+
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, val_opnd);
+
+ Some(KeepCompiling)
}
-fn gen_opt_getinlinecache(
+fn gen_opt_getconstant_path(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let jump_offset = jit_get_arg(jit, 0);
- let const_cache_as_value = jit_get_arg(jit, 1);
+) -> Option<CodegenStatus> {
+ let const_cache_as_value = jit.get_arg(0);
let ic: *const iseq_inline_constant_cache = const_cache_as_value.as_ptr();
+ let idlist: *const ID = unsafe { (*ic).segments };
+
+ // Make sure there is an exit for this block as the interpreter might want
+ // to invalidate this block from yjit_constant_ic_update().
+ jit_ensure_block_entry_exit(jit, asm, ocb)?;
// See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
+ // If a cache is not filled, fallback to the general C call.
let ice = unsafe { (*ic).entry };
if ice.is_null() {
- // In this case, leave a block that unconditionally side exits
- // for the interpreter to invalidate.
- return CantCompile;
- }
+ // Prepare for const_missing
+ jit_prepare_non_leaf_call(jit, asm);
- // Make sure there is an exit for this block as the interpreter might want
- // to invalidate this block from yjit_constant_ic_update().
- jit_ensure_block_entry_exit(jit, ocb);
+ // If this does not trigger const_missing, vm_ic_update will invalidate this block.
+ extern "C" {
+ fn rb_vm_opt_getconstant_path(ec: EcPtr, cfp: CfpPtr, ic: *const u8) -> VALUE;
+ }
+ let val = asm.ccall(
+ rb_vm_opt_getconstant_path as *const u8,
+ vec![EC, CFP, Opnd::const_ptr(ic as *const u8)],
+ );
+
+ let stack_top = asm.stack_push(Type::Unknown);
+ asm.store(stack_top, val);
+
+ jump_to_next_insn(jit, asm, ocb);
+ return Some(EndBlock);
+ }
if !unsafe { (*ice).ic_cref }.is_null() {
// Cache is keyed on a certain lexical scope. Use the interpreter's cache.
- let side_exit = get_side_exit(jit, ocb, ctx);
+ let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8));
// Call function to verify the cache. It doesn't allocate or call methods.
- mov(cb, C_ARG_REGS[0], const_ptr_opnd(ic as *const u8));
- mov(cb, C_ARG_REGS[1], mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
- call_ptr(cb, REG0, rb_vm_ic_hit_p as *const u8);
+ let ret_val = asm.ccall(
+ rb_vm_ic_hit_p as *const u8,
+ vec![inline_cache, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP)]
+ );
+
+ // Check the result. SysV only specifies one byte for _Bool return values,
+ // so it's important we only check one bit to ignore the higher bits in the register.
+ asm.test(ret_val, 1.into());
+ asm.jz(Target::side_exit(Counter::opt_getconstant_path_ic_miss));
- // Check the result. _Bool is one byte in SysV.
- test(cb, AL, AL);
- jz_ptr(cb, counted_exit!(ocb, side_exit, opt_getinlinecache_miss));
+ let inline_cache = asm.load(Opnd::const_ptr(ic as *const u8));
+
+ let ic_entry = asm.load(Opnd::mem(
+ 64,
+ inline_cache,
+ RUBY_OFFSET_IC_ENTRY
+ ));
+
+ let ic_entry_val = asm.load(Opnd::mem(
+ 64,
+ ic_entry,
+ RUBY_OFFSET_ICE_VALUE
+ ));
// Push ic->entry->value
- mov(cb, REG0, const_ptr_opnd(ic as *mut u8));
- mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_IC_ENTRY));
- let stack_top = ctx.stack_push(Type::Unknown);
- mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_ICE_VALUE));
- mov(cb, stack_top, REG0);
+ let stack_top = asm.stack_push(Type::Unknown);
+ asm.store(stack_top, ic_entry_val);
} else {
// Optimize for single ractor mode.
- // FIXME: This leaks when st_insert raises NoMemoryError
- if !assume_single_ractor_mode(jit, ocb) {
- return CantCompile;
+ if !assume_single_ractor_mode(jit, asm, ocb) {
+ gen_counter_incr(asm, Counter::opt_getconstant_path_multi_ractor);
+ return None;
}
// Invalidate output code on any constant writes associated with
// constants referenced within the current block.
- assume_stable_constant_names(jit, ocb);
+ jit.assume_stable_constant_names(asm, ocb, idlist);
- jit_putobject(jit, ctx, cb, unsafe { (*ice).value });
+ jit_putobject(asm, unsafe { (*ice).value });
}
- // Jump over the code for filling the cache
- let jump_idx = jit_next_insn_idx(jit) + jump_offset.as_u32();
- gen_direct_jump(
- jit,
- ctx,
- BlockId {
- iseq: jit.iseq,
- idx: jump_idx,
- },
- cb,
- );
- EndBlock
+ jump_to_next_insn(jit, asm, ocb);
+ Some(EndBlock)
}
// Push the explicit block parameter onto the temporary stack. Part of the
@@ -5589,200 +9697,249 @@ fn gen_opt_getinlinecache(
// explicit block parameters.
fn gen_getblockparamproxy(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- // A mirror of the interpreter code. Checking for the case
- // where it's pushing rb_block_param_proxy.
- let side_exit = get_side_exit(jit, ocb, ctx);
+) -> Option<CodegenStatus> {
+ if !jit.at_current_insn() {
+ defer_compilation(jit, asm, ocb);
+ return Some(EndBlock);
+ }
// EP level
- let level = jit_get_arg(jit, 1).as_u32();
+ let level = jit.get_arg(1).as_u32();
+
+ // Peek at the block handler so we can check whether it's nil
+ let comptime_handler = jit.peek_at_block_handler(level);
+
+ // Filter for the 4 cases we currently handle
+ if !(comptime_handler.as_u64() == 0 || // no block given
+ comptime_handler.as_u64() & 0x3 == 0x1 || // iseq block (no associated GC managed object)
+ comptime_handler.as_u64() & 0x3 == 0x3 || // ifunc block (no associated GC managed object)
+ unsafe { rb_obj_is_proc(comptime_handler) }.test() // block is a Proc
+ ) {
+ // Missing the symbol case, where we basically need to call Symbol#to_proc at runtime
+ gen_counter_incr(asm, Counter::gbpp_unsupported_type);
+ return None;
+ }
// Load environment pointer EP from CFP
- gen_get_ep(cb, REG0, level);
+ let ep_opnd = gen_get_ep(asm, level);
// Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
- let flag_check = mem_opnd(
+ let flag_check = Opnd::mem(
64,
- REG0,
- (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32),
- );
- test(
- cb,
- flag_check,
- uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()),
+ ep_opnd,
+ SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32),
);
- jnz_ptr(cb, counted_exit!(ocb, side_exit, gbpp_block_param_modified));
+ asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into());
+ asm.jnz(Target::side_exit(Counter::gbpp_block_param_modified));
// Load the block handler for the current frame
// note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
- mov(
- cb,
- REG0,
- mem_opnd(
- 64,
- REG0,
- (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32),
- ),
+ let block_handler = asm.load(
+ Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)
);
- // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
- and(cb, REG0_8, imm_opnd(0x3));
+ // Use block handler sample to guide specialization...
+ // NOTE: we use jit_chain_guard() in this decision tree, and since
+ // there are only a few cases, it should never reach the depth limit use
+ // the exit counter we pass to it.
+ //
+ // No block given
+ if comptime_handler.as_u64() == 0 {
+ // Bail if there is a block handler
+ asm.cmp(block_handler, Opnd::UImm(0));
- // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
- cmp(cb, REG0_8, imm_opnd(0x1));
- jnz_ptr(
- cb,
- counted_exit!(ocb, side_exit, gbpp_block_handler_not_iseq),
- );
+ jit_chain_guard(
+ JCC_JNZ,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::gbpp_block_handler_not_none,
+ );
- // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
- mov(
- cb,
- REG0,
- const_ptr_opnd(unsafe { rb_block_param_proxy }.as_ptr()),
- );
- assert!(!unsafe { rb_block_param_proxy }.special_const_p());
- let top = ctx.stack_push(Type::UnknownHeap);
- mov(cb, top, REG0);
+ jit_putobject(asm, Qnil);
+ } else if comptime_handler.as_u64() & 0x1 == 0x1 {
+ // This handles two cases which are nearly identical
+ // Block handler is a tagged pointer. Look at the tag.
+ // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01
+ // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03
+ // So to check for either of those cases we can use: val & 0x1 == 0x1
+ const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers");
+ // Procs are aligned heap pointers so testing the bit rejects them too.
+
+ asm.test(block_handler, 0x1.into());
+ jit_chain_guard(
+ JCC_JZ,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::gbpp_block_handler_not_iseq,
+ );
+
+ // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
+ assert!(!unsafe { rb_block_param_proxy }.special_const_p());
+
+ let top = asm.stack_push(Type::BlockParamProxy);
+ asm.mov(top, Opnd::const_ptr(unsafe { rb_block_param_proxy }.as_ptr()));
+ } else if unsafe { rb_obj_is_proc(comptime_handler) }.test() {
+ // The block parameter is a Proc
+ c_callable! {
+ // We can't hold values across C calls due to a backend limitation,
+ // so we'll use this thin wrapper around rb_obj_is_proc().
+ fn is_proc(object: VALUE) -> VALUE {
+ if unsafe { rb_obj_is_proc(object) }.test() {
+ // VM_BH_TO_PROC() is the identify function.
+ object
+ } else {
+ Qfalse
+ }
+ }
+ }
- KeepCompiling
+ // Simple predicate, no need to jit_prepare_non_leaf_call()
+ let proc_or_false = asm.ccall(is_proc as _, vec![block_handler]);
+
+ // Guard for proc
+ asm.cmp(proc_or_false, Qfalse.into());
+ jit_chain_guard(
+ JCC_JE,
+ jit,
+ asm,
+ ocb,
+ SEND_MAX_DEPTH,
+ Counter::gbpp_block_handler_not_proc,
+ );
+
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, proc_or_false);
+ } else {
+ unreachable!("absurd given initial filtering");
+ }
+
+ jump_to_next_insn(jit, asm, ocb);
+
+ Some(EndBlock)
}
fn gen_getblockparam(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
- ocb: &mut OutlinedCb,
-) -> CodegenStatus {
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+) -> Option<CodegenStatus> {
// EP level
- let level = jit_get_arg(jit, 1).as_u32();
+ let level = jit.get_arg(1).as_u32();
// Save the PC and SP because we might allocate
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_call_with_gc(jit, asm);
+ asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
// A mirror of the interpreter code. Checking for the case
// where it's pushing rb_block_param_proxy.
- let side_exit = get_side_exit(jit, ocb, ctx);
// Load environment pointer EP from CFP
- gen_get_ep(cb, REG1, level);
+ let ep_opnd = gen_get_ep(asm, level);
// Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
- let flag_check = mem_opnd(
- 64,
- REG1,
- (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32),
- );
+ let flag_check = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32));
// FIXME: This is testing bits in the same place that the WB check is testing.
// We should combine these at some point
- test(
- cb,
- flag_check,
- uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()),
- );
+ asm.test(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into());
// If the frame flag has been modified, then the actual proc value is
// already in the EP and we should just use the value.
- let frame_flag_modified = cb.new_label("frame_flag_modified".to_string());
- jnz_label(cb, frame_flag_modified);
+ let frame_flag_modified = asm.new_label("frame_flag_modified");
+ asm.jnz(frame_flag_modified);
// This instruction writes the block handler to the EP. If we need to
// fire a write barrier for the write, then exit (we'll let the
// interpreter handle it so it can fire the write barrier).
// flags & VM_ENV_FLAG_WB_REQUIRED
- let flags_opnd = mem_opnd(
+ let flags_opnd = Opnd::mem(
64,
- REG1,
- SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32,
+ ep_opnd,
+ SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32,
);
- test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED.into()));
+ asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into());
// if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
- jnz_ptr(cb, side_exit);
-
- // Load the block handler for the current frame
- // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
- mov(
- cb,
- C_ARG_REGS[1],
- mem_opnd(
- 64,
- REG1,
- (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32),
- ),
- );
+ asm.jnz(Target::side_exit(Counter::gbp_wb_required));
// Convert the block handler in to a proc
// call rb_vm_bh_to_procval(const rb_execution_context_t *ec, VALUE block_handler)
- mov(cb, C_ARG_REGS[0], REG_EC);
- call_ptr(cb, REG0, rb_vm_bh_to_procval as *const u8);
+ let proc = asm.ccall(
+ rb_vm_bh_to_procval as *const u8,
+ vec![
+ EC,
+ // The block handler for the current frame
+ // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
+ Opnd::mem(
+ 64,
+ ep_opnd,
+ SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL,
+ ),
+ ]
+ );
// Load environment pointer EP from CFP (again)
- gen_get_ep(cb, REG1, level);
-
- // Set the frame modified flag
- or(cb, flag_check, uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()));
+ let ep_opnd = gen_get_ep(asm, level);
// Write the value at the environment pointer
- let idx = jit_get_arg(jit, 0).as_i32();
- let offs = -(SIZEOF_VALUE as i32 * idx);
- mov(cb, mem_opnd(64, REG1, offs), RAX);
+ let idx = jit.get_arg(0).as_i32();
+ let offs = -(SIZEOF_VALUE_I32 * idx);
+ asm.mov(Opnd::mem(64, ep_opnd, offs), proc);
+
+ // Set the frame modified flag
+ let flag_check = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32));
+ let modified_flag = asm.or(flag_check, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into());
+ asm.store(flag_check, modified_flag);
- cb.write_label(frame_flag_modified);
+ asm.write_label(frame_flag_modified);
// Push the proc on the stack
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, RAX, mem_opnd(64, REG1, offs));
- mov(cb, stack_ret, RAX);
-
- cb.link_labels();
+ let stack_ret = asm.stack_push(Type::Unknown);
+ let ep_opnd = gen_get_ep(asm, level);
+ asm.mov(stack_ret, Opnd::mem(64, ep_opnd, offs));
- KeepCompiling
+ Some(KeepCompiling)
}
fn gen_invokebuiltin(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr();
+) -> Option<CodegenStatus> {
+ let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr();
let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc");
// ec, self, and arguments
- if bf_argc + 2 > C_ARG_REGS.len() {
- return CantCompile;
+ if bf_argc + 2 > C_ARG_OPNDS.len() {
+ incr_counter!(invokebuiltin_too_many_args);
+ return None;
}
// If the calls don't allocate, do they need up to date PC, SP?
- jit_prepare_routine_call(jit, ctx, cb, REG0);
+ jit_prepare_non_leaf_call(jit, asm);
// Call the builtin func (ec, recv, arg1, arg2, ...)
- mov(cb, C_ARG_REGS[0], REG_EC);
- mov(
- cb,
- C_ARG_REGS[1],
- mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
- );
+ let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)];
// Copy arguments from locals
for i in 0..bf_argc {
- let stack_opnd = ctx.stack_opnd((bf_argc - i - 1) as i32);
- let c_arg_reg = C_ARG_REGS[2 + i];
- mov(cb, c_arg_reg, stack_opnd);
+ let stack_opnd = asm.stack_opnd((bf_argc - i - 1) as i32);
+ args.push(stack_opnd);
}
- call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8);
+ let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args);
// Push the return value
- ctx.stack_pop(bf_argc);
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ asm.stack_pop(bf_argc);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
- KeepCompiling
+ Some(KeepCompiling)
}
// opt_invokebuiltin_delegate calls a builtin function, like
@@ -5790,51 +9947,44 @@ fn gen_invokebuiltin(
// stack uses the argument locals (and self) from the current method.
fn gen_opt_invokebuiltin_delegate(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
_ocb: &mut OutlinedCb,
-) -> CodegenStatus {
- let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr();
+) -> Option<CodegenStatus> {
+ let bf: *const rb_builtin_function = jit.get_arg(0).as_ptr();
let bf_argc = unsafe { (*bf).argc };
- let start_index = jit_get_arg(jit, 1).as_i32();
+ let start_index = jit.get_arg(1).as_i32();
// ec, self, and arguments
- if bf_argc + 2 > (C_ARG_REGS.len() as i32) {
- return CantCompile;
+ if bf_argc + 2 > (C_ARG_OPNDS.len() as i32) {
+ incr_counter!(invokebuiltin_too_many_args);
+ return None;
}
// If the calls don't allocate, do they need up to date PC, SP?
- jit_prepare_routine_call(jit, ctx, cb, REG0);
-
- if bf_argc > 0 {
- // Load environment pointer EP from CFP
- mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
- }
+ jit_prepare_non_leaf_call(jit, asm);
// Call the builtin func (ec, recv, arg1, arg2, ...)
- mov(cb, C_ARG_REGS[0], REG_EC);
- mov(
- cb,
- C_ARG_REGS[1],
- mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
- );
+ let mut args = vec![EC, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)];
// Copy arguments from locals
- for i in 0..bf_argc {
- let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
- let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i;
- let local_opnd = mem_opnd(64, REG0, offs * (SIZEOF_VALUE as i32));
- let offs: usize = (i + 2) as usize;
- let c_arg_reg = C_ARG_REGS[offs];
- mov(cb, c_arg_reg, local_opnd);
+ if bf_argc > 0 {
+ // Load environment pointer EP from CFP
+ let ep = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_EP));
+
+ for i in 0..bf_argc {
+ let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
+ let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i;
+ let local_opnd = Opnd::mem(64, ep, offs * SIZEOF_VALUE_I32);
+ args.push(local_opnd);
+ }
}
- call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8);
+ let val = asm.ccall(unsafe { (*bf).func_ptr } as *const u8, args);
// Push the return value
- let stack_ret = ctx.stack_push(Type::Unknown);
- mov(cb, stack_ret, RAX);
+ let stack_ret = asm.stack_push(Type::Unknown);
+ asm.mov(stack_ret, val);
- KeepCompiling
+ Some(KeepCompiling)
}
/// Maps a YARV opcode to a code generation function (if supported)
@@ -5858,6 +10008,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_setn => Some(gen_setn),
YARVINSN_topn => Some(gen_topn),
YARVINSN_adjuststack => Some(gen_adjuststack),
+
YARVINSN_getlocal => Some(gen_getlocal),
YARVINSN_getlocal_WC_0 => Some(gen_getlocal_wc0),
YARVINSN_getlocal_WC_1 => Some(gen_getlocal_wc1),
@@ -5880,11 +10031,19 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_opt_mod => Some(gen_opt_mod),
YARVINSN_opt_str_freeze => Some(gen_opt_str_freeze),
YARVINSN_opt_str_uminus => Some(gen_opt_str_uminus),
+ YARVINSN_opt_newarray_send => Some(gen_opt_newarray_send),
YARVINSN_splatarray => Some(gen_splatarray),
+ YARVINSN_splatkw => Some(gen_splatkw),
+ YARVINSN_concatarray => Some(gen_concatarray),
+ YARVINSN_concattoarray => Some(gen_concattoarray),
+ YARVINSN_pushtoarray => Some(gen_pushtoarray),
YARVINSN_newrange => Some(gen_newrange),
YARVINSN_putstring => Some(gen_putstring),
+ YARVINSN_putchilledstring => Some(gen_putchilledstring),
YARVINSN_expandarray => Some(gen_expandarray),
YARVINSN_defined => Some(gen_defined),
+ YARVINSN_definedivar => Some(gen_definedivar),
+ YARVINSN_checkmatch => Some(gen_checkmatch),
YARVINSN_checkkeyword => Some(gen_checkkeyword),
YARVINSN_concatstrings => Some(gen_concatstrings),
YARVINSN_getinstancevariable => Some(gen_getinstancevariable),
@@ -5894,6 +10053,7 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_opt_neq => Some(gen_opt_neq),
YARVINSN_opt_aref => Some(gen_opt_aref),
YARVINSN_opt_aset => Some(gen_opt_aset),
+ YARVINSN_opt_aref_with => Some(gen_opt_aref_with),
YARVINSN_opt_mult => Some(gen_opt_mult),
YARVINSN_opt_div => Some(gen_opt_div),
YARVINSN_opt_ltlt => Some(gen_opt_ltlt),
@@ -5904,7 +10064,8 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_opt_size => Some(gen_opt_size),
YARVINSN_opt_length => Some(gen_opt_length),
YARVINSN_opt_regexpmatch2 => Some(gen_opt_regexpmatch2),
- YARVINSN_opt_getinlinecache => Some(gen_opt_getinlinecache),
+ YARVINSN_getconstant => Some(gen_getconstant),
+ YARVINSN_opt_getconstant_path => Some(gen_opt_getconstant_path),
YARVINSN_invokebuiltin => Some(gen_invokebuiltin),
YARVINSN_opt_invokebuiltin_delegate => Some(gen_opt_invokebuiltin_delegate),
YARVINSN_opt_invokebuiltin_delegate_leave => Some(gen_opt_invokebuiltin_delegate),
@@ -5912,12 +10073,14 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
YARVINSN_branchif => Some(gen_branchif),
YARVINSN_branchunless => Some(gen_branchunless),
YARVINSN_branchnil => Some(gen_branchnil),
+ YARVINSN_throw => Some(gen_throw),
YARVINSN_jump => Some(gen_jump),
YARVINSN_getblockparamproxy => Some(gen_getblockparamproxy),
YARVINSN_getblockparam => Some(gen_getblockparam),
YARVINSN_opt_send_without_block => Some(gen_opt_send_without_block),
YARVINSN_send => Some(gen_send),
+ YARVINSN_invokeblock => Some(gen_invokeblock),
YARVINSN_invokesuper => Some(gen_invokesuper),
YARVINSN_leave => Some(gen_leave),
@@ -5937,20 +10100,114 @@ fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
}
// Return true when the codegen function generates code.
-// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
+// known_recv_class has Some value when the caller has used jit_guard_known_klass().
// See yjit_reg_method().
type MethodGenFn = fn(
jit: &mut JITState,
- ctx: &mut Context,
- cb: &mut CodeBlock,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
ci: *const rb_callinfo,
cme: *const rb_callable_method_entry_t,
- block: Option<IseqPtr>,
+ block: Option<BlockHandler>,
argc: i32,
- known_recv_class: *const VALUE,
+ known_recv_class: Option<VALUE>,
) -> bool;
+/// Methods for generating code for hardcoded (usually C) methods
+static mut METHOD_CODEGEN_TABLE: Option<HashMap<usize, MethodGenFn>> = None;
+
+/// Register codegen functions for some Ruby core methods
+pub fn yjit_reg_method_codegen_fns() {
+ unsafe {
+ assert!(METHOD_CODEGEN_TABLE.is_none());
+ METHOD_CODEGEN_TABLE = Some(HashMap::default());
+
+ // Specialization for C methods. See yjit_reg_method() for details.
+ yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
+
+ yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
+ yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
+ yjit_reg_method(rb_mKernel, "is_a?", jit_rb_kernel_is_a);
+ yjit_reg_method(rb_mKernel, "kind_of?", jit_rb_kernel_is_a);
+ yjit_reg_method(rb_mKernel, "instance_of?", jit_rb_kernel_instance_of);
+
+ yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
+ yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
+ yjit_reg_method(rb_cBasicObject, "!=", jit_rb_obj_not_equal);
+ yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
+ yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
+ yjit_reg_method(rb_cModule, "===", jit_rb_mod_eqq);
+ yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
+ yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
+ yjit_reg_method(rb_cInteger, "==", jit_rb_int_equal);
+ yjit_reg_method(rb_cInteger, "===", jit_rb_int_equal);
+
+ yjit_reg_method(rb_cInteger, "succ", jit_rb_int_succ);
+ yjit_reg_method(rb_cInteger, "/", jit_rb_int_div);
+ yjit_reg_method(rb_cInteger, "<<", jit_rb_int_lshift);
+ yjit_reg_method(rb_cInteger, ">>", jit_rb_int_rshift);
+ yjit_reg_method(rb_cInteger, "^", jit_rb_int_xor);
+ yjit_reg_method(rb_cInteger, "[]", jit_rb_int_aref);
+
+ yjit_reg_method(rb_cFloat, "+", jit_rb_float_plus);
+ yjit_reg_method(rb_cFloat, "-", jit_rb_float_minus);
+ yjit_reg_method(rb_cFloat, "*", jit_rb_float_mul);
+ yjit_reg_method(rb_cFloat, "/", jit_rb_float_div);
+
+ yjit_reg_method(rb_cString, "empty?", jit_rb_str_empty_p);
+ yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
+ yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
+ yjit_reg_method(rb_cString, "length", jit_rb_str_length);
+ yjit_reg_method(rb_cString, "size", jit_rb_str_length);
+ yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
+ yjit_reg_method(rb_cString, "getbyte", jit_rb_str_getbyte);
+ yjit_reg_method(rb_cString, "setbyte", jit_rb_str_setbyte);
+ yjit_reg_method(rb_cString, "byteslice", jit_rb_str_byteslice);
+ yjit_reg_method(rb_cString, "<<", jit_rb_str_concat);
+ yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus);
+
+ yjit_reg_method(rb_cArray, "empty?", jit_rb_ary_empty_p);
+ yjit_reg_method(rb_cArray, "length", jit_rb_ary_length);
+ yjit_reg_method(rb_cArray, "size", jit_rb_ary_length);
+ yjit_reg_method(rb_cArray, "<<", jit_rb_ary_push);
+
+ yjit_reg_method(rb_cHash, "empty?", jit_rb_hash_empty_p);
+
+ yjit_reg_method(rb_mKernel, "respond_to?", jit_obj_respond_to);
+ yjit_reg_method(rb_mKernel, "block_given?", jit_rb_f_block_given_p);
+
+ yjit_reg_method(rb_cClass, "superclass", jit_rb_class_superclass);
+
+ yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);
+ }
+}
+
+// Register a specialized codegen function for a particular method. Note that
+// the if the function returns true, the code it generates runs without a
+// control frame and without interrupt checks. To avoid creating observable
+// behavior changes, the codegen function should only target simple code paths
+// that do not allocate and do not make method calls.
+fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
+ let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!");
+ let mid = unsafe { rb_intern(id_string.as_ptr()) };
+ let me = unsafe { rb_method_entry_at(klass, mid) };
+
+ if me.is_null() {
+ panic!("undefined optimized method!: {mid_str}");
+ }
+
+ // For now, only cfuncs are supported
+ //RUBY_ASSERT(me && me->def);
+ //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
+
+ let method_serial = unsafe {
+ let def = (*me).def;
+ get_def_method_serial(def)
+ };
+
+ unsafe { METHOD_CODEGEN_TABLE.as_mut().unwrap().insert(method_serial, gen_fn); }
+}
+
/// Global state needed for code generation
pub struct CodegenGlobals {
/// Inline code block (fast path)
@@ -5962,24 +10219,31 @@ pub struct CodegenGlobals {
/// Code for exiting back to the interpreter from the leave instruction
leave_exit_code: CodePtr,
+ /// Code for exiting back to the interpreter after handling an exception
+ leave_exception_code: CodePtr,
+
// For exiting from YJIT frame from branch_stub_hit().
- // Filled by gen_code_for_exit_from_stub().
+ // Filled by gen_stub_exit().
stub_exit_code: CodePtr,
+ // For servicing branch stubs
+ branch_stub_hit_trampoline: CodePtr,
+
+ // For servicing entry stubs
+ entry_stub_hit_trampoline: CodePtr,
+
// Code for full logic of returning from C method and exiting to the interpreter
outline_full_cfunc_return_pos: CodePtr,
/// For implementing global code invalidation
global_inval_patches: Vec<CodepagePatch>,
- /// For implementing global code invalidation. The number of bytes counting from the beginning
- /// of the inline code block that should not be changed. After patching for global invalidation,
- /// no one should make changes to the invalidated code region anymore. This is used to
- /// break out of invalidation race when there are multiple ractors.
- inline_frozen_bytes: usize,
+ /// Page indexes for outlined code that are not associated to any ISEQ.
+ ocb_pages: Vec<usize>,
- // Methods for generating code for hardcoded (usually C) methods
- method_codegen_table: HashMap<u64, MethodGenFn>,
+ /// Map of cfunc YARV PCs to CMEs and receiver indexes, used to lazily push
+ /// a frame when rb_yjit_lazy_push_frame() is called with a PC in this HashMap.
+ pc_to_cfunc: HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)>,
}
/// For implementing global code invalidation. A position in the inline
@@ -5996,29 +10260,12 @@ static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None;
impl CodegenGlobals {
/// Initialize the codegen globals
pub fn init() {
- // Executable memory size in MiB
- let mem_size = get_option!(exec_mem_size) * 1024 * 1024;
+ // Executable memory and code page size in bytes
+ let mem_size = get_option!(exec_mem_size);
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- // TODO(alan): we can error more gracefully when the user gives
- // --yjit-exec-mem=absurdly-large-number
- //
- // 2 GiB. It's likely a bug if we generate this much code.
- const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
- assert!(mem_size <= MAX_BUFFER_SIZE);
- let mem_size_u32 = mem_size as u32;
- let half_size = mem_size / 2;
-
- let page_size = unsafe { rb_yjit_get_page_size() };
- let assert_page_aligned = |ptr| assert_eq!(
- 0,
- ptr as usize % page_size.as_usize(),
- "Start of virtual address block should be page-aligned",
- );
-
- let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
- let second_half = virt_block.wrapping_add(half_size);
+ let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
// `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
@@ -6027,26 +10274,26 @@ impl CodegenGlobals {
//
// Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
// (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
- assert_page_aligned(virt_block);
- assert_page_aligned(second_half);
+ let page_size = unsafe { rb_yjit_get_page_size() };
+ assert_eq!(
+ virt_block as usize % page_size.as_usize(), 0,
+ "Start of virtual address block should be page-aligned",
+ );
use crate::virtualmem::*;
+ use std::ptr::NonNull;
- let first_half = VirtualMem::new(
- SystemAllocator {},
- page_size,
- virt_block,
- half_size
- );
- let second_half = VirtualMem::new(
+ let mem_block = VirtualMem::new(
SystemAllocator {},
page_size,
- second_half,
- half_size
+ NonNull::new(virt_block).unwrap(),
+ mem_size,
);
+ let mem_block = Rc::new(RefCell::new(mem_block));
- let cb = CodeBlock::new(first_half);
- let ocb = OutlinedCb::wrap(CodeBlock::new(second_half));
+ let freed_pages = Rc::new(None);
+ let cb = CodeBlock::new(mem_block.clone(), false, freed_pages.clone());
+ let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, true, freed_pages));
(cb, ocb)
};
@@ -6058,100 +10305,54 @@ impl CodegenGlobals {
#[cfg(test)]
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2));
- let leave_exit_code = gen_leave_exit(&mut ocb);
+ let ocb_start_addr = ocb.unwrap().get_write_ptr();
+ let leave_exit_code = gen_leave_exit(&mut ocb).unwrap();
+ let leave_exception_code = gen_leave_exception(&mut ocb).unwrap();
- let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb);
+ let stub_exit_code = gen_stub_exit(&mut ocb).unwrap();
+
+ let branch_stub_hit_trampoline = gen_branch_stub_hit_trampoline(&mut ocb).unwrap();
+ let entry_stub_hit_trampoline = gen_entry_stub_hit_trampoline(&mut ocb).unwrap();
// Generate full exit code for C func
- let cfunc_exit_code = gen_full_cfunc_return(&mut ocb);
+ let cfunc_exit_code = gen_full_cfunc_return(&mut ocb).unwrap();
+
+ let ocb_end_addr = ocb.unwrap().get_write_ptr();
+ let ocb_pages = ocb.unwrap().addrs_to_pages(ocb_start_addr, ocb_end_addr);
// Mark all code memory as executable
cb.mark_all_executable();
ocb.unwrap().mark_all_executable();
- let mut codegen_globals = CodegenGlobals {
+ let codegen_globals = CodegenGlobals {
inline_cb: cb,
outlined_cb: ocb,
- leave_exit_code: leave_exit_code,
- stub_exit_code: stub_exit_code,
+ leave_exit_code,
+ leave_exception_code,
+ stub_exit_code,
outline_full_cfunc_return_pos: cfunc_exit_code,
+ branch_stub_hit_trampoline,
+ entry_stub_hit_trampoline,
global_inval_patches: Vec::new(),
- inline_frozen_bytes: 0,
- method_codegen_table: HashMap::new(),
+ ocb_pages,
+ pc_to_cfunc: HashMap::new(),
};
- // Register the method codegen functions
- codegen_globals.reg_method_codegen_fns();
-
// Initialize the codegen globals instance
unsafe {
CODEGEN_GLOBALS = Some(codegen_globals);
}
}
- // Register a specialized codegen function for a particular method. Note that
- // the if the function returns true, the code it generates runs without a
- // control frame and without interrupt checks. To avoid creating observable
- // behavior changes, the codegen function should only target simple code paths
- // that do not allocate and do not make method calls.
- fn yjit_reg_method(&mut self, klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
- let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!");
- let mid = unsafe { rb_intern(id_string.as_ptr()) };
- let me = unsafe { rb_method_entry_at(klass, mid) };
-
- if me.is_null() {
- panic!("undefined optimized method!");
- }
-
- // For now, only cfuncs are supported
- //RUBY_ASSERT(me && me->def);
- //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
-
- let method_serial = unsafe {
- let def = (*me).def;
- get_def_method_serial(def)
- };
-
- self.method_codegen_table.insert(method_serial, gen_fn);
- }
-
- /// Register codegen functions for some Ruby core methods
- fn reg_method_codegen_fns(&mut self) {
- unsafe {
- // Specialization for C methods. See yjit_reg_method() for details.
- self.yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
-
- self.yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
- self.yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
-
- self.yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
- self.yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
- self.yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
- self.yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
- self.yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
- self.yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
-
- // rb_str_to_s() methods in string.c
- self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
- self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
- self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
- self.yjit_reg_method(rb_cString, "<<", jit_rb_str_concat);
- self.yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus);
-
- // Thread.current
- self.yjit_reg_method(
- rb_singleton_class(rb_cThread),
- "current",
- jit_thread_s_current,
- );
- }
- }
-
/// Get a mutable reference to the codegen globals instance
pub fn get_instance() -> &'static mut CodegenGlobals {
unsafe { CODEGEN_GLOBALS.as_mut().unwrap() }
}
+ pub fn has_instance() -> bool {
+ unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
+ }
+
/// Get a mutable reference to the inline code block
pub fn get_inline_cb() -> &'static mut CodeBlock {
&mut CodegenGlobals::get_instance().inline_cb
@@ -6166,14 +10367,26 @@ impl CodegenGlobals {
CodegenGlobals::get_instance().leave_exit_code
}
+ pub fn get_leave_exception_code() -> CodePtr {
+ CodegenGlobals::get_instance().leave_exception_code
+ }
+
pub fn get_stub_exit_code() -> CodePtr {
CodegenGlobals::get_instance().stub_exit_code
}
- pub fn push_global_inval_patch(i_pos: CodePtr, o_pos: CodePtr) {
+ pub fn push_global_inval_patch(inline_pos: CodePtr, outlined_pos: CodePtr, cb: &CodeBlock) {
+ if let Some(last_patch) = CodegenGlobals::get_instance().global_inval_patches.last() {
+ let patch_offset = inline_pos.as_offset() - last_patch.inline_patch_pos.as_offset();
+ assert!(
+ patch_offset < 0 || cb.jmp_ptr_bytes() as i64 <= patch_offset,
+ "patches should not overlap (patch_offset: {patch_offset})",
+ );
+ }
+
let patch = CodepagePatch {
- inline_patch_pos: i_pos,
- outlined_target_pos: o_pos,
+ inline_patch_pos: inline_pos,
+ outlined_target_pos: outlined_pos,
};
CodegenGlobals::get_instance()
.global_inval_patches
@@ -6186,26 +10399,24 @@ impl CodegenGlobals {
mem::take(&mut globals.global_inval_patches)
}
- pub fn get_inline_frozen_bytes() -> usize {
- CodegenGlobals::get_instance().inline_frozen_bytes
+ pub fn get_outline_full_cfunc_return_pos() -> CodePtr {
+ CodegenGlobals::get_instance().outline_full_cfunc_return_pos
}
- pub fn set_inline_frozen_bytes(frozen_bytes: usize) {
- CodegenGlobals::get_instance().inline_frozen_bytes = frozen_bytes;
+ pub fn get_branch_stub_hit_trampoline() -> CodePtr {
+ CodegenGlobals::get_instance().branch_stub_hit_trampoline
}
- pub fn get_outline_full_cfunc_return_pos() -> CodePtr {
- CodegenGlobals::get_instance().outline_full_cfunc_return_pos
+ pub fn get_entry_stub_hit_trampoline() -> CodePtr {
+ CodegenGlobals::get_instance().entry_stub_hit_trampoline
}
- pub fn look_up_codegen_method(method_serial: u64) -> Option<MethodGenFn> {
- let table = &CodegenGlobals::get_instance().method_codegen_table;
+ pub fn get_ocb_pages() -> &'static Vec<usize> {
+ &CodegenGlobals::get_instance().ocb_pages
+ }
- let option_ref = table.get(&method_serial);
- match option_ref {
- None => None,
- Some(&mgf) => Some(mgf), // Deref
- }
+ pub fn get_pc_to_cfunc() -> &'static mut HashMap<*mut VALUE, (*const rb_callable_method_entry_t, u8)> {
+ &mut CodegenGlobals::get_instance().pc_to_cfunc
}
}
@@ -6213,17 +10424,19 @@ impl CodegenGlobals {
mod tests {
use super::*;
- fn setup_codegen() -> (JITState, Context, CodeBlock, OutlinedCb) {
- let blockid = BlockId {
- iseq: ptr::null(),
- idx: 0,
- };
- let block = Block::new(blockid, &Context::default());
+ fn setup_codegen() -> (JITState, Context, Assembler, CodeBlock, OutlinedCb) {
+ let cb = CodeBlock::new_dummy(256 * 1024);
return (
- JITState::new(&block),
- Context::new(),
- CodeBlock::new_dummy(256 * 1024),
+ JITState::new(
+ BlockId { iseq: std::ptr::null(), idx: 0 },
+ Context::default(),
+ cb.get_write_ptr(),
+ ptr::null(), // No execution context in tests. No peeking!
+ ),
+ Context::default(),
+ Assembler::new(),
+ cb,
OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)),
);
}
@@ -6237,237 +10450,203 @@ mod tests {
#[test]
fn test_gen_exit() {
- let (_, ctx, mut cb, _) = setup_codegen();
- gen_exit(0 as *mut VALUE, &ctx, &mut cb);
+ let (_, _ctx, mut asm, mut cb, _) = setup_codegen();
+ gen_exit(0 as *mut VALUE, &mut asm);
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0);
}
#[test]
fn test_get_side_exit() {
- let (mut jit, ctx, _, mut ocb) = setup_codegen();
- get_side_exit(&mut jit, &mut ocb, &ctx);
+ let (_jit, ctx, mut asm, _, mut ocb) = setup_codegen();
+ let side_exit_context = SideExitContext::new(0 as _, ctx);
+ asm.get_side_exit(&side_exit_context, None, &mut ocb);
assert!(ocb.unwrap().get_write_pos() > 0);
}
#[test]
fn test_gen_check_ints() {
- let (_, _ctx, mut cb, mut ocb) = setup_codegen();
- let side_exit = ocb.unwrap().get_write_ptr();
- gen_check_ints(&mut cb, side_exit);
+ let (_jit, _ctx, mut asm, _cb, _ocb) = setup_codegen();
+ asm.set_side_exit_context(0 as _, 0);
+ gen_check_ints(&mut asm, Counter::guard_send_interrupted);
}
#[test]
fn test_gen_nop() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- let status = gen_nop(&mut jit, &mut context, &mut cb, &mut ocb);
+ let (mut jit, context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let status = gen_nop(&mut jit, &mut asm, &mut ocb);
+ asm.compile(&mut cb, None).unwrap();
- assert_eq!(status, KeepCompiling);
- assert_eq!(context.diff(&Context::new()), 0);
+ assert_eq!(status, Some(KeepCompiling));
+ assert_eq!(context.diff(&Context::default()), TypeDiff::Compatible(0));
assert_eq!(cb.get_write_pos(), 0);
}
#[test]
fn test_gen_pop() {
- let (mut jit, _, mut cb, mut ocb) = setup_codegen();
- let mut context = Context::new_with_stack_size(1);
- let status = gen_pop(&mut jit, &mut context, &mut cb, &mut ocb);
+ let (mut jit, _, mut asm, _cb, mut ocb) = setup_codegen();
+ let context = Context::default();
+ asm.stack_push(Type::Fixnum);
+ let status = gen_pop(&mut jit, &mut asm, &mut ocb);
- assert_eq!(status, KeepCompiling);
- assert_eq!(context.diff(&Context::new()), 0);
+ assert_eq!(status, Some(KeepCompiling));
+ let mut default = Context::default();
+ default.set_reg_temps(context.get_reg_temps());
+ assert_eq!(context.diff(&default), TypeDiff::Compatible(0));
}
#[test]
fn test_gen_dup() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- context.stack_push(Type::Fixnum);
- let status = gen_dup(&mut jit, &mut context, &mut cb, &mut ocb);
+ let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ asm.stack_push(Type::Fixnum);
+ let status = gen_dup(&mut jit, &mut asm, &mut ocb);
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
// Did we duplicate the type information for the Fixnum type?
- assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(0)));
- assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1)));
+ assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(0)));
+ assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1)));
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0); // Write some movs
}
#[test]
fn test_gen_dupn() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- context.stack_push(Type::Fixnum);
- context.stack_push(Type::Flonum);
+ let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ asm.stack_push(Type::Fixnum);
+ asm.stack_push(Type::Flonum);
let mut value_array: [u64; 2] = [0, 2]; // We only compile for n == 2
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_dupn(&mut jit, &mut context, &mut cb, &mut ocb);
+ let status = gen_dupn(&mut jit, &mut asm, &mut ocb);
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
- assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(3)));
- assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2)));
- assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1)));
- assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+ assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(3)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2)));
+ assert_eq!(Type::Fixnum, asm.ctx.get_opnd_type(StackOpnd(1)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0)));
+ // TODO: this is writing zero bytes on x86. Why?
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0); // Write some movs
}
#[test]
fn test_gen_swap() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- context.stack_push(Type::Fixnum);
- context.stack_push(Type::Flonum);
+ let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen();
+ asm.stack_push(Type::Fixnum);
+ asm.stack_push(Type::Flonum);
- let status = gen_swap(&mut jit, &mut context, &mut cb, &mut ocb);
+ let status = gen_swap(&mut jit, &mut asm, &mut ocb);
- let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
- let (_, tmp_type_next) = context.get_opnd_mapping(StackOpnd(1));
+ let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0));
+ let tmp_type_next = asm.ctx.get_opnd_type(StackOpnd(1));
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
assert_eq!(tmp_type_top, Type::Fixnum);
assert_eq!(tmp_type_next, Type::Flonum);
}
#[test]
fn test_putnil() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- let status = gen_putnil(&mut jit, &mut context, &mut cb, &mut ocb);
+ let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let status = gen_putnil(&mut jit, &mut asm, &mut ocb);
- let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+ let tmp_type_top = asm.ctx.get_opnd_type(StackOpnd(0));
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
assert_eq!(tmp_type_top, Type::Nil);
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0);
}
- #[test]
- fn test_putobject_qtrue() {
- // Test gen_putobject with Qtrue
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
-
- let mut value_array: [u64; 2] = [0, Qtrue.into()];
- let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
- jit.pc = pc;
-
- let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb);
-
- let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
-
- assert_eq!(status, KeepCompiling);
- assert_eq!(tmp_type_top, Type::True);
- assert!(cb.get_write_pos() > 0);
- }
-
- #[test]
- fn test_putobject_fixnum() {
- // Test gen_putobject with a Fixnum to test another conditional branch
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
-
- // The Fixnum 7 is encoded as 7 * 2 + 1, or 15
- let mut value_array: [u64; 2] = [0, 15];
- let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
- jit.pc = pc;
-
- let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb);
-
- let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
-
- assert_eq!(status, KeepCompiling);
- assert_eq!(tmp_type_top, Type::Fixnum);
- assert!(cb.get_write_pos() > 0);
- }
-
- #[test]
- fn test_int2fix() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- jit.opcode = YARVINSN_putobject_INT2FIX_0_.as_usize();
- let status = gen_putobject_int2fix(&mut jit, &mut context, &mut cb, &mut ocb);
-
- let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
-
- // Right now we're not testing the generated machine code to make sure a literal 1 or 0 was pushed. I've checked locally.
- assert_eq!(status, KeepCompiling);
- assert_eq!(tmp_type_top, Type::Fixnum);
- }
#[test]
fn test_putself() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- let status = gen_putself(&mut jit, &mut context, &mut cb, &mut ocb);
+ let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ let status = gen_putself(&mut jit, &mut asm, &mut ocb);
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0);
}
#[test]
fn test_gen_setn() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- context.stack_push(Type::Fixnum);
- context.stack_push(Type::Flonum);
- context.stack_push(Type::CString);
+ let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ asm.stack_push(Type::Fixnum);
+ asm.stack_push(Type::Flonum);
+ asm.stack_push(Type::CString);
let mut value_array: [u64; 2] = [0, 2];
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_setn(&mut jit, &mut context, &mut cb, &mut ocb);
+ let status = gen_setn(&mut jit, &mut asm, &mut ocb);
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
- assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(2)));
- assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(1)));
- assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(0)));
+ assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(2)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(1)));
+ assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(0)));
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0);
}
#[test]
fn test_gen_topn() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- context.stack_push(Type::Flonum);
- context.stack_push(Type::CString);
+ let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ asm.stack_push(Type::Flonum);
+ asm.stack_push(Type::CString);
let mut value_array: [u64; 2] = [0, 1];
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_topn(&mut jit, &mut context, &mut cb, &mut ocb);
+ let status = gen_topn(&mut jit, &mut asm, &mut ocb);
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
- assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2)));
- assert_eq!(Type::CString, context.get_opnd_type(StackOpnd(1)));
- assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(2)));
+ assert_eq!(Type::CString, asm.ctx.get_opnd_type(StackOpnd(1)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0)));
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() > 0); // Write some movs
}
#[test]
fn test_gen_adjuststack() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
- context.stack_push(Type::Flonum);
- context.stack_push(Type::CString);
- context.stack_push(Type::Fixnum);
+ let (mut jit, _context, mut asm, mut cb, mut ocb) = setup_codegen();
+ asm.stack_push(Type::Flonum);
+ asm.stack_push(Type::CString);
+ asm.stack_push(Type::Fixnum);
let mut value_array: [u64; 3] = [0, 2, 0];
let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
jit.pc = pc;
- let status = gen_adjuststack(&mut jit, &mut context, &mut cb, &mut ocb);
+ let status = gen_adjuststack(&mut jit, &mut asm, &mut ocb);
- assert_eq!(status, KeepCompiling);
+ assert_eq!(status, Some(KeepCompiling));
- assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+ assert_eq!(Type::Flonum, asm.ctx.get_opnd_type(StackOpnd(0)));
+ asm.compile(&mut cb, None).unwrap();
assert!(cb.get_write_pos() == 0); // No instructions written
}
#[test]
fn test_gen_leave() {
- let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+ let (mut jit, _context, mut asm, _cb, mut ocb) = setup_codegen();
// Push return value
- context.stack_push(Type::Fixnum);
- gen_leave(&mut jit, &mut context, &mut cb, &mut ocb);
+ asm.stack_push(Type::Fixnum);
+ asm.set_side_exit_context(0 as _, 0);
+ gen_leave(&mut jit, &mut asm, &mut ocb);
}
}
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 8242c9477e..cd6e649aa0 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -1,18 +1,32 @@
-use crate::asm::x86_64::*;
+//! Code versioning, retained live control flow graph mutations, type tracking, etc.
+
+// So we can comment on individual uses of `unsafe` in `unsafe` functions
+#![warn(unsafe_op_in_unsafe_fn)]
+
use crate::asm::*;
+use crate::backend::ir::*;
use crate::codegen::*;
use crate::virtualmem::CodePtr;
use crate::cruby::*;
use crate::options::*;
use crate::stats::*;
use crate::utils::*;
+#[cfg(feature="disasm")]
+use crate::disasm::*;
use core::ffi::c_void;
use std::cell::*;
-use std::hash::{Hash, Hasher};
+use std::collections::HashSet;
+use std::fmt;
use std::mem;
-use std::rc::{Rc};
-use InsnOpnd::*;
-use TempMapping::*;
+use std::mem::transmute;
+use std::ops::Range;
+use std::rc::Rc;
+use mem::MaybeUninit;
+use std::ptr;
+use ptr::NonNull;
+use YARVOpnd::*;
+use TempMappingKind::*;
+use crate::invariants::*;
// Maximum number of temp value types we keep track of
pub const MAX_TEMP_TYPES: usize = 8;
@@ -20,10 +34,15 @@ pub const MAX_TEMP_TYPES: usize = 8;
// Maximum number of local variable types we keep track of
const MAX_LOCAL_TYPES: usize = 8;
+/// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points
+/// to a YARV instruction or an instruction operand.
+pub type IseqIdx = u16;
+
// Represent the type of a value (local/stack/self) in YJIT
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)]
+#[repr(u8)]
pub enum Type {
- Unknown,
+ Unknown = 0,
UnknownImm,
UnknownHeap,
Nil,
@@ -31,15 +50,20 @@ pub enum Type {
False,
Fixnum,
Flonum,
- Array,
- Hash,
ImmSymbol,
- #[allow(unused)]
- HeapSymbol,
-
TString, // An object with the T_STRING flag set, possibly an rb_cString
- CString, // An un-subclassed string of type rb_cString (can have instance vars in some cases)
+ CString, // An object that at one point had its class field equal rb_cString (creating a singleton class changes it)
+ TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray
+ CArray, // An object that at one point had its class field equal rb_cArray (creating a singleton class changes it)
+ THash, // An object with the T_HASH flag set, possibly an rb_cHash
+ CHash, // An object that at one point had its class field equal rb_cHash (creating a singleton class changes it)
+
+ BlockParamProxy, // A special sentinel value indicating the block parameter should be read from
+ // the current surrounding cfp
+
+ // The context currently relies on types taking at most 4 bits (max value 15)
+ // to encode, so if we add any more, we will need to refactor the context.
}
// Default initialization
@@ -66,18 +90,27 @@ impl Type {
} else if val.flonum_p() {
Type::Flonum
} else {
- unreachable!()
+ unreachable!("Illegal value: {:?}", val)
}
} else {
// Core.rs can't reference rb_cString because it's linked by Rust-only tests.
// But CString vs TString is only an optimisation and shouldn't affect correctness.
#[cfg(not(test))]
- if val.class_of() == unsafe { rb_cString } {
- return Type::CString;
+ match val.class_of() {
+ class if class == unsafe { rb_cArray } => return Type::CArray,
+ class if class == unsafe { rb_cHash } => return Type::CHash,
+ class if class == unsafe { rb_cString } => return Type::CString,
+ _ => {}
+ }
+ // We likewise can't reference rb_block_param_proxy, but it's again an optimisation;
+ // we can just treat it as a normal Object.
+ #[cfg(not(test))]
+ if val == unsafe { rb_block_param_proxy } {
+ return Type::BlockParamProxy;
}
match val.builtin_type() {
- RUBY_T_ARRAY => Type::Array,
- RUBY_T_HASH => Type::Hash,
+ RUBY_T_ARRAY => Type::TArray,
+ RUBY_T_HASH => Type::THash,
RUBY_T_STRING => Type::TString,
_ => Type::UnknownHeap,
}
@@ -117,121 +150,364 @@ impl Type {
pub fn is_heap(&self) -> bool {
match self {
Type::UnknownHeap => true,
- Type::Array => true,
- Type::Hash => true,
- Type::HeapSymbol => true,
+ Type::TArray => true,
+ Type::CArray => true,
+ Type::THash => true,
+ Type::CHash => true,
Type::TString => true,
Type::CString => true,
+ Type::BlockParamProxy => true,
_ => false,
}
}
+ /// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY)
+ pub fn is_array(&self) -> bool {
+ matches!(self, Type::TArray | Type::CArray)
+ }
+
+ /// Check if it's a T_HASH object (both THash and CHash are T_HASH)
+ pub fn is_hash(&self) -> bool {
+ matches!(self, Type::THash | Type::CHash)
+ }
+
+ /// Check if it's a T_STRING object (both TString and CString are T_STRING)
+ pub fn is_string(&self) -> bool {
+ matches!(self, Type::TString | Type::CString)
+ }
+
+ /// Returns an Option with the T_ value type if it is known, otherwise None
+ pub fn known_value_type(&self) -> Option<ruby_value_type> {
+ match self {
+ Type::Nil => Some(RUBY_T_NIL),
+ Type::True => Some(RUBY_T_TRUE),
+ Type::False => Some(RUBY_T_FALSE),
+ Type::Fixnum => Some(RUBY_T_FIXNUM),
+ Type::Flonum => Some(RUBY_T_FLOAT),
+ Type::TArray | Type::CArray => Some(RUBY_T_ARRAY),
+ Type::THash | Type::CHash => Some(RUBY_T_HASH),
+ Type::ImmSymbol => Some(RUBY_T_SYMBOL),
+ Type::TString | Type::CString => Some(RUBY_T_STRING),
+ Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None,
+ Type::BlockParamProxy => None,
+ }
+ }
+
+ /// Returns an Option with the class if it is known, otherwise None
+ pub fn known_class(&self) -> Option<VALUE> {
+ unsafe {
+ match self {
+ Type::Nil => Some(rb_cNilClass),
+ Type::True => Some(rb_cTrueClass),
+ Type::False => Some(rb_cFalseClass),
+ Type::Fixnum => Some(rb_cInteger),
+ Type::Flonum => Some(rb_cFloat),
+ Type::ImmSymbol => Some(rb_cSymbol),
+ Type::CArray => Some(rb_cArray),
+ Type::CHash => Some(rb_cHash),
+ Type::CString => Some(rb_cString),
+ _ => None,
+ }
+ }
+ }
+
+ /// Returns an Option with the exact value if it is known, otherwise None
+ #[allow(unused)] // not yet used
+ pub fn known_exact_value(&self) -> Option<VALUE> {
+ match self {
+ Type::Nil => Some(Qnil),
+ Type::True => Some(Qtrue),
+ Type::False => Some(Qfalse),
+ _ => None,
+ }
+ }
+
+ /// Returns an Option boolean representing whether the value is truthy if known, otherwise None
+ pub fn known_truthy(&self) -> Option<bool> {
+ match self {
+ Type::Nil => Some(false),
+ Type::False => Some(false),
+ Type::UnknownHeap => Some(true),
+ Type::Unknown | Type::UnknownImm => None,
+ _ => Some(true)
+ }
+ }
+
+ /// Returns an Option boolean representing whether the value is equal to nil if known, otherwise None
+ pub fn known_nil(&self) -> Option<bool> {
+ match (self, self.known_truthy()) {
+ (Type::Nil, _) => Some(true),
+ (Type::False, _) => Some(false), // Qfalse is not nil
+ (_, Some(true)) => Some(false), // if truthy, can't be nil
+ (_, _) => None // otherwise unknown
+ }
+ }
+
/// Compute a difference between two value types
- /// Returns 0 if the two are the same
- /// Returns > 0 if different but compatible
- /// Returns usize::MAX if incompatible
- pub fn diff(self, dst: Self) -> usize {
+ pub fn diff(self, dst: Self) -> TypeDiff {
// Perfect match, difference is zero
if self == dst {
- return 0;
+ return TypeDiff::Compatible(0);
}
// Any type can flow into an unknown type
if dst == Type::Unknown {
- return 1;
+ return TypeDiff::Compatible(1);
+ }
+
+ // A CArray is also a TArray.
+ if self == Type::CArray && dst == Type::TArray {
+ return TypeDiff::Compatible(1);
+ }
+
+ // A CHash is also a THash.
+ if self == Type::CHash && dst == Type::THash {
+ return TypeDiff::Compatible(1);
}
// A CString is also a TString.
if self == Type::CString && dst == Type::TString {
- return 1;
+ return TypeDiff::Compatible(1);
}
// Specific heap type into unknown heap type is imperfect but valid
if self.is_heap() && dst == Type::UnknownHeap {
- return 1;
+ return TypeDiff::Compatible(1);
}
// Specific immediate type into unknown immediate type is imperfect but valid
if self.is_imm() && dst == Type::UnknownImm {
- return 1;
+ return TypeDiff::Compatible(1);
}
// Incompatible types
- return usize::MAX;
+ return TypeDiff::Incompatible;
}
/// Upgrade this type into a more specific compatible type
/// The new type must be compatible and at least as specific as the previously known type.
- fn upgrade(&mut self, src: Self) {
- // Here we're checking that src is more specific than self
- assert!(src.diff(*self) != usize::MAX);
- *self = src;
+ fn upgrade(&mut self, new_type: Self) {
+ // We can only upgrade to a type that is more specific
+ assert!(new_type.diff(*self) != TypeDiff::Incompatible);
+ *self = new_type;
}
}
+#[derive(Debug, Eq, PartialEq)]
+pub enum TypeDiff {
+ // usize == 0: Same type
+ // usize >= 1: Different but compatible. The smaller, the more compatible.
+ Compatible(usize),
+ Incompatible,
+}
+
+#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
+#[repr(u8)]
+pub enum TempMappingKind
+{
+ MapToStack = 0,
+ MapToSelf = 1,
+ MapToLocal = 2,
+}
+
// Potential mapping of a value on the temporary stack to
// self, a local variable or constant so that we can track its type
-#[derive(Copy, Clone, Eq, PartialEq, Debug)]
-pub enum TempMapping {
- MapToStack, // Normal stack value
- MapToSelf, // Temp maps to the self operand
- MapToLocal(u8), // Temp maps to a local variable with index
- //ConstMapping, // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue)
+//
+// The highest two bits represent TempMappingKind, and the rest of
+// the bits are used differently across different kinds.
+// * MapToStack: The lowest 5 bits are used for mapping Type.
+// * MapToSelf: The remaining bits are not used; the type is stored in self_type.
+// * MapToLocal: The lowest 3 bits store the index of a local variable.
+#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
+pub struct TempMapping(u8);
+
+impl TempMapping {
+ pub fn map_to_stack(t: Type) -> TempMapping
+ {
+ let kind_bits = TempMappingKind::MapToStack as u8;
+ let type_bits = t as u8;
+ assert!(type_bits <= 0b11111);
+ let bits = (kind_bits << 6) | (type_bits & 0b11111);
+ TempMapping(bits)
+ }
+
+ pub fn map_to_self() -> TempMapping
+ {
+ let kind_bits = TempMappingKind::MapToSelf as u8;
+ let bits = kind_bits << 6;
+ TempMapping(bits)
+ }
+
+ pub fn map_to_local(local_idx: u8) -> TempMapping
+ {
+ let kind_bits = TempMappingKind::MapToLocal as u8;
+ assert!(local_idx <= 0b111);
+ let bits = (kind_bits << 6) | (local_idx & 0b111);
+ TempMapping(bits)
+ }
+
+ pub fn without_type(&self) -> TempMapping
+ {
+ if self.get_kind() != TempMappingKind::MapToStack {
+ return *self;
+ }
+
+ TempMapping::map_to_stack(Type::Unknown)
+ }
+
+ pub fn get_kind(&self) -> TempMappingKind
+ {
+ // Take the two highest bits
+ let TempMapping(bits) = self;
+ let kind_bits = bits >> 6;
+ assert!(kind_bits <= 2);
+ unsafe { transmute::<u8, TempMappingKind>(kind_bits) }
+ }
+
+ pub fn get_type(&self) -> Type
+ {
+ assert!(self.get_kind() == TempMappingKind::MapToStack);
+
+ // Take the 5 lowest bits
+ let TempMapping(bits) = self;
+ let type_bits = bits & 0b11111;
+ unsafe { transmute::<u8, Type>(type_bits) }
+ }
+
+ pub fn get_local_idx(&self) -> u8
+ {
+ assert!(self.get_kind() == TempMappingKind::MapToLocal);
+
+ // Take the 3 lowest bits
+ let TempMapping(bits) = self;
+ bits & 0b111
+ }
}
impl Default for TempMapping {
fn default() -> Self {
- MapToStack
+ TempMapping::map_to_stack(Type::Unknown)
}
}
-// Operand to a bytecode instruction
+// Operand to a YARV bytecode instruction
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
-pub enum InsnOpnd {
+pub enum YARVOpnd {
// The value is self
SelfOpnd,
// Temporary stack operand with stack index
- StackOpnd(u16),
+ StackOpnd(u8),
+}
+
+impl From<Opnd> for YARVOpnd {
+ fn from(value: Opnd) -> Self {
+ match value {
+ Opnd::Stack { idx, .. } => StackOpnd(idx.try_into().unwrap()),
+ _ => unreachable!("{:?} cannot be converted to YARVOpnd", value)
+ }
+ }
+}
+
+/// Maximum index of stack temps that could be in a register
+pub const MAX_REG_TEMPS: u8 = 8;
+
+/// Bitmap of which stack temps are in a register
+#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
+pub struct RegTemps(u8);
+
+impl RegTemps {
+ pub fn get(&self, index: u8) -> bool {
+ assert!(index < MAX_REG_TEMPS);
+ (self.0 >> index) & 1 == 1
+ }
+
+ pub fn set(&mut self, index: u8, value: bool) {
+ assert!(index < MAX_REG_TEMPS);
+ if value {
+ self.0 = self.0 | (1 << index);
+ } else {
+ self.0 = self.0 & !(1 << index);
+ }
+ }
+
+ pub fn as_u8(&self) -> u8 {
+ self.0
+ }
+
+ /// Return true if there's a register that conflicts with a given stack_idx.
+ pub fn conflicts_with(&self, stack_idx: u8) -> bool {
+ let mut other_idx = stack_idx as usize % get_option!(num_temp_regs);
+ while other_idx < MAX_REG_TEMPS as usize {
+ if stack_idx as usize != other_idx && self.get(other_idx as u8) {
+ return true;
+ }
+ other_idx += get_option!(num_temp_regs);
+ }
+ false
+ }
}
+/// Bits for chain_depth_return_landing_defer
+const RETURN_LANDING_BIT: u8 = 0b10000000;
+const DEFER_BIT: u8 = 0b01000000;
+const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63
+
/// Code generation context
/// Contains information we can use to specialize/optimize code
/// There are a lot of context objects so we try to keep the size small.
-#[derive(Copy, Clone, Default, Debug)]
+#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
+#[repr(packed)]
pub struct Context {
// Number of values currently on the temporary stack
- stack_size: u16,
+ stack_size: u8,
// Offset of the JIT SP relative to the interpreter SP
// This represents how far the JIT's SP is from the "real" SP
- sp_offset: i16,
-
- // Depth of this block in the sidechain (eg: inline-cache chain)
- chain_depth: u8,
+ sp_offset: i8,
- // Local variable types we keep track of
- local_types: [Type; MAX_LOCAL_TYPES],
+ /// Bitmap of which stack temps are in a register
+ reg_temps: RegTemps,
- // Temporary variable types we keep track of
- temp_types: [Type; MAX_TEMP_TYPES],
+ /// Fields packed into u8
+ /// - 1st bit from the left: Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing])
+ /// - 2nd bit from the left: Whether the compilation of this code has been deferred ([Self::is_deferred])
+ /// - Last 6 bits (max: 63): Depth of this block in the sidechain (eg: inline-cache chain)
+ chain_depth_and_flags: u8,
// Type we track for self
self_type: Type,
- // Mapping of temp stack entries to types we track
- temp_mapping: [TempMapping; MAX_TEMP_TYPES],
+ // Local variable types we keep track of
+ // We store 8 local types, requiring 4 bits each, for a total of 32 bits
+ local_types: u32,
+
+ // Temp mapping kinds we track
+ // 8 temp mappings * 2 bits, total 16 bits
+ temp_mapping_kind: u16,
+
+ // Stack slot type/local_idx we track
+ // 8 temp types * 4 bits, total 32 bits
+ temp_payload: u32,
+
+ /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined.
+ /// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)]
+ /// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr
+ /// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which
+ /// could allow this to consume no bytes, so we're leaving this as is.
+ inline_block: u64,
}
/// Tuple of (iseq, idx) used to identify basic blocks
/// There are a lot of blockid objects so we try to keep the size small.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[repr(packed)]
pub struct BlockId {
/// Instruction sequence
pub iseq: IseqPtr,
/// Index in the iseq where the block starts
- pub idx: u32,
+ pub idx: u16,
}
/// Branch code shape enumeration
@@ -242,116 +518,457 @@ pub enum BranchShape {
Default, // Neither target is next
}
-// Branch code generation function signature
-type BranchGenFn =
- fn(cb: &mut CodeBlock, target0: CodePtr, target1: Option<CodePtr>, shape: BranchShape) -> ();
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum BranchGenFn {
+ BranchIf(Cell<BranchShape>),
+ BranchNil(Cell<BranchShape>),
+ BranchUnless(Cell<BranchShape>),
+ JumpToTarget0(Cell<BranchShape>),
+ JNZToTarget0,
+ JZToTarget0,
+ JBEToTarget0,
+ JBToTarget0,
+ JOMulToTarget0,
+ JITReturn,
+}
+
+impl BranchGenFn {
+ pub fn call(&self, asm: &mut Assembler, target0: Target, target1: Option<Target>) {
+ match self {
+ BranchGenFn::BranchIf(shape) => {
+ match shape.get() {
+ BranchShape::Next0 => asm.jz(target1.unwrap()),
+ BranchShape::Next1 => asm.jnz(target0),
+ BranchShape::Default => {
+ asm.jnz(target0);
+ asm.jmp(target1.unwrap());
+ }
+ }
+ }
+ BranchGenFn::BranchNil(shape) => {
+ match shape.get() {
+ BranchShape::Next0 => asm.jne(target1.unwrap()),
+ BranchShape::Next1 => asm.je(target0),
+ BranchShape::Default => {
+ asm.je(target0);
+ asm.jmp(target1.unwrap());
+ }
+ }
+ }
+ BranchGenFn::BranchUnless(shape) => {
+ match shape.get() {
+ BranchShape::Next0 => asm.jnz(target1.unwrap()),
+ BranchShape::Next1 => asm.jz(target0),
+ BranchShape::Default => {
+ asm.jz(target0);
+ asm.jmp(target1.unwrap());
+ }
+ }
+ }
+ BranchGenFn::JumpToTarget0(shape) => {
+ if shape.get() == BranchShape::Next1 {
+ panic!("Branch shape Next1 not allowed in JumpToTarget0!");
+ }
+ if shape.get() == BranchShape::Default {
+ asm.jmp(target0);
+ }
+ }
+ BranchGenFn::JNZToTarget0 => {
+ asm.jnz(target0)
+ }
+ BranchGenFn::JZToTarget0 => {
+ asm.jz(target0)
+ }
+ BranchGenFn::JBEToTarget0 => {
+ asm.jbe(target0)
+ }
+ BranchGenFn::JBToTarget0 => {
+ asm.jb(target0)
+ }
+ BranchGenFn::JOMulToTarget0 => {
+ asm.jo_mul(target0)
+ }
+ BranchGenFn::JITReturn => {
+ asm_comment!(asm, "update cfp->jit_return");
+ let jit_return = RUBY_OFFSET_CFP_JIT_RETURN - RUBY_SIZEOF_CONTROL_FRAME as i32;
+ let raw_ptr = asm.lea_jump_target(target0);
+ asm.mov(Opnd::mem(64, CFP, jit_return), raw_ptr);
+ }
+ }
+ }
+
+ pub fn get_shape(&self) -> BranchShape {
+ match self {
+ BranchGenFn::BranchIf(shape) |
+ BranchGenFn::BranchNil(shape) |
+ BranchGenFn::BranchUnless(shape) |
+ BranchGenFn::JumpToTarget0(shape) => shape.get(),
+ BranchGenFn::JNZToTarget0 |
+ BranchGenFn::JZToTarget0 |
+ BranchGenFn::JBEToTarget0 |
+ BranchGenFn::JBToTarget0 |
+ BranchGenFn::JOMulToTarget0 |
+ BranchGenFn::JITReturn => BranchShape::Default,
+ }
+ }
+
+ pub fn set_shape(&self, new_shape: BranchShape) {
+ match self {
+ BranchGenFn::BranchIf(shape) |
+ BranchGenFn::BranchNil(shape) |
+ BranchGenFn::BranchUnless(shape) => {
+ shape.set(new_shape);
+ }
+ BranchGenFn::JumpToTarget0(shape) => {
+ if new_shape == BranchShape::Next1 {
+ panic!("Branch shape Next1 not allowed in JumpToTarget0!");
+ }
+ shape.set(new_shape);
+ }
+ BranchGenFn::JNZToTarget0 |
+ BranchGenFn::JZToTarget0 |
+ BranchGenFn::JBEToTarget0 |
+ BranchGenFn::JBToTarget0 |
+ BranchGenFn::JOMulToTarget0 |
+ BranchGenFn::JITReturn => {
+ assert_eq!(new_shape, BranchShape::Default);
+ }
+ }
+ }
+}
+
+/// A place that a branch could jump to
+#[derive(Debug, Clone)]
+enum BranchTarget {
+ Stub(Box<BranchStub>), // Not compiled yet
+ Block(BlockRef), // Already compiled
+}
+
+impl BranchTarget {
+ fn get_address(&self) -> Option<CodePtr> {
+ match self {
+ BranchTarget::Stub(stub) => stub.address,
+ BranchTarget::Block(blockref) => Some(unsafe { blockref.as_ref() }.start_addr),
+ }
+ }
+
+ fn get_blockid(&self) -> BlockId {
+ match self {
+ BranchTarget::Stub(stub) => BlockId { iseq: stub.iseq.get(), idx: stub.iseq_idx },
+ BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.get_blockid(),
+ }
+ }
+
+ fn get_ctx(&self) -> Context {
+ match self {
+ BranchTarget::Stub(stub) => stub.ctx,
+ BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx,
+ }
+ }
+
+ fn get_block(&self) -> Option<BlockRef> {
+ match self {
+ BranchTarget::Stub(_) => None,
+ BranchTarget::Block(blockref) => Some(*blockref),
+ }
+ }
+
+ fn set_iseq(&self, iseq: IseqPtr) {
+ match self {
+ BranchTarget::Stub(stub) => stub.iseq.set(iseq),
+ BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.iseq.set(iseq),
+ }
+ }
+}
+
+#[derive(Debug, Clone)]
+struct BranchStub {
+ address: Option<CodePtr>,
+ iseq: Cell<IseqPtr>,
+ iseq_idx: IseqIdx,
+ ctx: Context,
+}
/// Store info about an outgoing branch in a code segment
/// Note: care must be taken to minimize the size of branch objects
-struct Branch {
+pub struct Branch {
// Block this is attached to
block: BlockRef,
// Positions where the generated code starts and ends
- start_addr: Option<CodePtr>,
- end_addr: Option<CodePtr>,
-
- // Context right after the branch instruction
- #[allow(unused)] // set but not read at the moment
- src_ctx: Context,
+ start_addr: CodePtr,
+ end_addr: Cell<CodePtr>, // exclusive
// Branch target blocks and their contexts
- targets: [Option<BlockId>; 2],
- target_ctxs: [Context; 2],
- blocks: [Option<BlockRef>; 2],
-
- // Jump target addresses
- dst_addrs: [Option<CodePtr>; 2],
+ targets: [Cell<Option<Box<BranchTarget>>>; 2],
// Branch code generation function
gen_fn: BranchGenFn,
+}
+
+/// A [Branch] for a [Block] that is under construction.
+/// Fields correspond, but may be `None` during construction.
+pub struct PendingBranch {
+ /// Allocation holder for the address of the constructed branch
+ /// in error paths Box deallocates it.
+ uninit_branch: Box<MaybeUninit<Branch>>,
+
+ /// Branch code generation function
+ gen_fn: BranchGenFn,
+
+ /// Positions where the generated code starts and ends
+ start_addr: Cell<Option<CodePtr>>,
+ end_addr: Cell<Option<CodePtr>>, // exclusive
+
+ /// Branch target blocks and their contexts
+ targets: [Cell<Option<Box<BranchTarget>>>; 2],
+}
+
+impl Branch {
+ // Compute the size of the branch code
+ fn code_size(&self) -> usize {
+ (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize
+ }
+
+ /// Get the address of one of the branch destination
+ fn get_target_address(&self, target_idx: usize) -> Option<CodePtr> {
+ unsafe {
+ self.targets[target_idx]
+ .ref_unchecked()
+ .as_ref()
+ .and_then(|target| target.get_address())
+ }
+ }
- // Shape of the branch
- shape: BranchShape,
+ fn get_stub_count(&self) -> usize {
+ let mut count = 0;
+ for target in self.targets.iter() {
+ if unsafe {
+ // SAFETY: no mutation
+ matches!(
+ target.ref_unchecked().as_ref().map(Box::as_ref),
+ Some(BranchTarget::Stub(_))
+ )
+ } {
+ count += 1;
+ }
+ }
+ count
+ }
+
+ fn assert_layout(&self) {
+ let shape = self.gen_fn.get_shape();
+ assert!(
+ !(shape == BranchShape::Default && 0 == self.code_size()),
+ "zero-size branches are incorrect when code for neither targets are adjacent"
+ // One needs to issue some instruction to steer to the branch target
+ // when falling through isn't an option.
+ );
+ }
}
impl std::fmt::Debug for Branch {
+ // Can't derive this because `targets: !Copy` due to Cell.
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- // TODO: expand this if needed. #[derive(Debug)] on Branch gave a
- // strange error related to BranchGenFn
+ let targets = unsafe {
+ // SAFETY:
+ // While the references are live for the result of this function,
+ // no mutation happens because we are only calling derived fmt::Debug functions.
+ [self.targets[0].as_ptr().as_ref().unwrap(), self.targets[1].as_ptr().as_ref().unwrap()]
+ };
+
formatter
.debug_struct("Branch")
+ .field("block", &self.block)
.field("start", &self.start_addr)
.field("end", &self.end_addr)
- .field("targets", &self.targets)
+ .field("targets", &targets)
+ .field("gen_fn", &self.gen_fn)
.finish()
}
}
-impl Branch {
- // Compute the size of the branch code
- fn code_size(&self) -> usize {
- (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize)
+impl PendingBranch {
+ /// Set up a branch target at `target_idx`. Find an existing block to branch to
+ /// or generate a stub for one.
+ fn set_target(
+ &self,
+ target_idx: u32,
+ target: BlockId,
+ ctx: &Context,
+ ocb: &mut OutlinedCb,
+ ) -> Option<CodePtr> {
+ // If the block already exists
+ if let Some(blockref) = find_block_version(target, ctx) {
+ let block = unsafe { blockref.as_ref() };
+
+ // Fill out the target with this block
+ self.targets[target_idx.as_usize()]
+ .set(Some(Box::new(BranchTarget::Block(blockref))));
+ return Some(block.start_addr);
+ }
+
+ // The branch struct is uninitialized right now but as a stable address.
+ // We make sure the stub runs after the branch is initialized.
+ let branch_struct_addr = self.uninit_branch.as_ptr() as usize;
+ let stub_addr = gen_branch_stub(ctx, ocb, branch_struct_addr, target_idx);
+
+ if let Some(stub_addr) = stub_addr {
+ // Fill the branch target with a stub
+ self.targets[target_idx.as_usize()].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub {
+ address: Some(stub_addr),
+ iseq: Cell::new(target.iseq),
+ iseq_idx: target.idx,
+ ctx: *ctx,
+ })))));
+ }
+
+ stub_addr
+ }
+
+ // Construct the branch and wire it up in the grpah
+ fn into_branch(mut self, uninit_block: BlockRef) -> BranchRef {
+ // Make the branch
+ let branch = Branch {
+ block: uninit_block,
+ start_addr: self.start_addr.get().unwrap(),
+ end_addr: Cell::new(self.end_addr.get().unwrap()),
+ targets: self.targets,
+ gen_fn: self.gen_fn,
+ };
+ // Move it to the designated place on
+ // the heap and unwrap MaybeUninit.
+ self.uninit_branch.write(branch);
+ let raw_branch: *mut MaybeUninit<Branch> = Box::into_raw(self.uninit_branch);
+ let branchref = NonNull::new(raw_branch as *mut Branch).expect("no null from Box");
+
+ // SAFETY: just allocated it
+ let branch = unsafe { branchref.as_ref() };
+ // For block branch targets, put the new branch in the
+ // appropriate incoming list.
+ for target in branch.targets.iter() {
+ // SAFETY: no mutation
+ let out_block: Option<BlockRef> = unsafe {
+ target.ref_unchecked().as_ref().and_then(|target| target.get_block())
+ };
+
+ if let Some(out_block) = out_block {
+ // SAFETY: These blockrefs come from set_target() which only puts blocks from
+ // ISeqs, which are all initialized. Note that uninit_block isn't in any ISeq
+ // payload yet.
+ unsafe { out_block.as_ref() }.incoming.push(branchref);
+ }
+ }
+
+ branch.assert_layout();
+
+ branchref
}
}
-// In case this block is invalidated, these two pieces of info
-// help to remove all pointers to this block in the system.
-#[derive(Debug)]
-pub struct CmeDependency {
- pub receiver_klass: VALUE,
- pub callee_cme: *const rb_callable_method_entry_t,
+// Store info about code used on YJIT entry
+pub struct Entry {
+ // Positions where the generated code starts and ends
+ start_addr: CodePtr,
+ end_addr: CodePtr, // exclusive
}
+/// A [Branch] for a [Block] that is under construction.
+pub struct PendingEntry {
+ pub uninit_entry: Box<MaybeUninit<Entry>>,
+ start_addr: Cell<Option<CodePtr>>,
+ end_addr: Cell<Option<CodePtr>>, // exclusive
+}
+
+impl PendingEntry {
+ // Construct the entry in the heap
+ pub fn into_entry(mut self) -> EntryRef {
+ // Make the entry
+ let entry = Entry {
+ start_addr: self.start_addr.get().unwrap(),
+ end_addr: self.end_addr.get().unwrap(),
+ };
+ // Move it to the designated place on the heap and unwrap MaybeUninit.
+ self.uninit_entry.write(entry);
+ let raw_entry: *mut MaybeUninit<Entry> = Box::into_raw(self.uninit_entry);
+ NonNull::new(raw_entry as *mut Entry).expect("no null from Box")
+ }
+}
+
+// In case a block is invalidated, this helps to remove all pointers to the block.
+pub type CmePtr = *const rb_callable_method_entry_t;
+
/// Basic block version
/// Represents a portion of an iseq compiled with a given context
/// Note: care must be taken to minimize the size of block_t objects
#[derive(Debug)]
pub struct Block {
- // Bytecode sequence (iseq, idx) this is a version of
- blockid: BlockId,
+ // The byte code instruction sequence this is a version of.
+ // Can change due to moving GC.
+ iseq: Cell<IseqPtr>,
- // Index one past the last instruction for this block in the iseq
- end_idx: u32,
+ // Index range covered by this version in `ISEQ_BODY(iseq)->iseq_encoded`.
+ iseq_range: Range<IseqIdx>,
// Context at the start of the block
// This should never be mutated
ctx: Context,
// Positions where the generated code starts and ends
- start_addr: Option<CodePtr>,
- end_addr: Option<CodePtr>,
+ start_addr: CodePtr,
+ end_addr: Cell<CodePtr>,
// List of incoming branches (from predecessors)
// These are reference counted (ownership shared between predecessor and successors)
- incoming: Vec<BranchRef>,
+ incoming: MutableBranchList,
// NOTE: we might actually be able to store the branches here without refcounting
// however, using a RefCell makes it easy to get a pointer to Branch objects
//
// List of outgoing branches (to successors)
- outgoing: Vec<BranchRef>,
+ outgoing: Box<[BranchRef]>,
// FIXME: should these be code pointers instead?
// Offsets for GC managed objects in the mainline code block
- gc_object_offsets: Vec<u32>,
+ gc_obj_offsets: Box<[u32]>,
// CME dependencies of this block, to help to remove all pointers to this
// block in the system.
- cme_dependencies: Vec<CmeDependency>,
+ cme_dependencies: Box<[Cell<CmePtr>]>,
// Code address of an exit for `ctx` and `blockid`.
// Used for block invalidation.
- pub entry_exit: Option<CodePtr>,
+ entry_exit: Option<CodePtr>,
}
-/// Reference-counted pointer to a block that can be borrowed mutably.
-/// Wrapped so we could implement [Hash] and [Eq] for use with stdlib collections.
-#[derive(Debug)]
-pub struct BlockRef(Rc<RefCell<Block>>);
-
-/// Reference-counted pointer to a branch that can be borrowed mutably
-type BranchRef = Rc<RefCell<Branch>>;
+/// Pointer to a [Block].
+///
+/// # Safety
+///
+/// _Never_ derive a `&mut Block` from this and always use
+/// [std::ptr::NonNull::as_ref] to get a `&Block`. `&'a mut`
+/// in Rust asserts that there are no other references live
+/// over the lifetime `'a`. This uniqueness assertion does
+/// not hold in many situations for us, even when you ignore
+/// the fact that our control flow graph can have cycles.
+/// Here are just two examples where we have overlapping references:
+/// - Yielding to a different OS thread within the same
+/// ractor during compilation
+/// - The GC calling [rb_yjit_iseq_mark] during compilation
+///
+/// Technically, for soundness, we also need to ensure that
+/// the we have the VM lock while the result of `as_ref()`
+/// is live, so that no deallocation happens while the
+/// shared reference is live. The vast majority of our code run while
+/// holding the VM lock, though.
+pub type BlockRef = NonNull<Block>;
+
+/// Pointer to a [Branch]. See [BlockRef] for notes about
+/// proper usage.
+pub type BranchRef = NonNull<Branch>;
+
+/// Pointer to an entry that is already added to an ISEQ
+pub type EntryRef = NonNull<Entry>;
/// List of block versions for a given blockid
type VersionList = Vec<BlockRef>;
@@ -360,55 +977,51 @@ type VersionList = Vec<BlockRef>;
/// An instance of this is stored on each iseq
type VersionMap = Vec<VersionList>;
-impl BlockRef {
- /// Constructor
- pub fn new(rc: Rc<RefCell<Block>>) -> Self {
- Self(rc)
- }
-
- /// Borrow the block through [RefCell].
- pub fn borrow(&self) -> Ref<'_, Block> {
- self.0.borrow()
- }
-
- /// Borrow the block for mutation through [RefCell].
- pub fn borrow_mut(&self) -> RefMut<'_, Block> {
- self.0.borrow_mut()
- }
-}
-
-impl Clone for BlockRef {
- /// Clone the [Rc]
- fn clone(&self) -> Self {
- Self(self.0.clone())
+/// [Interior mutability][1] wrapper for a list of branches.
+/// O(n) insertion, but space efficient. We generally expect
+/// blocks to have only a few branches.
+///
+/// [1]: https://doc.rust-lang.org/std/cell/struct.UnsafeCell.html
+#[repr(transparent)]
+struct MutableBranchList(Cell<Box<[BranchRef]>>);
+
+impl MutableBranchList {
+ fn push(&self, branch: BranchRef) {
+ // Temporary move the boxed slice out of self.
+ // oom=abort is load bearing here...
+ let mut current_list = self.0.take().into_vec();
+ current_list.push(branch);
+ self.0.set(current_list.into_boxed_slice());
}
}
-impl Hash for BlockRef {
- /// Hash the reference by hashing the pointer
- fn hash<H: Hasher>(&self, state: &mut H) {
- let rc_ptr = Rc::as_ptr(&self.0);
- rc_ptr.hash(state);
- }
-}
+impl fmt::Debug for MutableBranchList {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // SAFETY: the derived Clone for boxed slices does not mutate this Cell
+ let branches = unsafe { self.0.ref_unchecked().clone() };
-impl PartialEq for BlockRef {
- /// Equality defined by allocation identity
- fn eq(&self, other: &Self) -> bool {
- Rc::ptr_eq(&self.0, &other.0)
+ formatter.debug_list().entries(branches.into_iter()).finish()
}
}
-/// It's comparison by identity so all the requirements are statisfied
-impl Eq for BlockRef {}
-
/// This is all the data YJIT stores on an iseq
/// This will be dynamically allocated by C code
/// C code should pass an &mut IseqPayload to us
/// when calling into YJIT
#[derive(Default)]
pub struct IseqPayload {
- version_map: VersionMap,
+ // Basic block versions
+ pub version_map: VersionMap,
+
+ // Indexes of code pages used by this this ISEQ
+ pub pages: HashSet<usize>,
+
+ // List of ISEQ entry codes
+ pub entries: Vec<EntryRef>,
+
+ // Blocks that are invalidated but are not yet deallocated.
+ // The code GC will free them later.
+ pub dead_blocks: Vec<BlockRef>,
}
impl IseqPayload {
@@ -424,14 +1037,14 @@ impl IseqPayload {
/// Get the payload for an iseq. For safety it's up to the caller to ensure the returned `&mut`
/// upholds aliasing rules and that the argument is a valid iseq.
-pub unsafe fn load_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> {
- let payload = rb_iseq_get_yjit_payload(iseq);
+pub fn get_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> {
+ let payload = unsafe { rb_iseq_get_yjit_payload(iseq) };
let payload: *mut IseqPayload = payload.cast();
- payload.as_mut()
+ unsafe { payload.as_mut() }
}
/// Get the payload object associated with an iseq. Create one if none exists.
-fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
+pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
type VoidPtr = *mut c_void;
let payload_non_null = unsafe {
@@ -444,7 +1057,8 @@ fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
// We drop the payload with Box::from_raw when the GC frees the iseq and calls us.
// NOTE(alan): Sometimes we read from an iseq without ever writing to it.
// We allocate in those cases anyways.
- let new_payload = Box::into_raw(Box::new(IseqPayload::default()));
+ let new_payload = IseqPayload::default();
+ let new_payload = Box::into_raw(Box::new(new_payload));
rb_iseq_set_yjit_payload(iseq, new_payload as VoidPtr);
new_payload
@@ -460,10 +1074,76 @@ fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
unsafe { payload_non_null.as_mut() }.unwrap()
}
+/// Iterate over all existing ISEQs
+pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
+ unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
+ // SAFETY: points to the local below
+ let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) };
+ callback(iseq);
+ }
+ let mut data: &mut dyn FnMut(IseqPtr) = &mut callback;
+ unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
+}
+
+/// Iterate over all ISEQ payloads
+pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
+ for_each_iseq(|iseq| {
+ if let Some(iseq_payload) = get_iseq_payload(iseq) {
+ callback(iseq_payload);
+ }
+ });
+}
+
+/// Iterate over all on-stack ISEQs
+pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
+ unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
+ // SAFETY: points to the local below
+ let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) };
+ callback(iseq);
+ }
+ let mut data: &mut dyn FnMut(IseqPtr) = &mut callback;
+ unsafe { rb_jit_cont_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
+}
+
+/// Iterate over all on-stack ISEQ payloads
+pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
+ for_each_on_stack_iseq(|iseq| {
+ if let Some(iseq_payload) = get_iseq_payload(iseq) {
+ callback(iseq_payload);
+ }
+ });
+}
+
+/// Iterate over all NOT on-stack ISEQ payloads
+pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback: F) {
+ // Get all ISEQs on the heap. Note that rb_objspace_each_objects() runs GC first,
+ // which could move ISEQ pointers when GC.auto_compact = true.
+ // So for_each_on_stack_iseq() must be called after this, which doesn't run GC.
+ let mut iseqs: Vec<IseqPtr> = vec![];
+ for_each_iseq(|iseq| iseqs.push(iseq));
+
+ // Get all ISEQs that are on a CFP of existing ECs.
+ let mut on_stack_iseqs: HashSet<IseqPtr> = HashSet::new();
+ for_each_on_stack_iseq(|iseq| { on_stack_iseqs.insert(iseq); });
+
+ // Invoke the callback for iseqs - on_stack_iseqs
+ for iseq in iseqs {
+ if !on_stack_iseqs.contains(&iseq) {
+ if let Some(iseq_payload) = get_iseq_payload(iseq) {
+ callback(iseq_payload);
+ }
+ }
+ }
+}
+
/// Free the per-iseq payload
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
+pub extern "C" fn rb_yjit_iseq_free(iseq: IseqPtr) {
+ // Free invariants for the ISEQ
+ iseq_free_invariants(iseq);
+
let payload = {
+ let payload = unsafe { rb_iseq_get_yjit_payload(iseq) };
if payload.is_null() {
// Nothing to free.
return;
@@ -472,31 +1152,50 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
}
};
- use crate::invariants;
-
// Take ownership of the payload with Box::from_raw().
// It drops right before this function returns.
// SAFETY: We got the pointer from Box::into_raw().
let payload = unsafe { Box::from_raw(payload) };
- // Remove all blocks in the payload from global invariants table.
+ // Free all blocks in version_map. The GC doesn't free running iseqs.
for versions in &payload.version_map {
for block in versions {
- invariants::block_assumptions_free(&block);
+ // SAFETY: blocks in the version_map are always well connected
+ unsafe { free_block(*block, true) };
}
}
+
+ // Free dead blocks
+ for block in payload.dead_blocks {
+ unsafe { free_block(block, false) };
+ }
+
+ // Free all entries
+ for entryref in payload.entries.iter() {
+ let entry = unsafe { Box::from_raw(entryref.as_ptr()) };
+ mem::drop(entry);
+ }
+
+ // Increment the freed iseq count
+ incr_counter!(freed_iseq_count);
}
-/// GC callback for marking GC objects in the the per-iseq payload.
+/// GC callback for marking GC objects in the per-iseq payload.
#[no_mangle]
pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
let payload = if payload.is_null() {
// Nothing to mark.
return;
} else {
- // SAFETY: It looks like the GC takes the VM lock while marking
- // so we should be satisfying aliasing rules here.
- unsafe { &*(payload as *const IseqPayload) }
+ // SAFETY: The GC takes the VM lock while marking, which
+ // we assert, so we should be synchronized and data race free.
+ //
+ // For aliasing, having the VM lock hopefully also implies that no one
+ // else has an overlapping &mut IseqPayload.
+ unsafe {
+ rb_yjit_assert_holding_vm_lock();
+ &*(payload as *const IseqPayload)
+ }
};
// For marking VALUEs written into the inline code block.
@@ -505,29 +1204,56 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
for versions in &payload.version_map {
for block in versions {
- let block = block.borrow();
+ // SAFETY: all blocks inside version_map are initialized.
+ let block = unsafe { block.as_ref() };
+ mark_block(block, cb, false);
+ }
+ }
+ // Mark dead blocks, since there could be stubs pointing at them
+ for blockref in &payload.dead_blocks {
+ // SAFETY: dead blocks come from version_map, which only have initialized blocks
+ let block = unsafe { blockref.as_ref() };
+ mark_block(block, cb, true);
+ }
- unsafe { rb_gc_mark_movable(block.blockid.iseq.into()) };
+ return;
- // Mark method entry dependencies
- for cme_dep in &block.cme_dependencies {
- unsafe { rb_gc_mark_movable(cme_dep.receiver_klass) };
- unsafe { rb_gc_mark_movable(cme_dep.callee_cme.into()) };
- }
+ fn mark_block(block: &Block, cb: &CodeBlock, dead: bool) {
+ unsafe { rb_gc_mark_movable(block.iseq.get().into()) };
- // Mark outgoing branch entries
- for branch in &block.outgoing {
- let branch = branch.borrow();
- for target in &branch.targets {
- if let Some(target) = target {
- unsafe { rb_gc_mark_movable(target.iseq.into()) };
- }
+ // Mark method entry dependencies
+ for cme_dep in block.cme_dependencies.iter() {
+ unsafe { rb_gc_mark_movable(cme_dep.get().into()) };
+ }
+
+ // Mark outgoing branch entries
+ for branch in block.outgoing.iter() {
+ let branch = unsafe { branch.as_ref() };
+ for target in branch.targets.iter() {
+ // SAFETY: no mutation inside unsafe
+ let target_iseq = unsafe {
+ target.ref_unchecked().as_ref().and_then(|target| {
+ // Avoid get_blockid() on blockref. Can be dangling on dead blocks,
+ // and the iseq housing the block already naturally handles it.
+ if target.get_block().is_some() {
+ None
+ } else {
+ Some(target.get_blockid().iseq)
+ }
+ })
+ };
+
+ if let Some(target_iseq) = target_iseq {
+ unsafe { rb_gc_mark_movable(target_iseq.into()) };
}
}
+ }
- // Walk over references to objects in generated code.
- for offset in &block.gc_object_offsets {
- let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+ // Mark references to objects in generated code.
+ // Skip for dead blocks since they shouldn't run.
+ if !dead {
+ for offset in block.gc_obj_offsets.iter() {
+ let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_address = value_address as *const VALUE;
@@ -541,17 +1267,24 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
}
}
-/// GC callback for updating GC objects in the the per-iseq payload.
+/// GC callback for updating GC objects in the per-iseq payload.
/// This is a mirror of [rb_yjit_iseq_mark].
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
+pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) {
+ let payload = unsafe { rb_iseq_get_yjit_payload(iseq) };
let payload = if payload.is_null() {
// Nothing to update.
return;
} else {
- // SAFETY: It looks like the GC takes the VM lock while updating references
- // so we should be satisfying aliasing rules here.
- unsafe { &*(payload as *const IseqPayload) }
+ // SAFETY: The GC takes the VM lock while marking, which
+ // we assert, so we should be synchronized and data race free.
+ //
+ // For aliasing, having the VM lock hopefully also implies that no one
+ // else has an overlapping &mut IseqPayload.
+ unsafe {
+ rb_yjit_assert_holding_vm_lock();
+ &*(payload as *const IseqPayload)
+ }
};
// Evict other threads from generated code since we are about to patch them.
@@ -562,32 +1295,73 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
let cb = CodegenGlobals::get_inline_cb();
for versions in &payload.version_map {
- for block in versions {
- let mut block = block.borrow_mut();
+ for version in versions {
+ // SAFETY: all blocks inside version_map are initialized
+ let block = unsafe { version.as_ref() };
+ block_update_references(block, cb, false);
+ }
+ }
+ // Update dead blocks, since there could be stubs pointing at them
+ for blockref in &payload.dead_blocks {
+ // SAFETY: dead blocks come from version_map, which only have initialized blocks
+ let block = unsafe { blockref.as_ref() };
+ block_update_references(block, cb, true);
+ }
- block.blockid.iseq = unsafe { rb_gc_location(block.blockid.iseq.into()) }.as_iseq();
+ // Note that we would have returned already if YJIT is off.
+ cb.mark_all_executable();
- // Update method entry dependencies
- for cme_dep in &mut block.cme_dependencies {
- cme_dep.receiver_klass = unsafe { rb_gc_location(cme_dep.receiver_klass) };
- cme_dep.callee_cme = unsafe { rb_gc_location(cme_dep.callee_cme.into()) }.as_cme();
- }
+ CodegenGlobals::get_outlined_cb()
+ .unwrap()
+ .mark_all_executable();
- // Update outgoing branch entries
- for branch in &block.outgoing {
- let mut branch = branch.borrow_mut();
- for target in &mut branch.targets {
- if let Some(target) = target {
- target.iseq = unsafe { rb_gc_location(target.iseq.into()) }.as_iseq();
- }
+ return;
+
+ fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) {
+ block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq());
+
+ // Update method entry dependencies
+ for cme_dep in block.cme_dependencies.iter() {
+ let cur_cme: VALUE = cme_dep.get().into();
+ let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme();
+ cme_dep.set(new_cme);
+ }
+
+ // Update outgoing branch entries
+ for branch in block.outgoing.iter() {
+ let branch = unsafe { branch.as_ref() };
+ for target in branch.targets.iter() {
+ // SAFETY: no mutation inside unsafe
+ let current_iseq = unsafe {
+ target.ref_unchecked().as_ref().and_then(|target| {
+ // Avoid get_blockid() on blockref. Can be dangling on dead blocks,
+ // and the iseq housing the block already naturally handles it.
+ if target.get_block().is_some() {
+ None
+ } else {
+ Some(target.get_blockid().iseq)
+ }
+ })
+ };
+
+ if let Some(current_iseq) = current_iseq {
+ let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) }
+ .as_iseq();
+ // SAFETY: the Cell::set is not on the reference given out
+ // by ref_unchecked.
+ unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) };
}
}
+ }
- // Walk over references to objects in generated code.
- for offset in &block.gc_object_offsets {
+ // Update references to objects in generated code.
+ // Skip for dead blocks since they shouldn't run and
+ // so there is no potential of writing over invalidation jumps
+ if !dead {
+ for offset in block.gc_obj_offsets.iter() {
let offset_to_value = offset.as_usize();
let value_code_ptr = cb.get_ptr(offset_to_value);
- let value_ptr: *const u8 = value_code_ptr.raw_ptr();
+ let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_ptr = value_ptr as *mut VALUE;
@@ -599,25 +1373,30 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
if new_addr != object {
for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
- cb.get_mem().write_byte(byte_code_ptr, byte)
+ cb.write_mem(byte_code_ptr, byte)
.expect("patching existing code should be within bounds");
}
}
}
}
- }
-
- // Note that we would have returned already if YJIT is off.
- cb.mark_all_executable();
- CodegenGlobals::get_outlined_cb()
- .unwrap()
- .mark_all_executable();
+ }
}
/// Get all blocks for a particular place in an iseq.
-fn get_version_list(blockid: BlockId) -> &'static mut VersionList {
- let payload = get_iseq_payload(blockid.iseq);
+fn get_version_list(blockid: BlockId) -> Option<&'static mut VersionList> {
+ let insn_idx = blockid.idx.as_usize();
+ match get_iseq_payload(blockid.iseq) {
+ Some(payload) if insn_idx < payload.version_map.len() => {
+ Some(payload.version_map.get_mut(insn_idx).unwrap())
+ },
+ _ => None
+ }
+}
+
+/// Get or create all blocks for a particular place in an iseq.
+fn get_or_create_version_list(blockid: BlockId) -> &'static mut VersionList {
+ let payload = get_or_create_iseq_payload(blockid.iseq);
let insn_idx = blockid.idx.as_usize();
// Expand the version map as necessary
@@ -632,32 +1411,39 @@ fn get_version_list(blockid: BlockId) -> &'static mut VersionList {
/// Take all of the blocks for a particular place in an iseq
pub fn take_version_list(blockid: BlockId) -> VersionList {
- let payload = get_iseq_payload(blockid.iseq);
let insn_idx = blockid.idx.as_usize();
-
- if insn_idx >= payload.version_map.len() {
- VersionList::default()
- } else {
- mem::take(&mut payload.version_map[insn_idx])
+ match get_iseq_payload(blockid.iseq) {
+ Some(payload) if insn_idx < payload.version_map.len() => {
+ mem::take(&mut payload.version_map[insn_idx])
+ },
+ _ => VersionList::default(),
}
}
/// Count the number of block versions matching a given blockid
-fn get_num_versions(blockid: BlockId) -> usize {
+/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
+fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
let insn_idx = blockid.idx.as_usize();
- let payload = get_iseq_payload(blockid.iseq);
-
- payload
- .version_map
- .get(insn_idx)
- .map(|versions| versions.len())
- .unwrap_or(0)
+ match get_iseq_payload(blockid.iseq) {
+ Some(payload) => {
+ payload
+ .version_map
+ .get(insn_idx)
+ .map(|versions| {
+ versions.iter().filter(|&&version|
+ unsafe { version.as_ref() }.ctx.inline() == inlined
+ ).count()
+ })
+ .unwrap_or(0)
+ }
+ None => 0,
+ }
}
-/// Get a list of block versions generated for an iseq
+/// Get or create a list of block versions generated for an iseq
/// This is used for disassembly (see disasm.rs)
-pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
- let payload = get_iseq_payload(iseq);
+pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
+ let payload = get_or_create_iseq_payload(iseq);
let mut blocks = Vec::<BlockRef>::new();
@@ -668,7 +1454,7 @@ pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
// For each version at this instruction index
for version in version_list {
// Clone the block ref and add it to the list
- blocks.push(version.clone());
+ blocks.push(*version);
}
}
@@ -678,82 +1464,125 @@ pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
/// Retrieve a basic block version for an (iseq, idx) tuple
/// This will return None if no version is found
fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
- let versions = get_version_list(blockid);
+ let versions = match get_version_list(blockid) {
+ Some(versions) => versions,
+ None => return None,
+ };
// Best match found
let mut best_version: Option<BlockRef> = None;
let mut best_diff = usize::MAX;
// For each version matching the blockid
- for blockref in versions.iter_mut() {
- let block = blockref.borrow();
- let diff = ctx.diff(&block.ctx);
+ for blockref in versions.iter() {
+ let block = unsafe { blockref.as_ref() };
// Note that we always prefer the first matching
// version found because of inline-cache chains
- if diff < best_diff {
- best_version = Some(blockref.clone());
- best_diff = diff;
- }
- }
-
- // If greedy versioning is enabled
- if get_option!(greedy_versioning) {
- // If we're below the version limit, don't settle for an imperfect match
- if versions.len() + 1 < get_option!(max_versions) && best_diff > 0 {
- return None;
+ match ctx.diff(&block.ctx) {
+ TypeDiff::Compatible(diff) if diff < best_diff => {
+ best_version = Some(*blockref);
+ best_diff = diff;
+ }
+ _ => {}
}
}
return best_version;
}
+/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
+const MAX_INLINE_VERSIONS: usize = 1000;
+
/// Produce a generic context when the block version limit is hit for a blockid
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
// Guard chains implement limits separately, do nothing
- if ctx.chain_depth > 0 {
+ if ctx.get_chain_depth() > 0 {
return *ctx;
}
+ let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
+ let max_versions = if ctx.inline() {
+ MAX_INLINE_VERSIONS
+ } else {
+ get_option!(max_versions)
+ };
+
// If this block version we're about to add will hit the version limit
- if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
+ if next_versions >= max_versions {
// Produce a generic context that stores no type information,
// but still respects the stack_size and sp_offset constraints.
// This new context will then match all future requests.
- let mut generic_ctx = Context::default();
- generic_ctx.stack_size = ctx.stack_size;
- generic_ctx.sp_offset = ctx.sp_offset;
+ let generic_ctx = ctx.get_generic_ctx();
+
+ if cfg!(debug_assertions) {
+ let mut ctx = ctx.clone();
+ if ctx.inline() {
+ // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible
+ // to keep inlining blocks until we hit the limit, but it's safe to give up inlining.
+ ctx.inline_block = 0;
+ assert!(generic_ctx.inline_block == 0);
+ }
+
+ assert_ne!(
+ TypeDiff::Incompatible,
+ ctx.diff(&generic_ctx),
+ "should substitute a compatible context",
+ );
+ }
- // Mutate the incoming context
return generic_ctx;
}
+ incr_counter_to!(max_inline_versions, next_versions);
return *ctx;
}
-/// Keep track of a block version. Block should be fully constructed.
-/// Uses `cb` for running write barriers.
-fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) {
- let block = blockref.borrow();
+/// Install a block version into its [IseqPayload], letting the GC track its
+/// lifetime, and allowing it to be considered for use for other
+/// blocks we might generate. Uses `cb` for running write barriers.
+///
+/// # Safety
+///
+/// The block must be fully initialized. Its incoming and outgoing edges,
+/// if there are any, must point to initialized blocks, too.
+///
+/// Note that the block might gain edges after this function returns,
+/// as can happen during [gen_block_series]. Initialized here doesn't mean
+/// ready to be consumed or that the machine code tracked by the block is
+/// ready to be run.
+///
+/// Due to this transient state where a block is tracked by the GC by
+/// being inside an [IseqPayload] but not ready to be executed, it's
+/// generally unsound to call any Ruby methods during codegen. That has
+/// the potential to run blocks which are not ready.
+unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
+ // SAFETY: caller ensures initialization
+ let block = unsafe { blockref.as_ref() };
// Function entry blocks must have stack size 0
- assert!(!(block.blockid.idx == 0 && block.ctx.stack_size > 0));
+ assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0));
+
+ let version_list = get_or_create_version_list(block.get_blockid());
- let version_list = get_version_list(block.blockid);
+ // If this the first block being compiled with this block id
+ if version_list.len() == 0 {
+ incr_counter!(compiled_blockid_count);
+ }
- version_list.push(blockref.clone());
+ version_list.push(blockref);
+ version_list.shrink_to_fit();
// By writing the new block to the iseq, the iseq now
// contains new references to Ruby objects. Run write barriers.
- let iseq: VALUE = block.blockid.iseq.into();
+ let iseq: VALUE = block.iseq.get().into();
for dep in block.iter_cme_deps() {
- obj_written!(iseq, dep.receiver_klass);
- obj_written!(iseq, dep.callee_cme.into());
+ obj_written!(iseq, dep.into());
}
// Run write barriers for all objects in generated code.
- for offset in &block.gc_object_offsets {
- let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+ for offset in block.gc_obj_offsets.iter() {
+ let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_address: *const VALUE = value_address.cast();
@@ -762,251 +1591,244 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) {
}
incr_counter!(compiled_block_count);
+
+ // Mark code pages for code GC
+ let iseq_payload = get_iseq_payload(block.iseq.get()).unwrap();
+ for page in cb.addrs_to_pages(block.start_addr, block.end_addr.get()) {
+ iseq_payload.pages.insert(page);
+ }
}
/// Remove a block version from the version map of its parent ISEQ
fn remove_block_version(blockref: &BlockRef) {
- let block = blockref.borrow();
- let version_list = get_version_list(block.blockid);
+ let block = unsafe { blockref.as_ref() };
+ let version_list = match get_version_list(block.get_blockid()) {
+ Some(version_list) => version_list,
+ None => return,
+ };
// Retain the versions that are not this one
version_list.retain(|other| blockref != other);
}
-//===========================================================================
-// I put the implementation of traits for core.rs types below
-// We can move these closer to the above structs later if we want.
-//===========================================================================
+impl JITState {
+ // Finish compiling and turn a jit state into a block
+ // note that the block is still not in shape.
+ pub fn into_block(self, end_insn_idx: IseqIdx, start_addr: CodePtr, end_addr: CodePtr, gc_obj_offsets: Vec<u32>) -> BlockRef {
+ // Allocate the block and get its pointer
+ let blockref: *mut MaybeUninit<Block> = Box::into_raw(Box::new(MaybeUninit::uninit()));
+
+ incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len());
+
+ // Make the new block
+ let block = MaybeUninit::new(Block {
+ start_addr,
+ iseq: Cell::new(self.get_iseq()),
+ iseq_range: self.get_starting_insn_idx()..end_insn_idx,
+ ctx: self.get_starting_ctx(),
+ end_addr: Cell::new(end_addr),
+ incoming: MutableBranchList(Cell::default()),
+ gc_obj_offsets: gc_obj_offsets.into_boxed_slice(),
+ entry_exit: self.get_block_entry_exit(),
+ cme_dependencies: self.method_lookup_assumptions.into_iter().map(Cell::new).collect(),
+ // Pending branches => actual branches
+ outgoing: self.pending_outgoing.into_iter().map(|pending_out| {
+ let pending_out = Rc::try_unwrap(pending_out)
+ .ok().expect("all PendingBranchRefs should be unique when ready to construct a Block");
+ pending_out.into_branch(NonNull::new(blockref as *mut Block).expect("no null from Box"))
+ }).collect()
+ });
+ // Initialize it on the heap
+ // SAFETY: allocated with Box above
+ unsafe { ptr::write(blockref, block) };
-impl Block {
- pub fn new(blockid: BlockId, ctx: &Context) -> BlockRef {
- let block = Block {
- blockid,
- end_idx: 0,
- ctx: *ctx,
- start_addr: None,
- end_addr: None,
- incoming: Vec::new(),
- outgoing: Vec::new(),
- gc_object_offsets: Vec::new(),
- cme_dependencies: Vec::new(),
- entry_exit: None,
- };
+ // Block is initialized now. Note that MaybeUnint<T> has the same layout as T.
+ let blockref = NonNull::new(blockref as *mut Block).expect("no null from Box");
+
+ // Track all the assumptions the block makes as invariants
+ if self.block_assumes_single_ractor {
+ track_single_ractor_assumption(blockref);
+ }
+ for bop in self.bop_assumptions {
+ track_bop_assumption(blockref, bop);
+ }
+ // SAFETY: just allocated it above
+ for cme in unsafe { blockref.as_ref() }.cme_dependencies.iter() {
+ track_method_lookup_stability_assumption(blockref, cme.get());
+ }
+ if let Some(idlist) = self.stable_constant_names_assumption {
+ track_stable_constant_names_assumption(blockref, idlist);
+ }
+ for klass in self.no_singleton_class_assumptions {
+ track_no_singleton_class_assumption(blockref, klass);
+ }
+ if self.no_ep_escape {
+ track_no_ep_escape_assumption(blockref, self.iseq);
+ }
- // Wrap the block in a reference counted refcell
- // so that the block ownership can be shared
- BlockRef::new(Rc::new(RefCell::new(block)))
+ blockref
}
+}
+impl Block {
pub fn get_blockid(&self) -> BlockId {
- self.blockid
+ BlockId { iseq: self.iseq.get(), idx: self.iseq_range.start }
}
- pub fn get_end_idx(&self) -> u32 {
- self.end_idx
+ pub fn get_end_idx(&self) -> IseqIdx {
+ self.iseq_range.end
}
- pub fn get_ctx(&self) -> Context {
- self.ctx
+ pub fn get_ctx_count(&self) -> usize {
+ let mut count = 1; // block.ctx
+ for branch in self.outgoing.iter() {
+ // SAFETY: &self implies it's initialized
+ count += unsafe { branch.as_ref() }.get_stub_count();
+ }
+ count
}
#[allow(unused)]
- pub fn get_start_addr(&self) -> Option<CodePtr> {
+ pub fn get_start_addr(&self) -> CodePtr {
self.start_addr
}
#[allow(unused)]
- pub fn get_end_addr(&self) -> Option<CodePtr> {
- self.end_addr
+ pub fn get_end_addr(&self) -> CodePtr {
+ self.end_addr.get()
}
/// Get an immutable iterator over cme dependencies
- pub fn iter_cme_deps(&self) -> std::slice::Iter<'_, CmeDependency> {
- self.cme_dependencies.iter()
- }
-
- /// Set the starting address in the generated code for the block
- /// This can be done only once for a block
- pub fn set_start_addr(&mut self, addr: CodePtr) {
- assert!(self.start_addr.is_none());
- self.start_addr = Some(addr);
- }
-
- /// Set the end address in the generated for the block
- /// This can be done only once for a block
- pub fn set_end_addr(&mut self, addr: CodePtr) {
- // The end address can only be set after the start address is set
- assert!(self.start_addr.is_some());
-
- // TODO: assert constraint that blocks can shrink but not grow in length
- self.end_addr = Some(addr);
- }
-
- /// Set the index of the last instruction in the block
- /// This can be done only once for a block
- pub fn set_end_idx(&mut self, end_idx: u32) {
- assert!(self.end_idx == 0);
- self.end_idx = end_idx;
- }
-
- pub fn add_gc_object_offset(self: &mut Block, ptr_offset: u32) {
- self.gc_object_offsets.push(ptr_offset);
+ pub fn iter_cme_deps(&self) -> impl Iterator<Item = CmePtr> + '_ {
+ self.cme_dependencies.iter().map(Cell::get)
}
- /// Instantiate a new CmeDependency struct and add it to the list of
- /// dependencies for this block.
- pub fn add_cme_dependency(
- &mut self,
- receiver_klass: VALUE,
- callee_cme: *const rb_callable_method_entry_t,
- ) {
- self.cme_dependencies.push(CmeDependency {
- receiver_klass,
- callee_cme,
- });
+ // Push an incoming branch ref and shrink the vector
+ fn push_incoming(&self, branch: BranchRef) {
+ self.incoming.push(branch);
}
// Compute the size of the block code
pub fn code_size(&self) -> usize {
- (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize)
+ (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap()
}
}
impl Context {
- pub fn new_with_stack_size(size: i16) -> Self {
- return Context {
- stack_size: size as u16,
- sp_offset: size,
- chain_depth: 0,
- local_types: [Type::Unknown; MAX_LOCAL_TYPES],
- temp_types: [Type::Unknown; MAX_TEMP_TYPES],
- self_type: Type::Unknown,
- temp_mapping: [MapToStack; MAX_TEMP_TYPES],
- };
+ pub fn get_stack_size(&self) -> u8 {
+ self.stack_size
}
- pub fn new() -> Self {
- return Self::new_with_stack_size(0);
+ pub fn set_stack_size(&mut self, stack_size: u8) {
+ self.stack_size = stack_size;
}
- pub fn get_stack_size(&self) -> u16 {
- self.stack_size
+ /// Create a new Context that is compatible with self but doesn't have type information.
+ pub fn get_generic_ctx(&self) -> Context {
+ let mut generic_ctx = Context::default();
+ generic_ctx.stack_size = self.stack_size;
+ generic_ctx.sp_offset = self.sp_offset;
+ generic_ctx.reg_temps = self.reg_temps;
+ if self.is_return_landing() {
+ generic_ctx.set_as_return_landing();
+ }
+ if self.is_deferred() {
+ generic_ctx.mark_as_deferred();
+ }
+ generic_ctx
+ }
+
+ /// Create a new Context instance with a given stack_size and sp_offset adjusted
+ /// accordingly. This is useful when you want to virtually rewind a stack_size for
+ /// generating a side exit while considering past sp_offset changes on gen_save_sp.
+ pub fn with_stack_size(&self, stack_size: u8) -> Context {
+ let mut ctx = *self;
+ ctx.sp_offset -= (ctx.get_stack_size() as isize - stack_size as isize) as i8;
+ ctx.stack_size = stack_size;
+ ctx
}
- pub fn get_sp_offset(&self) -> i16 {
+ pub fn get_sp_offset(&self) -> i8 {
self.sp_offset
}
- pub fn set_sp_offset(&mut self, offset: i16) {
+ pub fn set_sp_offset(&mut self, offset: i8) {
self.sp_offset = offset;
}
- pub fn get_chain_depth(&self) -> u8 {
- self.chain_depth
+ pub fn get_reg_temps(&self) -> RegTemps {
+ self.reg_temps
}
- pub fn reset_chain_depth(&mut self) {
- self.chain_depth = 0;
+ pub fn set_reg_temps(&mut self, reg_temps: RegTemps) {
+ self.reg_temps = reg_temps;
}
- pub fn increment_chain_depth(&mut self) {
- self.chain_depth += 1;
+ pub fn get_chain_depth(&self) -> u8 {
+ self.chain_depth_and_flags & CHAIN_DEPTH_MASK
}
- /// Get an operand for the adjusted stack pointer address
- pub fn sp_opnd(&self, offset_bytes: isize) -> X86Opnd {
- let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes;
- let offset = offset as i32;
- return mem_opnd(64, REG_SP, offset);
+ pub fn reset_chain_depth_and_defer(&mut self) {
+ self.chain_depth_and_flags &= !CHAIN_DEPTH_MASK;
+ self.chain_depth_and_flags &= !DEFER_BIT;
}
- /// Push one new value on the temp stack with an explicit mapping
- /// Return a pointer to the new stack top
- pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> X86Opnd {
- // If type propagation is disabled, store no types
- if get_option!(no_type_prop) {
- return self.stack_push_mapping((mapping, Type::Unknown));
- }
-
- let stack_size: usize = self.stack_size.into();
-
- // Keep track of the type and mapping of the value
- if stack_size < MAX_TEMP_TYPES {
- self.temp_mapping[stack_size] = mapping;
- self.temp_types[stack_size] = temp_type;
-
- if let MapToLocal(idx) = mapping {
- assert!((idx as usize) < MAX_LOCAL_TYPES);
- }
+ pub fn increment_chain_depth(&mut self) {
+ if self.get_chain_depth() == CHAIN_DEPTH_MASK {
+ panic!("max block version chain depth reached!");
}
-
- self.stack_size += 1;
- self.sp_offset += 1;
-
- // SP points just above the topmost value
- let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32);
- return mem_opnd(64, REG_SP, offset);
+ self.chain_depth_and_flags += 1;
}
- /// Push one new value on the temp stack
- /// Return a pointer to the new stack top
- pub fn stack_push(&mut self, val_type: Type) -> X86Opnd {
- return self.stack_push_mapping((MapToStack, val_type));
+ pub fn set_as_return_landing(&mut self) {
+ self.chain_depth_and_flags |= RETURN_LANDING_BIT;
}
- /// Push the self value on the stack
- pub fn stack_push_self(&mut self) -> X86Opnd {
- return self.stack_push_mapping((MapToSelf, Type::Unknown));
+ pub fn clear_return_landing(&mut self) {
+ self.chain_depth_and_flags &= !RETURN_LANDING_BIT;
}
- /// Push a local variable on the stack
- pub fn stack_push_local(&mut self, local_idx: usize) -> X86Opnd {
- if local_idx >= MAX_LOCAL_TYPES {
- return self.stack_push(Type::Unknown);
- }
-
- return self.stack_push_mapping((MapToLocal(local_idx as u8), Type::Unknown));
+ pub fn is_return_landing(&self) -> bool {
+ self.chain_depth_and_flags & RETURN_LANDING_BIT != 0
}
- // Pop N values off the stack
- // Return a pointer to the stack top before the pop operation
- pub fn stack_pop(&mut self, n: usize) -> X86Opnd {
- assert!(n <= self.stack_size.into());
-
- // SP points just above the topmost value
- let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32);
- let top = mem_opnd(64, REG_SP, offset);
-
- // Clear the types of the popped values
- for i in 0..n {
- let idx: usize = (self.stack_size as usize) - i - 1;
-
- if idx < MAX_TEMP_TYPES {
- self.temp_types[idx] = Type::Unknown;
- self.temp_mapping[idx] = MapToStack;
- }
- }
-
- self.stack_size -= n as u16;
- self.sp_offset -= n as i16;
+ pub fn mark_as_deferred(&mut self) {
+ self.chain_depth_and_flags |= DEFER_BIT;
+ }
- return top;
+ pub fn is_deferred(&self) -> bool {
+ self.chain_depth_and_flags & DEFER_BIT != 0
}
- /// Get an operand pointing to a slot on the temp stack
- pub fn stack_opnd(&self, idx: i32) -> X86Opnd {
- // SP points just above the topmost value
- let offset = ((self.sp_offset as i32) - 1 - idx) * (SIZEOF_VALUE as i32);
- let opnd = mem_opnd(64, REG_SP, offset);
- return opnd;
+ /// Get an operand for the adjusted stack pointer address
+ pub fn sp_opnd(&self, offset: i32) -> Opnd {
+ let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32;
+ return Opnd::mem(64, SP, offset);
+ }
+
+ /// Get an operand for the adjusted environment pointer address using SP register.
+ /// This is valid only when a Binding object hasn't been created for the frame.
+ pub fn ep_opnd(&self, offset: i32) -> Opnd {
+ let ep_offset = self.get_stack_size() as i32 + 1;
+ self.sp_opnd(-ep_offset + offset)
+ }
+
+ /// Stop using a register for a given stack temp.
+ /// This allows us to reuse the register for a value that we know is dead
+ /// and will no longer be used (e.g. popped stack temp).
+ pub fn dealloc_temp_reg(&mut self, stack_idx: u8) {
+ if stack_idx < MAX_REG_TEMPS {
+ let mut reg_temps = self.get_reg_temps();
+ reg_temps.set(stack_idx, false);
+ self.set_reg_temps(reg_temps);
+ }
}
/// Get the type of an instruction operand
- pub fn get_opnd_type(&self, opnd: InsnOpnd) -> Type {
+ pub fn get_opnd_type(&self, opnd: YARVOpnd) -> Type {
match opnd {
SelfOpnd => self.self_type,
StackOpnd(idx) => {
- let idx = idx as u16;
assert!(idx < self.stack_size);
let stack_idx: usize = (self.stack_size - 1 - idx).into();
@@ -1015,14 +1837,15 @@ impl Context {
return Type::Unknown;
}
- let mapping = self.temp_mapping[stack_idx];
+ let mapping = self.get_temp_mapping(stack_idx);
- match mapping {
+ match mapping.get_kind() {
MapToSelf => self.self_type,
- MapToStack => self.temp_types[(self.stack_size - 1 - idx) as usize],
- MapToLocal(idx) => {
+ MapToStack => mapping.get_type(),
+ MapToLocal => {
+ let idx = mapping.get_local_idx();
assert!((idx as usize) < MAX_LOCAL_TYPES);
- return self.local_types[idx as usize];
+ return self.get_local_type(idx.into());
}
}
}
@@ -1030,15 +1853,90 @@ impl Context {
}
/// Get the currently tracked type for a local variable
- pub fn get_local_type(&self, idx: usize) -> Type {
- *self.local_types.get(idx).unwrap_or(&Type::Unknown)
+ pub fn get_local_type(&self, local_idx: usize) -> Type {
+ if local_idx >= MAX_LOCAL_TYPES {
+ return Type::Unknown
+ } else {
+ // Each type is stored in 4 bits
+ let type_bits = (self.local_types >> (4 * local_idx)) & 0b1111;
+ unsafe { transmute::<u8, Type>(type_bits as u8) }
+ }
+ }
+
+ /// Get the current temp mapping for a given stack slot
+ fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping {
+ assert!(temp_idx < MAX_TEMP_TYPES);
+
+ // Extract the temp mapping kind
+ let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11;
+ let temp_kind = unsafe { transmute::<u8, TempMappingKind>(kind_bits as u8) };
+
+ // Extract the payload bits (temp type or local idx)
+ let payload_bits = (self.temp_payload >> (4 * temp_idx)) & 0b1111;
+
+ match temp_kind {
+ MapToSelf => TempMapping::map_to_self(),
+
+ MapToStack => {
+ TempMapping::map_to_stack(
+ unsafe { transmute::<u8, Type>(payload_bits as u8) }
+ )
+ }
+
+ MapToLocal => {
+ TempMapping::map_to_local(
+ payload_bits as u8
+ )
+ }
+ }
+ }
+
+ /// Get the current temp mapping for a given stack slot
+ fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) {
+ assert!(temp_idx < MAX_TEMP_TYPES);
+
+ // Extract the kind bits
+ let mapping_kind = mapping.get_kind();
+ let kind_bits = unsafe { transmute::<TempMappingKind, u8>(mapping_kind) };
+ assert!(kind_bits <= 0b11);
+
+ // Extract the payload bits
+ let payload_bits = match mapping_kind {
+ MapToSelf => 0,
+
+ MapToStack => {
+ let t = mapping.get_type();
+ unsafe { transmute::<Type, u8>(t) }
+ }
+
+ MapToLocal => {
+ mapping.get_local_idx()
+ }
+ };
+ assert!(payload_bits <= 0b1111);
+
+ // Update the kind bits
+ {
+ let mask_bits = 0b11_u16 << (2 * temp_idx);
+ let shifted_bits = (kind_bits as u16) << (2 * temp_idx);
+ let all_kind_bits = self.temp_mapping_kind as u16;
+ self.temp_mapping_kind = (all_kind_bits & !mask_bits) | shifted_bits;
+ }
+
+ // Update the payload bits
+ {
+ let mask_bits = 0b1111_u32 << (4 * temp_idx);
+ let shifted_bits = (payload_bits as u32) << (4 * temp_idx);
+ let all_payload_bits = self.temp_payload as u32;
+ self.temp_payload = (all_payload_bits & !mask_bits) | shifted_bits;
+ }
}
/// Upgrade (or "learn") the type of an instruction operand
/// This value must be compatible and at least as specific as the previously known type.
/// If this value originated from self, or an lvar, the learned type will be
/// propagated back to its source.
- pub fn upgrade_opnd_type(&mut self, opnd: InsnOpnd, opnd_type: Type) {
+ pub fn upgrade_opnd_type(&mut self, opnd: YARVOpnd, opnd_type: Type) {
// If type propagation is disabled, store no types
if get_option!(no_type_prop) {
return;
@@ -1047,7 +1945,6 @@ impl Context {
match opnd {
SelfOpnd => self.self_type.upgrade(opnd_type),
StackOpnd(idx) => {
- let idx = idx as u16;
assert!(idx < self.stack_size);
let stack_idx = (self.stack_size - 1 - idx) as usize;
@@ -1056,15 +1953,24 @@ impl Context {
return;
}
- let mapping = self.temp_mapping[stack_idx];
+ let mapping = self.get_temp_mapping(stack_idx);
- match mapping {
+ match mapping.get_kind() {
MapToSelf => self.self_type.upgrade(opnd_type),
- MapToStack => self.temp_types[stack_idx].upgrade(opnd_type),
- MapToLocal(idx) => {
- let idx = idx as usize;
+ MapToStack => {
+ let mut temp_type = mapping.get_type();
+ temp_type.upgrade(opnd_type);
+ self.set_temp_mapping(stack_idx, TempMapping::map_to_stack(temp_type));
+ }
+ MapToLocal => {
+ let idx = mapping.get_local_idx() as usize;
assert!(idx < MAX_LOCAL_TYPES);
- self.local_types[idx].upgrade(opnd_type);
+ let mut new_type = self.get_local_type(idx);
+ new_type.upgrade(opnd_type);
+ self.set_local_type(idx, new_type);
+ // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches
+ // all MapToLocal mappings, including the one we're upgrading here.
+ self.set_opnd_mapping(opnd, mapping);
}
}
}
@@ -1076,30 +1982,29 @@ impl Context {
This is can be used with stack_push_mapping or set_opnd_mapping to copy
a stack value's type while maintaining the mapping.
*/
- pub fn get_opnd_mapping(&self, opnd: InsnOpnd) -> (TempMapping, Type) {
+ pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> TempMapping {
let opnd_type = self.get_opnd_type(opnd);
match opnd {
- SelfOpnd => (MapToSelf, opnd_type),
+ SelfOpnd => TempMapping::map_to_self(),
StackOpnd(idx) => {
- let idx = idx as u16;
assert!(idx < self.stack_size);
let stack_idx = (self.stack_size - 1 - idx) as usize;
if stack_idx < MAX_TEMP_TYPES {
- (self.temp_mapping[stack_idx], opnd_type)
+ self.get_temp_mapping(stack_idx)
} else {
// We can't know the source of this stack operand, so we assume it is
// a stack-only temporary. type will be UNKNOWN
assert!(opnd_type == Type::Unknown);
- (MapToStack, opnd_type)
+ TempMapping::map_to_stack(opnd_type)
}
}
}
}
/// Overwrite both the type and mapping of a stack operand.
- pub fn set_opnd_mapping(&mut self, opnd: InsnOpnd, (mapping, opnd_type): (TempMapping, Type)) {
+ pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, mapping: TempMapping) {
match opnd {
SelfOpnd => unreachable!("self always maps to self"),
StackOpnd(idx) => {
@@ -1116,44 +2021,47 @@ impl Context {
return;
}
- self.temp_mapping[stack_idx] = mapping;
-
- // Only used when mapping == MAP_STACK
- self.temp_types[stack_idx] = opnd_type;
+ self.set_temp_mapping(stack_idx, mapping);
}
}
}
/// Set the type of a local variable
pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) {
- let ctx = self;
-
// If type propagation is disabled, store no types
if get_option!(no_type_prop) {
return;
}
if local_idx >= MAX_LOCAL_TYPES {
- return;
+ return
}
// If any values on the stack map to this local we must detach them
- for (i, mapping) in ctx.temp_mapping.iter_mut().enumerate() {
- *mapping = match *mapping {
- MapToStack => MapToStack,
- MapToSelf => MapToSelf,
- MapToLocal(idx) => {
+ for mapping_idx in 0..MAX_TEMP_TYPES {
+ let mapping = self.get_temp_mapping(mapping_idx);
+ let tm = match mapping.get_kind() {
+ MapToStack => mapping,
+ MapToSelf => mapping,
+ MapToLocal => {
+ let idx = mapping.get_local_idx();
if idx as usize == local_idx {
- ctx.temp_types[i] = ctx.local_types[idx as usize];
- MapToStack
+ let local_type = self.get_local_type(local_idx);
+ TempMapping::map_to_stack(local_type)
} else {
- MapToLocal(idx)
+ TempMapping::map_to_local(idx)
}
}
- }
+ };
+ self.set_temp_mapping(mapping_idx, tm);
}
- ctx.local_types[local_idx] = local_type;
+ // Update the type bits
+ let type_bits = local_type as u32;
+ assert!(type_bits <= 0b1111);
+ let mask_bits = 0b1111_u32 << (4 * local_idx);
+ let shifted_bits = type_bits << (4 * local_idx);
+ self.local_types = (self.local_types & !mask_bits) | shifted_bits;
}
/// Erase local variable type information
@@ -1161,99 +2069,238 @@ impl Context {
pub fn clear_local_types(&mut self) {
// When clearing local types we must detach any stack mappings to those
// locals. Even if local values may have changed, stack values will not.
- for (i, mapping) in self.temp_mapping.iter_mut().enumerate() {
- *mapping = match *mapping {
- MapToStack => MapToStack,
- MapToSelf => MapToSelf,
- MapToLocal(idx) => {
- self.temp_types[i] = self.local_types[idx as usize];
- MapToStack
- }
+
+ for mapping_idx in 0..MAX_TEMP_TYPES {
+ let mapping = self.get_temp_mapping(mapping_idx);
+ if mapping.get_kind() == MapToLocal {
+ let local_idx = mapping.get_local_idx() as usize;
+ self.set_temp_mapping(mapping_idx, TempMapping::map_to_stack(self.get_local_type(local_idx)));
}
}
// Clear the local types
- self.local_types = [Type::default(); MAX_LOCAL_TYPES];
+ self.local_types = 0;
+ }
+
+ /// Return true if the code is inlined by the caller
+ pub fn inline(&self) -> bool {
+ self.inline_block != 0
+ }
+
+ /// Set a block ISEQ given to the Block of this Context
+ pub fn set_inline_block(&mut self, iseq: IseqPtr) {
+ self.inline_block = iseq as u64
}
/// Compute a difference score for two context objects
- /// Returns 0 if the two contexts are the same
- /// Returns > 0 if different but compatible
- /// Returns usize::MAX if incompatible
- pub fn diff(&self, dst: &Context) -> usize {
+ pub fn diff(&self, dst: &Context) -> TypeDiff {
// Self is the source context (at the end of the predecessor)
let src = self;
// Can only lookup the first version in the chain
- if dst.chain_depth != 0 {
- return usize::MAX;
+ if dst.get_chain_depth() != 0 {
+ return TypeDiff::Incompatible;
}
// Blocks with depth > 0 always produce new versions
// Sidechains cannot overlap
- if src.chain_depth != 0 {
- return usize::MAX;
+ if src.get_chain_depth() != 0 {
+ return TypeDiff::Incompatible;
+ }
+
+ if src.is_return_landing() != dst.is_return_landing() {
+ return TypeDiff::Incompatible;
+ }
+
+ if src.is_deferred() != dst.is_deferred() {
+ return TypeDiff::Incompatible;
}
if dst.stack_size != src.stack_size {
- return usize::MAX;
+ return TypeDiff::Incompatible;
}
if dst.sp_offset != src.sp_offset {
- return usize::MAX;
+ return TypeDiff::Incompatible;
+ }
+
+ if dst.reg_temps != src.reg_temps {
+ return TypeDiff::Incompatible;
}
// Difference sum
let mut diff = 0;
// Check the type of self
- let self_diff = src.self_type.diff(dst.self_type);
+ diff += match src.self_type.diff(dst.self_type) {
+ TypeDiff::Compatible(diff) => diff,
+ TypeDiff::Incompatible => return TypeDiff::Incompatible,
+ };
- if self_diff == usize::MAX {
- return usize::MAX;
+ // Check the block to inline
+ if src.inline_block != dst.inline_block {
+ // find_block_version should not find existing blocks with different
+ // inline_block so that their yield will not be megamorphic.
+ return TypeDiff::Incompatible;
}
- diff += self_diff;
-
// For each local type we track
- for i in 0..src.local_types.len() {
- let t_src = src.local_types[i];
- let t_dst = dst.local_types[i];
- let temp_diff = t_src.diff(t_dst);
-
- if temp_diff == usize::MAX {
- return usize::MAX;
- }
-
- diff += temp_diff;
+ for i in 0.. MAX_LOCAL_TYPES {
+ let t_src = src.get_local_type(i);
+ let t_dst = dst.get_local_type(i);
+ diff += match t_src.diff(t_dst) {
+ TypeDiff::Compatible(diff) => diff,
+ TypeDiff::Incompatible => return TypeDiff::Incompatible,
+ };
}
// For each value on the temp stack
for i in 0..src.stack_size {
- let (src_mapping, src_type) = src.get_opnd_mapping(StackOpnd(i));
- let (dst_mapping, dst_type) = dst.get_opnd_mapping(StackOpnd(i));
+ let src_mapping = src.get_opnd_mapping(StackOpnd(i));
+ let dst_mapping = dst.get_opnd_mapping(StackOpnd(i));
// If the two mappings aren't the same
if src_mapping != dst_mapping {
- if dst_mapping == MapToStack {
+ if dst_mapping.get_kind() == MapToStack {
// We can safely drop information about the source of the temp
// stack operand.
diff += 1;
} else {
- return usize::MAX;
+ return TypeDiff::Incompatible;
}
}
- let temp_diff = src_type.diff(dst_type);
+ let src_type = src.get_opnd_type(StackOpnd(i));
+ let dst_type = dst.get_opnd_type(StackOpnd(i));
- if temp_diff == usize::MAX {
- return usize::MAX;
+ diff += match src_type.diff(dst_type) {
+ TypeDiff::Compatible(diff) => diff,
+ TypeDiff::Incompatible => return TypeDiff::Incompatible,
+ };
+ }
+
+ return TypeDiff::Compatible(diff);
+ }
+
+ pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> {
+ if jit.at_current_insn() {
+ let comptime_recv = jit.peek_at_stack(self, 1);
+ let comptime_arg = jit.peek_at_stack(self, 0);
+ return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p());
+ }
+
+ let recv_type = self.get_opnd_type(StackOpnd(1));
+ let arg_type = self.get_opnd_type(StackOpnd(0));
+ match (recv_type, arg_type) {
+ (Type::Fixnum, Type::Fixnum) => Some(true),
+ (Type::Unknown | Type::UnknownImm, Type::Unknown | Type::UnknownImm) => None,
+ _ => Some(false),
+ }
+ }
+}
+
+impl Assembler {
+ /// Push one new value on the temp stack with an explicit mapping
+ /// Return a pointer to the new stack top
+ pub fn stack_push_mapping(&mut self, mapping: TempMapping) -> Opnd {
+ // If type propagation is disabled, store no types
+ if get_option!(no_type_prop) {
+ return self.stack_push_mapping(mapping.without_type());
+ }
+
+ let stack_size: usize = self.ctx.stack_size.into();
+
+ // Keep track of the type and mapping of the value
+ if stack_size < MAX_TEMP_TYPES {
+ self.ctx.set_temp_mapping(stack_size, mapping);
+
+ if mapping.get_kind() == MapToLocal {
+ let idx = mapping.get_local_idx();
+ assert!((idx as usize) < MAX_LOCAL_TYPES);
}
+ }
+
+ // Allocate a register to the stack operand
+ if self.ctx.stack_size < MAX_REG_TEMPS {
+ self.alloc_temp_reg(self.ctx.stack_size);
+ }
+
+ self.ctx.stack_size += 1;
+ self.ctx.sp_offset += 1;
+
+ return self.stack_opnd(0);
+ }
- diff += temp_diff;
+ /// Push one new value on the temp stack
+ /// Return a pointer to the new stack top
+ pub fn stack_push(&mut self, val_type: Type) -> Opnd {
+ return self.stack_push_mapping(TempMapping::map_to_stack(val_type));
+ }
+
+ /// Push the self value on the stack
+ pub fn stack_push_self(&mut self) -> Opnd {
+ return self.stack_push_mapping(TempMapping::map_to_self());
+ }
+
+ /// Push a local variable on the stack
+ pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd {
+ if local_idx >= MAX_LOCAL_TYPES {
+ return self.stack_push(Type::Unknown);
}
- return diff;
+ return self.stack_push_mapping(TempMapping::map_to_local(local_idx as u8));
+ }
+
+ // Pop N values off the stack
+ // Return a pointer to the stack top before the pop operation
+ pub fn stack_pop(&mut self, n: usize) -> Opnd {
+ assert!(n <= self.ctx.stack_size.into());
+
+ let top = self.stack_opnd(0);
+
+ // Clear the types of the popped values
+ for i in 0..n {
+ let idx: usize = (self.ctx.stack_size as usize) - i - 1;
+
+ if idx < MAX_TEMP_TYPES {
+ self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown));
+ }
+ }
+
+ self.ctx.stack_size -= n as u8;
+ self.ctx.sp_offset -= n as i8;
+
+ return top;
+ }
+
+ /// Shift stack temps to remove a Symbol for #send.
+ pub fn shift_stack(&mut self, argc: usize) {
+ assert!(argc < self.ctx.stack_size.into());
+
+ let method_name_index = (self.ctx.stack_size as usize) - argc - 1;
+
+ for i in method_name_index..(self.ctx.stack_size - 1) as usize {
+ if i < MAX_TEMP_TYPES {
+ let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES {
+ self.ctx.get_temp_mapping(i + 1)
+ } else {
+ TempMapping::map_to_stack(Type::Unknown)
+ };
+ self.ctx.set_temp_mapping(i, next_arg_mapping);
+ }
+ }
+ self.stack_pop(1);
+ }
+
+ /// Get an operand pointing to a slot on the temp stack
+ pub fn stack_opnd(&self, idx: i32) -> Opnd {
+ Opnd::Stack {
+ idx,
+ num_bits: 64,
+ stack_size: self.ctx.stack_size,
+ sp_offset: self.ctx.sp_offset,
+ reg_temps: None, // push_insn will set this
+ }
}
}
@@ -1262,7 +2309,7 @@ impl BlockId {
#[cfg(debug_assertions)]
#[allow(dead_code)]
pub fn dump_src_loc(&self) {
- unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx) }
+ unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx as u32) }
}
}
@@ -1297,52 +2344,54 @@ fn gen_block_series_body(
// Generate code for the first block
let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?;
- batch.push(first_block.clone()); // Keep track of this block version
+ batch.push(first_block); // Keep track of this block version
// Add the block version to the VersionMap for this ISEQ
- add_block_version(&first_block, cb);
+ unsafe { add_block_version(first_block, cb) };
// Loop variable
- let mut last_blockref = first_block.clone();
+ let mut last_blockref = first_block;
loop {
// Get the last outgoing branch from the previous block.
let last_branchref = {
- let last_block = last_blockref.borrow();
+ let last_block = unsafe { last_blockref.as_ref() };
match last_block.outgoing.last() {
- Some(branch) => branch.clone(),
+ Some(branch) => *branch,
None => {
break;
} // If last block has no branches, stop.
}
};
- let mut last_branch = last_branchref.borrow_mut();
+ let last_branch = unsafe { last_branchref.as_ref() };
+
+ incr_counter!(block_next_count);
// gen_direct_jump() can request a block to be placed immediately after by
- // leaving `None`s in the `dst_addrs` array.
- match &last_branch.dst_addrs {
- [None, None] => (),
- _ => {
- break;
- } // If there is no next block to compile, stop
+ // leaving a single target that has a `None` address.
+ // SAFETY: no mutation inside the unsafe block
+ let (requested_blockid, requested_ctx) = unsafe {
+ match (last_branch.targets[0].ref_unchecked(), last_branch.targets[1].ref_unchecked()) {
+ (Some(last_target), None) if last_target.get_address().is_none() => {
+ (last_target.get_blockid(), last_target.get_ctx())
+ }
+ _ => {
+ // We're done when no fallthrough block is requested
+ break;
+ }
+ }
};
- // Get id and context for the new block
- let requested_id = last_branch.targets[0].expect("block id must be filled");
- let requested_ctx = &last_branch.target_ctxs[0];
-
// Generate new block using context from the last branch.
- let result = gen_single_block(requested_id, requested_ctx, ec, cb, ocb);
+ let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
// If the block failed to compile
if result.is_err() {
// Remove previously compiled block
// versions from the version map
- for blockref in &batch {
- // FIXME: should be deallocating resources here too
- // e.g. invariants, etc.
- //free_block(blockref)
-
- remove_block_version(blockref);
+ for blockref in batch {
+ remove_block_version(&blockref);
+ // SAFETY: block was well connected because it was in a version_map
+ unsafe { free_block(blockref, false) };
}
// Stop compiling
@@ -1352,37 +2401,50 @@ fn gen_block_series_body(
let new_blockref = result.unwrap();
// Add the block version to the VersionMap for this ISEQ
- add_block_version(&new_blockref, cb);
+ unsafe { add_block_version(new_blockref, cb) };
// Connect the last branch and the new block
- last_branch.blocks[0] = Some(new_blockref.clone());
- last_branch.dst_addrs[0] = new_blockref.borrow().start_addr;
- new_blockref
- .borrow_mut()
- .incoming
- .push(last_branchref.clone());
-
- // This block should immediately follow the last branch
- assert!(new_blockref.borrow().start_addr == last_branch.end_addr);
+ last_branch.targets[0].set(Some(Box::new(BranchTarget::Block(new_blockref))));
+ unsafe { new_blockref.as_ref().incoming.push(last_branchref) };
// Track the block
- batch.push(new_blockref.clone());
+ batch.push(new_blockref);
// Repeat with newest block
last_blockref = new_blockref;
}
+ #[cfg(feature = "disasm")]
+ {
+ // If dump_iseq_disasm is active, see if this iseq's location matches the given substring.
+ // If so, we print the new blocks to the console.
+ if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
+ let iseq_location = iseq_get_location(blockid.iseq, blockid.idx);
+ if iseq_location.contains(substr) {
+ let last_block = unsafe { last_blockref.as_ref() };
+ let iseq_range = &last_block.iseq_range;
+ println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, iseq_range.start, iseq_range.end);
+ print!("{}", disasm_iseq_insn_range(blockid.iseq, iseq_range.start, iseq_range.end));
+ }
+ }
+ }
+
Some(first_block)
}
/// Generate a block version that is an entry point inserted into an iseq
/// NOTE: this function assumes that the VM lock has been taken
-pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
+/// If jit_exception is true, compile JIT code for handling exceptions.
+/// See [jit_compile_exception] for details.
+pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> {
// Compute the current instruction index based on the current PC
- let insn_idx: u32 = unsafe {
- let pc_zero = rb_iseq_pc_at_idx(iseq, 0);
- let ec_pc = get_cfp_pc(get_ec_cfp(ec));
- ec_pc.offset_from(pc_zero).try_into().ok()?
+ let cfp = unsafe { get_ec_cfp(ec) };
+ let insn_idx: u16 = unsafe {
+ let ec_pc = get_cfp_pc(cfp);
+ iseq_pc_to_insn_idx(iseq, ec_pc)?
+ };
+ let stack_size: u8 = unsafe {
+ u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()?
};
// The entry context makes no assumptions about types
@@ -1396,61 +2458,226 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
let ocb = CodegenGlobals::get_outlined_cb();
// Write the interpreter entry prologue. Might be NULL when out of memory.
- let code_ptr = gen_entry_prologue(cb, iseq, insn_idx);
+ let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx, jit_exception);
// Try to generate code for the entry block
- let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb);
+ let mut ctx = Context::default();
+ ctx.stack_size = stack_size;
+ let block = gen_block_series(blockid, &ctx, ec, cb, ocb);
cb.mark_all_executable();
ocb.unwrap().mark_all_executable();
match block {
// Compilation failed
- None => return None,
+ None => {
+ // Trigger code GC. This entry point will be recompiled later.
+ if get_option!(code_gc) {
+ cb.code_gc(ocb);
+ }
+ return None;
+ }
// If the block contains no Ruby instructions
Some(block) => {
- let block = block.borrow();
- if block.end_idx == insn_idx {
+ let block = unsafe { block.as_ref() };
+ if block.iseq_range.is_empty() {
return None;
}
}
}
+ // Count the number of entry points we compile
+ incr_counter!(compiled_iseq_entry);
+
// Compilation successful and block not empty
- return code_ptr;
+ code_ptr.map(|ptr| ptr.raw_ptr(cb))
}
-/// Generate code for a branch, possibly rewriting and changing the size of it
-fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) {
- // FIXME
- /*
- if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
- // Generating this branch would modify frozen bytes. Do nothing.
- return;
- }
- */
+// Change the entry's jump target from an entry stub to a next entry
+pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) {
+ let mut asm = Assembler::new();
+ asm_comment!(asm, "regenerate_entry");
+
+ // gen_entry_guard generates cmp + jne. We're rewriting only jne.
+ asm.jne(next_entry.into());
+ // Move write_pos to rewrite the entry
let old_write_pos = cb.get_write_pos();
+ let old_dropped_bytes = cb.has_dropped_bytes();
+ cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr);
+ cb.set_dropped_bytes(false);
+ asm.compile(cb, None).expect("can rewrite existing code");
+
+ // Rewind write_pos to the original one
+ assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr);
+ cb.set_pos(old_write_pos);
+ cb.set_dropped_bytes(old_dropped_bytes);
+}
- let mut block = branch.block.borrow_mut();
- let branch_terminates_block = branch.end_addr == block.end_addr;
+pub type PendingEntryRef = Rc<PendingEntry>;
- // Rewrite the branch
- assert!(branch.dst_addrs[0].is_some());
- cb.set_write_ptr(branch.start_addr.unwrap());
- (branch.gen_fn)(
- cb,
- branch.dst_addrs[0].unwrap(),
- branch.dst_addrs[1],
- branch.shape,
+/// Create a new entry reference for an ISEQ
+pub fn new_pending_entry() -> PendingEntryRef {
+ let entry = PendingEntry {
+ uninit_entry: Box::new(MaybeUninit::uninit()),
+ start_addr: Cell::new(None),
+ end_addr: Cell::new(None),
+ };
+ return Rc::new(entry);
+}
+
+c_callable! {
+ /// Generated code calls this function with the SysV calling convention.
+ /// See [gen_call_entry_stub_hit].
+ fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 {
+ with_compile_time(|| {
+ with_vm_lock(src_loc!(), || {
+ let cb = CodegenGlobals::get_inline_cb();
+ let ocb = CodegenGlobals::get_outlined_cb();
+
+ let addr = entry_stub_hit_body(entry_ptr, ec, cb, ocb)
+ .unwrap_or_else(|| {
+ // Trigger code GC (e.g. no space).
+ // This entry point will be recompiled later.
+ if get_option!(code_gc) {
+ cb.code_gc(ocb);
+ }
+ CodegenGlobals::get_stub_exit_code().raw_ptr(cb)
+ });
+
+ cb.mark_all_executable();
+ ocb.unwrap().mark_all_executable();
+
+ addr
+ })
+ })
+ }
+}
+
+/// Called by the generated code when an entry stub is executed
+fn entry_stub_hit_body(
+ entry_ptr: *const c_void,
+ ec: EcPtr,
+ cb: &mut CodeBlock,
+ ocb: &mut OutlinedCb
+) -> Option<*const u8> {
+ // Get ISEQ and insn_idx from the current ec->cfp
+ let cfp = unsafe { get_ec_cfp(ec) };
+ let iseq = unsafe { get_cfp_iseq(cfp) };
+ let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?;
+ let stack_size: u8 = unsafe {
+ u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()?
+ };
+
+ // Compile a new entry guard as a next entry
+ let next_entry = cb.get_write_ptr();
+ let mut asm = Assembler::new();
+ let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?;
+ asm.compile(cb, Some(ocb))?;
+
+ // Find or compile a block version
+ let blockid = BlockId { iseq, idx: insn_idx };
+ let mut ctx = Context::default();
+ ctx.stack_size = stack_size;
+ let blockref = match find_block_version(blockid, &ctx) {
+ // If an existing block is found, generate a jump to the block.
+ Some(blockref) => {
+ let mut asm = Assembler::new();
+ asm.jmp(unsafe { blockref.as_ref() }.start_addr.into());
+ asm.compile(cb, Some(ocb))?;
+ Some(blockref)
+ }
+ // If this block hasn't yet been compiled, generate blocks after the entry guard.
+ None => gen_block_series(blockid, &ctx, ec, cb, ocb),
+ };
+
+ // Commit or retry the entry
+ if blockref.is_some() {
+ // Regenerate the previous entry
+ let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null");
+ regenerate_entry(cb, &entryref, next_entry);
+
+ // Write an entry to the heap and push it to the ISEQ
+ let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique");
+ get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry());
+ }
+
+ // Let the stub jump to the block
+ blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb))
+}
+
+/// Generate a stub that calls entry_stub_hit
+pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ let ocb = ocb.unwrap();
+
+ let mut asm = Assembler::new();
+ asm_comment!(asm, "entry stub hit");
+
+ asm.mov(C_ARG_OPNDS[0], entry_address.into());
+
+ // Jump to trampoline to call entry_stub_hit()
+ // Not really a side exit, just don't need a padded jump here.
+ asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit());
+
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
+}
+
+/// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so
+/// it's useful for Code GC to call entry_stub_hit from a globally shared code.
+pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
+ let ocb = ocb.unwrap();
+ let mut asm = Assembler::new();
+
+ // See gen_entry_guard for how it's used.
+ asm_comment!(asm, "entry_stub_hit() trampoline");
+ let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]);
+
+ // Jump to the address returned by the entry_stub_hit() call
+ asm.jmp_opnd(jump_addr);
+
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
+}
+
+/// Generate code for a branch, possibly rewriting and changing the size of it
+fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) {
+ // Remove old comments
+ cb.remove_comments(branch.start_addr, branch.end_addr.get());
+
+ // SAFETY: having a &Branch implies branch.block is initialized.
+ let block = unsafe { branch.block.as_ref() };
+
+ let branch_terminates_block = branch.end_addr.get() == block.get_end_addr();
+
+ // Generate the branch
+ let mut asm = Assembler::new();
+ asm_comment!(asm, "regenerate_branch");
+ branch.gen_fn.call(
+ &mut asm,
+ Target::CodePtr(branch.get_target_address(0).unwrap()),
+ branch.get_target_address(1).map(|addr| Target::CodePtr(addr)),
);
- branch.end_addr = Some(cb.get_write_ptr());
+
+ // If the entire block is the branch and the block could be invalidated,
+ // we need to pad to ensure there is room for invalidation patching.
+ if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() {
+ asm.pad_inval_patch();
+ }
+
+ // Rewrite the branch
+ let old_write_pos = cb.get_write_pos();
+ let old_dropped_bytes = cb.has_dropped_bytes();
+ cb.set_write_ptr(branch.start_addr);
+ cb.set_dropped_bytes(false);
+ asm.compile(cb, None).expect("can rewrite existing code");
+ let new_end_addr = cb.get_write_ptr();
+
+ branch.end_addr.set(new_end_addr);
// The block may have shrunk after the branch is rewritten
if branch_terminates_block {
// Adjust block size
- block.end_addr = branch.end_addr;
+ block.end_addr.set(new_end_addr);
}
// cb.write_pos is both a write cursor and a marker for the end of
@@ -1463,79 +2690,66 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) {
if old_write_pos > cb.get_write_pos() {
// We rewound cb->write_pos to generate the branch, now restore it.
cb.set_pos(old_write_pos);
+ cb.set_dropped_bytes(old_dropped_bytes);
} else {
// The branch sits at the end of cb and consumed some memory.
// Keep cb.write_pos.
}
-}
-
-/// Create a new outgoing branch entry for a block
-fn make_branch_entry(block: BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef {
- let branch = Branch {
- // Block this is attached to
- block: block.clone(),
-
- // Positions where the generated code starts and ends
- start_addr: None,
- end_addr: None,
-
- // Context right after the branch instruction
- src_ctx: *src_ctx,
- // Branch target blocks and their contexts
- targets: [None, None],
- target_ctxs: [Context::default(), Context::default()],
- blocks: [None, None],
+ branch.assert_layout();
+}
- // Jump target addresses
- dst_addrs: [None, None],
+pub type PendingBranchRef = Rc<PendingBranch>;
- // Branch code generation function
- gen_fn: gen_fn,
+/// Create a new outgoing branch entry for a block
+fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchRef {
+ let branch = Rc::new(PendingBranch {
+ uninit_branch: Box::new(MaybeUninit::uninit()),
+ gen_fn,
+ start_addr: Cell::new(None),
+ end_addr: Cell::new(None),
+ targets: [Cell::new(None), Cell::new(None)],
+ });
- // Shape of the branch
- shape: BranchShape::Default,
- };
+ incr_counter!(compiled_branch_count); // TODO not true. count at finalize time
// Add to the list of outgoing branches for the block
- let branchref = Rc::new(RefCell::new(branch));
- block.borrow_mut().outgoing.push(branchref.clone());
+ jit.queue_outgoing_branch(branch.clone());
- return branchref;
+ branch
}
-/// Generated code calls this function with the SysV calling convention.
-/// See [get_branch_target].
-extern "sysv64" fn branch_stub_hit(
- branch_ptr: *const c_void,
- target_idx: u32,
- ec: EcPtr,
-) -> *const u8 {
- with_vm_lock(src_loc!(), || {
- branch_stub_hit_body(branch_ptr, target_idx, ec)
- })
+c_callable! {
+ /// Generated code calls this function with the SysV calling convention.
+ /// See [gen_branch_stub].
+ fn branch_stub_hit(
+ branch_ptr: *const c_void,
+ target_idx: u32,
+ ec: EcPtr,
+ ) -> *const u8 {
+ with_vm_lock(src_loc!(), || {
+ with_compile_time(|| { branch_stub_hit_body(branch_ptr, target_idx, ec) })
+ })
+ }
}
/// Called by the generated code when a branch stub is executed
/// Triggers compilation of branches and code patching
fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 {
- assert!(!branch_ptr.is_null());
-
- //branch_ptr is actually:
- //branch_ptr: *const RefCell<Branch>
- let branch_rc = unsafe { BranchRef::from_raw(branch_ptr as *const RefCell<Branch>) };
+ if get_option!(dump_insns) {
+ println!("branch_stub_hit");
+ }
- // We increment the strong count because we want to keep the reference owned
- // by the branch stub alive. Return branch stubs can be hit multiple times.
- unsafe { Rc::increment_strong_count(branch_ptr) };
+ let branch_ref = NonNull::<Branch>::new(branch_ptr as *mut Branch)
+ .expect("Branches should not be null");
- let mut branch = branch_rc.borrow_mut();
+ // SAFETY: We have the VM lock, and the branch is initialized by the time generated
+ // code calls this function.
+ let branch = unsafe { branch_ref.as_ref() };
let branch_size_on_entry = branch.code_size();
+ let housing_block = unsafe { branch.block.as_ref() };
let target_idx: usize = target_idx.as_usize();
- let target = branch.targets[target_idx].unwrap();
- let target_ctx = branch.target_ctxs[target_idx];
-
let target_branch_shape = match target_idx {
0 => BranchShape::Next0,
1 => BranchShape::Next1,
@@ -1545,18 +2759,31 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
let cb = CodegenGlobals::get_inline_cb();
let ocb = CodegenGlobals::get_outlined_cb();
- // If this branch has already been patched, return the dst address
- // Note: ractors can cause the same stub to be hit multiple times
- if let Some(_) = branch.blocks[target_idx] {
- return branch.dst_addrs[target_idx].unwrap().raw_ptr();
- }
+ let (target_blockid, target_ctx): (BlockId, Context) = unsafe {
+ // SAFETY: no mutation of the target's Cell. Just reading out data.
+ let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap();
+
+ // If this branch has already been patched, return the dst address
+ // Note: recursion can cause the same stub to be hit multiple times
+ if let BranchTarget::Block(_) = target.as_ref() {
+ return target.get_address().unwrap().raw_ptr(cb);
+ }
+
+ (target.get_blockid(), target.get_ctx())
+ };
let (cfp, original_interp_sp) = unsafe {
let cfp = get_ec_cfp(ec);
let original_interp_sp = get_cfp_sp(cfp);
- let reconned_pc = rb_iseq_pc_at_idx(rb_cfp_get_iseq(cfp), target.idx);
+ let running_iseq = get_cfp_iseq(cfp);
+ assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq");
+
+ let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into());
let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into());
+ // Unlike in the interpreter, our `leave` doesn't write to the caller's
+ // SP -- we do it in the returned-to code. Account for this difference.
+ let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into());
// Update the PC in the current CFP, because it may be out of sync in JITted code
rb_set_cfp_pc(cfp, reconned_pc);
@@ -1569,76 +2796,88 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// So we do it here instead.
rb_set_cfp_sp(cfp, reconned_sp);
+ // Bail if code GC is disabled and we've already run out of spaces.
+ if !get_option!(code_gc) && (cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes()) {
+ return CodegenGlobals::get_stub_exit_code().raw_ptr(cb);
+ }
+
+ // Bail if we're about to run out of native stack space.
+ // We've just reconstructed interpreter state.
+ if rb_ec_stack_check(ec as _) != 0 {
+ return CodegenGlobals::get_stub_exit_code().raw_ptr(cb);
+ }
+
(cfp, original_interp_sp)
};
// Try to find an existing compiled version of this block
- let mut block = find_block_version(target, &target_ctx);
-
+ let mut block = find_block_version(target_blockid, &target_ctx);
+ let mut branch_modified = false;
// If this block hasn't yet been compiled
if block.is_none() {
- let branch_old_shape = branch.shape;
- let mut branch_modified = false;
+ let branch_old_shape = branch.gen_fn.get_shape();
// If the new block can be generated right after the branch (at cb->write_pos)
- if Some(cb.get_write_ptr()) == branch.end_addr {
+ if cb.get_write_ptr() == branch.end_addr.get() {
// This branch should be terminating its block
- assert!(branch.end_addr == branch.block.borrow().end_addr);
+ assert!(branch.end_addr == housing_block.end_addr);
// Change the branch shape to indicate the target block will be placed next
- branch.shape = target_branch_shape;
+ branch.gen_fn.set_shape(target_branch_shape);
// Rewrite the branch with the new, potentially more compact shape
- regenerate_branch(cb, &mut branch);
+ regenerate_branch(cb, branch);
branch_modified = true;
// Ensure that the branch terminates the codeblock just like
// before entering this if block. This drops bytes off the end
// in case we shrank the branch when regenerating.
- cb.set_write_ptr(branch.end_addr.unwrap());
+ cb.set_write_ptr(branch.end_addr.get());
}
// Compile the new block version
- drop(branch); // Stop mutable RefCell borrow since GC might borrow branch for marking
- block = gen_block_series(target, &target_ctx, ec, cb, ocb);
- branch = branch_rc.borrow_mut();
+ block = gen_block_series(target_blockid, &target_ctx, ec, cb, ocb);
if block.is_none() && branch_modified {
// We couldn't generate a new block for the branch, but we modified the branch.
// Restore the branch by regenerating it.
- branch.shape = branch_old_shape;
- regenerate_branch(cb, &mut branch);
+ branch.gen_fn.set_shape(branch_old_shape);
+ regenerate_branch(cb, branch);
}
}
// Finish building the new block
let dst_addr = match block {
- Some(block_rc) => {
- let mut block: RefMut<_> = block_rc.borrow_mut();
+ Some(new_block) => {
+ let new_block = unsafe { new_block.as_ref() };
// Branch shape should reflect layout
- assert!(!(branch.shape == target_branch_shape && block.start_addr != branch.end_addr));
+ assert!(!(branch.gen_fn.get_shape() == target_branch_shape && new_block.start_addr != branch.end_addr.get()));
// Add this branch to the list of incoming branches for the target
- block.incoming.push(branch_rc.clone());
+ new_block.push_incoming(branch_ref);
// Update the branch target address
- let dst_addr = block.start_addr;
- branch.dst_addrs[target_idx] = dst_addr;
-
- // Mark this branch target as patched (no longer a stub)
- branch.blocks[target_idx] = Some(block_rc.clone());
+ branch.targets[target_idx].set(Some(Box::new(BranchTarget::Block(new_block.into()))));
// Rewrite the branch with the new jump target address
- mem::drop(block); // end mut borrow
- regenerate_branch(cb, &mut branch);
+ regenerate_branch(cb, branch);
// Restore interpreter sp, since the code hitting the stub expects the original.
unsafe { rb_set_cfp_sp(cfp, original_interp_sp) };
- block_rc.borrow().start_addr.unwrap()
+ new_block.start_addr
}
None => {
+ // Trigger code GC. The whole ISEQ will be recompiled later.
+ // We shouldn't trigger it in the middle of compilation in branch_stub_hit
+ // because incomplete code could be used when cb.dropped_bytes is flipped
+ // by code GC. So this place, after all compilation, is the safest place
+ // to hook code GC on branch_stub_hit.
+ if get_option!(code_gc) {
+ cb.code_gc(ocb);
+ }
+
// Failed to service the stub by generating a new block so now we
// need to exit to the interpreter at the stubbed location. We are
// intentionally *not* restoring original_interp_sp. At the time of
@@ -1656,67 +2895,166 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
let new_branch_size = branch.code_size();
assert!(
new_branch_size <= branch_size_on_entry,
- "branch stubs should never enlarge branches"
+ "branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})",
+ branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size,
);
// Return a pointer to the compiled block version
- dst_addr.raw_ptr()
+ dst_addr.raw_ptr(cb)
}
-/// Get a block version or stub corresponding to a branch target
-fn get_branch_target(
- target: BlockId,
+/// Generate a "stub", a piece of code that calls the compiler back when run.
+/// A piece of code that redeems for more code; a thunk for code.
+fn gen_branch_stub(
ctx: &Context,
- branchref: &BranchRef,
- target_idx: u32,
ocb: &mut OutlinedCb,
+ branch_struct_address: usize,
+ target_idx: u32,
) -> Option<CodePtr> {
- let maybe_block = find_block_version(target, ctx);
+ let ocb = ocb.unwrap();
- // If the block already exists
- if let Some(blockref) = maybe_block {
- let mut block = blockref.borrow_mut();
+ let mut asm = Assembler::new();
+ asm.ctx = *ctx;
+ asm.set_reg_temps(ctx.reg_temps);
+ asm_comment!(asm, "branch stub hit");
- // Add an incoming branch into this block
- block.incoming.push(branchref.clone());
- let mut branch = branchref.borrow_mut();
- branch.blocks[target_idx.as_usize()] = Some(blockref.clone());
+ if asm.ctx.is_return_landing() {
+ asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP));
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, C_RET_OPND);
+ }
- // Return a pointer to the compiled code for the block
- return block.start_addr;
+ // Save caller-saved registers before C_ARG_OPNDS get clobbered.
+ // Spill all registers for consistency with the trampoline.
+ for &reg in caller_saved_temp_regs() {
+ asm.cpush(Opnd::Reg(reg));
}
+ // Spill temps to the VM stack as well for jit.peek_at_stack()
+ asm.spill_temps();
+
+ // Set up the arguments unique to this stub for:
+ //
+ // branch_stub_hit(branch_ptr, target_idx, ec)
+ //
+ // Bake pointer to Branch into output code.
+ // We make sure the block housing the branch is still alive when branch_stub_hit() is running.
+ asm.mov(C_ARG_OPNDS[0], branch_struct_address.into());
+ asm.mov(C_ARG_OPNDS[1], target_idx.into());
+
+ // Jump to trampoline to call branch_stub_hit()
+ // Not really a side exit, just don't need a padded jump here.
+ asm.jmp(CodegenGlobals::get_branch_stub_hit_trampoline().as_side_exit());
+
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
+}
+
+pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
+ let mut asm = Assembler::new();
+
+ // For `branch_stub_hit(branch_ptr, target_idx, ec)`,
+ // `branch_ptr` and `target_idx` is different for each stub,
+ // but the call and what's after is the same. This trampoline
+ // is the unchanging part.
+ // Since this trampoline is static, it allows code GC inside
+ // branch_stub_hit() to free stubs without problems.
+ asm_comment!(asm, "branch_stub_hit() trampoline");
+ let stub_hit_ret = asm.ccall(
+ branch_stub_hit as *mut u8,
+ vec![
+ C_ARG_OPNDS[0],
+ C_ARG_OPNDS[1],
+ EC,
+ ]
+ );
+ let jump_addr = asm.load(stub_hit_ret);
- // Generate an outlined stub that will call branch_stub_hit()
- let stub_addr = ocb.get_write_ptr();
+ // Restore caller-saved registers for stack temps
+ for &reg in caller_saved_temp_regs().rev() {
+ asm.cpop_into(Opnd::Reg(reg));
+ }
- // Get a raw pointer to the branch while keeping the reference count alive
- // Here clone increments the strong count by 1
- // This means the branch stub owns its own reference to the branch
- let branch_ptr: *const RefCell<Branch> = BranchRef::into_raw(branchref.clone());
+ // Jump to the address returned by the branch_stub_hit() call
+ asm.jmp_opnd(jump_addr);
- // Call branch_stub_hit(branch_idx, target_idx, ec)
- mov(ocb, C_ARG_REGS[2], REG_EC);
- mov(ocb, C_ARG_REGS[1], uimm_opnd(target_idx as u64));
- mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch_ptr as *const u8));
- call_ptr(ocb, REG0, branch_stub_hit as *mut u8);
+ // HACK: popping into C_RET_REG clobbers the return value of branch_stub_hit() we need to jump
+ // to, so we need a scratch register to preserve it. This extends the live range of the C
+ // return register so we get something else for the return value.
+ let _ = asm.live_reg_opnd(stub_hit_ret);
- // Jump to the address returned by the
- // branch_stub_hit call
- jmp_rm(ocb, RAX);
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
+}
- if ocb.has_dropped_bytes() {
- None // No space
+/// Return registers to be pushed and popped on branch_stub_hit.
+pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator {
+ let temp_regs = Assembler::get_temp_regs().iter();
+ let len = temp_regs.len();
+ // The return value gen_leave() leaves in C_RET_REG
+ // needs to survive the branch_stub_hit() call.
+ let regs = temp_regs.chain(std::iter::once(&C_RET_REG));
+
+ // On x86_64, maintain 16-byte stack alignment
+ if cfg!(target_arch = "x86_64") && len % 2 == 0 {
+ static ONE_MORE: [Reg; 1] = [C_RET_REG];
+ regs.chain(ONE_MORE.iter())
} else {
- Some(stub_addr)
+ regs.chain(&[])
+ }
+}
+
+impl Assembler
+{
+ /// Mark the start position of a patchable entry point in the machine code
+ pub fn mark_entry_start(&mut self, entryref: &PendingEntryRef) {
+ // We need to create our own entry rc object
+ // so that we can move the closure below
+ let entryref = entryref.clone();
+
+ self.pos_marker(move |code_ptr, _| {
+ entryref.start_addr.set(Some(code_ptr));
+ });
+ }
+
+ /// Mark the end position of a patchable entry point in the machine code
+ pub fn mark_entry_end(&mut self, entryref: &PendingEntryRef) {
+ // We need to create our own entry rc object
+ // so that we can move the closure below
+ let entryref = entryref.clone();
+
+ self.pos_marker(move |code_ptr, _| {
+ entryref.end_addr.set(Some(code_ptr));
+ });
+ }
+
+ // Mark the start position of a patchable branch in the machine code
+ fn mark_branch_start(&mut self, branchref: &PendingBranchRef)
+ {
+ // We need to create our own branch rc object
+ // so that we can move the closure below
+ let branchref = branchref.clone();
+
+ self.pos_marker(move |code_ptr, _| {
+ branchref.start_addr.set(Some(code_ptr));
+ });
+ }
+
+ // Mark the end position of a patchable branch in the machine code
+ fn mark_branch_end(&mut self, branchref: &PendingBranchRef)
+ {
+ // We need to create our own branch rc object
+ // so that we can move the closure below
+ let branchref = branchref.clone();
+
+ self.pos_marker(move |code_ptr, _| {
+ branchref.end_addr.set(Some(code_ptr));
+ });
}
}
pub fn gen_branch(
- jit: &JITState,
- src_ctx: &Context,
- cb: &mut CodeBlock,
+ jit: &mut JITState,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
target0: BlockId,
ctx0: &Context,
@@ -1724,168 +3062,213 @@ pub fn gen_branch(
ctx1: Option<&Context>,
gen_fn: BranchGenFn,
) {
- let branchref = make_branch_entry(jit.get_block(), src_ctx, gen_fn);
+ let branch = new_pending_branch(jit, gen_fn);
// Get the branch targets or stubs
- let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb);
- let dst_addr1 = if let Some(ctx) = ctx1 {
- get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb)
- } else {
- None
- };
-
- let mut branch = branchref.borrow_mut();
-
- // Set the branch target adresses
- branch.dst_addrs[0] = dst_addr0;
- branch.dst_addrs[1] = dst_addr1;
+ let target0_addr = branch.set_target(0, target0, ctx0, ocb);
+ let target1_addr = if let Some(ctx) = ctx1 {
+ let addr = branch.set_target(1, target1.unwrap(), ctx, ocb);
+ if addr.is_none() {
+ // target1 requested but we're out of memory.
+ // Avoid unwrap() in gen_fn()
+ return;
+ }
- branch.targets[0] = Some(target0);
- branch.targets[1] = target1;
- branch.target_ctxs[0] = *ctx0;
- branch.target_ctxs[1] = if let Some(&ctx) = ctx1 {
- ctx
- } else {
- Context::default()
- };
+ addr
+ } else { None };
// Call the branch generation function
- branch.start_addr = Some(cb.get_write_ptr());
- regenerate_branch(cb, &mut branch);
-}
-
-fn gen_jump_branch(
- cb: &mut CodeBlock,
- target0: CodePtr,
- _target1: Option<CodePtr>,
- shape: BranchShape,
-) {
- if shape == BranchShape::Next1 {
- panic!("Branch shape Next1 not allowed in gen_jump_branch!");
- }
-
- if shape == BranchShape::Default {
- jmp_ptr(cb, target0);
+ asm.mark_branch_start(&branch);
+ if let Some(dst_addr) = target0_addr {
+ branch.gen_fn.call(asm, Target::CodePtr(dst_addr), target1_addr.map(|addr| Target::CodePtr(addr)));
}
+ asm.mark_branch_end(&branch);
}
-pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut CodeBlock) {
- let branchref = make_branch_entry(jit.get_block(), ctx, gen_jump_branch);
- let mut branch = branchref.borrow_mut();
-
- branch.targets[0] = Some(target0);
- branch.target_ctxs[0] = *ctx;
-
+pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) {
+ let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default)));
let maybe_block = find_block_version(target0, ctx);
// If the block already exists
- if let Some(blockref) = maybe_block {
- let mut block = blockref.borrow_mut();
-
- block.incoming.push(branchref.clone());
-
- branch.dst_addrs[0] = block.start_addr;
- branch.blocks[0] = Some(blockref.clone());
- branch.shape = BranchShape::Default;
+ let new_target = if let Some(blockref) = maybe_block {
+ let block = unsafe { blockref.as_ref() };
+ let block_addr = block.start_addr;
// Call the branch generation function
- branch.start_addr = Some(cb.get_write_ptr());
- gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
- branch.end_addr = Some(cb.get_write_ptr());
+ asm_comment!(asm, "gen_direct_jmp: existing block");
+ asm.mark_branch_start(&branch);
+ branch.gen_fn.call(asm, Target::CodePtr(block_addr), None);
+ asm.mark_branch_end(&branch);
+
+ BranchTarget::Block(blockref)
} else {
- // This None target address signals gen_block_series() to compile the
- // target block right after this one (fallthrough).
- branch.dst_addrs[0] = None;
- branch.shape = BranchShape::Next0;
- branch.start_addr = Some(cb.get_write_ptr());
- branch.end_addr = Some(cb.get_write_ptr());
- }
+ // The branch is effectively empty (a noop)
+ asm_comment!(asm, "gen_direct_jmp: fallthrough");
+ asm.mark_branch_start(&branch);
+ asm.mark_branch_end(&branch);
+ branch.gen_fn.set_shape(BranchShape::Next0);
+
+ // `None` in new_target.address signals gen_block_series() to
+ // compile the target block right after this one (fallthrough).
+ BranchTarget::Stub(Box::new(BranchStub {
+ address: None,
+ ctx: *ctx,
+ iseq: Cell::new(target0.iseq),
+ iseq_idx: target0.idx,
+ }))
+ };
+
+ branch.targets[0].set(Some(Box::new(new_target)));
}
/// Create a stub to force the code up to this point to be executed
pub fn defer_compilation(
- jit: &JITState,
- cur_ctx: &Context,
- cb: &mut CodeBlock,
+ jit: &mut JITState,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
) {
- if cur_ctx.chain_depth != 0 {
+ if asm.ctx.is_deferred() {
panic!("Double defer!");
}
- let mut next_ctx = *cur_ctx;
+ let mut next_ctx = asm.ctx;
- if next_ctx.chain_depth == u8::MAX {
- panic!("max block version chain depth reached!");
- }
- next_ctx.chain_depth += 1;
+ next_ctx.mark_as_deferred();
- let block_rc = jit.get_block();
- let branch_rc = make_branch_entry(jit.get_block(), cur_ctx, gen_jump_branch);
- let mut branch = branch_rc.borrow_mut();
- let block = block_rc.borrow();
+ let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default)));
let blockid = BlockId {
- iseq: block.blockid.iseq,
+ iseq: jit.get_iseq(),
idx: jit.get_insn_idx(),
};
- branch.target_ctxs[0] = next_ctx;
- branch.targets[0] = Some(blockid);
- branch.dst_addrs[0] = get_branch_target(blockid, &next_ctx, &branch_rc, 0, ocb);
+
+ // Likely a stub due to the increased chain depth
+ let target0_address = branch.set_target(0, blockid, &next_ctx, ocb);
+
+ // Pad the block if it has the potential to be invalidated. This must be
+ // done before gen_fn() in case the jump is overwritten by a fallthrough.
+ if jit.block_entry_exit.is_some() {
+ asm.pad_inval_patch();
+ }
// Call the branch generation function
- branch.start_addr = Some(cb.get_write_ptr());
- gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
- branch.end_addr = Some(cb.get_write_ptr());
-}
+ asm_comment!(asm, "defer_compilation");
+ asm.mark_branch_start(&branch);
+ if let Some(dst_addr) = target0_address {
+ branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None);
+ }
+ asm.mark_branch_end(&branch);
-// Remove all references to a block then free it.
-fn free_block(blockref: &BlockRef) {
- use crate::invariants::*;
+ // If the block we're deferring from is empty
+ if jit.get_starting_insn_idx() == jit.get_insn_idx() {
+ incr_counter!(defer_empty_count);
+ }
- block_assumptions_free(blockref);
+ incr_counter!(defer_count);
+}
- let block = blockref.borrow();
+/// Remove a block from the live control flow graph.
+/// Block must be initialized and incoming/outgoing edges
+/// must also point to initialized blocks.
+unsafe fn remove_from_graph(blockref: BlockRef) {
+ let block = unsafe { blockref.as_ref() };
// Remove this block from the predecessor's targets
- for pred_branchref in &block.incoming {
+ for pred_branchref in block.incoming.0.take().iter() {
// Branch from the predecessor to us
- let mut pred_branch = pred_branchref.borrow_mut();
+ let pred_branch = unsafe { pred_branchref.as_ref() };
// If this is us, nullify the target block
- for pred_succ_ref in &mut pred_branch.blocks {
- if let Some(pred_succ) = pred_succ_ref {
- if pred_succ == blockref {
- *pred_succ_ref = None;
- }
+ for target_idx in 0..pred_branch.targets.len() {
+ // SAFETY: no mutation inside unsafe
+ let target_is_us = unsafe {
+ pred_branch.targets[target_idx]
+ .ref_unchecked()
+ .as_ref()
+ .and_then(|target| target.get_block())
+ .and_then(|target_block| (target_block == blockref).then(|| ()))
+ .is_some()
+ };
+
+ if target_is_us {
+ pred_branch.targets[target_idx].set(None);
}
}
}
// For each outgoing branch
- for out_branchref in &block.outgoing {
- let out_branch = out_branchref.borrow();
-
+ for out_branchref in block.outgoing.iter() {
+ let out_branch = unsafe { out_branchref.as_ref() };
// For each successor block
- for succ in &out_branch.blocks {
- if let Some(succ) = succ {
+ for out_target in out_branch.targets.iter() {
+ // SAFETY: copying out an Option<BlockRef>. No mutation.
+ let succ_block: Option<BlockRef> = unsafe {
+ out_target.ref_unchecked().as_ref().and_then(|target| target.get_block())
+ };
+
+ if let Some(succ_block) = succ_block {
// Remove outgoing branch from the successor's incoming list
- let mut succ_block = succ.borrow_mut();
- succ_block
- .incoming
- .retain(|succ_incoming| !Rc::ptr_eq(succ_incoming, out_branchref));
+ // SAFETY: caller promises the block has valid outgoing edges.
+ let succ_block = unsafe { succ_block.as_ref() };
+ // Temporarily move out of succ_block.incoming.
+ let succ_incoming = succ_block.incoming.0.take();
+ let mut succ_incoming = succ_incoming.into_vec();
+ succ_incoming.retain(|branch| branch != out_branchref);
+ succ_block.incoming.0.set(succ_incoming.into_boxed_slice()); // allocs. Rely on oom=abort
}
}
}
+}
+
+/// Tear down a block and deallocate it.
+/// Caller has to ensure that the code tracked by the block is not
+/// running, as running code may hit [branch_stub_hit] who exepcts
+/// [Branch] to be live.
+///
+/// We currently ensure this through the `jit_cont` system in cont.c
+/// and sometimes through the GC calling [rb_yjit_iseq_free]. The GC
+/// has proven that an ISeq is not running if it calls us to free it.
+///
+/// For delayed deallocation, since dead blocks don't keep
+/// blocks they refer alive, by the time we get here their outgoing
+/// edges may be dangling. Pass `graph_intact=false` such these cases.
+pub unsafe fn free_block(blockref: BlockRef, graph_intact: bool) {
+ // Careful with order here.
+ // First, remove all pointers to the referent block
+ unsafe {
+ block_assumptions_free(blockref);
+
+ if graph_intact {
+ remove_from_graph(blockref);
+ }
+ }
+
+ // SAFETY: we should now have a unique pointer to the block
+ unsafe { dealloc_block(blockref) }
+}
+
+/// Deallocate a block and its outgoing branches. Blocks own their outgoing branches.
+/// Caller must ensure that we have unique ownership for the referent block
+unsafe fn dealloc_block(blockref: BlockRef) {
+ unsafe {
+ for outgoing in blockref.as_ref().outgoing.iter() {
+ // this Box::from_raw matches the Box::into_raw from PendingBranch::into_branch
+ mem::drop(Box::from_raw(outgoing.as_ptr()));
+ }
+ }
- // No explicit deallocation here as blocks are ref-counted.
+ // Deallocate the referent Block
+ unsafe {
+ // this Box::from_raw matches the Box::into_raw from JITState::into_block
+ mem::drop(Box::from_raw(blockref.as_ptr()));
+ }
}
// Some runtime checks for integrity of a program location
pub fn verify_blockid(blockid: BlockId) {
unsafe {
assert!(rb_IMEMO_TYPE_P(blockid.iseq.into(), imemo_iseq) != 0);
- assert!(blockid.idx < get_iseq_encoded_size(blockid.iseq));
+ assert!(u32::from(blockid.idx) < get_iseq_encoded_size(blockid.iseq));
}
}
@@ -1896,114 +3279,130 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
// TODO: want to assert that all other ractors are stopped here. Can't patch
// machine code that some other thread is running.
- let block = blockref.borrow();
- let cb = CodegenGlobals::get_inline_cb();
+ let block = unsafe { (*blockref).as_ref() };
+ let id_being_invalidated = block.get_blockid();
+ let mut cb = CodegenGlobals::get_inline_cb();
let ocb = CodegenGlobals::get_outlined_cb();
- verify_blockid(block.blockid);
+ verify_blockid(id_being_invalidated);
+
+ #[cfg(feature = "disasm")]
+ {
+ // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated.
+ if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() {
+ let iseq_range = &block.iseq_range;
+ let iseq_location = iseq_get_location(block.iseq.get(), iseq_range.start);
+ if iseq_location.contains(substr) {
+ println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, iseq_range.start, iseq_range.end);
+ }
+ }
+ }
// Remove this block from the version array
remove_block_version(blockref);
// Get a pointer to the generated code for this block
- let code_ptr = block.start_addr;
+ let block_start = block.start_addr;
- // Make the the start of the block do an exit. This handles OOM situations
+ // Make the start of the block do an exit. This handles OOM situations
// and some cases where we can't efficiently patch incoming branches.
// Do this first, since in case there is a fallthrough branch into this
// block, the patching loop below can overwrite the start of the block.
// In those situations, there is hopefully no jumps to the start of the block
// after patching as the start of the block would be in the middle of something
// generated by branch_t::gen_fn.
+ let block_entry_exit = block
+ .entry_exit
+ .expect("invalidation needs the entry_exit field");
{
- let block_start = block
- .start_addr
- .expect("invalidation needs constructed block");
- let block_end = block
- .end_addr
- .expect("invalidation needs constructed block");
- let block_entry_exit = block
- .entry_exit
- .expect("invalidation needs the entry_exit field");
+ let block_end = block.get_end_addr();
if block_start == block_entry_exit {
// Some blocks exit on entry. Patching a jump to the entry at the
// entry makes an infinite loop.
} else {
- // TODO(alan)
- // if (block.start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) // Don't patch frozen code region
-
// Patch in a jump to block.entry_exit.
+
let cur_pos = cb.get_write_ptr();
+ let cur_dropped_bytes = cb.has_dropped_bytes();
cb.set_write_ptr(block_start);
- jmp_ptr(cb, block_entry_exit);
+
+ let mut asm = Assembler::new();
+ asm.jmp(block_entry_exit.as_side_exit());
+ cb.set_dropped_bytes(false);
+ asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code");
+
assert!(
- cb.get_write_ptr() < block_end,
- "invalidation wrote past end of block"
+ cb.get_write_ptr() <= block_end,
+ "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})",
+ block.code_size(),
+ cb.get_write_ptr().as_offset() - block_start.as_offset(),
+ block.start_addr.raw_ptr(cb),
);
cb.set_write_ptr(cur_pos);
+ cb.set_dropped_bytes(cur_dropped_bytes);
}
}
// For each incoming branch
- for branchref in &block.incoming {
- let mut branch = branchref.borrow_mut();
- let target_idx = if branch.dst_addrs[0] == code_ptr {
+ for branchref in block.incoming.0.take().iter() {
+ let branch = unsafe { branchref.as_ref() };
+ let target_idx = if branch.get_target_address(0) == Some(block_start) {
0
} else {
1
};
- assert_eq!(branch.dst_addrs[target_idx], code_ptr);
- assert_eq!(blockref, branch.blocks[target_idx].as_ref().unwrap());
-
- // Mark this target as being a stub
- branch.blocks[target_idx] = None;
- // TODO(alan):
- // Don't patch frozen code region
- // if (branch.start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
- // continue;
- // }
-
- // Create a stub for this branch target
- mem::drop(branch); // end RefCell borrow as get_branch_target() can borrow the branch.
- let mut branch_target =
- get_branch_target(block.blockid, &block.ctx, branchref, target_idx as u32, ocb);
-
- if branch_target.is_none() {
- // We were unable to generate a stub (e.g. OOM). Use the block's
- // exit instead of a stub for the block. It's important that we
- // still patch the branch in this situation so stubs are unique
- // to branches. Think about what could go wrong if we run out of
- // memory in the middle of this loop.
- branch_target = block.entry_exit;
+ // Assert that the incoming branch indeed points to the block being invalidated
+ // SAFETY: no mutation.
+ unsafe {
+ let incoming_target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap();
+ assert_eq!(Some(block_start), incoming_target.get_address());
+ if let Some(incoming_block) = &incoming_target.get_block() {
+ assert_eq!(blockref, incoming_block);
+ }
}
- branch = branchref.borrow_mut();
- branch.dst_addrs[target_idx] = branch_target;
+ // Create a stub for this branch target
+ let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
+
+ // In case we were unable to generate a stub (e.g. OOM). Use the block's
+ // exit instead of a stub for the block. It's important that we
+ // still patch the branch in this situation so stubs are unique
+ // to branches. Think about what could go wrong if we run out of
+ // memory in the middle of this loop.
+ let stub_addr = stub_addr.unwrap_or(block_entry_exit);
+
+ // Fill the branch target with a stub
+ branch.targets[target_idx].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub {
+ address: Some(stub_addr),
+ iseq: block.iseq.clone(),
+ iseq_idx: block.iseq_range.start,
+ ctx: block.ctx,
+ })))));
// Check if the invalidated block immediately follows
- let target_next = block.start_addr == branch.end_addr;
+ let target_next = block.start_addr == branch.end_addr.get();
if target_next {
// The new block will no longer be adjacent.
// Note that we could be enlarging the branch and writing into the
// start of the block being invalidated.
- branch.shape = BranchShape::Default;
+ branch.gen_fn.set_shape(BranchShape::Default);
}
// Rewrite the branch with the new jump target address
- regenerate_branch(cb, &mut branch);
+ let old_branch_size = branch.code_size();
+ regenerate_branch(cb, branch);
if target_next && branch.end_addr > block.end_addr {
- dbg!(
- branch.block.borrow().blockid.idx,
- block.blockid.idx,
- branch.end_addr,
- block.end_addr,
- block.code_size()
+ panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size());
+ }
+ if !target_next && branch.code_size() > old_branch_size {
+ panic!(
+ "invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})",
+ branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size()
);
- panic!("yjit invalidate rewrote branch past end of invalidated block");
}
}
@@ -2015,18 +3414,21 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
// points will always have an instruction index of 0. We'll need to
// change this in the future when we support optional parameters because
// they enter the function with a non-zero PC
- if block.blockid.idx == 0 {
- unsafe { rb_iseq_reset_jit_func(block.blockid.iseq) };
+ if block.iseq_range.start == 0 {
+ // TODO:
+ // We could reset the exec counter to zero in rb_iseq_reset_jit_func()
+ // so that we eventually compile a new entry point when useful
+ unsafe { rb_iseq_reset_jit_func(block.iseq.get()) };
}
- // TODO:
- // May want to recompile a new entry point (for interpreter entry blocks)
- // This isn't necessary for correctness
-
// FIXME:
// Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub.
- free_block(blockref);
+ // SAFETY: This block was in a version_map earlier
+ // in this function before we removed it, so it's well connected.
+ unsafe { remove_from_graph(*blockref) };
+
+ delayed_deallocation(*blockref);
ocb.unwrap().mark_all_executable();
cb.mark_all_executable();
@@ -2034,36 +3436,332 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
incr_counter!(invalidation_count);
}
+// We cannot deallocate blocks immediately after invalidation since there
+// could be stubs waiting to access branch pointers. Return stubs can do
+// this since patching the code for setting up return addresses does not
+// affect old return addresses that are already set up to use potentially
+// invalidated branch pointers. Example:
+// def foo(n)
+// if n == 2
+// # 1.times.each to create a cfunc frame to preserve the JIT frame
+// # which will return to a stub housed in an invalidated block
+// return 1.times.each { Object.define_method(:foo) {} }
+// end
+//
+// foo(n + 1)
+// end
+// p foo(1)
+pub fn delayed_deallocation(blockref: BlockRef) {
+ block_assumptions_free(blockref);
+
+ let payload = get_iseq_payload(unsafe { blockref.as_ref() }.iseq.get()).unwrap();
+ payload.dead_blocks.push(blockref);
+}
+
+trait RefUnchecked {
+ type Contained;
+ unsafe fn ref_unchecked(&self) -> &Self::Contained;
+}
+
+impl<T> RefUnchecked for Cell<T> {
+ type Contained = T;
+
+ /// Gives a reference to the contents of a [Cell].
+ /// Dangerous; please include a SAFETY note.
+ ///
+ /// An easy way to use this without triggering Undefined Behavior is to
+ /// 1. ensure there is transitively no Cell/UnsafeCell mutation in the `unsafe` block
+ /// 2. ensure the `unsafe` block does not return any references, so our
+ /// analysis is lexically confined. This is trivially true if the block
+ /// returns a `bool`, for example. Aggregates that store references have
+ /// explicit lifetime parameters that look like `<'a>`.
+ ///
+ /// There are other subtler situations that don't follow these rules yet
+ /// are still sound.
+ /// See `test_miri_ref_unchecked()` for examples. You can play with it
+ /// with `cargo +nightly miri test miri`.
+ unsafe fn ref_unchecked(&self) -> &Self::Contained {
+ // SAFETY: pointer is dereferenceable because it's from a &Cell.
+ // It's up to the caller to follow aliasing rules with the output
+ // reference.
+ unsafe { self.as_ptr().as_ref().unwrap() }
+ }
+}
+
#[cfg(test)]
mod tests {
use crate::core::*;
#[test]
+ fn type_size() {
+ // Check that we can store types in 4 bits,
+ // and all local types in 32 bits
+ assert_eq!(mem::size_of::<Type>(), 1);
+ assert!(Type::BlockParamProxy as usize <= 0b1111);
+ assert!(MAX_LOCAL_TYPES * 4 <= 32);
+ }
+
+ #[test]
+ fn tempmapping_size() {
+ assert_eq!(mem::size_of::<TempMapping>(), 1);
+ }
+
+ #[test]
+ fn local_types() {
+ let mut ctx = Context::default();
+
+ for i in 0..MAX_LOCAL_TYPES {
+ ctx.set_local_type(i, Type::Fixnum);
+ assert_eq!(ctx.get_local_type(i), Type::Fixnum);
+ ctx.set_local_type(i, Type::BlockParamProxy);
+ assert_eq!(ctx.get_local_type(i), Type::BlockParamProxy);
+ }
+
+ ctx.set_local_type(0, Type::Fixnum);
+ ctx.clear_local_types();
+ assert!(ctx.get_local_type(0) == Type::Unknown);
+
+ // Make sure we don't accidentally set bits incorrectly
+ let mut ctx = Context::default();
+ ctx.set_local_type(0, Type::Fixnum);
+ assert_eq!(ctx.get_local_type(0), Type::Fixnum);
+ ctx.set_local_type(2, Type::Fixnum);
+ ctx.set_local_type(1, Type::BlockParamProxy);
+ assert_eq!(ctx.get_local_type(0), Type::Fixnum);
+ assert_eq!(ctx.get_local_type(2), Type::Fixnum);
+ }
+
+ #[test]
+ fn tempmapping() {
+ let t = TempMapping::map_to_stack(Type::Unknown);
+ assert_eq!(t.get_kind(), MapToStack);
+ assert_eq!(t.get_type(), Type::Unknown);
+
+ let t = TempMapping::map_to_stack(Type::TString);
+ assert_eq!(t.get_kind(), MapToStack);
+ assert_eq!(t.get_type(), Type::TString);
+
+ let t = TempMapping::map_to_local(7);
+ assert_eq!(t.get_kind(), MapToLocal);
+ assert_eq!(t.get_local_idx(), 7);
+ }
+
+ #[test]
+ fn context_size() {
+ assert_eq!(mem::size_of::<Context>(), 23);
+ }
+
+ #[test]
fn types() {
// Valid src => dst
- assert_eq!(Type::Unknown.diff(Type::Unknown), 0);
- assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), 0);
- assert_ne!(Type::UnknownImm.diff(Type::Unknown), usize::MAX);
- assert_ne!(Type::Fixnum.diff(Type::Unknown), usize::MAX);
- assert_ne!(Type::Fixnum.diff(Type::UnknownImm), usize::MAX);
+ assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0));
+ assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), TypeDiff::Compatible(0));
+ assert_ne!(Type::UnknownImm.diff(Type::Unknown), TypeDiff::Incompatible);
+ assert_ne!(Type::Fixnum.diff(Type::Unknown), TypeDiff::Incompatible);
+ assert_ne!(Type::Fixnum.diff(Type::UnknownImm), TypeDiff::Incompatible);
// Invalid src => dst
- assert_eq!(Type::Unknown.diff(Type::UnknownImm), usize::MAX);
- assert_eq!(Type::Unknown.diff(Type::Fixnum), usize::MAX);
- assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), usize::MAX);
+ assert_eq!(Type::Unknown.diff(Type::UnknownImm), TypeDiff::Incompatible);
+ assert_eq!(Type::Unknown.diff(Type::Fixnum), TypeDiff::Incompatible);
+ assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), TypeDiff::Incompatible);
+ }
+
+ #[test]
+ fn reg_temps() {
+ let mut reg_temps = RegTemps(0);
+
+ // 0 means every slot is not spilled
+ for stack_idx in 0..MAX_REG_TEMPS {
+ assert_eq!(reg_temps.get(stack_idx), false);
+ }
+
+ // Set 0, 2, 7 (RegTemps: 10100001)
+ reg_temps.set(0, true);
+ reg_temps.set(2, true);
+ reg_temps.set(3, true);
+ reg_temps.set(3, false);
+ reg_temps.set(7, true);
+
+ // Get 0..8
+ assert_eq!(reg_temps.get(0), true);
+ assert_eq!(reg_temps.get(1), false);
+ assert_eq!(reg_temps.get(2), true);
+ assert_eq!(reg_temps.get(3), false);
+ assert_eq!(reg_temps.get(4), false);
+ assert_eq!(reg_temps.get(5), false);
+ assert_eq!(reg_temps.get(6), false);
+ assert_eq!(reg_temps.get(7), true);
+
+ // Test conflicts
+ assert_eq!(5, get_option!(num_temp_regs));
+ assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict
+ assert_eq!(reg_temps.conflicts_with(1), false);
+ assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7
+ assert_eq!(reg_temps.conflicts_with(3), false);
+ assert_eq!(reg_temps.conflicts_with(4), false);
+ assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0
+ assert_eq!(reg_temps.conflicts_with(6), false);
+ assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2
}
#[test]
fn context() {
// Valid src => dst
- assert_eq!(Context::default().diff(&Context::default()), 0);
+ assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0));
// Try pushing an operand and getting its type
- let mut ctx = Context::default();
- ctx.stack_push(Type::Fixnum);
- let top_type = ctx.get_opnd_type(StackOpnd(0));
+ let mut asm = Assembler::new();
+ asm.stack_push(Type::Fixnum);
+ let top_type = asm.ctx.get_opnd_type(StackOpnd(0));
assert!(top_type == Type::Fixnum);
// TODO: write more tests for Context type diff
}
+
+ #[test]
+ fn context_upgrade_local() {
+ let mut asm = Assembler::new();
+ asm.stack_push_local(0);
+ asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil);
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
+ }
+
+ #[test]
+ fn context_chain_depth() {
+ let mut ctx = Context::default();
+ assert_eq!(ctx.get_chain_depth(), 0);
+ assert_eq!(ctx.is_return_landing(), false);
+ assert_eq!(ctx.is_deferred(), false);
+
+ for _ in 0..5 {
+ ctx.increment_chain_depth();
+ }
+ assert_eq!(ctx.get_chain_depth(), 5);
+
+ ctx.set_as_return_landing();
+ assert_eq!(ctx.is_return_landing(), true);
+
+ ctx.clear_return_landing();
+ assert_eq!(ctx.is_return_landing(), false);
+
+ ctx.mark_as_deferred();
+ assert_eq!(ctx.is_deferred(), true);
+
+ ctx.reset_chain_depth_and_defer();
+ assert_eq!(ctx.get_chain_depth(), 0);
+ assert_eq!(ctx.is_deferred(), false);
+ }
+
+ #[test]
+ fn shift_stack_for_send() {
+ let mut asm = Assembler::new();
+
+ // Push values to simulate send(:name, arg) with 6 items already on-stack
+ for _ in 0..6 {
+ asm.stack_push(Type::Fixnum);
+ }
+ asm.stack_push(Type::Unknown);
+ asm.stack_push(Type::ImmSymbol);
+ asm.stack_push(Type::Unknown);
+
+ // This method takes argc of the sendee, not argc of send
+ asm.shift_stack(1);
+
+ // The symbol should be gone
+ assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(0)));
+ assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(1)));
+ }
+
+ #[test]
+ fn test_miri_ref_unchecked() {
+ let blockid = BlockId {
+ iseq: ptr::null(),
+ idx: 0,
+ };
+ let cb = CodeBlock::new_dummy(1024);
+ let dumm_addr = cb.get_write_ptr();
+ let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null())
+ .into_block(0, dumm_addr, dumm_addr, vec![]);
+ let _dropper = BlockDropper(block);
+
+ // Outside of brief moments during construction,
+ // we're always working with &Branch (a shared reference to a Branch).
+ let branch: &Branch = &Branch {
+ gen_fn: BranchGenFn::JZToTarget0,
+ block,
+ start_addr: dumm_addr,
+ end_addr: Cell::new(dumm_addr),
+ targets: [Cell::new(None), Cell::new(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub {
+ iseq: Cell::new(ptr::null()),
+ iseq_idx: 0,
+ address: None,
+ ctx: Context::default(),
+ })))))]
+ };
+ // For easier soundness reasoning, make sure the reference returned does not out live the
+ // `unsafe` block! It's tempting to do, but it leads to non-local issues.
+ // Here is an example where it goes wrong:
+ if false {
+ for target in branch.targets.iter().as_ref() {
+ if let Some(btarget) = unsafe { target.ref_unchecked() } {
+ // btarget is derived from the usnafe block!
+ target.set(None); // This drops the contents of the cell...
+ assert!(btarget.get_address().is_none()); // but `btarget` is still live! UB.
+ }
+ }
+ }
+
+ // Do something like this instead. It's not pretty, but it's easier to vet for UB this way.
+ for target in branch.targets.iter().as_ref() {
+ // SAFETY: no mutation within unsafe
+ if unsafe { target.ref_unchecked().is_none() } {
+ continue;
+ }
+ // SAFETY: no mutation within unsafe
+ assert!(unsafe { target.ref_unchecked().as_ref().unwrap().get_address().is_none() });
+ target.set(None);
+ }
+
+ // A more subtle situation where we do Cell/UnsafeCell mutation over the
+ // lifetime of the reference released by ref_unchecked().
+ branch.targets[0].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub {
+ iseq: Cell::new(ptr::null()),
+ iseq_idx: 0,
+ address: None,
+ ctx: Context::default(),
+ })))));
+ // Invalid ISeq; we never dereference it.
+ let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr();
+ unsafe {
+ if let Some(branch_target) = branch.targets[0].ref_unchecked().as_ref() {
+ if let BranchTarget::Stub(stub) = branch_target.as_ref() {
+ // SAFETY:
+ // This is a Cell mutation, but it mutates the contents
+ // of a a Cell<IseqPtr>, which is a different type
+ // from the type of Cell found in `Branch::targets`, so
+ // there is no chance of mutating the Cell that we called
+ // ref_unchecked() on above.
+ Cell::set(&stub.iseq, secret_iseq);
+ }
+ }
+ };
+ // Check that we indeed changed the iseq of the stub
+ // Cell::take moves out of the cell.
+ assert_eq!(
+ secret_iseq as usize,
+ branch.targets[0].take().unwrap().get_blockid().iseq as usize
+ );
+
+ struct BlockDropper(BlockRef);
+ impl Drop for BlockDropper {
+ fn drop(&mut self) {
+ // SAFETY: we have ownership because the test doesn't stash
+ // the block away in any global structure.
+ // Note that the test being self-contained is also why we
+ // use dealloc_block() over free_block(), as free_block() touches
+ // the global invariants tables unavailable in tests.
+ unsafe { dealloc_block(self.0) };
+ }
+ }
+ }
}
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
index 51ba9c1531..d07262ad4f 100644
--- a/yjit/src/cruby.rs
+++ b/yjit/src/cruby.rs
@@ -84,7 +84,7 @@
use std::convert::From;
use std::ffi::CString;
-use std::os::raw::{c_char, c_int, c_long, c_uint};
+use std::os::raw::{c_char, c_int, c_uint};
use std::panic::{catch_unwind, UnwindSafe};
// We check that we can do this with the configure script and a couple of
@@ -96,6 +96,7 @@ pub type size_t = u64;
pub type RedefinitionFlag = u32;
#[allow(dead_code)]
+#[allow(clippy::all)]
mod autogened {
use super::*;
// Textually include output from rust-bindgen as suggested by its user guide.
@@ -106,157 +107,20 @@ pub use autogened::*;
// TODO: For #defines that affect memory layout, we need to check for them
// on build and fail if they're wrong. e.g. USE_FLONUM *must* be true.
-// TODO:
-// Temporary, these external bindings will likely be auto-generated
-// and textually included in this file
+// These are functions we expose from C files, not in any header.
+// Parsing it would result in a lot of duplicate definitions.
+// Use bindgen for functions that are defined in headers or in yjit.c.
#[cfg_attr(test, allow(unused))] // We don't link against C code when testing
extern "C" {
- #[link_name = "rb_insn_name"]
- pub fn raw_insn_name(insn: VALUE) -> *const c_char;
-
- #[link_name = "rb_insn_len"]
- pub fn raw_insn_len(v: VALUE) -> c_int;
-
- #[link_name = "rb_yarv_class_of"]
- pub fn CLASS_OF(v: VALUE) -> VALUE;
-
- #[link_name = "rb_get_ec_cfp"]
- pub fn get_ec_cfp(ec: EcPtr) -> CfpPtr;
-
- #[link_name = "rb_get_cfp_pc"]
- pub fn get_cfp_pc(cfp: CfpPtr) -> *mut VALUE;
-
- #[link_name = "rb_get_cfp_sp"]
- pub fn get_cfp_sp(cfp: CfpPtr) -> *mut VALUE;
-
- #[link_name = "rb_get_cfp_self"]
- pub fn get_cfp_self(cfp: CfpPtr) -> VALUE;
-
- #[link_name = "rb_get_cfp_ep"]
- pub fn get_cfp_ep(cfp: CfpPtr) -> *mut VALUE;
-
- #[link_name = "rb_get_cme_def_type"]
- pub fn get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t;
-
- #[link_name = "rb_get_cme_def_body_attr_id"]
- pub fn get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID;
-
- #[link_name = "rb_get_cme_def_body_optimized_type"]
- pub fn get_cme_def_body_optimized_type(
- cme: *const rb_callable_method_entry_t,
- ) -> method_optimized_type;
-
- #[link_name = "rb_get_cme_def_body_optimized_index"]
- pub fn get_cme_def_body_optimized_index(cme: *const rb_callable_method_entry_t) -> c_uint;
-
- #[link_name = "rb_get_cme_def_body_cfunc"]
- pub fn get_cme_def_body_cfunc(cme: *const rb_callable_method_entry_t)
- -> *mut rb_method_cfunc_t;
-
- #[link_name = "rb_get_def_method_serial"]
- /// While this returns a uintptr_t in C, we always use it as a Rust u64
- pub fn get_def_method_serial(def: *const rb_method_definition_t) -> u64;
-
- #[link_name = "rb_get_def_original_id"]
- pub fn get_def_original_id(def: *const rb_method_definition_t) -> ID;
-
- #[link_name = "rb_get_mct_argc"]
- pub fn get_mct_argc(mct: *const rb_method_cfunc_t) -> c_int;
-
- #[link_name = "rb_get_mct_func"]
- pub fn get_mct_func(mct: *const rb_method_cfunc_t) -> *const u8;
-
- #[link_name = "rb_get_def_iseq_ptr"]
- pub fn get_def_iseq_ptr(def: *const rb_method_definition_t) -> IseqPtr;
-
- #[link_name = "rb_iseq_encoded_size"]
- pub fn get_iseq_encoded_size(iseq: IseqPtr) -> c_uint;
-
- #[link_name = "rb_get_iseq_body_local_iseq"]
- pub fn get_iseq_body_local_iseq(iseq: IseqPtr) -> IseqPtr;
-
- #[link_name = "rb_get_iseq_body_iseq_encoded"]
- pub fn get_iseq_body_iseq_encoded(iseq: IseqPtr) -> *mut VALUE;
-
- #[link_name = "rb_get_iseq_body_stack_max"]
- pub fn get_iseq_body_stack_max(iseq: IseqPtr) -> c_uint;
-
- #[link_name = "rb_get_iseq_flags_has_opt"]
- pub fn get_iseq_flags_has_opt(iseq: IseqPtr) -> bool;
-
- #[link_name = "rb_get_iseq_flags_has_kw"]
- pub fn get_iseq_flags_has_kw(iseq: IseqPtr) -> bool;
-
- #[link_name = "rb_get_iseq_flags_has_rest"]
- pub fn get_iseq_flags_has_rest(iseq: IseqPtr) -> bool;
-
- #[link_name = "rb_get_iseq_flags_has_post"]
- pub fn get_iseq_flags_has_post(iseq: IseqPtr) -> bool;
-
- #[link_name = "rb_get_iseq_flags_has_kwrest"]
- pub fn get_iseq_flags_has_kwrest(iseq: IseqPtr) -> bool;
-
- #[link_name = "rb_get_iseq_flags_has_block"]
- pub fn get_iseq_flags_has_block(iseq: IseqPtr) -> bool;
-
- #[link_name = "rb_get_iseq_flags_has_accepts_no_kwarg"]
- pub fn get_iseq_flags_has_accepts_no_kwarg(iseq: IseqPtr) -> bool;
-
- #[link_name = "rb_get_iseq_body_local_table_size"]
- pub fn get_iseq_body_local_table_size(iseq: IseqPtr) -> c_uint;
-
- #[link_name = "rb_get_iseq_body_param_keyword"]
- pub fn get_iseq_body_param_keyword(iseq: IseqPtr) -> *const rb_seq_param_keyword_struct;
-
- #[link_name = "rb_get_iseq_body_param_size"]
- pub fn get_iseq_body_param_size(iseq: IseqPtr) -> c_uint;
-
- #[link_name = "rb_get_iseq_body_param_lead_num"]
- pub fn get_iseq_body_param_lead_num(iseq: IseqPtr) -> c_int;
-
- #[link_name = "rb_get_iseq_body_param_opt_num"]
- pub fn get_iseq_body_param_opt_num(iseq: IseqPtr) -> c_int;
-
- #[link_name = "rb_get_iseq_body_param_opt_table"]
- pub fn get_iseq_body_param_opt_table(iseq: IseqPtr) -> *const VALUE;
-
- #[link_name = "rb_get_cikw_keyword_len"]
- pub fn get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> c_int;
-
- #[link_name = "rb_get_cikw_keywords_idx"]
- pub fn get_cikw_keywords_idx(cikw: *const rb_callinfo_kwarg, idx: c_int) -> VALUE;
-
- #[link_name = "rb_get_call_data_ci"]
- pub fn get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo;
-
- #[link_name = "rb_yarv_str_eql_internal"]
- pub fn rb_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE;
-
- #[link_name = "rb_yarv_ary_entry_internal"]
- pub fn rb_ary_entry_internal(ary: VALUE, offset: c_long) -> VALUE;
-
- #[link_name = "rb_FL_TEST"]
- pub fn FL_TEST(obj: VALUE, flags: VALUE) -> VALUE;
-
- #[link_name = "rb_FL_TEST_RAW"]
- pub fn FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE;
-
- #[link_name = "rb_RB_TYPE_P"]
- pub fn RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool;
-
- #[link_name = "rb_BASIC_OP_UNREDEFINED_P"]
- pub fn BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: RedefinitionFlag) -> bool;
-
- #[link_name = "rb_RSTRUCT_LEN"]
- pub fn RSTRUCT_LEN(st: VALUE) -> c_long;
-
- #[link_name = "rb_RSTRUCT_SET"]
- pub fn RSTRUCT_SET(st: VALUE, k: c_int, v: VALUE);
-
- // Ruby only defines these in vm_insnhelper.c, not in any header.
- // Parsing it would result in a lot of duplicate definitions.
- pub fn rb_vm_opt_mod(recv: VALUE, obj: VALUE) -> VALUE;
+ pub fn rb_check_overloaded_cme(
+ me: *const rb_callable_method_entry_t,
+ ci: *const rb_callinfo,
+ ) -> *const rb_callable_method_entry_t;
+ pub fn rb_hash_empty_p(hash: VALUE) -> VALUE;
+ pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE;
pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE;
+ pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE;
+ pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE;
pub fn rb_vm_defined(
ec: EcPtr,
reg_cfp: CfpPtr,
@@ -264,7 +128,7 @@ extern "C" {
obj: VALUE,
v: VALUE,
) -> bool;
- pub fn rb_vm_set_ivar_idx(obj: VALUE, idx: u32, val: VALUE) -> VALUE;
+ pub fn rb_vm_set_ivar_id(obj: VALUE, idx: u32, val: VALUE) -> VALUE;
pub fn rb_vm_setinstancevariable(iseq: IseqPtr, obj: VALUE, id: ID, val: VALUE, ic: IVC);
pub fn rb_aliased_callable_method_entry(
me: *const rb_callable_method_entry_t,
@@ -278,28 +142,70 @@ extern "C" {
ic: ICVARC,
) -> VALUE;
pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool;
-
- #[link_name = "rb_vm_ci_argc"]
- pub fn vm_ci_argc(ci: *const rb_callinfo) -> c_int;
-
- #[link_name = "rb_vm_ci_mid"]
- pub fn vm_ci_mid(ci: *const rb_callinfo) -> ID;
-
- #[link_name = "rb_vm_ci_flag"]
- pub fn vm_ci_flag(ci: *const rb_callinfo) -> c_uint;
-
- #[link_name = "rb_vm_ci_kwarg"]
- pub fn vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg;
-
- #[link_name = "rb_METHOD_ENTRY_VISI"]
- pub fn METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t;
-
- pub fn rb_str_bytesize(str: VALUE) -> VALUE;
-
- #[link_name = "rb_RCLASS_ORIGIN"]
- pub fn RCLASS_ORIGIN(v: VALUE) -> VALUE;
+ pub fn rb_vm_stack_canary() -> VALUE;
+ pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int);
}
+// Renames
+pub use rb_insn_name as raw_insn_name;
+pub use rb_get_ec_cfp as get_ec_cfp;
+pub use rb_get_cfp_iseq as get_cfp_iseq;
+pub use rb_get_cfp_pc as get_cfp_pc;
+pub use rb_get_cfp_sp as get_cfp_sp;
+pub use rb_get_cfp_self as get_cfp_self;
+pub use rb_get_cfp_ep as get_cfp_ep;
+pub use rb_get_cfp_ep_level as get_cfp_ep_level;
+pub use rb_vm_base_ptr as get_cfp_bp;
+pub use rb_get_cme_def_type as get_cme_def_type;
+pub use rb_get_cme_def_body_attr_id as get_cme_def_body_attr_id;
+pub use rb_get_cme_def_body_optimized_type as get_cme_def_body_optimized_type;
+pub use rb_get_cme_def_body_optimized_index as get_cme_def_body_optimized_index;
+pub use rb_get_cme_def_body_cfunc as get_cme_def_body_cfunc;
+pub use rb_get_def_method_serial as get_def_method_serial;
+pub use rb_get_def_original_id as get_def_original_id;
+pub use rb_get_mct_argc as get_mct_argc;
+pub use rb_get_mct_func as get_mct_func;
+pub use rb_get_def_iseq_ptr as get_def_iseq_ptr;
+pub use rb_iseq_encoded_size as get_iseq_encoded_size;
+pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq;
+pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded;
+pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max;
+pub use rb_get_iseq_body_type as get_iseq_body_type;
+pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead;
+pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt;
+pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw;
+pub use rb_get_iseq_flags_has_rest as get_iseq_flags_has_rest;
+pub use rb_get_iseq_flags_has_post as get_iseq_flags_has_post;
+pub use rb_get_iseq_flags_has_kwrest as get_iseq_flags_has_kwrest;
+pub use rb_get_iseq_flags_has_block as get_iseq_flags_has_block;
+pub use rb_get_iseq_flags_ambiguous_param0 as get_iseq_flags_ambiguous_param0;
+pub use rb_get_iseq_flags_accepts_no_kwarg as get_iseq_flags_accepts_no_kwarg;
+pub use rb_get_iseq_body_local_table_size as get_iseq_body_local_table_size;
+pub use rb_get_iseq_body_param_keyword as get_iseq_body_param_keyword;
+pub use rb_get_iseq_body_param_size as get_iseq_body_param_size;
+pub use rb_get_iseq_body_param_lead_num as get_iseq_body_param_lead_num;
+pub use rb_get_iseq_body_param_opt_num as get_iseq_body_param_opt_num;
+pub use rb_get_iseq_body_param_opt_table as get_iseq_body_param_opt_table;
+pub use rb_get_cikw_keyword_len as get_cikw_keyword_len;
+pub use rb_get_cikw_keywords_idx as get_cikw_keywords_idx;
+pub use rb_get_call_data_ci as get_call_data_ci;
+pub use rb_yarv_str_eql_internal as rb_str_eql_internal;
+pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal;
+pub use rb_yjit_fix_div_fix as rb_fix_div_fix;
+pub use rb_yjit_fix_mod_fix as rb_fix_mod_fix;
+pub use rb_FL_TEST as FL_TEST;
+pub use rb_FL_TEST_RAW as FL_TEST_RAW;
+pub use rb_RB_TYPE_P as RB_TYPE_P;
+pub use rb_BASIC_OP_UNREDEFINED_P as BASIC_OP_UNREDEFINED_P;
+pub use rb_RSTRUCT_LEN as RSTRUCT_LEN;
+pub use rb_RSTRUCT_SET as RSTRUCT_SET;
+pub use rb_vm_ci_argc as vm_ci_argc;
+pub use rb_vm_ci_mid as vm_ci_mid;
+pub use rb_vm_ci_flag as vm_ci_flag;
+pub use rb_vm_ci_kwarg as vm_ci_kwarg;
+pub use rb_METHOD_ENTRY_VISI as METHOD_ENTRY_VISI;
+pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN;
+
/// Helper so we can get a Rust string for insn_name()
pub fn insn_name(opcode: usize) -> String {
use std::ffi::CStr;
@@ -323,7 +229,7 @@ pub fn insn_len(opcode: usize) -> u32 {
#[cfg(not(test))]
unsafe {
- raw_insn_len(VALUE(opcode)).try_into().unwrap()
+ rb_insn_len(VALUE(opcode)).try_into().unwrap()
}
}
@@ -346,6 +252,30 @@ pub struct VALUE(pub usize);
/// Pointer to an ISEQ
pub type IseqPtr = *const rb_iseq_t;
+// Given an ISEQ pointer, convert PC to insn_idx
+pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> {
+ let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) };
+ unsafe { pc.offset_from(pc_zero) }.try_into().ok()
+}
+
+/// Given an ISEQ pointer and an instruction index, return an opcode.
+pub fn iseq_opcode_at_idx(iseq: IseqPtr, insn_idx: u32) -> u32 {
+ let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+ unsafe { rb_iseq_opcode_at_pc(iseq, pc) as u32 }
+}
+
+/// Return a poison value to be set above the stack top to verify leafness.
+#[cfg(not(test))]
+pub fn vm_stack_canary() -> u64 {
+ unsafe { rb_vm_stack_canary() }.as_u64()
+}
+
+/// Avoid linking the C function in `cargo test`
+#[cfg(test)]
+pub fn vm_stack_canary() -> u64 {
+ 0
+}
+
/// Opaque execution-context type from vm_core.h
#[repr(C)]
pub struct rb_execution_context_struct {
@@ -373,13 +303,6 @@ pub struct rb_method_cfunc_t {
_marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
}
-/// Opaque FILE type from the C standard library
-#[repr(C)]
-pub struct FILE {
- _data: [u8; 0],
- _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
-}
-
/// Opaque call-cache type from vm_callinfo.h
#[repr(C)]
pub struct rb_callcache {
@@ -387,13 +310,6 @@ pub struct rb_callcache {
_marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
}
-/// Opaque call-info type from vm_callinfo.h
-#[repr(C)]
-pub struct rb_callinfo_kwarg {
- _data: [u8; 0],
- _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
-}
-
/// Opaque control_frame (CFP) struct from vm_core.h
#[repr(C)]
pub struct rb_control_frame_struct {
@@ -427,7 +343,8 @@ impl VALUE {
/// Return true if the number is an immediate integer, flonum or static symbol
fn immediate_p(self) -> bool {
let VALUE(cval) = self;
- (cval & 7) != 0
+ let mask = RUBY_IMMEDIATE_MASK as usize;
+ (cval & mask) != 0
}
/// Return true if the value is a Ruby immediate integer, flonum, static symbol, nil or false
@@ -435,22 +352,50 @@ impl VALUE {
self.immediate_p() || !self.test()
}
+ /// Return true if the value is a heap object
+ pub fn heap_object_p(self) -> bool {
+ !self.special_const_p()
+ }
+
/// Return true if the value is a Ruby Fixnum (immediate-size integer)
pub fn fixnum_p(self) -> bool {
let VALUE(cval) = self;
- (cval & 1) == 1
+ let flag = RUBY_FIXNUM_FLAG as usize;
+ (cval & flag) == flag
}
/// Return true if the value is an immediate Ruby floating-point number (flonum)
pub fn flonum_p(self) -> bool {
let VALUE(cval) = self;
- (cval & 3) == 2
+ let mask = RUBY_FLONUM_MASK as usize;
+ let flag = RUBY_FLONUM_FLAG as usize;
+ (cval & mask) == flag
+ }
+
+ /// Return true if the value is a Ruby symbol (RB_SYMBOL_P)
+ pub fn symbol_p(self) -> bool {
+ self.static_sym_p() || self.dynamic_sym_p()
}
- /// Return true for a static (non-heap) Ruby symbol
+ /// Return true for a static (non-heap) Ruby symbol (RB_STATIC_SYM_P)
pub fn static_sym_p(self) -> bool {
let VALUE(cval) = self;
- (cval & 0xff) == RUBY_SYMBOL_FLAG
+ let flag = RUBY_SYMBOL_FLAG as usize;
+ (cval & 0xff) == flag
+ }
+
+ /// Return true for a dynamic Ruby symbol (RB_DYNAMIC_SYM_P)
+ fn dynamic_sym_p(self) -> bool {
+ return if self.special_const_p() {
+ false
+ } else {
+ self.builtin_type() == RUBY_T_SYMBOL
+ }
+ }
+
+ /// Returns true if the value is T_HASH
+ pub fn hash_p(self) -> bool {
+ !self.special_const_p() && self.builtin_type() == RUBY_T_HASH
}
/// Returns true or false depending on whether the value is nil
@@ -458,18 +403,62 @@ impl VALUE {
self == Qnil
}
+ pub fn string_p(self) -> bool {
+ self.class_of() == unsafe { rb_cString }
+ }
+
/// Read the flags bits from the RBasic object, then return a Ruby type enum (e.g. RUBY_T_ARRAY)
pub fn builtin_type(self) -> ruby_value_type {
+ (self.builtin_flags() & (RUBY_T_MASK as usize)) as ruby_value_type
+ }
+
+ pub fn builtin_flags(self) -> usize {
assert!(!self.special_const_p());
let VALUE(cval) = self;
let rbasic_ptr = cval as *const RBasic;
let flags_bits: usize = unsafe { (*rbasic_ptr).flags }.as_usize();
- (flags_bits & (RUBY_T_MASK as usize)) as ruby_value_type
+ return flags_bits;
}
pub fn class_of(self) -> VALUE {
- unsafe { CLASS_OF(self) }
+ if !self.special_const_p() {
+ let builtin_type = self.builtin_type();
+ assert_ne!(builtin_type, RUBY_T_NONE, "YJIT should only see live objects");
+ assert_ne!(builtin_type, RUBY_T_MOVED, "YJIT should only see live objects");
+ }
+
+ unsafe { rb_yarv_class_of(self) }
+ }
+
+ pub fn is_frozen(self) -> bool {
+ unsafe { rb_obj_frozen_p(self) != VALUE(0) }
+ }
+
+ pub fn shape_too_complex(self) -> bool {
+ unsafe { rb_shape_obj_too_complex(self) }
+ }
+
+ pub fn shape_id_of(self) -> u32 {
+ unsafe { rb_shape_get_shape_id(self) }
+ }
+
+ pub fn shape_of(self) -> *mut rb_shape {
+ unsafe {
+ let shape = rb_shape_get_shape_by_id(self.shape_id_of());
+
+ if shape.is_null() {
+ panic!("Shape should not be null");
+ } else {
+ shape
+ }
+ }
+ }
+
+ pub fn embedded_p(self) -> bool {
+ unsafe {
+ FL_TEST_RAW(self, VALUE(ROBJECT_EMBED as usize)) != VALUE(0)
+ }
}
pub fn as_isize(self) -> isize {
@@ -498,7 +487,7 @@ impl VALUE {
pub fn as_usize(self) -> usize {
let VALUE(us) = self;
- us as usize
+ us
}
pub fn as_ptr<T>(self) -> *const T {
@@ -511,7 +500,7 @@ impl VALUE {
us as *mut T
}
- /// For working with opague pointers and encoding null check.
+ /// For working with opaque pointers and encoding null check.
/// Similar to [std::ptr::NonNull], but for `*const T`. `NonNull<T>`
/// is for `*mut T` while our C functions are setup to use `*const T`.
/// Casting from `NonNull<T>` to `*const T` is too noisy.
@@ -591,24 +580,43 @@ impl From<VALUE> for i32 {
fn from(value: VALUE) -> Self {
let VALUE(uimm) = value;
assert!(uimm <= (i32::MAX as usize));
- uimm as i32
+ uimm.try_into().unwrap()
+ }
+}
+
+impl From<VALUE> for u16 {
+ fn from(value: VALUE) -> Self {
+ let VALUE(uimm) = value;
+ uimm.try_into().unwrap()
}
}
/// Produce a Ruby string from a Rust string slice
-#[cfg(feature = "asm_comments")]
+#[cfg(feature = "disasm")]
pub fn rust_str_to_ruby(str: &str) -> VALUE {
- unsafe { rb_utf8_str_new(str.as_ptr() as *const i8, str.len() as i64) }
+ unsafe { rb_utf8_str_new(str.as_ptr() as *const _, str.len() as i64) }
}
/// Produce a Ruby symbol from a Rust string slice
pub fn rust_str_to_sym(str: &str) -> VALUE {
let c_str = CString::new(str).unwrap();
let c_ptr: *const c_char = c_str.as_ptr();
-
unsafe { rb_id2sym(rb_intern(c_ptr)) }
}
+/// Produce an owned Rust String from a C char pointer
+pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> {
+ assert!(c_char_ptr != std::ptr::null());
+
+ use std::ffi::CStr;
+ let c_str: &CStr = unsafe { CStr::from_ptr(c_char_ptr) };
+
+ match c_str.to_str() {
+ Ok(rust_str) => Some(rust_str.to_string()),
+ Err(_) => None
+ }
+}
+
/// A location in Rust code for integrating with debugging facilities defined in C.
/// Use the [src_loc!] macro to crate an instance.
pub struct SourceLocation {
@@ -681,13 +689,13 @@ where
// Non-idiomatic capitalization for consistency with CRuby code
#[allow(non_upper_case_globals)]
-pub const Qfalse: VALUE = VALUE(0);
+pub const Qfalse: VALUE = VALUE(RUBY_Qfalse as usize);
#[allow(non_upper_case_globals)]
-pub const Qnil: VALUE = VALUE(8);
+pub const Qnil: VALUE = VALUE(RUBY_Qnil as usize);
#[allow(non_upper_case_globals)]
-pub const Qtrue: VALUE = VALUE(20);
+pub const Qtrue: VALUE = VALUE(RUBY_Qtrue as usize);
#[allow(non_upper_case_globals)]
-pub const Qundef: VALUE = VALUE(52);
+pub const Qundef: VALUE = VALUE(RUBY_Qundef as usize);
#[allow(unused)]
mod manual_defs {
@@ -695,33 +703,27 @@ mod manual_defs {
pub const SIZEOF_VALUE: usize = 8;
pub const SIZEOF_VALUE_I32: i32 = SIZEOF_VALUE as i32;
+ pub const VALUE_BITS: u8 = 8 * SIZEOF_VALUE as u8;
pub const RUBY_LONG_MIN: isize = std::os::raw::c_long::MIN as isize;
pub const RUBY_LONG_MAX: isize = std::os::raw::c_long::MAX as isize;
pub const RUBY_FIXNUM_MIN: isize = RUBY_LONG_MIN / 2;
pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2;
- pub const RUBY_FIXNUM_FLAG: usize = 0x1;
-
- // All these are defined in include/ruby/internal/special_consts.h,
- // in the same enum as RUBY_Qfalse, etc.
- // Do we want to switch to using Ruby's definition of Qnil, Qfalse, etc?
- pub const RUBY_SYMBOL_FLAG: usize = 0x0c;
- pub const RUBY_FLONUM_FLAG: usize = 0x2;
- pub const RUBY_FLONUM_MASK: usize = 0x3;
- pub const RUBY_SPECIAL_SHIFT: usize = 8;
- pub const RUBY_IMMEDIATE_MASK: usize = 0x7;
// From vm_callinfo.h - uses calculation that seems to confuse bindgen
+ pub const VM_CALL_ARGS_SIMPLE: u32 = 1 << VM_CALL_ARGS_SIMPLE_bit;
pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit;
pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit;
pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit;
pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit;
pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit;
pub const VM_CALL_TAILCALL: u32 = 1 << VM_CALL_TAILCALL_bit;
+ pub const VM_CALL_ZSUPER : u32 = 1 << VM_CALL_ZSUPER_bit;
+ pub const VM_CALL_OPT_SEND : u32 = 1 << VM_CALL_OPT_SEND_bit;
// From internal/struct.h - in anonymous enum, so we can't easily import it
- pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER2 | RUBY_FL_USER1) as usize;
+ pub const RSTRUCT_EMBED_LEN_MASK: usize = (RUBY_FL_USER7 | RUBY_FL_USER6 | RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3 |RUBY_FL_USER2 | RUBY_FL_USER1) as usize;
// From iseq.h - via a different constant, which seems to confuse bindgen
pub const ISEQ_TRANSLATED: usize = RUBY_FL_USER7 as usize;
@@ -737,9 +739,8 @@ mod manual_defs {
pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr"
pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary"
- pub const RUBY_OFFSET_ROBJECT_AS_ARY: i32 = 16; // struct RObject, subfield "as.ary"
- pub const RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV: i32 = 16; // struct RObject, subfield "as.heap.numiv"
- pub const RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR: i32 = 24; // struct RObject, subfield "as.heap.ivptr"
+ pub const RUBY_OFFSET_RSTRING_AS_HEAP_PTR: i32 = 24; // struct RString, subfield "as.heap.ptr"
+ pub const RUBY_OFFSET_RSTRING_AS_ARY: i32 = 24; // struct RString, subfield "as.embed.ary"
// Constants from rb_control_frame_t vm_core.h
pub const RUBY_OFFSET_CFP_PC: i32 = 0;
@@ -748,9 +749,8 @@ mod manual_defs {
pub const RUBY_OFFSET_CFP_SELF: i32 = 24;
pub const RUBY_OFFSET_CFP_EP: i32 = 32;
pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40;
- pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__
- pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56;
- pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64;
+ pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 48;
+ pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 56;
// Constants from rb_execution_context_t vm_core.h
pub const RUBY_OFFSET_EC_CFP: i32 = 16;
@@ -766,3 +766,52 @@ mod manual_defs {
pub const RUBY_OFFSET_ICE_VALUE: i32 = 8;
}
pub use manual_defs::*;
+
+/// Interned ID values for Ruby symbols and method names.
+/// See [crate::cruby::ID] and usages outside of YJIT.
+pub(crate) mod ids {
+ use std::sync::atomic::AtomicU64;
+ /// Globals to cache IDs on boot. Atomic to use with relaxed ordering
+ /// so reads can happen without `unsafe`. Initialization is done
+ /// single-threaded and release-acquire on [crate::yjit::YJIT_ENABLED]
+ /// makes sure we read the cached values after initialization is done.
+ macro_rules! def_ids {
+ ($(name: $ident:ident content: $str:literal)*) => {
+ $(
+ #[doc = concat!("[crate::cruby::ID] for `", stringify!($str), "`")]
+ pub static $ident: AtomicU64 = AtomicU64::new(0);
+ )*
+
+ pub(crate) fn init() {
+ $(
+ let content = &$str;
+ let ptr: *const u8 = content.as_ptr();
+
+ // Lookup and cache each ID
+ $ident.store(
+ unsafe { $crate::cruby::rb_intern2(ptr.cast(), content.len() as _) },
+ std::sync::atomic::Ordering::Relaxed
+ );
+ )*
+
+ }
+ }
+ }
+
+ def_ids! {
+ name: NULL content: b""
+ name: min content: b"min"
+ name: max content: b"max"
+ name: hash content: b"hash"
+ name: respond_to_missing content: b"respond_to_missing?"
+ name: to_ary content: b"to_ary"
+ }
+}
+
+/// Get an CRuby `ID` to an interned string, e.g. a particular method name.
+macro_rules! ID {
+ ($id_name:ident) => {
+ $crate::cruby::ids::$id_name.load(std::sync::atomic::Ordering::Relaxed)
+ }
+}
+pub(crate) use ID;
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 44f87a8482..70578ec7e9 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1,5 +1,158 @@
-/* automatically generated by rust-bindgen 0.59.2 */
+/* automatically generated by rust-bindgen 0.63.0 */
+#[repr(C)]
+#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct __BindgenBitfieldUnit<Storage> {
+ storage: Storage,
+}
+impl<Storage> __BindgenBitfieldUnit<Storage> {
+ #[inline]
+ pub const fn new(storage: Storage) -> Self {
+ Self { storage }
+ }
+}
+impl<Storage> __BindgenBitfieldUnit<Storage>
+where
+ Storage: AsRef<[u8]> + AsMut<[u8]>,
+{
+ #[inline]
+ pub fn get_bit(&self, index: usize) -> bool {
+ debug_assert!(index / 8 < self.storage.as_ref().len());
+ let byte_index = index / 8;
+ let byte = self.storage.as_ref()[byte_index];
+ let bit_index = if cfg!(target_endian = "big") {
+ 7 - (index % 8)
+ } else {
+ index % 8
+ };
+ let mask = 1 << bit_index;
+ byte & mask == mask
+ }
+ #[inline]
+ pub fn set_bit(&mut self, index: usize, val: bool) {
+ debug_assert!(index / 8 < self.storage.as_ref().len());
+ let byte_index = index / 8;
+ let byte = &mut self.storage.as_mut()[byte_index];
+ let bit_index = if cfg!(target_endian = "big") {
+ 7 - (index % 8)
+ } else {
+ index % 8
+ };
+ let mask = 1 << bit_index;
+ if val {
+ *byte |= mask;
+ } else {
+ *byte &= !mask;
+ }
+ }
+ #[inline]
+ pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
+ debug_assert!(bit_width <= 64);
+ debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
+ debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
+ let mut val = 0;
+ for i in 0..(bit_width as usize) {
+ if self.get_bit(i + bit_offset) {
+ let index = if cfg!(target_endian = "big") {
+ bit_width as usize - 1 - i
+ } else {
+ i
+ };
+ val |= 1 << index;
+ }
+ }
+ val
+ }
+ #[inline]
+ pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
+ debug_assert!(bit_width <= 64);
+ debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
+ debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
+ for i in 0..(bit_width as usize) {
+ let mask = 1 << i;
+ let val_bit_is_set = val & mask == mask;
+ let index = if cfg!(target_endian = "big") {
+ bit_width as usize - 1 - i
+ } else {
+ i
+ };
+ self.set_bit(index + bit_offset, val_bit_is_set);
+ }
+ }
+}
+#[repr(C)]
+#[derive(Default)]
+pub struct __IncompleteArrayField<T>(::std::marker::PhantomData<T>, [T; 0]);
+impl<T> __IncompleteArrayField<T> {
+ #[inline]
+ pub const fn new() -> Self {
+ __IncompleteArrayField(::std::marker::PhantomData, [])
+ }
+ #[inline]
+ pub fn as_ptr(&self) -> *const T {
+ self as *const _ as *const T
+ }
+ #[inline]
+ pub fn as_mut_ptr(&mut self) -> *mut T {
+ self as *mut _ as *mut T
+ }
+ #[inline]
+ pub unsafe fn as_slice(&self, len: usize) -> &[T] {
+ ::std::slice::from_raw_parts(self.as_ptr(), len)
+ }
+ #[inline]
+ pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] {
+ ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len)
+ }
+}
+impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> {
+ fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ fmt.write_str("__IncompleteArrayField")
+ }
+}
+#[repr(C)]
+pub struct __BindgenUnionField<T>(::std::marker::PhantomData<T>);
+impl<T> __BindgenUnionField<T> {
+ #[inline]
+ pub const fn new() -> Self {
+ __BindgenUnionField(::std::marker::PhantomData)
+ }
+ #[inline]
+ pub unsafe fn as_ref(&self) -> &T {
+ ::std::mem::transmute(self)
+ }
+ #[inline]
+ pub unsafe fn as_mut(&mut self) -> &mut T {
+ ::std::mem::transmute(self)
+ }
+}
+impl<T> ::std::default::Default for __BindgenUnionField<T> {
+ #[inline]
+ fn default() -> Self {
+ Self::new()
+ }
+}
+impl<T> ::std::clone::Clone for __BindgenUnionField<T> {
+ #[inline]
+ fn clone(&self) -> Self {
+ Self::new()
+ }
+}
+impl<T> ::std::marker::Copy for __BindgenUnionField<T> {}
+impl<T> ::std::fmt::Debug for __BindgenUnionField<T> {
+ fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ fmt.write_str("__BindgenUnionField")
+ }
+}
+impl<T> ::std::hash::Hash for __BindgenUnionField<T> {
+ fn hash<H: ::std::hash::Hasher>(&self, _state: &mut H) {}
+}
+impl<T> ::std::cmp::PartialEq for __BindgenUnionField<T> {
+ fn eq(&self, _other: &__BindgenUnionField<T>) -> bool {
+ true
+ }
+}
+impl<T> ::std::cmp::Eq for __BindgenUnionField<T> {}
pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1;
pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2;
pub const STRING_REDEFINED_OP_FLAG: u32 = 4;
@@ -17,14 +170,21 @@ pub const VM_ENV_DATA_INDEX_ME_CREF: i32 = -2;
pub const VM_ENV_DATA_INDEX_SPECVAL: i32 = -1;
pub const VM_ENV_DATA_INDEX_FLAGS: u32 = 0;
pub const VM_BLOCK_HANDLER_NONE: u32 = 0;
+pub const SHAPE_ID_NUM_BITS: u32 = 32;
+pub const OBJ_TOO_COMPLEX_SHAPE_ID: u32 = 2;
pub type ID = ::std::os::raw::c_ulong;
-extern "C" {
- pub fn rb_singleton_class(obj: VALUE) -> VALUE;
-}
pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>;
-extern "C" {
- pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t;
-}
+pub const RUBY_Qfalse: ruby_special_consts = 0;
+pub const RUBY_Qnil: ruby_special_consts = 4;
+pub const RUBY_Qtrue: ruby_special_consts = 20;
+pub const RUBY_Qundef: ruby_special_consts = 36;
+pub const RUBY_IMMEDIATE_MASK: ruby_special_consts = 7;
+pub const RUBY_FIXNUM_FLAG: ruby_special_consts = 1;
+pub const RUBY_FLONUM_MASK: ruby_special_consts = 3;
+pub const RUBY_FLONUM_FLAG: ruby_special_consts = 2;
+pub const RUBY_SYMBOL_FLAG: ruby_special_consts = 12;
+pub const RUBY_SPECIAL_SHIFT: ruby_special_consts = 8;
+pub type ruby_special_consts = u32;
#[repr(C)]
pub struct RBasic {
pub flags: VALUE,
@@ -62,13 +222,12 @@ pub type ruby_value_type = u32;
pub const RUBY_FL_USHIFT: ruby_fl_ushift = 12;
pub type ruby_fl_ushift = u32;
pub const RUBY_FL_WB_PROTECTED: ruby_fl_type = 32;
-pub const RUBY_FL_PROMOTED0: ruby_fl_type = 32;
-pub const RUBY_FL_PROMOTED1: ruby_fl_type = 64;
-pub const RUBY_FL_PROMOTED: ruby_fl_type = 96;
+pub const RUBY_FL_PROMOTED: ruby_fl_type = 32;
+pub const RUBY_FL_UNUSED6: ruby_fl_type = 64;
pub const RUBY_FL_FINALIZE: ruby_fl_type = 128;
-pub const RUBY_FL_TAINT: ruby_fl_type = 256;
+pub const RUBY_FL_TAINT: ruby_fl_type = 0;
pub const RUBY_FL_SHAREABLE: ruby_fl_type = 256;
-pub const RUBY_FL_UNTRUSTED: ruby_fl_type = 256;
+pub const RUBY_FL_UNTRUSTED: ruby_fl_type = 0;
pub const RUBY_FL_SEEN_OBJ_ID: ruby_fl_type = 512;
pub const RUBY_FL_EXIVAR: ruby_fl_type = 1024;
pub const RUBY_FL_FREEZE: ruby_fl_type = 2048;
@@ -93,392 +252,99 @@ pub const RUBY_FL_USER17: ruby_fl_type = 536870912;
pub const RUBY_FL_USER18: ruby_fl_type = 1073741824;
pub const RUBY_FL_USER19: ruby_fl_type = -2147483648;
pub const RUBY_ELTS_SHARED: ruby_fl_type = 16384;
-pub const RUBY_FL_SINGLETON: ruby_fl_type = 4096;
+pub const RUBY_FL_SINGLETON: ruby_fl_type = 8192;
pub type ruby_fl_type = i32;
+pub const RSTRING_NOEMBED: ruby_rstring_flags = 8192;
+pub const RSTRING_FSTR: ruby_rstring_flags = 536870912;
+pub type ruby_rstring_flags = u32;
pub type st_data_t = ::std::os::raw::c_ulong;
pub type st_index_t = st_data_t;
+pub const ST_CONTINUE: st_retval = 0;
+pub const ST_STOP: st_retval = 1;
+pub const ST_DELETE: st_retval = 2;
+pub const ST_CHECK: st_retval = 3;
+pub const ST_REPLACE: st_retval = 4;
+pub type st_retval = u32;
+pub type st_foreach_callback_func = ::std::option::Option<
+ unsafe extern "C" fn(
+ arg1: st_data_t,
+ arg2: st_data_t,
+ arg3: st_data_t,
+ ) -> ::std::os::raw::c_int,
+>;
pub const RARRAY_EMBED_FLAG: ruby_rarray_flags = 8192;
pub const RARRAY_EMBED_LEN_MASK: ruby_rarray_flags = 4161536;
-pub const RARRAY_TRANSIENT_FLAG: ruby_rarray_flags = 33554432;
pub type ruby_rarray_flags = u32;
pub const RARRAY_EMBED_LEN_SHIFT: ruby_rarray_consts = 15;
pub type ruby_rarray_consts = u32;
pub const RMODULE_IS_REFINEMENT: ruby_rmodule_flags = 32768;
pub type ruby_rmodule_flags = u32;
-extern "C" {
- pub fn rb_class_get_superclass(klass: VALUE) -> VALUE;
-}
pub const ROBJECT_EMBED: ruby_robject_flags = 8192;
pub type ruby_robject_flags = u32;
-pub const ROBJECT_EMBED_LEN_MAX: ruby_robject_consts = 3;
-pub type ruby_robject_consts = u32;
-extern "C" {
- pub static mut rb_mKernel: VALUE;
-}
-extern "C" {
- pub static mut rb_cBasicObject: VALUE;
-}
-extern "C" {
- pub static mut rb_cArray: VALUE;
-}
-extern "C" {
- pub static mut rb_cFalseClass: VALUE;
-}
-extern "C" {
- pub static mut rb_cFloat: VALUE;
-}
-extern "C" {
- pub static mut rb_cHash: VALUE;
-}
-extern "C" {
- pub static mut rb_cInteger: VALUE;
-}
-extern "C" {
- pub static mut rb_cModule: VALUE;
-}
-extern "C" {
- pub static mut rb_cNilClass: VALUE;
-}
-extern "C" {
- pub static mut rb_cString: VALUE;
-}
-extern "C" {
- pub static mut rb_cSymbol: VALUE;
-}
-extern "C" {
- pub static mut rb_cThread: VALUE;
-}
-extern "C" {
- pub static mut rb_cTrueClass: VALUE;
-}
-extern "C" {
- pub fn rb_ary_new_capa(capa: ::std::os::raw::c_long) -> VALUE;
-}
-extern "C" {
- pub fn rb_ary_store(ary: VALUE, key: ::std::os::raw::c_long, val: VALUE);
-}
-extern "C" {
- pub fn rb_ary_resurrect(ary: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_ary_clear(ary: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_hash_new() -> VALUE;
-}
-extern "C" {
- pub fn rb_hash_aref(hash: VALUE, key: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_hash_aset(hash: VALUE, key: VALUE, val: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_hash_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE, hash: VALUE);
-}
-extern "C" {
- pub fn rb_sym2id(obj: VALUE) -> ID;
-}
-extern "C" {
- pub fn rb_id2sym(id: ID) -> VALUE;
-}
-extern "C" {
- pub fn rb_intern(name: *const ::std::os::raw::c_char) -> ID;
-}
-extern "C" {
- pub fn rb_gc_mark(obj: VALUE);
-}
-extern "C" {
- pub fn rb_gc_mark_movable(obj: VALUE);
-}
-extern "C" {
- pub fn rb_gc_location(obj: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_backref_get() -> VALUE;
-}
-extern "C" {
- pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE;
-}
-extern "C" {
- pub fn rb_reg_nth_match(n: ::std::os::raw::c_int, md: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_reg_last_match(md: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_reg_match_pre(md: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_reg_match_post(md: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_reg_match_last(md: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_utf8_str_new(
- ptr: *const ::std::os::raw::c_char,
- len: ::std::os::raw::c_long,
- ) -> VALUE;
-}
-extern "C" {
- pub fn rb_str_dup(str_: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_str_append(dst: VALUE, src: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_str_intern(str_: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE;
-}
-extern "C" {
- pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE;
-}
+pub type rb_block_call_func = ::std::option::Option<
+ unsafe extern "C" fn(
+ yielded_arg: VALUE,
+ callback_arg: VALUE,
+ argc: ::std::os::raw::c_int,
+ argv: *const VALUE,
+ blockarg: VALUE,
+ ) -> VALUE,
+>;
+pub type rb_block_call_func_t = rb_block_call_func;
pub const RUBY_ENCODING_INLINE_MAX: ruby_encoding_consts = 127;
pub const RUBY_ENCODING_SHIFT: ruby_encoding_consts = 22;
pub const RUBY_ENCODING_MASK: ruby_encoding_consts = 532676608;
pub const RUBY_ENCODING_MAXNAMELEN: ruby_encoding_consts = 42;
pub type ruby_encoding_consts = u32;
-extern "C" {
- pub fn rb_obj_info_dump(obj: VALUE);
-}
-extern "C" {
- pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE;
-}
-pub const idDot2: ruby_method_ids = 128;
-pub const idDot3: ruby_method_ids = 129;
-pub const idUPlus: ruby_method_ids = 132;
-pub const idUMinus: ruby_method_ids = 133;
-pub const idPow: ruby_method_ids = 134;
-pub const idCmp: ruby_method_ids = 135;
-pub const idPLUS: ruby_method_ids = 43;
-pub const idMINUS: ruby_method_ids = 45;
-pub const idMULT: ruby_method_ids = 42;
-pub const idDIV: ruby_method_ids = 47;
-pub const idMOD: ruby_method_ids = 37;
-pub const idLTLT: ruby_method_ids = 136;
-pub const idGTGT: ruby_method_ids = 137;
-pub const idLT: ruby_method_ids = 60;
-pub const idLE: ruby_method_ids = 138;
-pub const idGT: ruby_method_ids = 62;
-pub const idGE: ruby_method_ids = 139;
-pub const idEq: ruby_method_ids = 140;
-pub const idEqq: ruby_method_ids = 141;
-pub const idNeq: ruby_method_ids = 142;
-pub const idNot: ruby_method_ids = 33;
-pub const idAnd: ruby_method_ids = 38;
-pub const idOr: ruby_method_ids = 124;
-pub const idBackquote: ruby_method_ids = 96;
-pub const idEqTilde: ruby_method_ids = 143;
-pub const idNeqTilde: ruby_method_ids = 144;
-pub const idAREF: ruby_method_ids = 145;
-pub const idASET: ruby_method_ids = 146;
-pub const idCOLON2: ruby_method_ids = 147;
-pub const idANDOP: ruby_method_ids = 148;
-pub const idOROP: ruby_method_ids = 149;
-pub const idANDDOT: ruby_method_ids = 150;
-pub const tPRESERVED_ID_BEGIN: ruby_method_ids = 150;
-pub const idNilP: ruby_method_ids = 151;
-pub const idNULL: ruby_method_ids = 152;
-pub const idEmptyP: ruby_method_ids = 153;
-pub const idEqlP: ruby_method_ids = 154;
-pub const idRespond_to: ruby_method_ids = 155;
-pub const idRespond_to_missing: ruby_method_ids = 156;
-pub const idIFUNC: ruby_method_ids = 157;
-pub const idCFUNC: ruby_method_ids = 158;
-pub const id_core_set_method_alias: ruby_method_ids = 159;
-pub const id_core_set_variable_alias: ruby_method_ids = 160;
-pub const id_core_undef_method: ruby_method_ids = 161;
-pub const id_core_define_method: ruby_method_ids = 162;
-pub const id_core_define_singleton_method: ruby_method_ids = 163;
-pub const id_core_set_postexe: ruby_method_ids = 164;
-pub const id_core_hash_merge_ptr: ruby_method_ids = 165;
-pub const id_core_hash_merge_kwd: ruby_method_ids = 166;
-pub const id_core_raise: ruby_method_ids = 167;
-pub const id_core_sprintf: ruby_method_ids = 168;
-pub const id_debug_created_info: ruby_method_ids = 169;
-pub const tPRESERVED_ID_END: ruby_method_ids = 170;
-pub const tTOKEN_LOCAL_BEGIN: ruby_method_ids = 169;
-pub const tMax: ruby_method_ids = 170;
-pub const tMin: ruby_method_ids = 171;
-pub const tFreeze: ruby_method_ids = 172;
-pub const tInspect: ruby_method_ids = 173;
-pub const tIntern: ruby_method_ids = 174;
-pub const tObject_id: ruby_method_ids = 175;
-pub const tConst_added: ruby_method_ids = 176;
-pub const tConst_missing: ruby_method_ids = 177;
-pub const tMethodMissing: ruby_method_ids = 178;
-pub const tMethod_added: ruby_method_ids = 179;
-pub const tSingleton_method_added: ruby_method_ids = 180;
-pub const tMethod_removed: ruby_method_ids = 181;
-pub const tSingleton_method_removed: ruby_method_ids = 182;
-pub const tMethod_undefined: ruby_method_ids = 183;
-pub const tSingleton_method_undefined: ruby_method_ids = 184;
-pub const tLength: ruby_method_ids = 185;
-pub const tSize: ruby_method_ids = 186;
-pub const tGets: ruby_method_ids = 187;
-pub const tSucc: ruby_method_ids = 188;
-pub const tEach: ruby_method_ids = 189;
-pub const tProc: ruby_method_ids = 190;
-pub const tLambda: ruby_method_ids = 191;
-pub const tSend: ruby_method_ids = 192;
-pub const t__send__: ruby_method_ids = 193;
-pub const t__attached__: ruby_method_ids = 194;
-pub const t__recursive_key__: ruby_method_ids = 195;
-pub const tInitialize: ruby_method_ids = 196;
-pub const tInitialize_copy: ruby_method_ids = 197;
-pub const tInitialize_clone: ruby_method_ids = 198;
-pub const tInitialize_dup: ruby_method_ids = 199;
-pub const tTo_int: ruby_method_ids = 200;
-pub const tTo_ary: ruby_method_ids = 201;
-pub const tTo_str: ruby_method_ids = 202;
-pub const tTo_sym: ruby_method_ids = 203;
-pub const tTo_hash: ruby_method_ids = 204;
-pub const tTo_proc: ruby_method_ids = 205;
-pub const tTo_io: ruby_method_ids = 206;
-pub const tTo_a: ruby_method_ids = 207;
-pub const tTo_s: ruby_method_ids = 208;
-pub const tTo_i: ruby_method_ids = 209;
-pub const tTo_f: ruby_method_ids = 210;
-pub const tTo_r: ruby_method_ids = 211;
-pub const tBt: ruby_method_ids = 212;
-pub const tBt_locations: ruby_method_ids = 213;
-pub const tCall: ruby_method_ids = 214;
-pub const tMesg: ruby_method_ids = 215;
-pub const tException: ruby_method_ids = 216;
-pub const tLocals: ruby_method_ids = 217;
-pub const tNOT: ruby_method_ids = 218;
-pub const tAND: ruby_method_ids = 219;
-pub const tOR: ruby_method_ids = 220;
-pub const tDiv: ruby_method_ids = 221;
-pub const tDivmod: ruby_method_ids = 222;
-pub const tFdiv: ruby_method_ids = 223;
-pub const tQuo: ruby_method_ids = 224;
-pub const tName: ruby_method_ids = 225;
-pub const tNil: ruby_method_ids = 226;
-pub const tUScore: ruby_method_ids = 227;
-pub const tNUMPARAM_1: ruby_method_ids = 228;
-pub const tNUMPARAM_2: ruby_method_ids = 229;
-pub const tNUMPARAM_3: ruby_method_ids = 230;
-pub const tNUMPARAM_4: ruby_method_ids = 231;
-pub const tNUMPARAM_5: ruby_method_ids = 232;
-pub const tNUMPARAM_6: ruby_method_ids = 233;
-pub const tNUMPARAM_7: ruby_method_ids = 234;
-pub const tNUMPARAM_8: ruby_method_ids = 235;
-pub const tNUMPARAM_9: ruby_method_ids = 236;
-pub const tTOKEN_LOCAL_END: ruby_method_ids = 237;
-pub const tTOKEN_INSTANCE_BEGIN: ruby_method_ids = 236;
-pub const tTOKEN_INSTANCE_END: ruby_method_ids = 237;
-pub const tTOKEN_GLOBAL_BEGIN: ruby_method_ids = 236;
-pub const tLASTLINE: ruby_method_ids = 237;
-pub const tBACKREF: ruby_method_ids = 238;
-pub const tERROR_INFO: ruby_method_ids = 239;
-pub const tTOKEN_GLOBAL_END: ruby_method_ids = 240;
-pub const tTOKEN_CONST_BEGIN: ruby_method_ids = 239;
-pub const tTOKEN_CONST_END: ruby_method_ids = 240;
-pub const tTOKEN_CLASS_BEGIN: ruby_method_ids = 239;
-pub const tTOKEN_CLASS_END: ruby_method_ids = 240;
-pub const tTOKEN_ATTRSET_BEGIN: ruby_method_ids = 239;
-pub const tTOKEN_ATTRSET_END: ruby_method_ids = 240;
-pub const tNEXT_ID: ruby_method_ids = 240;
-pub const idMax: ruby_method_ids = 2721;
-pub const idMin: ruby_method_ids = 2737;
-pub const idFreeze: ruby_method_ids = 2753;
-pub const idInspect: ruby_method_ids = 2769;
-pub const idIntern: ruby_method_ids = 2785;
-pub const idObject_id: ruby_method_ids = 2801;
-pub const idConst_added: ruby_method_ids = 2817;
-pub const idConst_missing: ruby_method_ids = 2833;
-pub const idMethodMissing: ruby_method_ids = 2849;
-pub const idMethod_added: ruby_method_ids = 2865;
-pub const idSingleton_method_added: ruby_method_ids = 2881;
-pub const idMethod_removed: ruby_method_ids = 2897;
-pub const idSingleton_method_removed: ruby_method_ids = 2913;
-pub const idMethod_undefined: ruby_method_ids = 2929;
-pub const idSingleton_method_undefined: ruby_method_ids = 2945;
-pub const idLength: ruby_method_ids = 2961;
-pub const idSize: ruby_method_ids = 2977;
-pub const idGets: ruby_method_ids = 2993;
-pub const idSucc: ruby_method_ids = 3009;
-pub const idEach: ruby_method_ids = 3025;
-pub const idProc: ruby_method_ids = 3041;
-pub const idLambda: ruby_method_ids = 3057;
-pub const idSend: ruby_method_ids = 3073;
-pub const id__send__: ruby_method_ids = 3089;
-pub const id__attached__: ruby_method_ids = 3105;
-pub const id__recursive_key__: ruby_method_ids = 3121;
-pub const idInitialize: ruby_method_ids = 3137;
-pub const idInitialize_copy: ruby_method_ids = 3153;
-pub const idInitialize_clone: ruby_method_ids = 3169;
-pub const idInitialize_dup: ruby_method_ids = 3185;
-pub const idTo_int: ruby_method_ids = 3201;
-pub const idTo_ary: ruby_method_ids = 3217;
-pub const idTo_str: ruby_method_ids = 3233;
-pub const idTo_sym: ruby_method_ids = 3249;
-pub const idTo_hash: ruby_method_ids = 3265;
-pub const idTo_proc: ruby_method_ids = 3281;
-pub const idTo_io: ruby_method_ids = 3297;
-pub const idTo_a: ruby_method_ids = 3313;
-pub const idTo_s: ruby_method_ids = 3329;
-pub const idTo_i: ruby_method_ids = 3345;
-pub const idTo_f: ruby_method_ids = 3361;
-pub const idTo_r: ruby_method_ids = 3377;
-pub const idBt: ruby_method_ids = 3393;
-pub const idBt_locations: ruby_method_ids = 3409;
-pub const idCall: ruby_method_ids = 3425;
-pub const idMesg: ruby_method_ids = 3441;
-pub const idException: ruby_method_ids = 3457;
-pub const idLocals: ruby_method_ids = 3473;
-pub const idNOT: ruby_method_ids = 3489;
-pub const idAND: ruby_method_ids = 3505;
-pub const idOR: ruby_method_ids = 3521;
-pub const idDiv: ruby_method_ids = 3537;
-pub const idDivmod: ruby_method_ids = 3553;
-pub const idFdiv: ruby_method_ids = 3569;
-pub const idQuo: ruby_method_ids = 3585;
-pub const idName: ruby_method_ids = 3601;
-pub const idNil: ruby_method_ids = 3617;
-pub const idUScore: ruby_method_ids = 3633;
-pub const idNUMPARAM_1: ruby_method_ids = 3649;
-pub const idNUMPARAM_2: ruby_method_ids = 3665;
-pub const idNUMPARAM_3: ruby_method_ids = 3681;
-pub const idNUMPARAM_4: ruby_method_ids = 3697;
-pub const idNUMPARAM_5: ruby_method_ids = 3713;
-pub const idNUMPARAM_6: ruby_method_ids = 3729;
-pub const idNUMPARAM_7: ruby_method_ids = 3745;
-pub const idNUMPARAM_8: ruby_method_ids = 3761;
-pub const idNUMPARAM_9: ruby_method_ids = 3777;
-pub const idLASTLINE: ruby_method_ids = 3799;
-pub const idBACKREF: ruby_method_ids = 3815;
-pub const idERROR_INFO: ruby_method_ids = 3831;
-pub const tLAST_OP_ID: ruby_method_ids = 169;
-pub const idLAST_OP_ID: ruby_method_ids = 10;
-pub type ruby_method_ids = u32;
-extern "C" {
- pub fn rb_ary_tmp_new_from_values(
- arg1: VALUE,
- arg2: ::std::os::raw::c_long,
- arg3: *const VALUE,
- ) -> VALUE;
-}
-extern "C" {
- pub fn rb_ec_ary_new_from_values(
- ec: *mut rb_execution_context_struct,
- n: ::std::os::raw::c_long,
- elts: *const VALUE,
- ) -> VALUE;
-}
+pub const RUBY_ENCINDEX_ASCII_8BIT: ruby_preserved_encindex = 0;
+pub const RUBY_ENCINDEX_UTF_8: ruby_preserved_encindex = 1;
+pub const RUBY_ENCINDEX_US_ASCII: ruby_preserved_encindex = 2;
+pub const RUBY_ENCINDEX_UTF_16BE: ruby_preserved_encindex = 3;
+pub const RUBY_ENCINDEX_UTF_16LE: ruby_preserved_encindex = 4;
+pub const RUBY_ENCINDEX_UTF_32BE: ruby_preserved_encindex = 5;
+pub const RUBY_ENCINDEX_UTF_32LE: ruby_preserved_encindex = 6;
+pub const RUBY_ENCINDEX_UTF_16: ruby_preserved_encindex = 7;
+pub const RUBY_ENCINDEX_UTF_32: ruby_preserved_encindex = 8;
+pub const RUBY_ENCINDEX_UTF8_MAC: ruby_preserved_encindex = 9;
+pub const RUBY_ENCINDEX_EUC_JP: ruby_preserved_encindex = 10;
+pub const RUBY_ENCINDEX_Windows_31J: ruby_preserved_encindex = 11;
+pub const RUBY_ENCINDEX_BUILTIN_MAX: ruby_preserved_encindex = 12;
+pub type ruby_preserved_encindex = u32;
+pub const BOP_PLUS: ruby_basic_operators = 0;
+pub const BOP_MINUS: ruby_basic_operators = 1;
+pub const BOP_MULT: ruby_basic_operators = 2;
+pub const BOP_DIV: ruby_basic_operators = 3;
+pub const BOP_MOD: ruby_basic_operators = 4;
+pub const BOP_EQ: ruby_basic_operators = 5;
+pub const BOP_EQQ: ruby_basic_operators = 6;
+pub const BOP_LT: ruby_basic_operators = 7;
+pub const BOP_LE: ruby_basic_operators = 8;
+pub const BOP_LTLT: ruby_basic_operators = 9;
+pub const BOP_AREF: ruby_basic_operators = 10;
+pub const BOP_ASET: ruby_basic_operators = 11;
+pub const BOP_LENGTH: ruby_basic_operators = 12;
+pub const BOP_SIZE: ruby_basic_operators = 13;
+pub const BOP_EMPTY_P: ruby_basic_operators = 14;
+pub const BOP_NIL_P: ruby_basic_operators = 15;
+pub const BOP_SUCC: ruby_basic_operators = 16;
+pub const BOP_GT: ruby_basic_operators = 17;
+pub const BOP_GE: ruby_basic_operators = 18;
+pub const BOP_NOT: ruby_basic_operators = 19;
+pub const BOP_NEQ: ruby_basic_operators = 20;
+pub const BOP_MATCH: ruby_basic_operators = 21;
+pub const BOP_FREEZE: ruby_basic_operators = 22;
+pub const BOP_UMINUS: ruby_basic_operators = 23;
+pub const BOP_MAX: ruby_basic_operators = 24;
+pub const BOP_MIN: ruby_basic_operators = 25;
+pub const BOP_HASH: ruby_basic_operators = 26;
+pub const BOP_CALL: ruby_basic_operators = 27;
+pub const BOP_AND: ruby_basic_operators = 28;
+pub const BOP_OR: ruby_basic_operators = 29;
+pub const BOP_CMP: ruby_basic_operators = 30;
+pub const BOP_DEFAULT: ruby_basic_operators = 31;
+pub const BOP_LAST_: ruby_basic_operators = 32;
+pub type ruby_basic_operators = u32;
pub type rb_serial_t = ::std::os::raw::c_ulonglong;
-extern "C" {
- pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char;
-}
pub const imemo_env: imemo_type = 0;
pub const imemo_cref: imemo_type = 1;
pub const imemo_svar: imemo_type = 2;
@@ -494,6 +360,20 @@ pub const imemo_callinfo: imemo_type = 11;
pub const imemo_callcache: imemo_type = 12;
pub const imemo_constcache: imemo_type = 13;
pub type imemo_type = u32;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct vm_ifunc_argc {
+ pub min: ::std::os::raw::c_int,
+ pub max: ::std::os::raw::c_int,
+}
+#[repr(C)]
+pub struct vm_ifunc {
+ pub flags: VALUE,
+ pub svar_lep: *mut VALUE,
+ pub func: rb_block_call_func_t,
+ pub data: *const ::std::os::raw::c_void,
+ pub argc: vm_ifunc_argc,
+}
pub const METHOD_VISI_UNDEF: rb_method_visibility_t = 0;
pub const METHOD_VISI_PUBLIC: rb_method_visibility_t = 1;
pub const METHOD_VISI_PRIVATE: rb_method_visibility_t = 2;
@@ -531,6 +411,21 @@ pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9;
pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10;
pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11;
pub type rb_method_type_t = u32;
+pub type rb_cfunc_t = ::std::option::Option<unsafe extern "C" fn() -> VALUE>;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_method_cfunc_struct {
+ pub func: rb_cfunc_t,
+ pub invoker: ::std::option::Option<
+ unsafe extern "C" fn(
+ recv: VALUE,
+ argc: ::std::os::raw::c_int,
+ argv: *const VALUE,
+ func: ::std::option::Option<unsafe extern "C" fn() -> VALUE>,
+ ) -> VALUE,
+ >,
+ pub argc: ::std::os::raw::c_int,
+}
pub const OPTIMIZED_METHOD_TYPE_SEND: method_optimized_type = 0;
pub const OPTIMIZED_METHOD_TYPE_CALL: method_optimized_type = 1;
pub const OPTIMIZED_METHOD_TYPE_BLOCK_CALL: method_optimized_type = 2;
@@ -538,13 +433,26 @@ pub const OPTIMIZED_METHOD_TYPE_STRUCT_AREF: method_optimized_type = 3;
pub const OPTIMIZED_METHOD_TYPE_STRUCT_ASET: method_optimized_type = 4;
pub const OPTIMIZED_METHOD_TYPE__MAX: method_optimized_type = 5;
pub type method_optimized_type = u32;
-extern "C" {
- pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
-}
-extern "C" {
- pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_id_table {
+ _unused: [u8; 0],
}
pub type rb_num_t = ::std::os::raw::c_ulong;
+pub const RUBY_TAG_NONE: ruby_tag_type = 0;
+pub const RUBY_TAG_RETURN: ruby_tag_type = 1;
+pub const RUBY_TAG_BREAK: ruby_tag_type = 2;
+pub const RUBY_TAG_NEXT: ruby_tag_type = 3;
+pub const RUBY_TAG_RETRY: ruby_tag_type = 4;
+pub const RUBY_TAG_REDO: ruby_tag_type = 5;
+pub const RUBY_TAG_RAISE: ruby_tag_type = 6;
+pub const RUBY_TAG_THROW: ruby_tag_type = 7;
+pub const RUBY_TAG_FATAL: ruby_tag_type = 8;
+pub const RUBY_TAG_MASK: ruby_tag_type = 15;
+pub type ruby_tag_type = u32;
+pub const VM_THROW_NO_ESCAPE_FLAG: ruby_vm_throw_flags = 32768;
+pub const VM_THROW_STATE_MASK: ruby_vm_throw_flags = 255;
+pub type ruby_vm_throw_flags = u32;
#[repr(C)]
pub struct iseq_inline_constant_cache_entry {
pub flags: VALUE,
@@ -557,18 +465,33 @@ pub struct iseq_inline_constant_cache_entry {
#[derive(Debug, Copy, Clone)]
pub struct iseq_inline_constant_cache {
pub entry: *mut iseq_inline_constant_cache_entry,
- pub get_insn_idx: ::std::os::raw::c_uint,
+ pub segments: *const ID,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct iseq_inline_iv_cache_entry {
- pub entry: *mut rb_iv_index_tbl_entry,
+ pub value: usize,
+ pub iv_set_name: ID,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct iseq_inline_cvar_cache_entry {
pub entry: *mut rb_cvar_class_tbl_entry,
}
+pub const ISEQ_TYPE_TOP: rb_iseq_type = 0;
+pub const ISEQ_TYPE_METHOD: rb_iseq_type = 1;
+pub const ISEQ_TYPE_BLOCK: rb_iseq_type = 2;
+pub const ISEQ_TYPE_CLASS: rb_iseq_type = 3;
+pub const ISEQ_TYPE_RESCUE: rb_iseq_type = 4;
+pub const ISEQ_TYPE_ENSURE: rb_iseq_type = 5;
+pub const ISEQ_TYPE_EVAL: rb_iseq_type = 6;
+pub const ISEQ_TYPE_MAIN: rb_iseq_type = 7;
+pub const ISEQ_TYPE_PLAIN: rb_iseq_type = 8;
+pub type rb_iseq_type = u32;
+pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
+pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
+pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
+pub type rb_builtin_attr = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword {
@@ -579,44 +502,104 @@ pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword {
pub table: *const ID,
pub default_values: *mut VALUE,
}
-pub const BOP_PLUS: ruby_basic_operators = 0;
-pub const BOP_MINUS: ruby_basic_operators = 1;
-pub const BOP_MULT: ruby_basic_operators = 2;
-pub const BOP_DIV: ruby_basic_operators = 3;
-pub const BOP_MOD: ruby_basic_operators = 4;
-pub const BOP_EQ: ruby_basic_operators = 5;
-pub const BOP_EQQ: ruby_basic_operators = 6;
-pub const BOP_LT: ruby_basic_operators = 7;
-pub const BOP_LE: ruby_basic_operators = 8;
-pub const BOP_LTLT: ruby_basic_operators = 9;
-pub const BOP_AREF: ruby_basic_operators = 10;
-pub const BOP_ASET: ruby_basic_operators = 11;
-pub const BOP_LENGTH: ruby_basic_operators = 12;
-pub const BOP_SIZE: ruby_basic_operators = 13;
-pub const BOP_EMPTY_P: ruby_basic_operators = 14;
-pub const BOP_NIL_P: ruby_basic_operators = 15;
-pub const BOP_SUCC: ruby_basic_operators = 16;
-pub const BOP_GT: ruby_basic_operators = 17;
-pub const BOP_GE: ruby_basic_operators = 18;
-pub const BOP_NOT: ruby_basic_operators = 19;
-pub const BOP_NEQ: ruby_basic_operators = 20;
-pub const BOP_MATCH: ruby_basic_operators = 21;
-pub const BOP_FREEZE: ruby_basic_operators = 22;
-pub const BOP_UMINUS: ruby_basic_operators = 23;
-pub const BOP_MAX: ruby_basic_operators = 24;
-pub const BOP_MIN: ruby_basic_operators = 25;
-pub const BOP_CALL: ruby_basic_operators = 26;
-pub const BOP_AND: ruby_basic_operators = 27;
-pub const BOP_OR: ruby_basic_operators = 28;
-pub const BOP_LAST_: ruby_basic_operators = 29;
-pub type ruby_basic_operators = u32;
-pub type rb_control_frame_t = rb_control_frame_struct;
-extern "C" {
- pub static mut rb_mRubyVMFrozenCore: VALUE;
+#[repr(C)]
+pub struct rb_captured_block {
+ pub self_: VALUE,
+ pub ep: *const VALUE,
+ pub code: rb_captured_block__bindgen_ty_1,
}
-extern "C" {
- pub static mut rb_block_param_proxy: VALUE;
+#[repr(C)]
+pub struct rb_captured_block__bindgen_ty_1 {
+ pub iseq: __BindgenUnionField<*const rb_iseq_t>,
+ pub ifunc: __BindgenUnionField<*const vm_ifunc>,
+ pub val: __BindgenUnionField<VALUE>,
+ pub bindgen_union_field: u64,
+}
+pub const block_type_iseq: rb_block_type = 0;
+pub const block_type_ifunc: rb_block_type = 1;
+pub const block_type_symbol: rb_block_type = 2;
+pub const block_type_proc: rb_block_type = 3;
+pub type rb_block_type = u32;
+#[repr(C)]
+pub struct rb_block {
+ pub as_: rb_block__bindgen_ty_1,
+ pub type_: rb_block_type,
+}
+#[repr(C)]
+pub struct rb_block__bindgen_ty_1 {
+ pub captured: __BindgenUnionField<rb_captured_block>,
+ pub symbol: __BindgenUnionField<VALUE>,
+ pub proc_: __BindgenUnionField<VALUE>,
+ pub bindgen_union_field: [u64; 3usize],
}
+pub type rb_control_frame_t = rb_control_frame_struct;
+#[repr(C)]
+pub struct rb_proc_t {
+ pub block: rb_block,
+ pub _bitfield_align_1: [u8; 0],
+ pub _bitfield_1: __BindgenBitfieldUnit<[u8; 1usize]>,
+ pub __bindgen_padding_0: [u8; 7usize],
+}
+impl rb_proc_t {
+ #[inline]
+ pub fn is_from_method(&self) -> ::std::os::raw::c_uint {
+ unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+ }
+ #[inline]
+ pub fn set_is_from_method(&mut self, val: ::std::os::raw::c_uint) {
+ unsafe {
+ let val: u32 = ::std::mem::transmute(val);
+ self._bitfield_1.set(0usize, 1u8, val as u64)
+ }
+ }
+ #[inline]
+ pub fn is_lambda(&self) -> ::std::os::raw::c_uint {
+ unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+ }
+ #[inline]
+ pub fn set_is_lambda(&mut self, val: ::std::os::raw::c_uint) {
+ unsafe {
+ let val: u32 = ::std::mem::transmute(val);
+ self._bitfield_1.set(1usize, 1u8, val as u64)
+ }
+ }
+ #[inline]
+ pub fn is_isolated(&self) -> ::std::os::raw::c_uint {
+ unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+ }
+ #[inline]
+ pub fn set_is_isolated(&mut self, val: ::std::os::raw::c_uint) {
+ unsafe {
+ let val: u32 = ::std::mem::transmute(val);
+ self._bitfield_1.set(2usize, 1u8, val as u64)
+ }
+ }
+ #[inline]
+ pub fn new_bitfield_1(
+ is_from_method: ::std::os::raw::c_uint,
+ is_lambda: ::std::os::raw::c_uint,
+ is_isolated: ::std::os::raw::c_uint,
+ ) -> __BindgenBitfieldUnit<[u8; 1usize]> {
+ let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 1usize]> = Default::default();
+ __bindgen_bitfield_unit.set(0usize, 1u8, {
+ let is_from_method: u32 = unsafe { ::std::mem::transmute(is_from_method) };
+ is_from_method as u64
+ });
+ __bindgen_bitfield_unit.set(1usize, 1u8, {
+ let is_lambda: u32 = unsafe { ::std::mem::transmute(is_lambda) };
+ is_lambda as u64
+ });
+ __bindgen_bitfield_unit.set(2usize, 1u8, {
+ let is_isolated: u32 = unsafe { ::std::mem::transmute(is_isolated) };
+ is_isolated as u64
+ });
+ __bindgen_bitfield_unit
+ }
+}
+pub const VM_CHECKMATCH_TYPE_WHEN: vm_check_match_type = 1;
+pub const VM_CHECKMATCH_TYPE_CASE: vm_check_match_type = 2;
+pub const VM_CHECKMATCH_TYPE_RESCUE: vm_check_match_type = 3;
+pub type vm_check_match_type = u32;
pub const VM_SPECIAL_OBJECT_VMCORE: vm_special_object_type = 1;
pub const VM_SPECIAL_OBJECT_CBASE: vm_special_object_type = 2;
pub const VM_SPECIAL_OBJECT_CONST_BASE: vm_special_object_type = 3;
@@ -646,30 +629,60 @@ pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4;
pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
pub type vm_frame_env_flags = u32;
-extern "C" {
- pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE;
+pub type attr_index_t = u32;
+pub type shape_id_t = u32;
+pub type redblack_id_t = u32;
+pub type redblack_node_t = redblack_node;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_shape {
+ pub edges: *mut rb_id_table,
+ pub edge_name: ID,
+ pub next_iv_index: attr_index_t,
+ pub capacity: u32,
+ pub type_: u8,
+ pub size_pool_index: u8,
+ pub parent_id: shape_id_t,
+ pub ancestor_index: *mut redblack_node_t,
+}
+pub type rb_shape_t = rb_shape;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct redblack_node {
+ pub key: ID,
+ pub value: *mut rb_shape_t,
+ pub l: redblack_id_t,
+ pub r: redblack_id_t,
}
-extern "C" {
- pub fn rb_vm_frame_method_entry(
- cfp: *const rb_control_frame_t,
- ) -> *const rb_callable_method_entry_t;
+#[repr(C)]
+pub struct rb_cvar_class_tbl_entry {
+ pub index: u32,
+ pub global_cvar_state: rb_serial_t,
+ pub cref: *const rb_cref_t,
+ pub class_value: VALUE,
}
pub const VM_CALL_ARGS_SPLAT_bit: vm_call_flag_bits = 0;
pub const VM_CALL_ARGS_BLOCKARG_bit: vm_call_flag_bits = 1;
pub const VM_CALL_FCALL_bit: vm_call_flag_bits = 2;
pub const VM_CALL_VCALL_bit: vm_call_flag_bits = 3;
pub const VM_CALL_ARGS_SIMPLE_bit: vm_call_flag_bits = 4;
-pub const VM_CALL_BLOCKISEQ_bit: vm_call_flag_bits = 5;
-pub const VM_CALL_KWARG_bit: vm_call_flag_bits = 6;
-pub const VM_CALL_KW_SPLAT_bit: vm_call_flag_bits = 7;
-pub const VM_CALL_TAILCALL_bit: vm_call_flag_bits = 8;
-pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 9;
-pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 10;
-pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 11;
-pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 12;
+pub const VM_CALL_KWARG_bit: vm_call_flag_bits = 5;
+pub const VM_CALL_KW_SPLAT_bit: vm_call_flag_bits = 6;
+pub const VM_CALL_TAILCALL_bit: vm_call_flag_bits = 7;
+pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 8;
+pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 9;
+pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 10;
+pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 11;
+pub const VM_CALL_ARGS_SPLAT_MUT_bit: vm_call_flag_bits = 12;
pub const VM_CALL__END: vm_call_flag_bits = 13;
pub type vm_call_flag_bits = u32;
#[repr(C)]
+pub struct rb_callinfo_kwarg {
+ pub keyword_len: ::std::os::raw::c_int,
+ pub references: ::std::os::raw::c_int,
+ pub keywords: __IncompleteArrayField<VALUE>,
+}
+#[repr(C)]
pub struct rb_callinfo {
pub flags: VALUE,
pub kwarg: *const rb_callinfo_kwarg,
@@ -683,45 +696,16 @@ pub struct rb_call_data {
pub ci: *const rb_callinfo,
pub cc: *const rb_callcache,
}
-extern "C" {
- pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_str_concat_literals(num: size_t, strary: *const VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_ec_str_resurrect(ec: *mut rb_execution_context_struct, str_: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE;
-}
-extern "C" {
- pub fn rb_hash_resurrect(hash: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_obj_ensure_iv_index_mapping(obj: VALUE, id: ID) -> u32;
-}
-extern "C" {
- pub fn rb_gvar_get(arg1: ID) -> VALUE;
-}
-extern "C" {
- pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE;
-}
-extern "C" {
- pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
-}
-#[repr(C)]
-pub struct rb_iv_index_tbl_entry {
- pub index: u32,
- pub class_serial: rb_serial_t,
- pub class_value: VALUE,
-}
-#[repr(C)]
-pub struct rb_cvar_class_tbl_entry {
- pub index: u32,
- pub global_cvar_state: rb_serial_t,
- pub class_value: VALUE,
-}
+pub const RHASH_PASS_AS_KEYWORDS: ruby_rhash_flags = 8192;
+pub const RHASH_PROC_DEFAULT: ruby_rhash_flags = 16384;
+pub const RHASH_ST_TABLE_FLAG: ruby_rhash_flags = 32768;
+pub const RHASH_AR_TABLE_SIZE_MASK: ruby_rhash_flags = 983040;
+pub const RHASH_AR_TABLE_SIZE_SHIFT: ruby_rhash_flags = 16;
+pub const RHASH_AR_TABLE_BOUND_MASK: ruby_rhash_flags = 15728640;
+pub const RHASH_AR_TABLE_BOUND_SHIFT: ruby_rhash_flags = 20;
+pub const RHASH_LEV_SHIFT: ruby_rhash_flags = 25;
+pub const RHASH_LEV_MAX: ruby_rhash_flags = 127;
+pub type ruby_rhash_flags = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct rb_builtin_function {
@@ -729,14 +713,6 @@ pub struct rb_builtin_function {
pub argc: ::std::os::raw::c_int,
pub index: ::std::os::raw::c_int,
pub name: *const ::std::os::raw::c_char,
- pub compiler: ::std::option::Option<
- unsafe extern "C" fn(
- arg1: *mut FILE,
- arg2: ::std::os::raw::c_long,
- arg3: ::std::os::raw::c_uint,
- arg4: bool,
- ),
- >,
}
pub const YARVINSN_nop: ruby_vminsn_type = 0;
pub const YARVINSN_getlocal: ruby_vminsn_type = 1;
@@ -750,331 +726,537 @@ pub const YARVINSN_getinstancevariable: ruby_vminsn_type = 8;
pub const YARVINSN_setinstancevariable: ruby_vminsn_type = 9;
pub const YARVINSN_getclassvariable: ruby_vminsn_type = 10;
pub const YARVINSN_setclassvariable: ruby_vminsn_type = 11;
-pub const YARVINSN_getconstant: ruby_vminsn_type = 12;
-pub const YARVINSN_setconstant: ruby_vminsn_type = 13;
-pub const YARVINSN_getglobal: ruby_vminsn_type = 14;
-pub const YARVINSN_setglobal: ruby_vminsn_type = 15;
-pub const YARVINSN_putnil: ruby_vminsn_type = 16;
-pub const YARVINSN_putself: ruby_vminsn_type = 17;
-pub const YARVINSN_putobject: ruby_vminsn_type = 18;
-pub const YARVINSN_putspecialobject: ruby_vminsn_type = 19;
-pub const YARVINSN_putstring: ruby_vminsn_type = 20;
-pub const YARVINSN_concatstrings: ruby_vminsn_type = 21;
-pub const YARVINSN_anytostring: ruby_vminsn_type = 22;
-pub const YARVINSN_toregexp: ruby_vminsn_type = 23;
-pub const YARVINSN_intern: ruby_vminsn_type = 24;
-pub const YARVINSN_newarray: ruby_vminsn_type = 25;
-pub const YARVINSN_newarraykwsplat: ruby_vminsn_type = 26;
-pub const YARVINSN_duparray: ruby_vminsn_type = 27;
-pub const YARVINSN_duphash: ruby_vminsn_type = 28;
-pub const YARVINSN_expandarray: ruby_vminsn_type = 29;
-pub const YARVINSN_concatarray: ruby_vminsn_type = 30;
-pub const YARVINSN_splatarray: ruby_vminsn_type = 31;
-pub const YARVINSN_newhash: ruby_vminsn_type = 32;
-pub const YARVINSN_newrange: ruby_vminsn_type = 33;
-pub const YARVINSN_pop: ruby_vminsn_type = 34;
-pub const YARVINSN_dup: ruby_vminsn_type = 35;
-pub const YARVINSN_dupn: ruby_vminsn_type = 36;
-pub const YARVINSN_swap: ruby_vminsn_type = 37;
-pub const YARVINSN_topn: ruby_vminsn_type = 38;
-pub const YARVINSN_setn: ruby_vminsn_type = 39;
-pub const YARVINSN_adjuststack: ruby_vminsn_type = 40;
-pub const YARVINSN_defined: ruby_vminsn_type = 41;
-pub const YARVINSN_checkmatch: ruby_vminsn_type = 42;
-pub const YARVINSN_checkkeyword: ruby_vminsn_type = 43;
-pub const YARVINSN_checktype: ruby_vminsn_type = 44;
-pub const YARVINSN_defineclass: ruby_vminsn_type = 45;
-pub const YARVINSN_definemethod: ruby_vminsn_type = 46;
-pub const YARVINSN_definesmethod: ruby_vminsn_type = 47;
-pub const YARVINSN_send: ruby_vminsn_type = 48;
-pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 49;
-pub const YARVINSN_objtostring: ruby_vminsn_type = 50;
-pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 51;
-pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 52;
-pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 53;
-pub const YARVINSN_opt_newarray_max: ruby_vminsn_type = 54;
-pub const YARVINSN_opt_newarray_min: ruby_vminsn_type = 55;
-pub const YARVINSN_invokesuper: ruby_vminsn_type = 56;
-pub const YARVINSN_invokeblock: ruby_vminsn_type = 57;
-pub const YARVINSN_leave: ruby_vminsn_type = 58;
-pub const YARVINSN_throw: ruby_vminsn_type = 59;
-pub const YARVINSN_jump: ruby_vminsn_type = 60;
-pub const YARVINSN_branchif: ruby_vminsn_type = 61;
-pub const YARVINSN_branchunless: ruby_vminsn_type = 62;
-pub const YARVINSN_branchnil: ruby_vminsn_type = 63;
-pub const YARVINSN_opt_getinlinecache: ruby_vminsn_type = 64;
-pub const YARVINSN_opt_setinlinecache: ruby_vminsn_type = 65;
-pub const YARVINSN_once: ruby_vminsn_type = 66;
-pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 67;
-pub const YARVINSN_opt_plus: ruby_vminsn_type = 68;
-pub const YARVINSN_opt_minus: ruby_vminsn_type = 69;
-pub const YARVINSN_opt_mult: ruby_vminsn_type = 70;
-pub const YARVINSN_opt_div: ruby_vminsn_type = 71;
-pub const YARVINSN_opt_mod: ruby_vminsn_type = 72;
-pub const YARVINSN_opt_eq: ruby_vminsn_type = 73;
-pub const YARVINSN_opt_neq: ruby_vminsn_type = 74;
-pub const YARVINSN_opt_lt: ruby_vminsn_type = 75;
-pub const YARVINSN_opt_le: ruby_vminsn_type = 76;
-pub const YARVINSN_opt_gt: ruby_vminsn_type = 77;
-pub const YARVINSN_opt_ge: ruby_vminsn_type = 78;
-pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 79;
-pub const YARVINSN_opt_and: ruby_vminsn_type = 80;
-pub const YARVINSN_opt_or: ruby_vminsn_type = 81;
-pub const YARVINSN_opt_aref: ruby_vminsn_type = 82;
-pub const YARVINSN_opt_aset: ruby_vminsn_type = 83;
-pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 84;
-pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 85;
-pub const YARVINSN_opt_length: ruby_vminsn_type = 86;
-pub const YARVINSN_opt_size: ruby_vminsn_type = 87;
-pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 88;
-pub const YARVINSN_opt_succ: ruby_vminsn_type = 89;
-pub const YARVINSN_opt_not: ruby_vminsn_type = 90;
-pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 91;
-pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 92;
-pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 93;
-pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 94;
-pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 95;
-pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 96;
-pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 97;
-pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 98;
-pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 99;
-pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 100;
-pub const YARVINSN_trace_nop: ruby_vminsn_type = 101;
-pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 102;
-pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 103;
-pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 104;
-pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 105;
-pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 106;
-pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 107;
-pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 108;
-pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 109;
-pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 110;
-pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 111;
-pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 112;
-pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 113;
-pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 114;
-pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 115;
-pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 116;
-pub const YARVINSN_trace_putnil: ruby_vminsn_type = 117;
-pub const YARVINSN_trace_putself: ruby_vminsn_type = 118;
-pub const YARVINSN_trace_putobject: ruby_vminsn_type = 119;
-pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 120;
-pub const YARVINSN_trace_putstring: ruby_vminsn_type = 121;
-pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 122;
-pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 123;
-pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 124;
-pub const YARVINSN_trace_intern: ruby_vminsn_type = 125;
-pub const YARVINSN_trace_newarray: ruby_vminsn_type = 126;
-pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 127;
-pub const YARVINSN_trace_duparray: ruby_vminsn_type = 128;
-pub const YARVINSN_trace_duphash: ruby_vminsn_type = 129;
-pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 130;
-pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 131;
-pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 132;
-pub const YARVINSN_trace_newhash: ruby_vminsn_type = 133;
-pub const YARVINSN_trace_newrange: ruby_vminsn_type = 134;
-pub const YARVINSN_trace_pop: ruby_vminsn_type = 135;
-pub const YARVINSN_trace_dup: ruby_vminsn_type = 136;
-pub const YARVINSN_trace_dupn: ruby_vminsn_type = 137;
-pub const YARVINSN_trace_swap: ruby_vminsn_type = 138;
-pub const YARVINSN_trace_topn: ruby_vminsn_type = 139;
-pub const YARVINSN_trace_setn: ruby_vminsn_type = 140;
-pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 141;
-pub const YARVINSN_trace_defined: ruby_vminsn_type = 142;
-pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 143;
-pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 144;
-pub const YARVINSN_trace_checktype: ruby_vminsn_type = 145;
-pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 146;
-pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 147;
-pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 148;
-pub const YARVINSN_trace_send: ruby_vminsn_type = 149;
-pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 150;
-pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 151;
-pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 152;
-pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 153;
-pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 154;
-pub const YARVINSN_trace_opt_newarray_max: ruby_vminsn_type = 155;
-pub const YARVINSN_trace_opt_newarray_min: ruby_vminsn_type = 156;
-pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 157;
-pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 158;
-pub const YARVINSN_trace_leave: ruby_vminsn_type = 159;
-pub const YARVINSN_trace_throw: ruby_vminsn_type = 160;
-pub const YARVINSN_trace_jump: ruby_vminsn_type = 161;
-pub const YARVINSN_trace_branchif: ruby_vminsn_type = 162;
-pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 163;
-pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 164;
-pub const YARVINSN_trace_opt_getinlinecache: ruby_vminsn_type = 165;
-pub const YARVINSN_trace_opt_setinlinecache: ruby_vminsn_type = 166;
-pub const YARVINSN_trace_once: ruby_vminsn_type = 167;
-pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 168;
-pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 169;
-pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 170;
-pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 171;
-pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 172;
-pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 173;
-pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 174;
-pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 175;
-pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 176;
-pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 177;
-pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 178;
-pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 179;
-pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 180;
-pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 181;
-pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 182;
-pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 183;
-pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 184;
-pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 185;
-pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 186;
-pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 187;
-pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 188;
-pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 189;
-pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 190;
-pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 191;
-pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 192;
-pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 193;
-pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 194;
-pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 195;
-pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 196;
-pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 197;
-pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 198;
-pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 199;
-pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 200;
-pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 201;
-pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 202;
+pub const YARVINSN_opt_getconstant_path: ruby_vminsn_type = 12;
+pub const YARVINSN_getconstant: ruby_vminsn_type = 13;
+pub const YARVINSN_setconstant: ruby_vminsn_type = 14;
+pub const YARVINSN_getglobal: ruby_vminsn_type = 15;
+pub const YARVINSN_setglobal: ruby_vminsn_type = 16;
+pub const YARVINSN_putnil: ruby_vminsn_type = 17;
+pub const YARVINSN_putself: ruby_vminsn_type = 18;
+pub const YARVINSN_putobject: ruby_vminsn_type = 19;
+pub const YARVINSN_putspecialobject: ruby_vminsn_type = 20;
+pub const YARVINSN_putstring: ruby_vminsn_type = 21;
+pub const YARVINSN_putchilledstring: ruby_vminsn_type = 22;
+pub const YARVINSN_concatstrings: ruby_vminsn_type = 23;
+pub const YARVINSN_anytostring: ruby_vminsn_type = 24;
+pub const YARVINSN_toregexp: ruby_vminsn_type = 25;
+pub const YARVINSN_intern: ruby_vminsn_type = 26;
+pub const YARVINSN_newarray: ruby_vminsn_type = 27;
+pub const YARVINSN_newarraykwsplat: ruby_vminsn_type = 28;
+pub const YARVINSN_pushtoarraykwsplat: ruby_vminsn_type = 29;
+pub const YARVINSN_duparray: ruby_vminsn_type = 30;
+pub const YARVINSN_duphash: ruby_vminsn_type = 31;
+pub const YARVINSN_expandarray: ruby_vminsn_type = 32;
+pub const YARVINSN_concatarray: ruby_vminsn_type = 33;
+pub const YARVINSN_concattoarray: ruby_vminsn_type = 34;
+pub const YARVINSN_pushtoarray: ruby_vminsn_type = 35;
+pub const YARVINSN_splatarray: ruby_vminsn_type = 36;
+pub const YARVINSN_splatkw: ruby_vminsn_type = 37;
+pub const YARVINSN_newhash: ruby_vminsn_type = 38;
+pub const YARVINSN_newrange: ruby_vminsn_type = 39;
+pub const YARVINSN_pop: ruby_vminsn_type = 40;
+pub const YARVINSN_dup: ruby_vminsn_type = 41;
+pub const YARVINSN_dupn: ruby_vminsn_type = 42;
+pub const YARVINSN_swap: ruby_vminsn_type = 43;
+pub const YARVINSN_opt_reverse: ruby_vminsn_type = 44;
+pub const YARVINSN_topn: ruby_vminsn_type = 45;
+pub const YARVINSN_setn: ruby_vminsn_type = 46;
+pub const YARVINSN_adjuststack: ruby_vminsn_type = 47;
+pub const YARVINSN_defined: ruby_vminsn_type = 48;
+pub const YARVINSN_definedivar: ruby_vminsn_type = 49;
+pub const YARVINSN_checkmatch: ruby_vminsn_type = 50;
+pub const YARVINSN_checkkeyword: ruby_vminsn_type = 51;
+pub const YARVINSN_checktype: ruby_vminsn_type = 52;
+pub const YARVINSN_defineclass: ruby_vminsn_type = 53;
+pub const YARVINSN_definemethod: ruby_vminsn_type = 54;
+pub const YARVINSN_definesmethod: ruby_vminsn_type = 55;
+pub const YARVINSN_send: ruby_vminsn_type = 56;
+pub const YARVINSN_opt_send_without_block: ruby_vminsn_type = 57;
+pub const YARVINSN_objtostring: ruby_vminsn_type = 58;
+pub const YARVINSN_opt_str_freeze: ruby_vminsn_type = 59;
+pub const YARVINSN_opt_nil_p: ruby_vminsn_type = 60;
+pub const YARVINSN_opt_str_uminus: ruby_vminsn_type = 61;
+pub const YARVINSN_opt_newarray_send: ruby_vminsn_type = 62;
+pub const YARVINSN_invokesuper: ruby_vminsn_type = 63;
+pub const YARVINSN_invokeblock: ruby_vminsn_type = 64;
+pub const YARVINSN_leave: ruby_vminsn_type = 65;
+pub const YARVINSN_throw: ruby_vminsn_type = 66;
+pub const YARVINSN_jump: ruby_vminsn_type = 67;
+pub const YARVINSN_branchif: ruby_vminsn_type = 68;
+pub const YARVINSN_branchunless: ruby_vminsn_type = 69;
+pub const YARVINSN_branchnil: ruby_vminsn_type = 70;
+pub const YARVINSN_once: ruby_vminsn_type = 71;
+pub const YARVINSN_opt_case_dispatch: ruby_vminsn_type = 72;
+pub const YARVINSN_opt_plus: ruby_vminsn_type = 73;
+pub const YARVINSN_opt_minus: ruby_vminsn_type = 74;
+pub const YARVINSN_opt_mult: ruby_vminsn_type = 75;
+pub const YARVINSN_opt_div: ruby_vminsn_type = 76;
+pub const YARVINSN_opt_mod: ruby_vminsn_type = 77;
+pub const YARVINSN_opt_eq: ruby_vminsn_type = 78;
+pub const YARVINSN_opt_neq: ruby_vminsn_type = 79;
+pub const YARVINSN_opt_lt: ruby_vminsn_type = 80;
+pub const YARVINSN_opt_le: ruby_vminsn_type = 81;
+pub const YARVINSN_opt_gt: ruby_vminsn_type = 82;
+pub const YARVINSN_opt_ge: ruby_vminsn_type = 83;
+pub const YARVINSN_opt_ltlt: ruby_vminsn_type = 84;
+pub const YARVINSN_opt_and: ruby_vminsn_type = 85;
+pub const YARVINSN_opt_or: ruby_vminsn_type = 86;
+pub const YARVINSN_opt_aref: ruby_vminsn_type = 87;
+pub const YARVINSN_opt_aset: ruby_vminsn_type = 88;
+pub const YARVINSN_opt_aset_with: ruby_vminsn_type = 89;
+pub const YARVINSN_opt_aref_with: ruby_vminsn_type = 90;
+pub const YARVINSN_opt_length: ruby_vminsn_type = 91;
+pub const YARVINSN_opt_size: ruby_vminsn_type = 92;
+pub const YARVINSN_opt_empty_p: ruby_vminsn_type = 93;
+pub const YARVINSN_opt_succ: ruby_vminsn_type = 94;
+pub const YARVINSN_opt_not: ruby_vminsn_type = 95;
+pub const YARVINSN_opt_regexpmatch2: ruby_vminsn_type = 96;
+pub const YARVINSN_invokebuiltin: ruby_vminsn_type = 97;
+pub const YARVINSN_opt_invokebuiltin_delegate: ruby_vminsn_type = 98;
+pub const YARVINSN_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 99;
+pub const YARVINSN_getlocal_WC_0: ruby_vminsn_type = 100;
+pub const YARVINSN_getlocal_WC_1: ruby_vminsn_type = 101;
+pub const YARVINSN_setlocal_WC_0: ruby_vminsn_type = 102;
+pub const YARVINSN_setlocal_WC_1: ruby_vminsn_type = 103;
+pub const YARVINSN_putobject_INT2FIX_0_: ruby_vminsn_type = 104;
+pub const YARVINSN_putobject_INT2FIX_1_: ruby_vminsn_type = 105;
+pub const YARVINSN_trace_nop: ruby_vminsn_type = 106;
+pub const YARVINSN_trace_getlocal: ruby_vminsn_type = 107;
+pub const YARVINSN_trace_setlocal: ruby_vminsn_type = 108;
+pub const YARVINSN_trace_getblockparam: ruby_vminsn_type = 109;
+pub const YARVINSN_trace_setblockparam: ruby_vminsn_type = 110;
+pub const YARVINSN_trace_getblockparamproxy: ruby_vminsn_type = 111;
+pub const YARVINSN_trace_getspecial: ruby_vminsn_type = 112;
+pub const YARVINSN_trace_setspecial: ruby_vminsn_type = 113;
+pub const YARVINSN_trace_getinstancevariable: ruby_vminsn_type = 114;
+pub const YARVINSN_trace_setinstancevariable: ruby_vminsn_type = 115;
+pub const YARVINSN_trace_getclassvariable: ruby_vminsn_type = 116;
+pub const YARVINSN_trace_setclassvariable: ruby_vminsn_type = 117;
+pub const YARVINSN_trace_opt_getconstant_path: ruby_vminsn_type = 118;
+pub const YARVINSN_trace_getconstant: ruby_vminsn_type = 119;
+pub const YARVINSN_trace_setconstant: ruby_vminsn_type = 120;
+pub const YARVINSN_trace_getglobal: ruby_vminsn_type = 121;
+pub const YARVINSN_trace_setglobal: ruby_vminsn_type = 122;
+pub const YARVINSN_trace_putnil: ruby_vminsn_type = 123;
+pub const YARVINSN_trace_putself: ruby_vminsn_type = 124;
+pub const YARVINSN_trace_putobject: ruby_vminsn_type = 125;
+pub const YARVINSN_trace_putspecialobject: ruby_vminsn_type = 126;
+pub const YARVINSN_trace_putstring: ruby_vminsn_type = 127;
+pub const YARVINSN_trace_putchilledstring: ruby_vminsn_type = 128;
+pub const YARVINSN_trace_concatstrings: ruby_vminsn_type = 129;
+pub const YARVINSN_trace_anytostring: ruby_vminsn_type = 130;
+pub const YARVINSN_trace_toregexp: ruby_vminsn_type = 131;
+pub const YARVINSN_trace_intern: ruby_vminsn_type = 132;
+pub const YARVINSN_trace_newarray: ruby_vminsn_type = 133;
+pub const YARVINSN_trace_newarraykwsplat: ruby_vminsn_type = 134;
+pub const YARVINSN_trace_pushtoarraykwsplat: ruby_vminsn_type = 135;
+pub const YARVINSN_trace_duparray: ruby_vminsn_type = 136;
+pub const YARVINSN_trace_duphash: ruby_vminsn_type = 137;
+pub const YARVINSN_trace_expandarray: ruby_vminsn_type = 138;
+pub const YARVINSN_trace_concatarray: ruby_vminsn_type = 139;
+pub const YARVINSN_trace_concattoarray: ruby_vminsn_type = 140;
+pub const YARVINSN_trace_pushtoarray: ruby_vminsn_type = 141;
+pub const YARVINSN_trace_splatarray: ruby_vminsn_type = 142;
+pub const YARVINSN_trace_splatkw: ruby_vminsn_type = 143;
+pub const YARVINSN_trace_newhash: ruby_vminsn_type = 144;
+pub const YARVINSN_trace_newrange: ruby_vminsn_type = 145;
+pub const YARVINSN_trace_pop: ruby_vminsn_type = 146;
+pub const YARVINSN_trace_dup: ruby_vminsn_type = 147;
+pub const YARVINSN_trace_dupn: ruby_vminsn_type = 148;
+pub const YARVINSN_trace_swap: ruby_vminsn_type = 149;
+pub const YARVINSN_trace_opt_reverse: ruby_vminsn_type = 150;
+pub const YARVINSN_trace_topn: ruby_vminsn_type = 151;
+pub const YARVINSN_trace_setn: ruby_vminsn_type = 152;
+pub const YARVINSN_trace_adjuststack: ruby_vminsn_type = 153;
+pub const YARVINSN_trace_defined: ruby_vminsn_type = 154;
+pub const YARVINSN_trace_definedivar: ruby_vminsn_type = 155;
+pub const YARVINSN_trace_checkmatch: ruby_vminsn_type = 156;
+pub const YARVINSN_trace_checkkeyword: ruby_vminsn_type = 157;
+pub const YARVINSN_trace_checktype: ruby_vminsn_type = 158;
+pub const YARVINSN_trace_defineclass: ruby_vminsn_type = 159;
+pub const YARVINSN_trace_definemethod: ruby_vminsn_type = 160;
+pub const YARVINSN_trace_definesmethod: ruby_vminsn_type = 161;
+pub const YARVINSN_trace_send: ruby_vminsn_type = 162;
+pub const YARVINSN_trace_opt_send_without_block: ruby_vminsn_type = 163;
+pub const YARVINSN_trace_objtostring: ruby_vminsn_type = 164;
+pub const YARVINSN_trace_opt_str_freeze: ruby_vminsn_type = 165;
+pub const YARVINSN_trace_opt_nil_p: ruby_vminsn_type = 166;
+pub const YARVINSN_trace_opt_str_uminus: ruby_vminsn_type = 167;
+pub const YARVINSN_trace_opt_newarray_send: ruby_vminsn_type = 168;
+pub const YARVINSN_trace_invokesuper: ruby_vminsn_type = 169;
+pub const YARVINSN_trace_invokeblock: ruby_vminsn_type = 170;
+pub const YARVINSN_trace_leave: ruby_vminsn_type = 171;
+pub const YARVINSN_trace_throw: ruby_vminsn_type = 172;
+pub const YARVINSN_trace_jump: ruby_vminsn_type = 173;
+pub const YARVINSN_trace_branchif: ruby_vminsn_type = 174;
+pub const YARVINSN_trace_branchunless: ruby_vminsn_type = 175;
+pub const YARVINSN_trace_branchnil: ruby_vminsn_type = 176;
+pub const YARVINSN_trace_once: ruby_vminsn_type = 177;
+pub const YARVINSN_trace_opt_case_dispatch: ruby_vminsn_type = 178;
+pub const YARVINSN_trace_opt_plus: ruby_vminsn_type = 179;
+pub const YARVINSN_trace_opt_minus: ruby_vminsn_type = 180;
+pub const YARVINSN_trace_opt_mult: ruby_vminsn_type = 181;
+pub const YARVINSN_trace_opt_div: ruby_vminsn_type = 182;
+pub const YARVINSN_trace_opt_mod: ruby_vminsn_type = 183;
+pub const YARVINSN_trace_opt_eq: ruby_vminsn_type = 184;
+pub const YARVINSN_trace_opt_neq: ruby_vminsn_type = 185;
+pub const YARVINSN_trace_opt_lt: ruby_vminsn_type = 186;
+pub const YARVINSN_trace_opt_le: ruby_vminsn_type = 187;
+pub const YARVINSN_trace_opt_gt: ruby_vminsn_type = 188;
+pub const YARVINSN_trace_opt_ge: ruby_vminsn_type = 189;
+pub const YARVINSN_trace_opt_ltlt: ruby_vminsn_type = 190;
+pub const YARVINSN_trace_opt_and: ruby_vminsn_type = 191;
+pub const YARVINSN_trace_opt_or: ruby_vminsn_type = 192;
+pub const YARVINSN_trace_opt_aref: ruby_vminsn_type = 193;
+pub const YARVINSN_trace_opt_aset: ruby_vminsn_type = 194;
+pub const YARVINSN_trace_opt_aset_with: ruby_vminsn_type = 195;
+pub const YARVINSN_trace_opt_aref_with: ruby_vminsn_type = 196;
+pub const YARVINSN_trace_opt_length: ruby_vminsn_type = 197;
+pub const YARVINSN_trace_opt_size: ruby_vminsn_type = 198;
+pub const YARVINSN_trace_opt_empty_p: ruby_vminsn_type = 199;
+pub const YARVINSN_trace_opt_succ: ruby_vminsn_type = 200;
+pub const YARVINSN_trace_opt_not: ruby_vminsn_type = 201;
+pub const YARVINSN_trace_opt_regexpmatch2: ruby_vminsn_type = 202;
+pub const YARVINSN_trace_invokebuiltin: ruby_vminsn_type = 203;
+pub const YARVINSN_trace_opt_invokebuiltin_delegate: ruby_vminsn_type = 204;
+pub const YARVINSN_trace_opt_invokebuiltin_delegate_leave: ruby_vminsn_type = 205;
+pub const YARVINSN_trace_getlocal_WC_0: ruby_vminsn_type = 206;
+pub const YARVINSN_trace_getlocal_WC_1: ruby_vminsn_type = 207;
+pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 208;
+pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 209;
+pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 210;
+pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 211;
+pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 212;
pub type ruby_vminsn_type = u32;
-extern "C" {
- pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
-}
-pub type rb_iseq_each_i = ::std::option::Option<
- unsafe extern "C" fn(
- code: *mut VALUE,
- insn: VALUE,
- index: size_t,
- data: *mut ::std::os::raw::c_void,
- ) -> bool,
+pub type rb_iseq_callback = ::std::option::Option<
+ unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
>;
+pub const DEFINED_NOT_DEFINED: defined_type = 0;
+pub const DEFINED_NIL: defined_type = 1;
+pub const DEFINED_IVAR: defined_type = 2;
+pub const DEFINED_LVAR: defined_type = 3;
+pub const DEFINED_GVAR: defined_type = 4;
+pub const DEFINED_CVAR: defined_type = 5;
+pub const DEFINED_CONST: defined_type = 6;
+pub const DEFINED_METHOD: defined_type = 7;
+pub const DEFINED_YIELD: defined_type = 8;
+pub const DEFINED_ZSUPER: defined_type = 9;
+pub const DEFINED_SELF: defined_type = 10;
+pub const DEFINED_TRUE: defined_type = 11;
+pub const DEFINED_FALSE: defined_type = 12;
+pub const DEFINED_ASGN: defined_type = 13;
+pub const DEFINED_EXPR: defined_type = 14;
+pub const DEFINED_REF: defined_type = 15;
+pub const DEFINED_FUNC: defined_type = 16;
+pub const DEFINED_CONST_FROM: defined_type = 17;
+pub type defined_type = u32;
+pub const ROBJECT_OFFSET_AS_HEAP_IVPTR: robject_offsets = 16;
+pub const ROBJECT_OFFSET_AS_HEAP_IV_INDEX_TBL: robject_offsets = 24;
+pub const ROBJECT_OFFSET_AS_ARY: robject_offsets = 16;
+pub type robject_offsets = u32;
+pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16;
+pub type rstring_offsets = u32;
+pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
extern "C" {
- pub fn rb_iseq_each(
- iseq: *const rb_iseq_t,
- start_index: size_t,
- iterator: rb_iseq_each_i,
- data: *mut ::std::os::raw::c_void,
- );
-}
-extern "C" {
- pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
-}
-extern "C" {
+ pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void);
+ pub fn rb_class_attached_object(klass: VALUE) -> VALUE;
+ pub fn rb_singleton_class(obj: VALUE) -> VALUE;
+ pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t;
+ pub fn rb_method_basic_definition_p(klass: VALUE, mid: ID) -> ::std::os::raw::c_int;
+ pub fn rb_bug(fmt: *const ::std::os::raw::c_char, ...) -> !;
+ pub fn rb_gc_mark(obj: VALUE);
+ pub fn rb_gc_mark_movable(obj: VALUE);
+ pub fn rb_gc_location(obj: VALUE) -> VALUE;
+ pub fn rb_gc_writebarrier(old: VALUE, young: VALUE);
+ pub fn rb_class_get_superclass(klass: VALUE) -> VALUE;
+ pub static mut rb_mKernel: VALUE;
+ pub static mut rb_cBasicObject: VALUE;
+ pub static mut rb_cArray: VALUE;
+ pub static mut rb_cClass: VALUE;
+ pub static mut rb_cFalseClass: VALUE;
+ pub static mut rb_cFloat: VALUE;
+ pub static mut rb_cHash: VALUE;
+ pub static mut rb_cIO: VALUE;
+ pub static mut rb_cInteger: VALUE;
+ pub static mut rb_cModule: VALUE;
+ pub static mut rb_cNilClass: VALUE;
+ pub static mut rb_cString: VALUE;
+ pub static mut rb_cSymbol: VALUE;
+ pub static mut rb_cThread: VALUE;
+ pub static mut rb_cTrueClass: VALUE;
+ pub fn rb_obj_class(obj: VALUE) -> VALUE;
+ pub fn rb_ary_new_capa(capa: ::std::os::raw::c_long) -> VALUE;
+ pub fn rb_ary_store(ary: VALUE, key: ::std::os::raw::c_long, val: VALUE);
+ pub fn rb_ary_dup(ary: VALUE) -> VALUE;
+ pub fn rb_ary_resurrect(ary: VALUE) -> VALUE;
+ pub fn rb_ary_cat(ary: VALUE, train: *const VALUE, len: ::std::os::raw::c_long) -> VALUE;
+ pub fn rb_ary_push(ary: VALUE, elem: VALUE) -> VALUE;
+ pub fn rb_ary_clear(ary: VALUE) -> VALUE;
+ pub fn rb_hash_new() -> VALUE;
+ pub fn rb_hash_aref(hash: VALUE, key: VALUE) -> VALUE;
+ pub fn rb_hash_aset(hash: VALUE, key: VALUE, val: VALUE) -> VALUE;
+ pub fn rb_hash_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE, hash: VALUE);
+ pub fn rb_obj_is_proc(recv: VALUE) -> VALUE;
+ pub fn rb_sym2id(obj: VALUE) -> ID;
+ pub fn rb_id2sym(id: ID) -> VALUE;
+ pub fn rb_intern(name: *const ::std::os::raw::c_char) -> ID;
+ pub fn rb_intern2(name: *const ::std::os::raw::c_char, len: ::std::os::raw::c_long) -> ID;
+ pub fn rb_id2name(id: ID) -> *const ::std::os::raw::c_char;
+ pub fn rb_class2name(klass: VALUE) -> *const ::std::os::raw::c_char;
+ pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE;
+ pub fn rb_obj_frozen_p(obj: VALUE) -> VALUE;
+ pub fn rb_backref_get() -> VALUE;
+ pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE;
+ pub fn rb_reg_nth_match(n: ::std::os::raw::c_int, md: VALUE) -> VALUE;
+ pub fn rb_reg_last_match(md: VALUE) -> VALUE;
+ pub fn rb_reg_match_pre(md: VALUE) -> VALUE;
+ pub fn rb_reg_match_post(md: VALUE) -> VALUE;
+ pub fn rb_reg_match_last(md: VALUE) -> VALUE;
+ pub fn rb_utf8_str_new(
+ ptr: *const ::std::os::raw::c_char,
+ len: ::std::os::raw::c_long,
+ ) -> VALUE;
+ pub fn rb_str_buf_append(dst: VALUE, src: VALUE) -> VALUE;
+ pub fn rb_str_dup(str_: VALUE) -> VALUE;
+ pub fn rb_str_intern(str_: VALUE) -> VALUE;
+ pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE;
+ pub fn rb_ivar_defined(obj: VALUE, name: ID) -> VALUE;
+ pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE;
+ pub fn rb_obj_info_dump(obj: VALUE);
+ pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
+ pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE;
+ pub fn rb_ary_tmp_new_from_values(
+ arg1: VALUE,
+ arg2: ::std::os::raw::c_long,
+ arg3: *const VALUE,
+ ) -> VALUE;
+ pub fn rb_ec_ary_new_from_values(
+ ec: *mut rb_execution_context_struct,
+ n: ::std::os::raw::c_long,
+ elts: *const VALUE,
+ ) -> VALUE;
+ pub fn rb_vm_top_self() -> VALUE;
+ pub static mut rb_vm_insns_count: u64;
+ pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
+ pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
+ pub fn rb_callable_method_entry_or_negative(
+ klass: VALUE,
+ id: ID,
+ ) -> *const rb_callable_method_entry_t;
+ pub static mut rb_mRubyVMFrozenCore: VALUE;
+ pub static mut rb_block_param_proxy: VALUE;
+ pub fn rb_vm_ep_local_ep(ep: *const VALUE) -> *const VALUE;
+ pub fn rb_iseq_path(iseq: *const rb_iseq_t) -> VALUE;
+ pub fn rb_vm_env_write(ep: *const VALUE, index: ::std::os::raw::c_int, v: VALUE);
+ pub fn rb_vm_bh_to_procval(ec: *const rb_execution_context_t, block_handler: VALUE) -> VALUE;
+ pub fn rb_vm_frame_method_entry(
+ cfp: *const rb_control_frame_t,
+ ) -> *const rb_callable_method_entry_t;
+ pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char;
+ pub fn rb_ec_stack_check(ec: *mut rb_execution_context_struct) -> ::std::os::raw::c_int;
+ pub fn rb_shape_id_offset() -> i32;
+ pub fn rb_shape_get_shape_by_id(shape_id: shape_id_t) -> *mut rb_shape_t;
+ pub fn rb_shape_get_shape_id(obj: VALUE) -> shape_id_t;
+ pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool;
+ pub fn rb_shape_obj_too_complex(obj: VALUE) -> bool;
+ pub fn rb_shape_get_next(shape: *mut rb_shape_t, obj: VALUE, id: ID) -> *mut rb_shape_t;
+ pub fn rb_shape_id(shape: *mut rb_shape_t) -> shape_id_t;
+ pub fn rb_gvar_get(arg1: ID) -> VALUE;
+ pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE;
+ pub fn rb_ensure_iv_list_size(obj: VALUE, len: u32, newsize: u32);
pub fn rb_vm_barrier();
-}
-extern "C" {
+ pub fn rb_str_byte_substr(str_: VALUE, beg: VALUE, len: VALUE) -> VALUE;
+ pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE;
+ pub fn rb_str_concat_literals(num: usize, strary: *const VALUE) -> VALUE;
+ pub fn rb_ec_str_resurrect(
+ ec: *mut rb_execution_context_struct,
+ str_: VALUE,
+ chilled: bool,
+ ) -> VALUE;
+ pub fn rb_to_hash_type(obj: VALUE) -> VALUE;
+ pub fn rb_hash_stlike_foreach(
+ hash: VALUE,
+ func: st_foreach_callback_func,
+ arg: st_data_t,
+ ) -> ::std::os::raw::c_int;
+ pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE;
+ pub fn rb_hash_resurrect(hash: VALUE) -> VALUE;
+ pub fn rb_hash_stlike_lookup(
+ hash: VALUE,
+ key: st_data_t,
+ pval: *mut st_data_t,
+ ) -> ::std::os::raw::c_int;
+ pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
+ pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
+ pub fn rb_float_plus(x: VALUE, y: VALUE) -> VALUE;
+ pub fn rb_float_minus(x: VALUE, y: VALUE) -> VALUE;
+ pub fn rb_float_mul(x: VALUE, y: VALUE) -> VALUE;
+ pub fn rb_float_div(x: VALUE, y: VALUE) -> VALUE;
+ pub fn rb_fix_aref(fix: VALUE, idx: VALUE) -> VALUE;
+ pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
+ pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint;
+ pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
+ pub fn rb_iseq_label(iseq: *const rb_iseq_t) -> VALUE;
pub fn rb_profile_frames(
start: ::std::os::raw::c_int,
limit: ::std::os::raw::c_int,
buff: *mut VALUE,
lines: *mut ::std::os::raw::c_int,
) -> ::std::os::raw::c_int;
-}
-extern "C" {
+ pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
-}
-extern "C" {
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
-}
-extern "C" {
+ pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
+ pub fn rb_yjit_array_len(a: VALUE) -> ::std::os::raw::c_long;
+ pub fn rb_yjit_icache_invalidate(
+ start: *mut ::std::os::raw::c_void,
+ end: *mut ::std::os::raw::c_void,
+ );
pub fn rb_yjit_exit_locations_dict(
yjit_raw_samples: *mut VALUE,
yjit_line_samples: *mut ::std::os::raw::c_int,
samples_len: ::std::os::raw::c_int,
) -> VALUE;
-}
-extern "C" {
pub fn rb_yjit_get_page_size() -> u32;
-}
-extern "C" {
pub fn rb_yjit_reserve_addr_space(mem_size: u32) -> *mut u8;
-}
-extern "C" {
- pub fn rb_c_method_tracing_currently_enabled(ec: *mut rb_execution_context_t) -> bool;
-}
-extern "C" {
+ pub fn rb_c_method_tracing_currently_enabled(ec: *const rb_execution_context_t) -> bool;
pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE);
-}
-extern "C" {
+ pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void;
-}
-extern "C" {
pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void);
-}
-extern "C" {
pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t);
-}
-extern "C" {
pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE;
-}
-extern "C" {
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
-}
-pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
-extern "C" {
- pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool;
-}
-extern "C" {
- pub fn rb_leaf_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
-}
-extern "C" {
+ pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong;
+ pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
+ pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
+ pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char;
+ pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;
+ pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID;
+ pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;
+ pub fn rb_vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg;
+ pub fn rb_get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> ::std::os::raw::c_int;
+ pub fn rb_get_cikw_keywords_idx(
+ cikw: *const rb_callinfo_kwarg,
+ idx: ::std::os::raw::c_int,
+ ) -> VALUE;
+ pub fn rb_METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t;
+ pub fn rb_get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t;
+ pub fn rb_get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID;
+ pub fn rb_get_symbol_id(namep: VALUE) -> ID;
+ pub fn rb_get_cme_def_body_optimized_type(
+ cme: *const rb_callable_method_entry_t,
+ ) -> method_optimized_type;
+ pub fn rb_get_cme_def_body_optimized_index(
+ cme: *const rb_callable_method_entry_t,
+ ) -> ::std::os::raw::c_uint;
+ pub fn rb_get_cme_def_body_cfunc(
+ cme: *const rb_callable_method_entry_t,
+ ) -> *mut rb_method_cfunc_t;
+ pub fn rb_get_def_method_serial(def: *const rb_method_definition_t) -> usize;
+ pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID;
+ pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int;
+ pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void;
+ pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t;
+ pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE;
+ pub fn rb_get_iseq_body_local_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t;
+ pub fn rb_get_iseq_body_parent_iseq(iseq: *const rb_iseq_t) -> *const rb_iseq_t;
+ pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
+ pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE;
+ pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
+ pub fn rb_get_iseq_body_type(iseq: *const rb_iseq_t) -> rb_iseq_type;
+ pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_has_post(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_has_kwrest(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_anon_kwrest(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_has_rest(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_ruby2_keywords(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_has_block(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_ambiguous_param0(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_flags_accepts_no_kwarg(iseq: *const rb_iseq_t) -> bool;
+ pub fn rb_get_iseq_body_param_keyword(
+ iseq: *const rb_iseq_t,
+ ) -> *const rb_seq_param_keyword_struct;
+ pub fn rb_get_iseq_body_param_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
+ pub fn rb_get_iseq_body_param_lead_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int;
+ pub fn rb_get_iseq_body_param_opt_num(iseq: *const rb_iseq_t) -> ::std::os::raw::c_int;
+ pub fn rb_get_iseq_body_param_opt_table(iseq: *const rb_iseq_t) -> *const VALUE;
+ pub fn rb_optimized_call(
+ recv: *mut VALUE,
+ ec: *mut rb_execution_context_t,
+ argc: ::std::os::raw::c_int,
+ argv: *mut VALUE,
+ kw_splat: ::std::os::raw::c_int,
+ block_handler: VALUE,
+ ) -> VALUE;
+ pub fn rb_yjit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
+ pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE;
-}
-extern "C" {
+ pub fn rb_get_ec_cfp(ec: *const rb_execution_context_t) -> *mut rb_control_frame_struct;
+ pub fn rb_get_cfp_iseq(cfp: *mut rb_control_frame_struct) -> *const rb_iseq_t;
+ pub fn rb_get_cfp_pc(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
+ pub fn rb_get_cfp_sp(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
-}
-extern "C" {
pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE);
-}
-extern "C" {
- pub fn rb_cfp_get_iseq(cfp: *mut rb_control_frame_struct) -> *mut rb_iseq_t;
-}
-extern "C" {
+ pub fn rb_get_cfp_self(cfp: *mut rb_control_frame_struct) -> VALUE;
+ pub fn rb_get_cfp_ep(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
+ pub fn rb_get_cfp_ep_level(cfp: *mut rb_control_frame_struct, lv: u32) -> *const VALUE;
+ pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
+ pub fn rb_yarv_class_of(obj: VALUE) -> VALUE;
+ pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE;
+ pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE;
+ pub fn rb_yarv_ary_entry_internal(ary: VALUE, offset: ::std::os::raw::c_long) -> VALUE;
+ pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE;
+ pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE;
+ pub fn rb_yjit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE;
+ pub fn rb_yjit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE;
+ pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize;
+ pub fn rb_yjit_splat_varg_checks(
+ sp: *mut VALUE,
+ splat_array: VALUE,
+ cfp: *mut rb_control_frame_t,
+ ) -> VALUE;
+ pub fn rb_yjit_splat_varg_cfunc(stack_splat_array: *mut VALUE) -> ::std::os::raw::c_int;
pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32);
-}
-extern "C" {
+ pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char;
+ pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE;
+ pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE;
+ pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool;
+ pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long;
+ pub fn rb_RSTRUCT_SET(st: VALUE, k: ::std::os::raw::c_int, v: VALUE);
+ pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo;
+ pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool;
+ pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE;
pub fn rb_ENCODING_GET(obj: VALUE) -> ::std::os::raw::c_int;
-}
-extern "C" {
pub fn rb_yjit_multi_ractor_p() -> bool;
-}
-extern "C" {
pub fn rb_assert_iseq_handle(handle: VALUE);
-}
-extern "C" {
pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int;
-}
-extern "C" {
pub fn rb_assert_cme_handle(handle: VALUE);
-}
-pub type iseq_callback = ::std::option::Option<unsafe extern "C" fn(arg1: *const rb_iseq_t)>;
-extern "C" {
- pub fn rb_yjit_for_each_iseq(callback: iseq_callback);
-}
-extern "C" {
+ pub fn rb_yjit_for_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
pub fn rb_yjit_obj_written(
old: VALUE,
young: VALUE,
file: *const ::std::os::raw::c_char,
line: ::std::os::raw::c_int,
);
-}
-extern "C" {
pub fn rb_yjit_vm_lock_then_barrier(
recursive_lock_level: *mut ::std::os::raw::c_uint,
file: *const ::std::os::raw::c_char,
line: ::std::os::raw::c_int,
);
-}
-extern "C" {
pub fn rb_yjit_vm_unlock(
recursive_lock_level: *mut ::std::os::raw::c_uint,
file: *const ::std::os::raw::c_char,
line: ::std::os::raw::c_int,
);
+ pub fn rb_yjit_assert_holding_vm_lock();
+ pub fn rb_yjit_sendish_sp_pops(ci: *const rb_callinfo) -> usize;
+ pub fn rb_yjit_invokeblock_sp_pops(ci: *const rb_callinfo) -> usize;
+ pub fn rb_yjit_set_exception_return(
+ cfp: *mut rb_control_frame_t,
+ leave_exit: *mut ::std::os::raw::c_void,
+ leave_exception: *mut ::std::os::raw::c_void,
+ );
}
diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs
index 2082648c4a..7875276815 100644
--- a/yjit/src/disasm.rs
+++ b/yjit/src/disasm.rs
@@ -1,6 +1,15 @@
use crate::core::*;
use crate::cruby::*;
use crate::yjit::yjit_enabled_p;
+#[cfg(feature = "disasm")]
+use crate::asm::CodeBlock;
+#[cfg(feature = "disasm")]
+use crate::codegen::CodePtr;
+#[cfg(feature = "disasm")]
+use crate::options::DumpDisasm;
+
+#[cfg(feature = "disasm")]
+use std::fmt::Write;
/// Primitive called in yjit.rb
/// Produce a string representing the disassembly for an ISEQ
@@ -26,110 +35,226 @@ pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALU
// Get the iseq pointer from the wrapper
let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
- let out_string = disasm_iseq(iseq);
+ // This will truncate disassembly of methods with 10k+ bytecodes.
+ // That's a good thing - this prints to console.
+ let out_string = with_vm_lock(src_loc!(), || disasm_iseq_insn_range(iseq, 0, 9999));
return rust_str_to_ruby(&out_string);
}
}
+/// Only call while holding the VM lock.
#[cfg(feature = "disasm")]
-fn disasm_iseq(iseq: IseqPtr) -> String {
+pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u16, end_idx: u16) -> String {
let mut out = String::from("");
// Get a list of block versions generated for this iseq
- let mut block_list = get_iseq_block_list(iseq);
+ let block_list = get_or_create_iseq_block_list(iseq);
+ let mut block_list: Vec<&Block> = block_list.into_iter().map(|blockref| {
+ // SAFETY: We have the VM lock here and all the blocks on iseqs are valid.
+ unsafe { blockref.as_ref() }
+ }).collect();
// Get a list of codeblocks relevant to this iseq
let global_cb = crate::codegen::CodegenGlobals::get_inline_cb();
// Sort the blocks by increasing start addresses
- block_list.sort_by(|a, b| {
- use std::cmp::Ordering;
-
- // Get the start addresses for each block
- let addr_a = a.borrow().get_start_addr().unwrap().raw_ptr();
- let addr_b = b.borrow().get_start_addr().unwrap().raw_ptr();
-
- if addr_a < addr_b {
- Ordering::Less
- } else if addr_a == addr_b {
- Ordering::Equal
- } else {
- Ordering::Greater
- }
- });
+ block_list.sort_by_key(|block| block.get_start_addr().as_offset());
// Compute total code size in bytes for all blocks in the function
let mut total_code_size = 0;
for blockref in &block_list {
- total_code_size += blockref.borrow().code_size();
+ total_code_size += blockref.code_size();
+ }
+
+ writeln!(out, "NUM BLOCK VERSIONS: {}", block_list.len()).unwrap();
+ writeln!(out, "TOTAL INLINE CODE SIZE: {} bytes", total_code_size).unwrap();
+
+ // For each block, sorted by increasing start address
+ for (block_idx, block) in block_list.iter().enumerate() {
+ let blockid = block.get_blockid();
+ if blockid.idx >= start_idx && blockid.idx < end_idx {
+ let end_idx = block.get_end_idx();
+ let start_addr = block.get_start_addr();
+ let end_addr = block.get_end_addr();
+ let code_size = block.code_size();
+
+ // Write some info about the current block
+ let blockid_idx = blockid.idx;
+ let block_ident = format!(
+ "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
+ block_idx + 1,
+ block_list.len(),
+ blockid_idx,
+ end_idx,
+ code_size
+ );
+ writeln!(out, "== {:=<60}", block_ident).unwrap();
+
+ // Disassemble the instructions
+ for (start_addr, end_addr) in global_cb.writable_addrs(start_addr, end_addr) {
+ out.push_str(&disasm_addr_range(global_cb, start_addr, end_addr));
+ writeln!(out).unwrap();
+ }
+
+ // If this is not the last block
+ if block_idx < block_list.len() - 1 {
+ // Compute the size of the gap between this block and the next
+ let next_block = block_list[block_idx + 1];
+ let next_start_addr = next_block.get_start_addr();
+ let gap_size = next_start_addr.as_offset() - end_addr.as_offset();
+
+ // Log the size of the gap between the blocks if nonzero
+ if gap_size > 0 {
+ writeln!(out, "... {} byte gap ...", gap_size).unwrap();
+ }
+ }
+ }
}
+ return out;
+}
+
+#[cfg(feature = "disasm")]
+pub fn dump_disasm_addr_range(cb: &CodeBlock, start_addr: CodePtr, end_addr: CodePtr, dump_disasm: &DumpDisasm) {
+ use std::fs::File;
+ use std::io::Write;
+
+ for (start_addr, end_addr) in cb.writable_addrs(start_addr, end_addr) {
+ let disasm = disasm_addr_range(cb, start_addr, end_addr);
+ if disasm.len() > 0 {
+ match dump_disasm {
+ DumpDisasm::Stdout => println!("{disasm}"),
+ DumpDisasm::File(path) => {
+ let mut f = File::options().create(true).append(true).open(path).unwrap();
+ f.write_all(disasm.as_bytes()).unwrap();
+ }
+ };
+ }
+ }
+}
+
+#[cfg(feature = "disasm")]
+pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> String {
+ let mut out = String::from("");
+
// Initialize capstone
use capstone::prelude::*;
- let cs = Capstone::new()
+
+ #[cfg(target_arch = "x86_64")]
+ let mut cs = Capstone::new()
.x86()
.mode(arch::x86::ArchMode::Mode64)
.syntax(arch::x86::ArchSyntax::Intel)
.build()
.unwrap();
- out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len()));
- out.push_str(&format!(
- "TOTAL INLINE CODE SIZE: {} bytes\n",
- total_code_size
- ));
-
- // For each block, sorted by increasing start address
- for block_idx in 0..block_list.len() {
- let block = block_list[block_idx].borrow();
- let blockid = block.get_blockid();
- let end_idx = block.get_end_idx();
- let start_addr = block.get_start_addr().unwrap().raw_ptr();
- let end_addr = block.get_end_addr().unwrap().raw_ptr();
- let code_size = block.code_size();
-
- // Write some info about the current block
- let block_ident = format!(
- "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
- block_idx + 1,
- block_list.len(),
- blockid.idx,
- end_idx,
- code_size
- );
- out.push_str(&format!("== {:=<60}\n", block_ident));
-
- // Disassemble the instructions
- let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
- let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
-
- // For each instruction in this block
- for insn in insns.as_ref() {
- // Comments for this block
- if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
- for comment in comment_list {
- out.push_str(&format!(" \x1b[1m# {}\x1b[0m\n", comment));
+ #[cfg(target_arch = "aarch64")]
+ let mut cs = Capstone::new()
+ .arm64()
+ .mode(arch::arm64::ArchMode::Arm)
+ .detail(true)
+ .build()
+ .unwrap();
+ cs.set_skipdata(true).unwrap();
+
+ // Disassemble the instructions
+ let code_size = end_addr - start_addr;
+ let code_slice = unsafe { std::slice::from_raw_parts(start_addr as _, code_size) };
+ // Stabilize output for cargo test
+ #[cfg(test)]
+ let start_addr = 0;
+ let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
+
+ // For each instruction in this block
+ for insn in insns.as_ref() {
+ // Comments for this block
+ if let Some(comment_list) = cb.comments_at(insn.address() as usize) {
+ for comment in comment_list {
+ if cb.outlined {
+ write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue
}
+ writeln!(&mut out, " \x1b[1m# {comment}\x1b[22m").unwrap(); // Make comments bold
}
- out.push_str(&format!(" {}\n", insn));
}
+ if cb.outlined {
+ write!(&mut out, "\x1b[34m").unwrap(); // Make outlined code blue
+ }
+ writeln!(&mut out, " {insn}").unwrap();
+ if cb.outlined {
+ write!(&mut out, "\x1b[0m").unwrap(); // Disable blue
+ }
+ }
- // If this is not the last block
- if block_idx < block_list.len() - 1 {
- // Compute the size of the gap between this block and the next
- let next_block = block_list[block_idx + 1].borrow();
- let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
- let gap_size = (next_start_addr as usize) - (end_addr as usize);
+ return out;
+}
- // Log the size of the gap between the blocks if nonzero
- if gap_size > 0 {
- out.push_str(&format!("... {} byte gap ...\n", gap_size));
+/// Assert that CodeBlock has the code specified with hex. In addition, if tested with
+/// `cargo test --all-features`, it also checks it generates the specified disasm.
+#[cfg(test)]
+macro_rules! assert_disasm {
+ ($cb:expr, $hex:expr, $disasm:expr) => {
+ #[cfg(feature = "disasm")]
+ {
+ let disasm = disasm_addr_range(
+ &$cb,
+ $cb.get_ptr(0).raw_addr(&$cb),
+ $cb.get_write_ptr().raw_addr(&$cb),
+ );
+ assert_eq!(unindent(&disasm, false), unindent(&$disasm, true));
+ }
+ assert_eq!(format!("{:x}", $cb), $hex);
+ };
+}
+#[cfg(test)]
+pub(crate) use assert_disasm;
+
+/// Remove the minimum indent from every line, skipping the first line if `skip_first`.
+#[cfg(all(feature = "disasm", test))]
+pub fn unindent(string: &str, trim_lines: bool) -> String {
+ fn split_lines(string: &str) -> Vec<String> {
+ let mut result: Vec<String> = vec![];
+ let mut buf: Vec<u8> = vec![];
+ for byte in string.as_bytes().iter() {
+ buf.push(*byte);
+ if *byte == b'\n' {
+ result.push(String::from_utf8(buf).unwrap());
+ buf = vec![];
}
}
+ if !buf.is_empty() {
+ result.push(String::from_utf8(buf).unwrap());
+ }
+ result
}
- return out;
+ // Break up a string into multiple lines
+ let mut lines = split_lines(string);
+ if trim_lines { // raw string literals come with extra lines
+ lines.remove(0);
+ lines.remove(lines.len() - 1);
+ }
+
+ // Count the minimum number of spaces
+ let spaces = lines.iter().filter_map(|line| {
+ for (i, ch) in line.as_bytes().iter().enumerate() {
+ if *ch != b' ' {
+ return Some(i);
+ }
+ }
+ None
+ }).min().unwrap_or(0);
+
+ // Join lines, removing spaces
+ let mut unindented: Vec<u8> = vec![];
+ for line in lines.iter() {
+ if line.len() > spaces {
+ unindented.extend_from_slice(&line.as_bytes()[spaces..]);
+ } else {
+ unindented.extend_from_slice(&line.as_bytes());
+ }
+ }
+ String::from_utf8(unindented).unwrap()
}
/// Primitive called in yjit.rb
@@ -176,24 +301,26 @@ pub extern "C" fn rb_yjit_insns_compiled(_ec: EcPtr, _ruby_self: VALUE, iseqw: V
}
}
-fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> {
+fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u16)> {
let mut insn_vec = Vec::new();
// Get a list of block versions generated for this iseq
- let block_list = get_iseq_block_list(iseq);
+ let block_list = get_or_create_iseq_block_list(iseq);
// For each block associated with this iseq
for blockref in &block_list {
- let block = blockref.borrow();
+ // SAFETY: Called as part of a Ruby method, which ensures the graph is
+ // well connected for the given iseq.
+ let block = unsafe { blockref.as_ref() };
let start_idx = block.get_blockid().idx;
let end_idx = block.get_end_idx();
- assert!(end_idx <= unsafe { get_iseq_encoded_size(iseq) });
+ assert!(u32::from(end_idx) <= unsafe { get_iseq_encoded_size(iseq) });
// For each YARV instruction in the block
let mut insn_idx = start_idx;
while insn_idx < end_idx {
// Get the current pc and opcode
- let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+ let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx.into()) };
// try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) }
.try_into()
@@ -206,7 +333,7 @@ fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> {
insn_vec.push((op_name, insn_idx));
// Move to the next instruction
- insn_idx += insn_len(opcode);
+ insn_idx += insn_len(opcode) as u16;
}
}
diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs
index 6329c70f87..f32d51131d 100644
--- a/yjit/src/invariants.rs
+++ b/yjit/src/invariants.rs
@@ -2,23 +2,23 @@
//! generated code if and when these assumptions are invalidated.
use crate::asm::OutlinedCb;
+use crate::backend::ir::Assembler;
use crate::codegen::*;
use crate::core::*;
use crate::cruby::*;
-use crate::options::*;
use crate::stats::*;
use crate::utils::IntoUsize;
use crate::yjit::yjit_enabled_p;
use std::collections::{HashMap, HashSet};
-use std::mem;
use std::os::raw::c_void;
+use std::mem;
// Invariants to track:
// assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)
// assume_method_lookup_stable(comptime_recv_klass, cme, jit);
-// assume_single_ractor_mode(jit)
-// assume_stable_global_constant_state(jit);
+// assume_single_ractor_mode()
+// track_stable_constant_names_assumption()
/// Used to track all of the various block references that contain assumptions
/// about the state of the virtual machine.
@@ -26,11 +26,6 @@ pub struct Invariants {
/// Tracks block assumptions about callable method entry validity.
cme_validity: HashMap<*const rb_callable_method_entry_t, HashSet<BlockRef>>,
- /// Tracks block assumptions about method lookup. Maps a class to a table of
- /// method ID points to a set of blocks. While a block `b` is in the table,
- /// b->callee_cme == rb_callable_method_entry(klass, mid).
- method_lookup: HashMap<VALUE, HashMap<ID, HashSet<BlockRef>>>,
-
/// A map from a class and its associated basic operator to a set of blocks
/// that are assuming that that operator is not redefined. This is used for
/// quick access to all of the blocks that are making this assumption when
@@ -58,6 +53,17 @@ pub struct Invariants {
/// A map from a block to a set of IDs that it is assuming have not been
/// redefined.
block_constant_states: HashMap<BlockRef, HashSet<ID>>,
+
+ /// A map from a class to a set of blocks that assume objects of the class
+ /// will have no singleton class. When the set is empty, it means that
+ /// there has been a singleton class for the class after boot, so you cannot
+ /// assume no singleton class going forward.
+ no_singleton_classes: HashMap<VALUE, HashSet<BlockRef>>,
+
+ /// A map from an ISEQ to a set of blocks that assume base pointer is equal
+ /// to environment pointer. When the set is empty, it means that EP has been
+ /// escaped in the ISEQ.
+ no_ep_escape_iseqs: HashMap<IseqPtr, HashSet<BlockRef>>,
}
/// Private singleton instance of the invariants global struct.
@@ -69,12 +75,13 @@ impl Invariants {
unsafe {
INVARIANTS = Some(Invariants {
cme_validity: HashMap::new(),
- method_lookup: HashMap::new(),
basic_operator_blocks: HashMap::new(),
block_basic_operators: HashMap::new(),
single_ractor: HashSet::new(),
constant_state_blocks: HashMap::new(),
block_constant_states: HashMap::new(),
+ no_singleton_classes: HashMap::new(),
+ no_ep_escape_iseqs: HashMap::new(),
});
}
}
@@ -85,29 +92,21 @@ impl Invariants {
}
}
-/// A public function that can be called from within the code generation
-/// functions to ensure that the block being generated is invalidated when the
-/// basic operator is redefined.
+/// Mark the pending block as assuming that certain basic operators (e.g. Integer#==)
+/// have not been redefined.
+#[must_use]
pub fn assume_bop_not_redefined(
jit: &mut JITState,
+ asm: &mut Assembler,
ocb: &mut OutlinedCb,
klass: RedefinitionFlag,
bop: ruby_basic_operators,
) -> bool {
if unsafe { BASIC_OP_UNREDEFINED_P(bop, klass) } {
- jit_ensure_block_entry_exit(jit, ocb);
-
- let invariants = Invariants::get_instance();
- invariants
- .basic_operator_blocks
- .entry((klass, bop))
- .or_default()
- .insert(jit.get_block());
- invariants
- .block_basic_operators
- .entry(jit.get_block())
- .or_default()
- .insert((klass, bop));
+ if jit_ensure_block_entry_exit(jit, asm, ocb).is_none() {
+ return false;
+ }
+ jit.bop_assumptions.push((klass, bop));
return true;
} else {
@@ -115,115 +114,151 @@ pub fn assume_bop_not_redefined(
}
}
-// Remember that a block assumes that
-// `rb_callable_method_entry(receiver_klass, cme->called_id) == cme` and that
-// `cme` is valid.
-// When either of these assumptions becomes invalid, rb_yjit_method_lookup_change() or
-// rb_yjit_cme_invalidate() invalidates the block.
-//
-// @raise NoMemoryError
-pub fn assume_method_lookup_stable(
- jit: &mut JITState,
- ocb: &mut OutlinedCb,
- receiver_klass: VALUE,
+/// Track that a block is only valid when a certain basic operator has not been redefined
+/// since the block's inception.
+pub fn track_bop_assumption(uninit_block: BlockRef, bop: (RedefinitionFlag, ruby_basic_operators)) {
+ let invariants = Invariants::get_instance();
+ invariants
+ .basic_operator_blocks
+ .entry(bop)
+ .or_default()
+ .insert(uninit_block);
+ invariants
+ .block_basic_operators
+ .entry(uninit_block)
+ .or_default()
+ .insert(bop);
+}
+
+/// Track that a block will assume that `cme` is valid (false == METHOD_ENTRY_INVALIDATED(cme)).
+/// [rb_yjit_cme_invalidate] invalidates the block when `cme` is invalidated.
+pub fn track_method_lookup_stability_assumption(
+ uninit_block: BlockRef,
callee_cme: *const rb_callable_method_entry_t,
) {
- // RUBY_ASSERT(rb_callable_method_entry(receiver_klass, cme->called_id) == cme);
- // RUBY_ASSERT_ALWAYS(RB_TYPE_P(receiver_klass, T_CLASS) || RB_TYPE_P(receiver_klass, T_ICLASS));
- // RUBY_ASSERT_ALWAYS(!rb_objspace_garbage_object_p(receiver_klass));
-
- jit_ensure_block_entry_exit(jit, ocb);
-
- let block = jit.get_block();
- block
- .borrow_mut()
- .add_cme_dependency(receiver_klass, callee_cme);
-
Invariants::get_instance()
.cme_validity
.entry(callee_cme)
.or_default()
- .insert(block.clone());
+ .insert(uninit_block);
+}
- let mid = unsafe { (*callee_cme).called_id };
+/// Track that a block will assume that `klass` objects will have no singleton class.
+pub fn track_no_singleton_class_assumption(uninit_block: BlockRef, klass: VALUE) {
Invariants::get_instance()
- .method_lookup
- .entry(receiver_klass)
+ .no_singleton_classes
+ .entry(klass)
.or_default()
- .entry(mid)
+ .insert(uninit_block);
+}
+
+/// Returns true if we've seen a singleton class of a given class since boot.
+pub fn has_singleton_class_of(klass: VALUE) -> bool {
+ Invariants::get_instance()
+ .no_singleton_classes
+ .get(&klass)
+ .map_or(false, |blocks| blocks.is_empty())
+}
+
+/// Track that a block will assume that base pointer is equal to environment pointer.
+pub fn track_no_ep_escape_assumption(uninit_block: BlockRef, iseq: IseqPtr) {
+ Invariants::get_instance()
+ .no_ep_escape_iseqs
+ .entry(iseq)
.or_default()
- .insert(block);
+ .insert(uninit_block);
+}
+
+/// Returns true if a given ISEQ has previously escaped an environment.
+pub fn iseq_escapes_ep(iseq: IseqPtr) -> bool {
+ Invariants::get_instance()
+ .no_ep_escape_iseqs
+ .get(&iseq)
+ .map_or(false, |blocks| blocks.is_empty())
+}
+
+/// Forget an ISEQ remembered in invariants
+pub fn iseq_free_invariants(iseq: IseqPtr) {
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+ Invariants::get_instance().no_ep_escape_iseqs.remove(&iseq);
+}
+
+// Checks rb_method_basic_definition_p and registers the current block for invalidation if method
+// lookup changes.
+// A "basic method" is one defined during VM boot, so we can use this to check assumptions based on
+// default behavior.
+pub fn assume_method_basic_definition(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ ocb: &mut OutlinedCb,
+ klass: VALUE,
+ mid: ID
+) -> bool {
+ if unsafe { rb_method_basic_definition_p(klass, mid) } != 0 {
+ let cme = unsafe { rb_callable_method_entry(klass, mid) };
+ jit.assume_method_lookup_stable(asm, ocb, cme);
+ true
+ } else {
+ false
+ }
}
/// Tracks that a block is assuming it is operating in single-ractor mode.
#[must_use]
-pub fn assume_single_ractor_mode(jit: &mut JITState, ocb: &mut OutlinedCb) -> bool {
+pub fn assume_single_ractor_mode(jit: &mut JITState, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool {
if unsafe { rb_yjit_multi_ractor_p() } {
false
} else {
- jit_ensure_block_entry_exit(jit, ocb);
- Invariants::get_instance()
- .single_ractor
- .insert(jit.get_block());
+ if jit_ensure_block_entry_exit(jit, asm, ocb).is_none() {
+ return false;
+ }
+ jit.block_assumes_single_ractor = true;
+
true
}
}
-/// Walk through the ISEQ to go from the current opt_getinlinecache to the
-/// subsequent opt_setinlinecache and find all of the name components that are
-/// associated with this constant (which correspond to the getconstant
-/// arguments).
-pub fn assume_stable_constant_names(jit: &mut JITState, ocb: &mut OutlinedCb) {
- /// Tracks that a block is assuming that the name component of a constant
- /// has not changed since the last call to this function.
- unsafe extern "C" fn assume_stable_constant_name(
- code: *mut VALUE,
- insn: VALUE,
- index: u64,
- data: *mut c_void,
- ) -> bool {
- if insn.as_u32() == YARVINSN_opt_setinlinecache {
- return false;
- }
+/// Track that the block will assume single ractor mode.
+pub fn track_single_ractor_assumption(uninit_block: BlockRef) {
+ Invariants::get_instance()
+ .single_ractor
+ .insert(uninit_block);
+}
- if insn.as_u32() == YARVINSN_getconstant {
- let jit = &mut *(data as *mut JITState);
-
- // The first operand to GETCONSTANT is always the ID associated with
- // the constant lookup. We are grabbing this out in order to
- // associate this block with the stability of this constant name.
- let id = code.add(index.as_usize() + 1).read().as_u64() as ID;
-
- let invariants = Invariants::get_instance();
- invariants
- .constant_state_blocks
- .entry(id)
- .or_default()
- .insert(jit.get_block());
- invariants
- .block_constant_states
- .entry(jit.get_block())
- .or_default()
- .insert(id);
+/// Track that a block will assume that the name components of a constant path expression
+/// has not changed since the block's full initialization.
+pub fn track_stable_constant_names_assumption(uninit_block: BlockRef, idlist: *const ID) {
+ fn assume_stable_constant_name(
+ uninit_block: BlockRef,
+ id: ID,
+ ) {
+ if id == ID!(NULL) {
+ // Used for :: prefix
+ return;
}
- true
+ let invariants = Invariants::get_instance();
+ invariants
+ .constant_state_blocks
+ .entry(id)
+ .or_default()
+ .insert(uninit_block);
+ invariants
+ .block_constant_states
+ .entry(uninit_block)
+ .or_default()
+ .insert(id);
}
- jit_ensure_block_entry_exit(jit, ocb);
-
- unsafe {
- let iseq = jit.get_iseq();
- let encoded = get_iseq_body_iseq_encoded(iseq);
- let start_index = jit.get_pc().offset_from(encoded);
- rb_iseq_each(
- iseq,
- start_index.try_into().unwrap(),
- Some(assume_stable_constant_name),
- jit as *mut _ as *mut c_void,
- );
- };
+ for i in 0.. {
+ match unsafe { *idlist.offset(i) } {
+ 0 => break, // End of NULL terminated list
+ id => assume_stable_constant_name(uninit_block, id),
+ }
+ }
}
/// Called when a basic operator is redefined. Note that all the blocks assuming
@@ -270,31 +305,6 @@ pub extern "C" fn rb_yjit_cme_invalidate(callee_cme: *const rb_callable_method_e
});
}
-/// Callback for when rb_callable_method_entry(klass, mid) is going to change.
-/// Invalidate blocks that assume stable method lookup of `mid` in `klass` when this happens.
-/// This needs to be wrapped on the C side with RB_VM_LOCK_ENTER().
-#[no_mangle]
-pub extern "C" fn rb_yjit_method_lookup_change(klass: VALUE, mid: ID) {
- // If YJIT isn't enabled, do nothing
- if !yjit_enabled_p() {
- return;
- }
-
- with_vm_lock(src_loc!(), || {
- Invariants::get_instance()
- .method_lookup
- .entry(klass)
- .and_modify(|deps| {
- if let Some(deps) = deps.remove(&mid) {
- for block in &deps {
- invalidate_block_version(block);
- incr_counter!(invalidate_method_lookup);
- }
- }
- });
- });
-}
-
/// Callback for then Ruby is about to spawn a ractor. In that case we need to
/// invalidate every block that is assuming single ractor mode.
#[no_mangle]
@@ -325,32 +335,11 @@ pub extern "C" fn rb_yjit_constant_state_changed(id: ID) {
}
with_vm_lock(src_loc!(), || {
- if get_option!(global_constant_state) {
- // If the global-constant-state option is set, then we're going to
- // invalidate every block that depends on any constant.
-
- Invariants::get_instance()
- .constant_state_blocks
- .keys()
- .for_each(|id| {
- if let Some(blocks) =
- Invariants::get_instance().constant_state_blocks.remove(&id)
- {
- for block in &blocks {
- invalidate_block_version(block);
- incr_counter!(invalidate_constant_state_bump);
- }
- }
- });
- } else {
- // If the global-constant-state option is not set, then we're only going
- // to invalidate the blocks that are associated with the given ID.
-
- if let Some(blocks) = Invariants::get_instance().constant_state_blocks.remove(&id) {
- for block in &blocks {
- invalidate_block_version(block);
- incr_counter!(invalidate_constant_state_bump);
- }
+ // Invalidate the blocks that are associated with the given ID.
+ if let Some(blocks) = Invariants::get_instance().constant_state_blocks.remove(&id) {
+ for block in &blocks {
+ invalidate_block_version(block);
+ incr_counter!(invalidate_constant_state_bump);
}
}
});
@@ -359,7 +348,7 @@ pub extern "C" fn rb_yjit_constant_state_changed(id: ID) {
/// Callback for marking GC objects inside [Invariants].
/// See `struct yjijt_root_struct` in C.
#[no_mangle]
-pub extern "C" fn rb_yjit_root_mark() {
+pub extern "C" fn rb_yjit_root_mark(_: *mut c_void) {
// Call rb_gc_mark on exit location's raw_samples to
// wrap frames in a GC allocated object. This needs to be called
// at the same time as root mark.
@@ -370,7 +359,7 @@ pub extern "C" fn rb_yjit_root_mark() {
// Why not let the GC move the cme keys in this table?
// Because this is basically a compare_by_identity Hash.
// If a key moves, we would need to reinsert it into the table so it is rehashed.
- // That is tricky to do, espcially as it could trigger allocation which could
+ // That is tricky to do, especially as it could trigger allocation which could
// trigger GC. Not sure if it is okay to trigger GC while the GC is updating
// references.
//
@@ -385,41 +374,49 @@ pub extern "C" fn rb_yjit_root_mark() {
unsafe { rb_gc_mark(cme) };
}
+}
- // Mark class and iclass objects
- for klass in invariants.method_lookup.keys() {
- // TODO: This is a leak. Unused blocks linger in the table forever, preventing the
- // callee class they speculate on from being collected.
- // We could do a bespoke weak reference scheme on classes similar to
- // the interpreter's call cache. See finalizer for T_CLASS and cc_table_free().
+#[no_mangle]
+pub extern "C" fn rb_yjit_root_update_references(_: *mut c_void) {
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+ let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs;
- unsafe { rb_gc_mark(*klass) };
+ // Make a copy of the table with updated ISEQ keys
+ let mut updated_copy = HashMap::with_capacity(no_ep_escape_iseqs.len());
+ for (iseq, blocks) in mem::take(no_ep_escape_iseqs) {
+ let new_iseq = unsafe { rb_gc_location(iseq.into()) }.as_iseq();
+ updated_copy.insert(new_iseq, blocks);
}
+
+ *no_ep_escape_iseqs = updated_copy;
}
/// Remove all invariant assumptions made by the block by removing the block as
/// as a key in all of the relevant tables.
-pub fn block_assumptions_free(blockref: &BlockRef) {
+/// For safety, the block has to be initialized and the vm lock must be held.
+/// However, outgoing/incoming references to the block does _not_ need to be valid.
+pub fn block_assumptions_free(blockref: BlockRef) {
let invariants = Invariants::get_instance();
{
- let block = blockref.borrow();
+ // SAFETY: caller ensures that this reference is valid
+ let block = unsafe { blockref.as_ref() };
// For each method lookup dependency
for dep in block.iter_cme_deps() {
// Remove tracking for cme validity
- if let Some(blockset) = invariants.cme_validity.get_mut(&dep.callee_cme) {
- blockset.remove(blockref);
- }
-
- // Remove tracking for lookup stability
- if let Some(id_to_block_set) = invariants.method_lookup.get_mut(&dep.receiver_klass) {
- let mid = unsafe { (*dep.callee_cme).called_id };
- if let Some(block_set) = id_to_block_set.get_mut(&mid) {
- block_set.remove(&blockref);
+ if let Some(blockset) = invariants.cme_validity.get_mut(&dep) {
+ blockset.remove(&blockref);
+ if blockset.is_empty() {
+ invariants.cme_validity.remove(&dep);
}
}
}
+ if invariants.cme_validity.is_empty() {
+ invariants.cme_validity.shrink_to_fit();
+ }
}
// Remove tracking for basic operators that the given block assumes have
@@ -430,32 +427,72 @@ pub fn block_assumptions_free(blockref: &BlockRef) {
for key in &bops {
if let Some(blocks) = invariants.basic_operator_blocks.get_mut(key) {
blocks.remove(&blockref);
+ if blocks.is_empty() {
+ invariants.basic_operator_blocks.remove(key);
+ }
}
}
}
+ if invariants.block_basic_operators.is_empty() {
+ invariants.block_basic_operators.shrink_to_fit();
+ }
+ if invariants.basic_operator_blocks.is_empty() {
+ invariants.basic_operator_blocks.shrink_to_fit();
+ }
+ // Remove tracking for blocks assuming single ractor mode
invariants.single_ractor.remove(&blockref);
+ if invariants.single_ractor.is_empty() {
+ invariants.single_ractor.shrink_to_fit();
+ }
// Remove tracking for constant state for a given ID.
if let Some(ids) = invariants.block_constant_states.remove(&blockref) {
for id in ids {
if let Some(blocks) = invariants.constant_state_blocks.get_mut(&id) {
blocks.remove(&blockref);
+ if blocks.is_empty() {
+ invariants.constant_state_blocks.remove(&id);
+ }
}
}
}
+ if invariants.block_constant_states.is_empty() {
+ invariants.block_constant_states.shrink_to_fit();
+ }
+ if invariants.constant_state_blocks.is_empty() {
+ invariants.constant_state_blocks.shrink_to_fit();
+ }
+
+ // Remove tracking for blocks assuming no singleton class
+ for (_, blocks) in invariants.no_singleton_classes.iter_mut() {
+ blocks.remove(&blockref);
+ }
+ // Remove tracking for blocks assuming EP doesn't escape
+ for (_, blocks) in invariants.no_ep_escape_iseqs.iter_mut() {
+ blocks.remove(&blockref);
+ }
}
/// Callback from the opt_setinlinecache instruction in the interpreter.
/// Invalidate the block for the matching opt_getinlinecache so it could regenerate code
/// using the new value in the constant cache.
#[no_mangle]
-pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC) {
+pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC, insn_idx: std::os::raw::c_uint) {
// If YJIT isn't enabled, do nothing
if !yjit_enabled_p() {
return;
}
+ // Try to downcast the iseq index
+ let insn_idx: IseqIdx = if let Ok(idx) = insn_idx.try_into() {
+ idx
+ } else {
+ // The index is too large, YJIT can't possibly have code for it,
+ // so there is nothing to invalidate.
+ return;
+ };
+
if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_yjit_multi_ractor_p() } {
// We can't generate code in these situations, so no need to invalidate.
// See gen_opt_getinlinecache.
@@ -464,34 +501,33 @@ pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC) {
with_vm_lock(src_loc!(), || {
let code = unsafe { get_iseq_body_iseq_encoded(iseq) };
- let get_insn_idx = unsafe { (*ic).get_insn_idx };
// This should come from a running iseq, so direct threading translation
// should have been done
- assert!(unsafe { FL_TEST(iseq.into(), VALUE(ISEQ_TRANSLATED as usize)) } != VALUE(0));
- assert!(get_insn_idx < unsafe { get_iseq_encoded_size(iseq) });
+ assert!(unsafe { FL_TEST(iseq.into(), VALUE(ISEQ_TRANSLATED)) } != VALUE(0));
+ assert!(u32::from(insn_idx) < unsafe { get_iseq_encoded_size(iseq) });
- // Ensure that the instruction the get_insn_idx is pointing to is in
- // fact a opt_getinlinecache instruction.
+ // Ensure that the instruction the insn_idx is pointing to is in
+ // fact a opt_getconstant_path instruction.
assert_eq!(
unsafe {
- let opcode_pc = code.add(get_insn_idx.as_usize());
+ let opcode_pc = code.add(insn_idx.as_usize());
let translated_opcode: VALUE = opcode_pc.read();
rb_vm_insn_decode(translated_opcode)
},
- YARVINSN_opt_getinlinecache.try_into().unwrap()
+ YARVINSN_opt_getconstant_path.try_into().unwrap()
);
// Find the matching opt_getinlinecache and invalidate all the blocks there
// RUBY_ASSERT(insn_op_type(BIN(opt_getinlinecache), 1) == TS_IC);
- let ic_pc = unsafe { code.add(get_insn_idx.as_usize() + 2) };
+ let ic_pc = unsafe { code.add(insn_idx.as_usize() + 1) };
let ic_operand: IC = unsafe { ic_pc.read() }.as_mut_ptr();
if ic == ic_operand {
for block in take_version_list(BlockId {
iseq,
- idx: get_insn_idx,
+ idx: insn_idx,
}) {
invalidate_block_version(&block);
incr_counter!(invalidate_constant_ic_fill);
@@ -502,6 +538,63 @@ pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC) {
});
}
+/// Invalidate blocks that assume objects of a given class will have no singleton class.
+#[no_mangle]
+pub extern "C" fn rb_yjit_invalidate_no_singleton_class(klass: VALUE) {
+ // Skip tracking singleton classes during boot. Such objects already have a singleton class
+ // before entering JIT code, so they get rejected when they're checked for the first time.
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+
+ // We apply this optimization only to Array, Hash, and String for now.
+ if unsafe { [rb_cArray, rb_cHash, rb_cString].contains(&klass) } {
+ let no_singleton_classes = &mut Invariants::get_instance().no_singleton_classes;
+ match no_singleton_classes.get_mut(&klass) {
+ Some(blocks) => {
+ // Invalidate existing blocks and let has_singleton_class_of()
+ // return true when they are compiled again
+ for block in mem::take(blocks) {
+ invalidate_block_version(&block);
+ incr_counter!(invalidate_no_singleton_class);
+ }
+ }
+ None => {
+ // Let has_singleton_class_of() return true for this class
+ no_singleton_classes.insert(klass, HashSet::new());
+ }
+ }
+ }
+}
+
+/// Invalidate blocks for a given ISEQ that assumes environment pointer is
+/// equal to base pointer.
+#[no_mangle]
+pub extern "C" fn rb_yjit_invalidate_ep_is_bp(iseq: IseqPtr) {
+ // Skip tracking EP escapes on boot. We don't need to invalidate anything during boot.
+ if unsafe { INVARIANTS.is_none() } {
+ return;
+ }
+
+ // If an EP escape for this ISEQ is detected for the first time, invalidate all blocks
+ // associated to the ISEQ.
+ let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs;
+ match no_ep_escape_iseqs.get_mut(&iseq) {
+ Some(blocks) => {
+ // Invalidate existing blocks and let jit.ep_is_bp()
+ // return true when they are compiled again
+ for block in mem::take(blocks) {
+ invalidate_block_version(&block);
+ incr_counter!(invalidate_no_singleton_class);
+ }
+ }
+ None => {
+ // Let jit.ep_is_bp() return false for this ISEQ
+ no_ep_escape_iseqs.insert(iseq, HashSet::new());
+ }
+ }
+}
+
// Invalidate all generated code and patch C method return code to contain
// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
@@ -528,59 +621,78 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() {
return;
}
- use crate::asm::x86_64::jmp_ptr;
-
// Stop other ractors since we are going to patch machine code.
with_vm_lock(src_loc!(), || {
// Make it so all live block versions are no longer valid branch targets
- unsafe { rb_yjit_for_each_iseq(Some(invalidate_all_blocks_for_tracing)) };
-
- extern "C" fn invalidate_all_blocks_for_tracing(iseq: IseqPtr) {
- if let Some(payload) = unsafe { load_iseq_payload(iseq) } {
- // C comment:
- // Leaking the blocks for now since we might have situations where
- // a different ractor is waiting for the VM lock in branch_stub_hit().
- // If we free the block that ractor can wake up with a dangling block.
- //
- // Deviation: since we ref count the the blocks now, we might be deallocating and
- // not leak the block.
- //
- // Empty all blocks on the iseq so we don't compile new blocks that jump to the
- // invalidated region.
+ let mut on_stack_iseqs = HashSet::new();
+ for_each_on_stack_iseq(|iseq| {
+ on_stack_iseqs.insert(iseq);
+ });
+ for_each_iseq(|iseq| {
+ if let Some(payload) = get_iseq_payload(iseq) {
let blocks = payload.take_all_blocks();
- for blockref in blocks {
- block_assumptions_free(&blockref);
+
+ if on_stack_iseqs.contains(&iseq) {
+ // This ISEQ is running, so we can't free blocks immediately
+ for block in blocks {
+ delayed_deallocation(block);
+ }
+ payload.dead_blocks.shrink_to_fit();
+ } else {
+ // Safe to free dead blocks since the ISEQ isn't running
+ // Since we're freeing _all_ blocks, we don't need to keep the graph well formed
+ for block in blocks {
+ unsafe { free_block(block, false) };
+ }
+ mem::take(&mut payload.dead_blocks)
+ .into_iter()
+ .for_each(|block| unsafe { free_block(block, false) });
}
}
// Reset output code entry point
unsafe { rb_iseq_reset_jit_func(iseq) };
- }
+ });
let cb = CodegenGlobals::get_inline_cb();
+ // Prevent on-stack frames from jumping to the caller on jit_exec_exception
+ extern "C" {
+ fn rb_yjit_cancel_jit_return(leave_exit: *mut c_void, leave_exception: *mut c_void) -> VALUE;
+ }
+ unsafe {
+ rb_yjit_cancel_jit_return(
+ CodegenGlobals::get_leave_exit_code().raw_ptr(cb) as _,
+ CodegenGlobals::get_leave_exception_code().raw_ptr(cb) as _,
+ );
+ }
+
// Apply patches
let old_pos = cb.get_write_pos();
- let patches = CodegenGlobals::take_global_inval_patches();
+ let old_dropped_bytes = cb.has_dropped_bytes();
+ let mut patches = CodegenGlobals::take_global_inval_patches();
+ patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr(cb));
+ let mut last_patch_end = std::ptr::null();
for patch in &patches {
- cb.set_write_ptr(patch.inline_patch_pos);
- jmp_ptr(cb, patch.outlined_target_pos);
+ let patch_pos = patch.inline_patch_pos.raw_ptr(cb);
+ assert!(
+ last_patch_end <= patch_pos,
+ "patches should not overlap (last_patch_end: {last_patch_end:?}, patch_pos: {patch_pos:?})",
+ );
- // FIXME: Can't easily check we actually wrote out the JMP at the moment.
- // assert!(!cb.has_dropped_bytes(), "patches should have space and jump offsets should fit in JMP rel32");
+ cb.set_write_ptr(patch.inline_patch_pos);
+ cb.set_dropped_bytes(false);
+ cb.without_page_end_reserve(|cb| {
+ let mut asm = crate::backend::ir::Assembler::new();
+ asm.jmp(patch.outlined_target_pos.as_side_exit());
+ if asm.compile(cb, None).is_none() {
+ panic!("Failed to apply patch at {:?}", patch.inline_patch_pos);
+ }
+ });
+ last_patch_end = cb.get_write_ptr().raw_ptr(cb);
}
cb.set_pos(old_pos);
-
- // Freeze invalidated part of the codepage. We only want to wait for
- // running instances of the code to exit from now on, so we shouldn't
- // change the code. There could be other ractors sleeping in
- // branch_stub_hit(), for example. We could harden this by changing memory
- // protection on the frozen range.
- assert!(
- CodegenGlobals::get_inline_frozen_bytes() <= old_pos,
- "frozen bytes should increase monotonically"
- );
- CodegenGlobals::set_inline_frozen_bytes(old_pos);
+ cb.set_dropped_bytes(old_dropped_bytes);
CodegenGlobals::get_outlined_cb()
.unwrap()
diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs
index 6772f551a8..3f3d24be4b 100644
--- a/yjit/src/lib.rs
+++ b/yjit/src/lib.rs
@@ -3,7 +3,8 @@
#![allow(clippy::too_many_arguments)] // :shrug:
#![allow(clippy::identity_op)] // Sometimes we do it for style
-mod asm;
+pub mod asm;
+mod backend;
mod codegen;
mod core;
mod cruby;
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index 704c709bae..59ec864bf5 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -1,19 +1,36 @@
-use std::ffi::CStr;
+use std::{ffi::{CStr, CString}, ptr::null, fs::File};
+use crate::{backend::current::TEMP_REGS, stats::Counter};
+use std::os::raw::{c_char, c_int, c_uint};
+
+// Call threshold for small deployments and command-line apps
+pub static SMALL_CALL_THRESHOLD: u64 = 30;
+
+// Call threshold for larger deployments and production-sized applications
+pub static LARGE_CALL_THRESHOLD: u64 = 120;
+
+// Number of live ISEQs after which we consider an app to be large
+pub static LARGE_ISEQ_COUNT: u64 = 40_000;
+
+// This option is exposed to the C side in a global variable for performance, see vm.c
+// Number of method calls after which to start generating code
+// Threshold==1 means compile on first execution
+#[no_mangle]
+pub static mut rb_yjit_call_threshold: u64 = SMALL_CALL_THRESHOLD;
+
+// This option is exposed to the C side in a global variable for performance, see vm.c
+// Number of execution requests after which a method is no longer
+// considered hot. Raising this results in more generated code.
+#[no_mangle]
+pub static mut rb_yjit_cold_threshold: u64 = 200_000;
// Command-line options
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
pub struct Options {
- // Size of the executable memory block to allocate in MiB
+ // Size of the executable memory block to allocate in bytes
+ // Note that the command line argument is expressed in MiB and not bytes
pub exec_mem_size: usize,
- // Number of method calls after which to start generating code
- // Threshold==1 means compile on first execution
- pub call_threshold: usize,
-
- // Generate versions greedily until the limit is hit
- pub greedy_versioning: bool,
-
// Disable the propagation of type information
pub no_type_prop: bool,
@@ -21,49 +38,130 @@ pub struct Options {
// 1 means always create generic versions
pub max_versions: usize,
- // Capture and print out stats
+ // The number of registers allocated for stack temps
+ pub num_temp_regs: usize,
+
+ // Capture stats
pub gen_stats: bool,
+ // Print stats on exit (when gen_stats is also true)
+ pub print_stats: bool,
+
// Trace locations of exits
- pub gen_trace_exits: bool,
+ pub trace_exits: Option<TraceExits>,
+
+ // how often to sample exit trace data
+ pub trace_exits_sample_rate: usize,
+
+ // Whether to enable YJIT at boot. This option prevents other
+ // YJIT tuning options from enabling YJIT at boot.
+ pub disable: bool,
/// Dump compiled and executed instructions for debugging
pub dump_insns: bool,
+ /// Dump all compiled instructions of target cbs.
+ pub dump_disasm: Option<DumpDisasm>,
+
+ /// Print when specific ISEQ items are compiled or invalidated
+ pub dump_iseq_disasm: Option<String>,
+
/// Verify context objects (debug mode only)
pub verify_ctx: bool,
- /// Whether or not to assume a global constant state (and therefore
- /// invalidating code whenever any constant changes) versus assuming
- /// constant name components (and therefore invalidating code whenever a
- /// matching name component changes)
- pub global_constant_state: bool,
+ /// Enable generating frame pointers (for x86. arm64 always does this)
+ pub frame_pointer: bool,
+
+ /// Run code GC when exec_mem_size is reached.
+ pub code_gc: bool,
+
+ /// Enable writing /tmp/perf-{pid}.map for Linux perf
+ pub perf_map: Option<PerfMap>,
}
// Initialize the options to default values
pub static mut OPTIONS: Options = Options {
- exec_mem_size: 256,
- call_threshold: 10,
- greedy_versioning: false,
+ exec_mem_size: 48 * 1024 * 1024,
no_type_prop: false,
max_versions: 4,
+ num_temp_regs: 5,
gen_stats: false,
- gen_trace_exits: false,
+ trace_exits: None,
+ print_stats: true,
+ trace_exits_sample_rate: 0,
+ disable: false,
dump_insns: false,
+ dump_disasm: None,
verify_ctx: false,
- global_constant_state: false,
+ dump_iseq_disasm: None,
+ frame_pointer: false,
+ code_gc: false,
+ perf_map: None,
};
+/// YJIT option descriptions for `ruby --help`.
+static YJIT_OPTIONS: [(&str, &str); 9] = [
+ ("--yjit-exec-mem-size=num", "Size of executable memory block in MiB (default: 48)."),
+ ("--yjit-call-threshold=num", "Number of calls to trigger JIT."),
+ ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."),
+ ("--yjit-stats", "Enable collecting YJIT statistics."),
+ ("--yjit-disable", "Disable YJIT for lazily enabling it with RubyVM::YJIT.enable."),
+ ("--yjit-code-gc", "Run code GC when the code size reaches the limit."),
+ ("--yjit-perf", "Enable frame pointers and perf profiling."),
+ ("--yjit-trace-exits", "Record Ruby source location when exiting from generated code."),
+ ("--yjit-trace-exits-sample-rate=num", "Trace exit locations only every Nth occurrence."),
+];
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum TraceExits {
+ // Trace all exits
+ All,
+ // Trace a specific counted exit
+ CountedExit(Counter),
+}
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum DumpDisasm {
+ // Dump to stdout
+ Stdout,
+ // Dump to "yjit_{pid}.log" file under the specified directory
+ File(String),
+}
+
+/// Type of symbols to dump into /tmp/perf-{pid}.map
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum PerfMap {
+ // Dump ISEQ symbols
+ ISEQ,
+ // Dump YJIT codegen symbols
+ Codegen,
+}
+
/// Macro to get an option value by name
macro_rules! get_option {
// Unsafe is ok here because options are initialized
// once before any Ruby code executes
($option_name:ident) => {
- unsafe { OPTIONS.$option_name }
+ {
+ // Make this a statement since attributes on expressions are experimental
+ #[allow(unused_unsafe)]
+ let ret = unsafe { OPTIONS.$option_name };
+ ret
+ }
};
}
pub(crate) use get_option;
+/// Macro to reference an option value by name; we assume it's a cloneable type like String or an Option of same.
+macro_rules! get_option_ref {
+ // Unsafe is ok here because options are initialized
+ // once before any Ruby code executes
+ ($option_name:ident) => {
+ unsafe { &($crate::options::OPTIONS.$option_name) }
+ };
+}
+pub(crate) use get_option_ref;
+
/// Expected to receive what comes after the third dash in "--yjit-*".
/// Empty string means user passed only "--yjit". C code rejects when
/// they pass exact "--yjit-".
@@ -84,15 +182,29 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
match (opt_name, opt_val) {
("", "") => (), // Simply --yjit
- ("exec-mem-size", _) => match opt_val.parse() {
- Ok(n) => unsafe { OPTIONS.exec_mem_size = n },
+ ("exec-mem-size", _) => match opt_val.parse::<usize>() {
+ Ok(n) => {
+ if n == 0 || n > 2 * 1024 * 1024 {
+ return None
+ }
+
+ // Convert from MiB to bytes internally for convenience
+ unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 }
+ }
Err(_) => {
return None;
}
},
("call-threshold", _) => match opt_val.parse() {
- Ok(n) => unsafe { OPTIONS.call_threshold = n },
+ Ok(n) => unsafe { rb_yjit_call_threshold = n },
+ Err(_) => {
+ return None;
+ }
+ },
+
+ ("cold-threshold", _) => match opt_val.parse() {
+ Ok(n) => unsafe { rb_yjit_cold_threshold = n },
Err(_) => {
return None;
}
@@ -105,13 +217,94 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},
- ("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true },
+ ("disable", "") => unsafe {
+ OPTIONS.disable = true;
+ },
+
+ ("temp-regs", _) => match opt_val.parse() {
+ Ok(n) => {
+ assert!(n <= TEMP_REGS.len(), "--yjit-temp-regs must be <= {}", TEMP_REGS.len());
+ unsafe { OPTIONS.num_temp_regs = n }
+ }
+ Err(_) => {
+ return None;
+ }
+ },
+
+ ("code-gc", _) => unsafe {
+ OPTIONS.code_gc = true;
+ },
+
+ ("perf", _) => match opt_val {
+ "" => unsafe {
+ OPTIONS.frame_pointer = true;
+ OPTIONS.perf_map = Some(PerfMap::ISEQ);
+ },
+ "fp" => unsafe { OPTIONS.frame_pointer = true },
+ "iseq" => unsafe { OPTIONS.perf_map = Some(PerfMap::ISEQ) },
+ // Accept --yjit-perf=map for backward compatibility
+ "codegen" | "map" => unsafe { OPTIONS.perf_map = Some(PerfMap::Codegen) },
+ _ => return None,
+ },
+
+ ("dump-disasm", _) => {
+ if !cfg!(feature = "disasm") {
+ eprintln!("WARNING: the {} option is only available when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name);
+ }
+
+ match opt_val {
+ "" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) },
+ directory => {
+ let path = format!("{directory}/yjit_{}.log", std::process::id());
+ match File::options().create(true).append(true).open(&path) {
+ Ok(_) => {
+ eprintln!("YJIT disasm dump: {path}");
+ unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::File(path)) }
+ }
+ Err(err) => eprintln!("Failed to create {path}: {err}"),
+ }
+ }
+ }
+ },
+
+ ("dump-iseq-disasm", _) => unsafe {
+ if !cfg!(feature = "disasm") {
+ eprintln!("WARNING: the {} option is only available when YJIT is built in dev mode, i.e. ./configure --enable-yjit=dev", opt_name);
+ }
+
+ OPTIONS.dump_iseq_disasm = Some(opt_val.to_string());
+ },
+
("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true },
- ("stats", "") => unsafe { OPTIONS.gen_stats = true },
- ("trace-exits", "") => unsafe { OPTIONS.gen_trace_exits = true; OPTIONS.gen_stats = true },
+ ("stats", _) => match opt_val {
+ "" => unsafe { OPTIONS.gen_stats = true },
+ "quiet" => unsafe {
+ OPTIONS.gen_stats = true;
+ OPTIONS.print_stats = false;
+ },
+ _ => {
+ return None;
+ }
+ },
+ ("trace-exits", _) => unsafe {
+ OPTIONS.gen_stats = true;
+ OPTIONS.trace_exits = match opt_val {
+ "" => Some(TraceExits::All),
+ name => match Counter::get(name) {
+ Some(counter) => Some(TraceExits::CountedExit(counter)),
+ None => return None,
+ },
+ };
+ },
+ ("trace-exits-sample-rate", sample_rate) => unsafe {
+ OPTIONS.gen_stats = true;
+ if OPTIONS.trace_exits.is_none() {
+ OPTIONS.trace_exits = Some(TraceExits::All);
+ }
+ OPTIONS.trace_exits_sample_rate = sample_rate.parse().unwrap();
+ },
("dump-insns", "") => unsafe { OPTIONS.dump_insns = true },
("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true },
- ("global-constant-state", "") => unsafe { OPTIONS.global_constant_state = true },
// Option name not recognized
_ => {
@@ -119,8 +312,36 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
}
+ // before we continue, check that sample_rate is either 0 or a prime number
+ let trace_sample_rate = unsafe { OPTIONS.trace_exits_sample_rate };
+ if trace_sample_rate > 1 {
+ let mut i = 2;
+ while i*i <= trace_sample_rate {
+ if trace_sample_rate % i == 0 {
+ println!("Warning: using a non-prime number as your sampling rate can result in less accurate sampling data");
+ return Some(());
+ }
+ i += 1;
+ }
+ }
+
// dbg!(unsafe {OPTIONS});
// Option successfully parsed
return Some(());
}
+
+/// Print YJIT options for `ruby --help`. `width` is width of option parts, and
+/// `columns` is indent width of descriptions.
+#[no_mangle]
+pub extern "C" fn rb_yjit_show_usage(help: c_int, highlight: c_int, width: c_uint, columns: c_int) {
+ for &(name, description) in YJIT_OPTIONS.iter() {
+ extern "C" {
+ fn ruby_show_usage_line(name: *const c_char, secondary: *const c_char, description: *const c_char,
+ help: c_int, highlight: c_int, width: c_uint, columns: c_int);
+ }
+ let name = CString::new(name).unwrap();
+ let description = CString::new(description).unwrap();
+ unsafe { ruby_show_usage_line(name.as_ptr(), null(), description.as_ptr(), help, highlight, width, columns) }
+ }
+}
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 6bad8db7e7..0a63fab8b0 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -3,13 +3,125 @@
#![allow(dead_code)] // Counters are only used with the stats features
+use std::alloc::{GlobalAlloc, Layout, System};
+use std::ptr::addr_of_mut;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::time::Instant;
+use std::collections::HashMap;
+
use crate::codegen::CodegenGlobals;
+use crate::core::Context;
+use crate::core::for_each_iseq_payload;
use crate::cruby::*;
use crate::options::*;
use crate::yjit::yjit_enabled_p;
+/// Running total of how many ISeqs are in the system.
+#[no_mangle]
+pub static mut rb_yjit_live_iseq_count: u64 = 0;
+
+/// Monotonically increasing total of how many ISEQs were allocated
+#[no_mangle]
+pub static mut rb_yjit_iseq_alloc_count: u64 = 0;
+
+/// A middleware to count Rust-allocated bytes as yjit_alloc_size.
+#[global_allocator]
+static GLOBAL_ALLOCATOR: StatsAlloc = StatsAlloc { alloc_size: AtomicUsize::new(0) };
+
+pub struct StatsAlloc {
+ alloc_size: AtomicUsize,
+}
+
+unsafe impl GlobalAlloc for StatsAlloc {
+ unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+ self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst);
+ System.alloc(layout)
+ }
+
+ unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+ self.alloc_size.fetch_sub(layout.size(), Ordering::SeqCst);
+ System.dealloc(ptr, layout)
+ }
+
+ unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
+ self.alloc_size.fetch_add(layout.size(), Ordering::SeqCst);
+ System.alloc_zeroed(layout)
+ }
+
+ unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
+ if new_size > layout.size() {
+ self.alloc_size.fetch_add(new_size - layout.size(), Ordering::SeqCst);
+ } else if new_size < layout.size() {
+ self.alloc_size.fetch_sub(layout.size() - new_size, Ordering::SeqCst);
+ }
+ System.realloc(ptr, layout, new_size)
+ }
+}
+
+/// Mapping of C function / ISEQ name to integer indices
+/// This is accessed at compilation time only (protected by a lock)
+static mut CFUNC_NAME_TO_IDX: Option<HashMap<String, usize>> = None;
+static mut ISEQ_NAME_TO_IDX: Option<HashMap<String, usize>> = None;
+
+/// Vector of call counts for each C function / ISEQ index
+/// This is modified (but not resized) by JITted code
+static mut CFUNC_CALL_COUNT: Option<Vec<u64>> = None;
+static mut ISEQ_CALL_COUNT: Option<Vec<u64>> = None;
+
+/// Assign an index to a given cfunc name string
+pub fn get_cfunc_idx(name: &str) -> usize {
+ // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables.
+ unsafe { get_method_idx(name, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT)) }
+}
+
+/// Assign an index to a given ISEQ name string
+pub fn get_iseq_idx(name: &str) -> usize {
+ // SAFETY: We acquire a VM lock and don't create multiple &mut references to these static mut variables.
+ unsafe { get_method_idx(name, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT)) }
+}
+
+fn get_method_idx(
+ name: &str,
+ method_name_to_idx: &mut Option<HashMap<String, usize>>,
+ method_call_count: &mut Option<Vec<u64>>,
+) -> usize {
+ //println!("{}", name);
+
+ let name_to_idx = method_name_to_idx.get_or_insert_with(HashMap::default);
+ let call_count = method_call_count.get_or_insert_with(Vec::default);
+
+ match name_to_idx.get(name) {
+ Some(idx) => *idx,
+ None => {
+ let idx = name_to_idx.len();
+ name_to_idx.insert(name.to_string(), idx);
+
+ // Resize the call count vector
+ if idx >= call_count.len() {
+ call_count.resize(idx + 1, 0);
+ }
+
+ idx
+ }
+ }
+}
+
+// Increment the counter for a C function
+pub extern "C" fn incr_cfunc_counter(idx: usize) {
+ let cfunc_call_count = unsafe { CFUNC_CALL_COUNT.as_mut().unwrap() };
+ assert!(idx < cfunc_call_count.len());
+ cfunc_call_count[idx] += 1;
+}
+
+// Increment the counter for an ISEQ
+pub extern "C" fn incr_iseq_counter(idx: usize) {
+ let iseq_call_count = unsafe { ISEQ_CALL_COUNT.as_mut().unwrap() };
+ assert!(idx < iseq_call_count.len());
+ iseq_call_count[idx] += 1;
+}
+
// YJIT exit counts for each instruction type
-const VM_INSTRUCTION_SIZE_USIZE:usize = VM_INSTRUCTION_SIZE as usize;
+const VM_INSTRUCTION_SIZE_USIZE: usize = VM_INSTRUCTION_SIZE as usize;
static mut EXIT_OP_COUNT: [u64; VM_INSTRUCTION_SIZE_USIZE] = [0; VM_INSTRUCTION_SIZE_USIZE];
/// Global state needed for collecting backtraces of exits
@@ -19,7 +131,9 @@ pub struct YjitExitLocations {
raw_samples: Vec<VALUE>,
/// Vec to hold line_samples which represent line numbers of
/// the iseq caller.
- line_samples: Vec<i32>
+ line_samples: Vec<i32>,
+ /// Number of samples skipped when sampling
+ skipped_samples: usize
}
/// Private singleton instance of yjit exit locations
@@ -28,19 +142,15 @@ static mut YJIT_EXIT_LOCATIONS: Option<YjitExitLocations> = None;
impl YjitExitLocations {
/// Initialize the yjit exit locations
pub fn init() {
- // Return if the stats feature is disabled
- if !cfg!(feature = "stats") {
- return;
- }
-
// Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_none() {
return;
}
let yjit_exit_locations = YjitExitLocations {
raw_samples: Vec::new(),
- line_samples: Vec::new()
+ line_samples: Vec::new(),
+ skipped_samples: 0
};
// Initialize the yjit exit locations instance
@@ -64,6 +174,11 @@ impl YjitExitLocations {
&mut YjitExitLocations::get_instance().line_samples
}
+ /// Get the number of samples skipped
+ pub fn get_skipped_samples() -> &'static mut usize {
+ &mut YjitExitLocations::get_instance().skipped_samples
+ }
+
/// Mark the data stored in YjitExitLocations::get_raw_samples that needs to be used by
/// rb_yjit_add_frame. YjitExitLocations::get_raw_samples are an array of
/// VALUE pointers, exit instruction, and number of times we've seen this stack row
@@ -76,13 +191,8 @@ impl YjitExitLocations {
return;
}
- // Return if the stats feature is disabled
- if !cfg!(feature = "stats") {
- return;
- }
-
// Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_none() {
return;
}
@@ -104,7 +214,7 @@ impl YjitExitLocations {
// Increase index for exit instruction.
idx += 1;
- // Increase index for bookeeping value (number of times we've seen this
+ // Increase index for bookkeeping value (number of times we've seen this
// row in a stack).
idx += 1;
}
@@ -118,6 +228,28 @@ macro_rules! make_counters {
#[derive(Default, Debug)]
pub struct Counters { $(pub $counter_name: u64),+ }
+ /// Enum to represent a counter
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, PartialEq, Eq, Debug)]
+ pub enum Counter { $($counter_name),+ }
+
+ impl Counter {
+ /// Map a counter name string to a counter enum
+ pub fn get(name: &str) -> Option<Counter> {
+ match name {
+ $( stringify!($counter_name) => { Some(Counter::$counter_name) } ),+
+ _ => None,
+ }
+ }
+
+ /// Get a counter name string
+ pub fn get_name(&self) -> String {
+ match self {
+ $( Counter::$counter_name => stringify!($counter_name).to_string() ),+
+ }
+ }
+ }
+
/// Global counters instance, initialized to zero
pub static mut COUNTERS: Counters = Counters { $($counter_name: 0),+ };
@@ -125,7 +257,7 @@ macro_rules! make_counters {
const COUNTER_NAMES: &'static [&'static str] = &[ $(stringify!($counter_name)),+ ];
/// Map a counter name string to a counter pointer
- fn get_counter_ptr(name: &str) -> *mut u64 {
+ pub fn get_counter_ptr(name: &str) -> *mut u64 {
match name {
$( stringify!($counter_name) => { ptr_to_counter!($counter_name) } ),+
_ => panic!()
@@ -134,6 +266,59 @@ macro_rules! make_counters {
}
}
+/// The list of counters that are available without --yjit-stats.
+/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
+pub const DEFAULT_COUNTERS: [Counter; 16] = [
+ Counter::code_gc_count,
+ Counter::compiled_iseq_entry,
+ Counter::cold_iseq_entry,
+ Counter::compiled_iseq_count,
+ Counter::compiled_blockid_count,
+ Counter::compiled_block_count,
+ Counter::compiled_branch_count,
+ Counter::compile_time_ns,
+ Counter::max_inline_versions,
+
+ Counter::invalidation_count,
+ Counter::invalidate_method_lookup,
+ Counter::invalidate_bop_redefined,
+ Counter::invalidate_ractor_spawn,
+ Counter::invalidate_constant_state_bump,
+ Counter::invalidate_constant_ic_fill,
+ Counter::invalidate_no_singleton_class,
+];
+
+/// Macro to increase a counter by name and count
+macro_rules! incr_counter_by {
+ // Unsafe is ok here because options are initialized
+ // once before any Ruby code executes
+ ($counter_name:ident, $count:expr) => {
+ #[allow(unused_unsafe)]
+ {
+ unsafe { $crate::stats::COUNTERS.$counter_name += $count as u64 }
+ }
+ };
+}
+pub(crate) use incr_counter_by;
+
+/// Macro to increase a counter if the given value is larger
+macro_rules! incr_counter_to {
+ // Unsafe is ok here because options are initialized
+ // once before any Ruby code executes
+ ($counter_name:ident, $count:expr) => {
+ #[allow(unused_unsafe)]
+ {
+ unsafe {
+ $crate::stats::COUNTERS.$counter_name = u64::max(
+ $crate::stats::COUNTERS.$counter_name,
+ $count as u64,
+ )
+ }
+ }
+ };
+}
+pub(crate) use incr_counter_to;
+
/// Macro to increment a counter by name
macro_rules! incr_counter {
// Unsafe is ok here because options are initialized
@@ -160,71 +345,219 @@ pub(crate) use ptr_to_counter;
// Declare all the counters we track
make_counters! {
- exec_instruction,
+ yjit_insns_count,
- send_keywords,
- send_kw_splat,
- send_args_splat,
- send_block_arg,
+ // Method calls that fallback to dynamic dispatch
+ send_singleton_class,
send_ivar_set_method,
send_zsuper_method,
send_undef_method,
- send_optimized_method,
- send_optimized_method_send,
- send_optimized_method_call,
send_optimized_method_block_call,
+ send_call_block,
+ send_call_kwarg,
+ send_call_multi_ractor,
+ send_cme_not_found,
+ send_megamorphic,
send_missing_method,
- send_bmethod,
send_refined_method,
- send_cfunc_ruby_array_varg,
+ send_private_not_fcall,
+ send_cfunc_kw_splat_non_nil,
+ send_cfunc_splat_neg2,
send_cfunc_argc_mismatch,
+ send_cfunc_block_arg,
send_cfunc_toomany_args,
send_cfunc_tracing,
- send_cfunc_kwargs,
+ send_cfunc_splat_with_kw,
+ send_cfunc_splat_varg_ruby2_keywords,
send_attrset_kwargs,
+ send_attrset_block_arg,
send_iseq_tailcall,
send_iseq_arity_error,
- send_iseq_only_keywords,
- send_iseq_kwargs_req_and_opt_missing,
+ send_iseq_block_arg_type,
+ send_iseq_clobbering_block_arg,
+ send_iseq_complex_discard_extras,
+ send_iseq_leaf_builtin_block_arg_block_param,
+ send_iseq_kw_splat_non_nil,
send_iseq_kwargs_mismatch,
- send_iseq_complex_callee,
+ send_iseq_has_post,
+ send_iseq_has_no_kw,
+ send_iseq_accepts_no_kwarg,
+ send_iseq_materialized_block,
+ send_iseq_splat_not_array,
+ send_iseq_splat_with_kw,
+ send_iseq_missing_optional_kw,
+ send_iseq_too_many_kwargs,
send_not_implemented_method,
send_getter_arity,
- send_se_cf_overflow,
- send_se_protected_check_failed,
+ send_getter_block_arg,
+ send_args_splat_attrset,
+ send_args_splat_bmethod,
+ send_args_splat_aref,
+ send_args_splat_aset,
+ send_args_splat_opt_call,
+ send_iseq_splat_arity_error,
+ send_splat_too_long,
+ send_send_wrong_args,
+ send_send_null_mid,
+ send_send_null_cme,
+ send_send_nested,
+ send_send_attr_reader,
+ send_send_attr_writer,
+ send_iseq_has_rest_and_captured,
+ send_iseq_has_kwrest_and_captured,
+ send_iseq_has_rest_and_kw_supplied,
+ send_iseq_has_rest_opt_and_block,
+ send_bmethod_ractor,
+ send_bmethod_block_arg,
+ send_optimized_block_arg,
+
+ invokesuper_defined_class_mismatch,
+ invokesuper_kw_splat,
+ invokesuper_kwarg,
+ invokesuper_megamorphic,
+ invokesuper_no_cme,
+ invokesuper_no_me,
+ invokesuper_not_iseq_or_cfunc,
+ invokesuper_refinement,
+ invokesuper_singleton_class,
+
+ invokeblock_megamorphic,
+ invokeblock_none,
+ invokeblock_iseq_arg0_optional,
+ invokeblock_iseq_arg0_args_splat,
+ invokeblock_iseq_arg0_not_array,
+ invokeblock_iseq_arg0_wrong_len,
+ invokeblock_iseq_not_inlined,
+ invokeblock_ifunc_args_splat,
+ invokeblock_ifunc_kw_splat,
+ invokeblock_proc,
+ invokeblock_symbol,
+
+ // Method calls that exit to the interpreter
+ guard_send_block_arg_type,
+ guard_send_getter_splat_non_empty,
+ guard_send_klass_megamorphic,
+ guard_send_se_cf_overflow,
+ guard_send_se_protected_check_failed,
+ guard_send_splatarray_length_not_equal,
+ guard_send_splatarray_last_ruby2_keywords,
+ guard_send_splat_not_array,
+ guard_send_send_name_chain,
+ guard_send_iseq_has_rest_and_splat_too_few,
+ guard_send_is_a_class_mismatch,
+ guard_send_instance_of_class_mismatch,
+ guard_send_interrupted,
+ guard_send_not_fixnums,
+ guard_send_not_fixnum_or_flonum,
+ guard_send_not_string,
+ guard_send_respond_to_mid_mismatch,
+
+ guard_send_cfunc_bad_splat_vargs,
+
+ guard_invokesuper_me_changed,
+
+ guard_invokeblock_tag_changed,
+ guard_invokeblock_iseq_block_changed,
traced_cfunc_return,
- invokesuper_me_changed,
- invokesuper_block,
-
leave_se_interrupt,
leave_interp_return,
- leave_start_pc_non_zero,
- getivar_se_self_not_heap,
- getivar_idx_out_of_range,
getivar_megamorphic,
+ getivar_not_heap,
- setivar_se_self_not_heap,
- setivar_idx_out_of_range,
- setivar_val_heapobject,
- setivar_name_not_mapped,
- setivar_not_object,
+ setivar_not_heap,
setivar_frozen,
+ setivar_megamorphic,
+
+ definedivar_not_heap,
+ definedivar_megamorphic,
+
+ setlocal_wb_required,
+
+ invokebuiltin_too_many_args,
+
+ opt_plus_overflow,
+ opt_minus_overflow,
+ opt_mult_overflow,
+
+ opt_succ_not_fixnum,
+ opt_succ_overflow,
+
+ opt_mod_zero,
+ opt_div_zero,
+
+ lshift_amount_changed,
+ lshift_overflow,
+
+ rshift_amount_changed,
+
+ opt_aref_argc_not_one,
+ opt_aref_arg_not_fixnum,
+ opt_aref_not_array,
+ opt_aref_not_hash,
+
+ opt_aset_not_array,
+ opt_aset_not_fixnum,
+ opt_aset_not_hash,
+
+ opt_aref_with_qundef,
+
+ opt_case_dispatch_megamorphic,
- oaref_argc_not_one,
- oaref_arg_not_fixnum,
+ opt_getconstant_path_ic_miss,
+ opt_getconstant_path_multi_ractor,
- opt_getinlinecache_miss,
+ expandarray_splat,
+ expandarray_postarg,
+ expandarray_not_array,
+ expandarray_to_ary,
+ expandarray_chain_max_depth,
+
+ // getblockparam
+ gbp_wb_required,
+
+ // getblockparamproxy
+ gbpp_unsupported_type,
+ gbpp_block_param_modified,
+ gbpp_block_handler_not_none,
+ gbpp_block_handler_not_iseq,
+ gbpp_block_handler_not_proc,
+
+ branchif_interrupted,
+ branchunless_interrupted,
+ branchnil_interrupted,
+ jump_interrupted,
+
+ objtostring_not_string,
+
+ getbyte_idx_not_fixnum,
+ getbyte_idx_negative,
+ getbyte_idx_out_of_bounds,
+
+ splatkw_not_hash,
+ splatkw_not_nil,
binding_allocations,
binding_set,
- vm_insns_count,
+ compiled_iseq_entry,
+ cold_iseq_entry,
compiled_iseq_count,
+ compiled_blockid_count,
compiled_block_count,
+ compiled_branch_count,
+ compile_time_ns,
compilation_failure,
+ block_next_count,
+ defer_count,
+ defer_empty_count,
+ branch_insn_count,
+ branch_known_count,
+ max_inline_versions,
+
+ freed_iseq_count,
exit_from_branch_stub,
@@ -234,20 +567,48 @@ make_counters! {
invalidate_ractor_spawn,
invalidate_constant_state_bump,
invalidate_constant_ic_fill,
-
- constant_state_bumps,
-
- expandarray_splat,
- expandarray_postarg,
- expandarray_not_array,
- expandarray_rhs_too_small,
-
- gbpp_block_param_modified,
- gbpp_block_handler_not_iseq,
+ invalidate_no_singleton_class,
// Currently, it's out of the ordinary (might be impossible) for YJIT to leave gaps in
// executable memory, so this should be 0.
exec_mem_non_bump_alloc,
+
+ code_gc_count,
+
+ num_gc_obj_refs,
+
+ num_send,
+ num_send_known_class,
+ num_send_polymorphic,
+ num_send_x86_rel32,
+ num_send_x86_reg,
+ num_send_dynamic,
+ num_send_cfunc,
+ num_send_cfunc_inline,
+ num_send_iseq,
+ num_send_iseq_leaf,
+ num_send_iseq_inline,
+
+ num_getivar_megamorphic,
+ num_setivar_megamorphic,
+ num_opt_case_dispatch_megamorphic,
+
+ num_throw,
+ num_throw_break,
+ num_throw_retry,
+ num_throw_return,
+
+ num_lazy_frame_check,
+ num_lazy_frame_push,
+ lazy_frame_count,
+ lazy_frame_failure,
+
+ iseq_stack_too_large,
+ iseq_too_long,
+
+ temp_reg_opnd,
+ temp_mem_opnd,
+ temp_spill,
}
//===========================================================================
@@ -256,19 +617,30 @@ make_counters! {
/// Check if stats generation is enabled
#[no_mangle]
pub extern "C" fn rb_yjit_stats_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
- #[cfg(feature = "stats")]
+
if get_option!(gen_stats) {
return Qtrue;
+ } else {
+ return Qfalse;
}
+}
- return Qfalse;
+/// Primitive called in yjit.rb
+/// Check if stats generation should print at exit
+#[no_mangle]
+pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+ if yjit_enabled_p() && get_option!(print_stats) {
+ return Qtrue;
+ } else {
+ return Qfalse;
+ }
}
/// Primitive called in yjit.rb.
/// Export all YJIT statistics as a Ruby hash.
#[no_mangle]
-pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
- with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict())
+pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, context: VALUE) -> VALUE {
+ with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(context == Qtrue))
}
/// Primitive called in yjit.rb
@@ -277,8 +649,7 @@ pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
/// to be enabled.
#[no_mangle]
pub extern "C" fn rb_yjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
- #[cfg(feature = "stats")]
- if get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_some() {
return Qtrue;
}
@@ -294,13 +665,8 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V
return Qnil;
}
- // Return if the stats feature is disabled
- if !cfg!(feature = "stats") {
- return Qnil;
- }
-
// Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
+ if get_option!(trace_exits).is_none() {
return Qnil;
}
@@ -322,39 +688,92 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V
}
}
+/// Increment a counter by name from the CRuby side
+/// Warning: this is not fast because it requires a hash lookup, so don't use in tight loops
+#[no_mangle]
+pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char) {
+ use std::ffi::CStr;
+ let counter_name = unsafe { CStr::from_ptr(counter_name).to_str().unwrap() };
+ let counter_ptr = get_counter_ptr(counter_name);
+ unsafe { *counter_ptr += 1 };
+}
+
/// Export all YJIT statistics as a Ruby hash.
-fn rb_yjit_gen_stats_dict() -> VALUE {
+fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
// If YJIT is not enabled, return Qnil
if !yjit_enabled_p() {
return Qnil;
}
+ macro_rules! hash_aset_usize {
+ ($hash:ident, $counter_name:expr, $value:expr) => {
+ let key = rust_str_to_sym($counter_name);
+ let value = VALUE::fixnum_from_usize($value);
+ rb_hash_aset($hash, key, value);
+ }
+ }
+
let hash = unsafe { rb_hash_new() };
- // Inline and outlined code size
unsafe {
// Get the inline and outlined code blocks
let cb = CodegenGlobals::get_inline_cb();
let ocb = CodegenGlobals::get_outlined_cb();
// Inline code size
- let key = rust_str_to_sym("inline_code_size");
- let value = VALUE::fixnum_from_usize(cb.get_write_pos());
- rb_hash_aset(hash, key, value);
+ hash_aset_usize!(hash, "inline_code_size", cb.code_size());
// Outlined code size
- let key = rust_str_to_sym("outlined_code_size");
- let value = VALUE::fixnum_from_usize(ocb.unwrap().get_write_pos());
- rb_hash_aset(hash, key, value);
+ hash_aset_usize!(hash, "outlined_code_size", ocb.unwrap().code_size());
+
+ // GCed pages
+ let freed_page_count = cb.num_freed_pages();
+ hash_aset_usize!(hash, "freed_page_count", freed_page_count);
+
+ // GCed code size
+ hash_aset_usize!(hash, "freed_code_size", freed_page_count * cb.page_size());
+
+ // Live pages
+ hash_aset_usize!(hash, "live_page_count", cb.num_mapped_pages() - freed_page_count);
+
+ // Size of memory region allocated for JIT code
+ hash_aset_usize!(hash, "code_region_size", cb.mapped_region_size());
+
+ // Rust global allocations in bytes
+ hash_aset_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst));
+
+ // `context` is true at RubyVM::YJIT._print_stats for --yjit-stats. It's false by default
+ // for RubyVM::YJIT.runtime_stats because counting all Contexts could be expensive.
+ if context {
+ let live_context_count = get_live_context_count();
+ let context_size = std::mem::size_of::<Context>();
+ hash_aset_usize!(hash, "live_context_count", live_context_count);
+ hash_aset_usize!(hash, "live_context_size", live_context_count * context_size);
+ }
+
+ // VM instructions count
+ hash_aset_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
+
+ hash_aset_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize);
+ hash_aset_usize!(hash, "iseq_alloc_count", rb_yjit_iseq_alloc_count as usize);
}
- // If we're not generating stats, the hash is done
+ // If we're not generating stats, put only default counters
if !get_option!(gen_stats) {
+ for counter in DEFAULT_COUNTERS {
+ // Get the counter value
+ let counter_ptr = get_counter_ptr(&counter.get_name());
+ let counter_val = unsafe { *counter_ptr };
+
+ // Put counter into hash
+ let key = rust_str_to_sym(&counter.get_name());
+ let value = VALUE::fixnum_from_usize(counter_val as usize);
+ unsafe { rb_hash_aset(hash, key, value); }
+ }
+
return hash;
}
- // If the stats feature is enabled
- #[cfg(feature = "stats")]
unsafe {
// Indicate that the complete set of stats is available
rb_hash_aset(hash, rust_str_to_sym("all_stats"), Qtrue);
@@ -380,11 +799,51 @@ fn rb_yjit_gen_stats_dict() -> VALUE {
let value = VALUE::fixnum_from_usize(EXIT_OP_COUNT[op_idx] as usize);
rb_hash_aset(hash, key, value);
}
+
+ fn set_call_counts(
+ calls_hash: VALUE,
+ method_name_to_idx: &mut Option<HashMap<String, usize>>,
+ method_call_count: &mut Option<Vec<u64>>,
+ ) {
+ if let (Some(name_to_idx), Some(call_counts)) = (method_name_to_idx, method_call_count) {
+ for (name, idx) in name_to_idx {
+ let count = call_counts[*idx];
+ let key = rust_str_to_sym(name);
+ let value = VALUE::fixnum_from_usize(count as usize);
+ unsafe { rb_hash_aset(calls_hash, key, value); }
+ }
+ }
+ }
+
+ // Create a hash for the cfunc call counts
+ let cfunc_calls = rb_hash_new();
+ rb_hash_aset(hash, rust_str_to_sym("cfunc_calls"), cfunc_calls);
+ set_call_counts(cfunc_calls, &mut *addr_of_mut!(CFUNC_NAME_TO_IDX), &mut *addr_of_mut!(CFUNC_CALL_COUNT));
+
+ // Create a hash for the ISEQ call counts
+ let iseq_calls = rb_hash_new();
+ rb_hash_aset(hash, rust_str_to_sym("iseq_calls"), iseq_calls);
+ set_call_counts(iseq_calls, &mut *addr_of_mut!(ISEQ_NAME_TO_IDX), &mut *addr_of_mut!(ISEQ_CALL_COUNT));
}
hash
}
+fn get_live_context_count() -> usize {
+ let mut count = 0;
+ for_each_iseq_payload(|iseq_payload| {
+ for blocks in iseq_payload.version_map.iter() {
+ for block in blocks.iter() {
+ count += unsafe { block.as_ref() }.get_ctx_count();
+ }
+ }
+ for block in iseq_payload.dead_blocks.iter() {
+ count += unsafe { block.as_ref() }.get_ctx_count();
+ }
+ });
+ count
+}
+
/// Record the backtrace when a YJIT exit occurs. This functionality requires
/// that the stats feature is enabled as well as the --yjit-trace-exits option.
///
@@ -392,35 +851,40 @@ fn rb_yjit_gen_stats_dict() -> VALUE {
/// and line samples. Their length should be the same, however the data stored in
/// them is different.
#[no_mangle]
-pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE)
+pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE)
{
// Return if YJIT is not enabled
if !yjit_enabled_p() {
return;
}
- // Return if the stats feature is disabled
- if !cfg!(feature = "stats") {
+ // Return if --yjit-trace-exits isn't enabled
+ if get_option!(trace_exits).is_none() {
return;
}
- // Return if --yjit-trace-exits isn't enabled
- if !get_option!(gen_trace_exits) {
- return;
+ if get_option!(trace_exits_sample_rate) > 0 {
+ if get_option!(trace_exits_sample_rate) <= *YjitExitLocations::get_skipped_samples() {
+ YjitExitLocations::get_instance().skipped_samples = 0;
+ } else {
+ YjitExitLocations::get_instance().skipped_samples += 1;
+ return;
+ }
}
// rb_vm_insn_addr2opcode won't work in cargo test --all-features
// because it's a C function. Without insn call, this function is useless
// so wrap the whole thing in a not test check.
- if cfg!(not(test)) {
+ #[cfg(not(test))]
+ {
// Get the opcode from the encoded insn handler at this PC
- let insn = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
+ let insn = unsafe { rb_vm_insn_addr2opcode((*_exit_pc).as_ptr()) };
// Use the same buffer size as Stackprof.
const BUFF_LEN: usize = 2048;
// Create 2 array buffers to be used to collect frames and lines.
- let mut frames_buffer = [VALUE(0 as usize); BUFF_LEN];
+ let mut frames_buffer = [VALUE(0_usize); BUFF_LEN];
let mut lines_buffer = [0; BUFF_LEN];
// Records call frame and line information for each method entry into two
@@ -430,34 +894,78 @@ pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE)
// Call frame info is stored in the frames_buffer, line number information
// in the lines_buffer. The first argument is the start point and the second
// argument is the buffer limit, set at 2048.
- let num = unsafe { rb_profile_frames(0, BUFF_LEN as i32, frames_buffer.as_mut_ptr(), lines_buffer.as_mut_ptr()) };
+ let stack_length = unsafe { rb_profile_frames(0, BUFF_LEN as i32, frames_buffer.as_mut_ptr(), lines_buffer.as_mut_ptr()) };
+ let samples_length = (stack_length as usize) + 3;
- let mut i = num - 1;
let yjit_raw_samples = YjitExitLocations::get_raw_samples();
let yjit_line_samples = YjitExitLocations::get_line_samples();
- yjit_raw_samples.push(VALUE(num as usize));
- yjit_line_samples.push(num);
+ // If yjit_raw_samples is less than or equal to the current length of the samples
+ // we might have seen this stack trace previously.
+ if yjit_raw_samples.len() >= samples_length {
+ let prev_stack_len_index = yjit_raw_samples.len() - samples_length;
+ let prev_stack_len = i64::from(yjit_raw_samples[prev_stack_len_index]);
+ let mut idx = stack_length - 1;
+ let mut prev_frame_idx = 0;
+ let mut seen_already = true;
+
+ // If the previous stack length and current stack length are equal,
+ // loop and compare the current frame to the previous frame. If they are
+ // not equal, set seen_already to false and break out of the loop.
+ if prev_stack_len == stack_length as i64 {
+ while idx >= 0 {
+ let current_frame = frames_buffer[idx as usize];
+ let prev_frame = yjit_raw_samples[prev_stack_len_index + prev_frame_idx + 1];
+
+ // If the current frame and previous frame are not equal, set
+ // seen_already to false and break out of the loop.
+ if current_frame != prev_frame {
+ seen_already = false;
+ break;
+ }
+
+ idx -= 1;
+ prev_frame_idx += 1;
+ }
+
+ // If we know we've seen this stack before, increment the counter by 1.
+ if seen_already {
+ let prev_idx = yjit_raw_samples.len() - 1;
+ let prev_count = i64::from(yjit_raw_samples[prev_idx]);
+ let new_count = prev_count + 1;
+
+ yjit_raw_samples[prev_idx] = VALUE(new_count as usize);
+ yjit_line_samples[prev_idx] = new_count as i32;
+
+ return;
+ }
+ }
+ }
+
+ yjit_raw_samples.push(VALUE(stack_length as usize));
+ yjit_line_samples.push(stack_length);
- while i >= 0 {
- let frame = frames_buffer[i as usize];
- let line = lines_buffer[i as usize];
+ let mut idx = stack_length - 1;
+
+ while idx >= 0 {
+ let frame = frames_buffer[idx as usize];
+ let line = lines_buffer[idx as usize];
yjit_raw_samples.push(frame);
yjit_line_samples.push(line);
- i -= 1;
+ idx -= 1;
}
// Push the insn value into the yjit_raw_samples Vec.
yjit_raw_samples.push(VALUE(insn as usize));
- // Push the current line onto the yjit_line_samples Vec. This
- // points to the line in insns.def.
- let line = yjit_line_samples.len() - 1;
- yjit_line_samples.push(line as i32);
+ // We don't know the line
+ yjit_line_samples.push(0);
- yjit_raw_samples.push(VALUE(1 as usize));
+ // Push number of times seen onto the stack, which is 1
+ // because it's the first time we've seen it.
+ yjit_raw_samples.push(VALUE(1_usize));
yjit_line_samples.push(1);
}
}
@@ -473,12 +981,6 @@ pub extern "C" fn rb_yjit_reset_stats_bang(_ec: EcPtr, _ruby_self: VALUE) -> VAL
return Qnil;
}
-/// Increment the number of instructions executed by the interpreter
-#[no_mangle]
-pub extern "C" fn rb_yjit_collect_vm_usage_insn() {
- incr_counter!(vm_insns_count);
-}
-
#[no_mangle]
pub extern "C" fn rb_yjit_collect_binding_alloc() {
incr_counter!(binding_allocations);
@@ -503,3 +1005,12 @@ pub extern "C" fn rb_yjit_count_side_exit_op(exit_pc: *const VALUE) -> *const VA
// This function must return exit_pc!
return exit_pc;
}
+
+/// Measure the time taken by func() and add that to yjit_compile_time.
+pub fn with_compile_time<F, R>(func: F) -> R where F: FnOnce() -> R {
+ let start = Instant::now();
+ let ret = func();
+ let nanos = Instant::now().duration_since(start).as_nanos();
+ incr_counter_by!(compile_time_ns, nanos);
+ ret
+}
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
index 02fbce47d8..6bc66ee33e 100644
--- a/yjit/src/utils.rs
+++ b/yjit/src/utils.rs
@@ -1,12 +1,11 @@
#![allow(dead_code)] // Some functions for print debugging in here
-use crate::asm::x86_64::*;
-use crate::asm::*;
+use crate::backend::ir::*;
use crate::cruby::*;
use std::slice;
/// Trait for casting to [usize] that allows you to say `.as_usize()`.
-/// Implementation conditional on the the cast preserving the numeric value on
+/// Implementation conditional on the cast preserving the numeric value on
/// all inputs and being inexpensive.
///
/// [usize] is only guaranteed to be more than 16-bit wide, so we can't use
@@ -52,6 +51,20 @@ impl IntoUsize for u8 {
}
}
+/// The [Into<u64>] Rust does not provide.
+/// Convert to u64 with assurance that the value is preserved.
+/// Currently, `usize::BITS == 64` holds for all platforms we support.
+pub(crate) trait IntoU64 {
+ fn as_u64(self) -> u64;
+}
+
+#[cfg(target_pointer_width = "64")]
+impl IntoU64 for usize {
+ fn as_u64(self) -> u64 {
+ self as u64
+ }
+}
+
/// Compute an offset in bytes of a given struct field
#[allow(unused)]
macro_rules! offset_of {
@@ -71,34 +84,41 @@ macro_rules! offset_of {
#[allow(unused)]
pub(crate) use offset_of;
-#[cfg(test)]
-mod tests {
- #[test]
- fn min_max_preserved_after_cast_to_usize() {
- use crate::utils::IntoUsize;
-
- let min: usize = u64::MIN.as_usize();
- assert_eq!(min, u64::MIN.try_into().unwrap());
- let max: usize = u64::MAX.as_usize();
- assert_eq!(max, u64::MAX.try_into().unwrap());
-
- let min: usize = u32::MIN.as_usize();
- assert_eq!(min, u32::MIN.try_into().unwrap());
- let max: usize = u32::MAX.as_usize();
- assert_eq!(max, u32::MAX.try_into().unwrap());
+// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
+// This should work fine on ASCII strings and anything else
+// that is considered legal UTF-8, including embedded nulls.
+pub fn ruby_str_to_rust(v: VALUE) -> String {
+ let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
+ let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
+ let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
+ match String::from_utf8(str_slice.to_vec()) {
+ Ok(utf8) => utf8,
+ Err(_) => String::new(),
}
+}
- #[test]
- fn test_offset_of() {
- #[repr(C)]
- struct Foo {
- a: u8,
- b: u64,
- }
-
- assert_eq!(0, offset_of!(Foo, a), "C99 6.7.2.1p13 says no padding at the front");
- assert_eq!(8, offset_of!(Foo, b), "ABI dependent, but should hold");
+// Location is the file defining the method, colon, method name.
+// Filenames are sometimes internal strings supplied to eval,
+// so be careful with them.
+pub fn iseq_get_location(iseq: IseqPtr, pos: u16) -> String {
+ let iseq_label = unsafe { rb_iseq_label(iseq) };
+ let iseq_path = unsafe { rb_iseq_path(iseq) };
+ let iseq_lineno = unsafe { rb_iseq_line_no(iseq, pos as usize) };
+
+ let mut s = if iseq_label == Qnil {
+ "None".to_string()
+ } else {
+ ruby_str_to_rust(iseq_label)
+ };
+ s.push_str("@");
+ if iseq_path == Qnil {
+ s.push_str("None");
+ } else {
+ s.push_str(&ruby_str_to_rust(iseq_path));
}
+ s.push_str(":");
+ s.push_str(&iseq_lineno.to_string());
+ s
}
// TODO: we may want to move this function into yjit.c, maybe add a convenient Rust-side wrapper
@@ -116,123 +136,156 @@ yjit_print_iseq(const rb_iseq_t *iseq)
}
*/
-// Save caller-save registers on the stack before a C call
-fn push_regs(cb: &mut CodeBlock) {
- push(cb, RAX);
- push(cb, RCX);
- push(cb, RDX);
- push(cb, RSI);
- push(cb, RDI);
- push(cb, R8);
- push(cb, R9);
- push(cb, R10);
- push(cb, R11);
- pushfq(cb);
-}
-
-// Restore caller-save registers from the after a C call
-fn pop_regs(cb: &mut CodeBlock) {
- popfq(cb);
- pop(cb, R11);
- pop(cb, R10);
- pop(cb, R9);
- pop(cb, R8);
- pop(cb, RDI);
- pop(cb, RSI);
- pop(cb, RDX);
- pop(cb, RCX);
- pop(cb, RAX);
-}
-
-pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) {
- extern "sysv64" fn print_int_fn(val: i64) {
- println!("{}", val);
+#[cfg(target_arch = "aarch64")]
+macro_rules! c_callable {
+ ($(#[$outer:meta])*
+ fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => {
+ $(#[$outer])*
+ extern "C" fn $f $args $(-> $ret)? $body
+ };
+}
+
+#[cfg(target_arch = "x86_64")]
+macro_rules! c_callable {
+ ($(#[$outer:meta])*
+ fn $f:ident $args:tt $(-> $ret:ty)? $body:block) => {
+ $(#[$outer])*
+ extern "sysv64" fn $f $args $(-> $ret)? $body
+ };
+}
+pub(crate) use c_callable;
+
+pub fn print_int(asm: &mut Assembler, opnd: Opnd) {
+ c_callable!{
+ fn print_int_fn(val: i64) {
+ println!("{}", val);
+ }
}
- push_regs(cb);
+ asm.cpush_all();
- match opnd {
- X86Opnd::Mem(_) | X86Opnd::Reg(_) => {
+ let argument = match opnd {
+ Opnd::Mem(_) | Opnd::Reg(_) | Opnd::InsnOut { .. } => {
// Sign-extend the value if necessary
- if opnd.num_bits() < 64 {
- movsx(cb, C_ARG_REGS[0], opnd);
+ if opnd.rm_num_bits() < 64 {
+ asm.load_sext(opnd)
} else {
- mov(cb, C_ARG_REGS[0], opnd);
+ opnd
}
- }
- X86Opnd::Imm(_) | X86Opnd::UImm(_) => {
- mov(cb, C_ARG_REGS[0], opnd);
- }
+ },
+ Opnd::Imm(_) | Opnd::UImm(_) => opnd,
_ => unreachable!(),
- }
+ };
- mov(cb, RAX, const_ptr_opnd(print_int_fn as *const u8));
- call(cb, RAX);
- pop_regs(cb);
+ asm.ccall(print_int_fn as *const u8, vec![argument]);
+ asm.cpop_all();
}
/// Generate code to print a pointer
-pub fn print_ptr(cb: &mut CodeBlock, opnd: X86Opnd) {
- extern "sysv64" fn print_ptr_fn(ptr: *const u8) {
- println!("{:p}", ptr);
+pub fn print_ptr(asm: &mut Assembler, opnd: Opnd) {
+ c_callable!{
+ fn print_ptr_fn(ptr: *const u8) {
+ println!("{:p}", ptr);
+ }
}
- assert!(opnd.num_bits() == 64);
+ assert!(opnd.rm_num_bits() == 64);
- push_regs(cb);
- mov(cb, C_ARG_REGS[0], opnd);
- mov(cb, RAX, const_ptr_opnd(print_ptr_fn as *const u8));
- call(cb, RAX);
- pop_regs(cb);
+ asm.cpush_all();
+ asm.ccall(print_ptr_fn as *const u8, vec![opnd]);
+ asm.cpop_all();
}
/// Generate code to print a value
-pub fn print_value(cb: &mut CodeBlock, opnd: X86Opnd) {
- extern "sysv64" fn print_value_fn(val: VALUE) {
- unsafe { rb_obj_info_dump(val) }
+pub fn print_value(asm: &mut Assembler, opnd: Opnd) {
+ c_callable!{
+ fn print_value_fn(val: VALUE) {
+ unsafe { rb_obj_info_dump(val) }
+ }
}
- assert!(opnd.num_bits() == 64);
-
- push_regs(cb);
-
- mov(cb, RDI, opnd);
- mov(cb, RAX, const_ptr_opnd(print_value_fn as *const u8));
- call(cb, RAX);
+ assert!(matches!(opnd, Opnd::Value(_)));
- pop_regs(cb);
+ asm.cpush_all();
+ asm.ccall(print_value_fn as *const u8, vec![opnd]);
+ asm.cpop_all();
}
/// Generate code to print constant string to stdout
-pub fn print_str(cb: &mut CodeBlock, str: &str) {
- extern "sysv64" fn print_str_cfun(ptr: *const u8, num_bytes: usize) {
- unsafe {
- let slice = slice::from_raw_parts(ptr, num_bytes);
- let str = std::str::from_utf8(slice).unwrap();
- println!("{}", str);
+pub fn print_str(asm: &mut Assembler, str: &str) {
+ c_callable!{
+ fn print_str_cfun(ptr: *const u8, num_bytes: usize) {
+ unsafe {
+ let slice = slice::from_raw_parts(ptr, num_bytes);
+ let str = std::str::from_utf8(slice).unwrap();
+ println!("{}", str);
+ }
}
}
- let bytes = str.as_ptr();
- let num_bytes = str.len();
+ asm.cpush_all();
+
+ let string_data = asm.new_label("string_data");
+ let after_string = asm.new_label("after_string");
+
+ asm.jmp(after_string);
+ asm.write_label(string_data);
+ asm.bake_string(str);
+ asm.write_label(after_string);
+
+ let opnd = asm.lea_jump_target(string_data);
+ asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]);
+
+ asm.cpop_all();
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::asm::CodeBlock;
+
+ #[test]
+ fn min_max_preserved_after_cast_to_usize() {
+ use crate::utils::IntoUsize;
+
+ let min: usize = u64::MIN.as_usize();
+ assert_eq!(min, u64::MIN.try_into().unwrap());
+ let max: usize = u64::MAX.as_usize();
+ assert_eq!(max, u64::MAX.try_into().unwrap());
- push_regs(cb);
+ let min: usize = u32::MIN.as_usize();
+ assert_eq!(min, u32::MIN.try_into().unwrap());
+ let max: usize = u32::MAX.as_usize();
+ assert_eq!(max, u32::MAX.try_into().unwrap());
+ }
- // Load the string address and jump over the string data
- lea(cb, C_ARG_REGS[0], mem_opnd(8, RIP, 5));
- jmp32(cb, num_bytes as i32);
+ #[test]
+ fn test_offset_of() {
+ #[repr(C)]
+ struct Foo {
+ a: u8,
+ b: u64,
+ }
- // Write the string chars and a null terminator
- for i in 0..num_bytes {
- cb.write_byte(unsafe { *bytes.add(i) });
+ assert_eq!(0, offset_of!(Foo, a), "C99 6.7.2.1p13 says no padding at the front");
+ assert_eq!(8, offset_of!(Foo, b), "ABI dependent, but should hold");
}
- // Pass the string length as an argument
- mov(cb, C_ARG_REGS[1], uimm_opnd(num_bytes as u64));
+ #[test]
+ fn test_print_int() {
+ let mut asm = Assembler::new();
+ let mut cb = CodeBlock::new_dummy(1024);
- // Call the print function
- mov(cb, RAX, const_ptr_opnd(print_str_cfun as *const u8));
- call(cb, RAX);
+ print_int(&mut asm, Opnd::Imm(42));
+ asm.compile(&mut cb, None).unwrap();
+ }
- pop_regs(cb);
+ #[test]
+ fn test_print_str() {
+ let mut asm = Assembler::new();
+ let mut cb = CodeBlock::new_dummy(1024);
+
+ print_str(&mut asm, "Hello, world!");
+ asm.compile(&mut cb, None).unwrap();
+ }
}
diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs
index 6a8e27447e..f3c0ceefff 100644
--- a/yjit/src/virtualmem.rs
+++ b/yjit/src/virtualmem.rs
@@ -3,7 +3,9 @@
// usize->pointer casts is viable. It seems like a lot of work for us to participate for not much
// benefit.
-use crate::utils::IntoUsize;
+use std::ptr::NonNull;
+
+use crate::{utils::IntoUsize, backend::ir::Target};
#[cfg(not(test))]
pub type VirtualMem = VirtualMemory<sys::SystemAllocator>;
@@ -22,7 +24,7 @@ pub type VirtualMem = VirtualMemory<tests::TestingAllocator>;
/// the code in the region executable.
pub struct VirtualMemory<A: Allocator> {
/// Location of the virtual memory region.
- region_start: *mut u8,
+ region_start: NonNull<u8>,
/// Size of the region in bytes.
region_size_bytes: usize,
@@ -51,14 +53,47 @@ pub trait Allocator {
fn mark_writable(&mut self, ptr: *const u8, size: u32) -> bool;
fn mark_executable(&mut self, ptr: *const u8, size: u32);
+
+ fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool;
}
-/// Pointer into a [VirtualMemory].
-/// We may later change this to wrap an u32.
-/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
+/// Pointer into a [VirtualMemory] represented as an offset from the base.
+/// Note: there is no NULL constant for [CodePtr]. You should use `Option<CodePtr>` instead.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
-#[repr(C)]
-pub struct CodePtr(*const u8);
+#[repr(C, packed)]
+pub struct CodePtr(u32);
+
+impl CodePtr {
+ /// Advance the CodePtr. Can return a dangling pointer.
+ pub fn add_bytes(self, bytes: usize) -> Self {
+ let CodePtr(raw) = self;
+ let bytes: u32 = bytes.try_into().unwrap();
+ CodePtr(raw + bytes)
+ }
+
+ /// Note that the raw pointer might be dangling if there hasn't
+ /// been any writes to it through the [VirtualMemory] yet.
+ pub fn raw_ptr(self, base: &impl CodePtrBase) -> *const u8 {
+ let CodePtr(offset) = self;
+ return base.base_ptr().as_ptr().wrapping_add(offset.as_usize())
+ }
+
+ /// Get the address of the code pointer.
+ pub fn raw_addr(self, base: &impl CodePtrBase) -> usize {
+ self.raw_ptr(base) as usize
+ }
+
+ /// Get the offset component for the code pointer. Useful finding the distance between two
+ /// code pointers that share the same [VirtualMem].
+ pub fn as_offset(self) -> i64 {
+ let CodePtr(offset) = self;
+ offset.into()
+ }
+
+ pub fn as_side_exit(self) -> Target {
+ Target::SideExitPtr(self)
+ }
+}
/// Errors that can happen when writing to [VirtualMemory]
#[derive(Debug, PartialEq)]
@@ -71,7 +106,7 @@ use WriteError::*;
impl<A: Allocator> VirtualMemory<A> {
/// Bring a part of the address space under management.
- pub fn new(allocator: A, page_size: u32, virt_region_start: *mut u8, size_bytes: usize) -> Self {
+ pub fn new(allocator: A, page_size: u32, virt_region_start: NonNull<u8>, size_bytes: usize) -> Self {
assert_ne!(0, page_size);
let page_size_bytes = page_size.as_usize();
@@ -88,7 +123,20 @@ impl<A: Allocator> VirtualMemory<A> {
/// Return the start of the region as a raw pointer. Note that it could be a dangling
/// pointer so be careful dereferencing it.
pub fn start_ptr(&self) -> CodePtr {
- CodePtr(self.region_start)
+ CodePtr(0)
+ }
+
+ pub fn mapped_end_ptr(&self) -> CodePtr {
+ self.start_ptr().add_bytes(self.mapped_region_bytes)
+ }
+
+ pub fn virtual_end_ptr(&self) -> CodePtr {
+ self.start_ptr().add_bytes(self.region_size_bytes)
+ }
+
+ /// Size of the region in bytes that we have allocated physical memory for.
+ pub fn mapped_region_size(&self) -> usize {
+ self.mapped_region_bytes
}
/// Size of the region in bytes where writes could be attempted.
@@ -96,17 +144,23 @@ impl<A: Allocator> VirtualMemory<A> {
self.region_size_bytes
}
+ /// The granularity at which we can control memory permission.
+ /// On Linux, this is the page size that mmap(2) talks about.
+ pub fn system_page_size(&self) -> usize {
+ self.page_size_bytes
+ }
+
/// Write a single byte. The first write to a page makes it readable.
pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
let page_size = self.page_size_bytes;
- let raw: *mut u8 = write_ptr.raw_ptr() as *mut u8;
+ let raw: *mut u8 = write_ptr.raw_ptr(self) as *mut u8;
let page_addr = (raw as usize / page_size) * page_size;
if self.current_write_page == Some(page_addr) {
// Writing within the last written to page, nothing to do
} else {
// Switching to a different and potentially new page
- let start = self.region_start;
+ let start = self.region_start.as_ptr();
let mapped_region_end = start.wrapping_add(self.mapped_region_bytes);
let whole_region_end = start.wrapping_add(self.region_size_bytes);
let alloc = &mut self.allocator;
@@ -141,10 +195,16 @@ impl<A: Allocator> VirtualMemory<A> {
if !alloc.mark_writable(mapped_region_end.cast(), alloc_size_u32) {
return Err(FailedPageMapping);
}
- // Fill new memory with PUSH DS (0x1E) so that executing uninitialized memory
- // will fault with #UD in 64-bit mode. On Linux it becomes SIGILL and use the
- // usual Ruby crash reporter.
- std::slice::from_raw_parts_mut(mapped_region_end, alloc_size).fill(0x1E);
+ if cfg!(target_arch = "x86_64") {
+ // Fill new memory with PUSH DS (0x1E) so that executing uninitialized memory
+ // will fault with #UD in 64-bit mode. On Linux it becomes SIGILL and use the
+ // usual Ruby crash reporter.
+ std::slice::from_raw_parts_mut(mapped_region_end, alloc_size).fill(0x1E);
+ } else if cfg!(target_arch = "aarch64") {
+ // In aarch64, all zeros encodes UDF, so it's already what we want.
+ } else {
+ unreachable!("unknown arch");
+ }
}
self.mapped_region_bytes = self.mapped_region_bytes + alloc_size;
@@ -169,39 +229,35 @@ impl<A: Allocator> VirtualMemory<A> {
let mapped_region_bytes: u32 = self.mapped_region_bytes.try_into().unwrap();
// Make mapped region executable
- self.allocator.mark_executable(region_start, mapped_region_bytes);
- }
-}
-
-impl CodePtr {
- /// Note that the raw pointer might be dangling if there hasn't
- /// been any writes to it through the [VirtualMemory] yet.
- pub fn raw_ptr(self) -> *const u8 {
- let CodePtr(ptr) = self;
- return ptr;
- }
-
- /// Advance the CodePtr. Can return a dangling pointer.
- pub fn add_bytes(self, bytes: usize) -> Self {
- let CodePtr(raw) = self;
- CodePtr(raw.wrapping_add(bytes))
+ self.allocator.mark_executable(region_start.as_ptr(), mapped_region_bytes);
}
- pub fn into_i64(self) -> i64 {
- let CodePtr(ptr) = self;
- ptr as i64
+ /// Free a range of bytes. start_ptr must be memory page-aligned.
+ pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) {
+ assert_eq!(start_ptr.raw_ptr(self) as usize % self.page_size_bytes, 0);
+
+ // Bounds check the request. We should only free memory we manage.
+ let mapped_region = self.start_ptr().raw_ptr(self)..self.mapped_end_ptr().raw_ptr(self);
+ let virtual_region = self.start_ptr().raw_ptr(self)..self.virtual_end_ptr().raw_ptr(self);
+ let last_byte_to_free = start_ptr.add_bytes(size.saturating_sub(1).as_usize()).raw_ptr(self);
+ assert!(mapped_region.contains(&start_ptr.raw_ptr(self)));
+ // On platforms where code page size != memory page size (e.g. Linux), we often need
+ // to free code pages that contain unmapped memory pages. When it happens on the last
+ // code page, it's more appropriate to check the last byte against the virtual region.
+ assert!(virtual_region.contains(&last_byte_to_free));
+
+ self.allocator.mark_unused(start_ptr.raw_ptr(self), size);
}
+}
- pub fn into_usize(self) -> usize {
- let CodePtr(ptr) = self;
- ptr as usize
- }
+/// Something that could provide a base pointer to compute a raw pointer from a [CodePtr].
+pub trait CodePtrBase {
+ fn base_ptr(&self) -> NonNull<u8>;
}
-impl From<*mut u8> for CodePtr {
- fn from(value: *mut u8) -> Self {
- assert!(value as usize != 0);
- return CodePtr(value);
+impl<A: Allocator> CodePtrBase for VirtualMemory<A> {
+ fn base_ptr(&self) -> NonNull<u8> {
+ self.region_start
}
}
@@ -223,6 +279,10 @@ mod sys {
fn mark_executable(&mut self, ptr: *const u8, size: u32) {
unsafe { rb_yjit_mark_executable(ptr as VoidPtr, size) }
}
+
+ fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool {
+ unsafe { rb_yjit_mark_unused(ptr as VoidPtr, size) }
+ }
}
}
@@ -246,6 +306,7 @@ pub mod tests {
enum AllocRequest {
MarkWritable{ start_idx: usize, length: usize },
MarkExecutable{ start_idx: usize, length: usize },
+ MarkUnused,
}
use AllocRequest::*;
@@ -286,6 +347,13 @@ pub mod tests {
// We don't try to execute generated code in cfg(test)
// so no need to actually request executable memory.
}
+
+ fn mark_unused(&mut self, ptr: *const u8, length: u32) -> bool {
+ self.bounds_check_request(ptr, length);
+ self.requests.push(MarkUnused);
+
+ true
+ }
}
// Fictional architecture where each page is 4 bytes long
@@ -298,12 +366,13 @@ pub mod tests {
VirtualMemory::new(
alloc,
PAGE_SIZE.try_into().unwrap(),
- mem_start as *mut u8,
+ NonNull::new(mem_start as *mut u8).unwrap(),
mem_size,
)
}
#[test]
+ #[cfg(target_arch = "x86_64")]
fn new_memory_is_initialized() {
let mut virt = new_dummy_virt_mem();
@@ -340,16 +409,12 @@ pub mod tests {
#[test]
fn bounds_checking() {
use super::WriteError::*;
- use std::ptr;
let mut virt = new_dummy_virt_mem();
- let null = CodePtr(ptr::null());
- assert_eq!(Err(OutOfBounds), virt.write_byte(null, 0));
-
let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size());
assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0));
- let end_of_addr_space = CodePtr(usize::MAX as _);
+ let end_of_addr_space = CodePtr(u32::MAX);
assert_eq!(Err(OutOfBounds), virt.write_byte(end_of_addr_space, 0));
}
diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs
index 192e9753d9..cc2c8fe066 100644
--- a/yjit/src/yjit.rs
+++ b/yjit/src/yjit.rs
@@ -4,15 +4,16 @@ use crate::cruby::*;
use crate::invariants::*;
use crate::options::*;
use crate::stats::YjitExitLocations;
+use crate::stats::incr_counter;
+use crate::stats::with_compile_time;
use std::os::raw;
-use std::sync::atomic::{AtomicBool, Ordering};
-/// For tracking whether the user enabled YJIT through command line arguments or environment
-/// variables. AtomicBool to avoid `unsafe`. On x86 it compiles to simple movs.
-/// See <https://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html>
-/// See [rb_yjit_enabled_p]
-static YJIT_ENABLED: AtomicBool = AtomicBool::new(false);
+/// Is YJIT on? The interpreter uses this variable to decide whether to trigger
+/// compilation. See jit_exec() and jit_compile().
+#[allow(non_upper_case_globals)]
+#[no_mangle]
+pub static mut rb_yjit_enabled_p: bool = false;
/// Parse one command-line option.
/// This is called from ruby.c
@@ -21,64 +22,173 @@ pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool {
return parse_option(str_ptr).is_some();
}
-/// Is YJIT on? The interpreter uses this function to decide whether to increment
-/// ISEQ call counters. See mjit_exec().
-/// This is used frequently since it's used on every method call in the interpreter.
-#[no_mangle]
-pub extern "C" fn rb_yjit_enabled_p() -> raw::c_int {
- // Note that we might want to call this function from signal handlers so
- // might need to ensure signal-safety(7).
- YJIT_ENABLED.load(Ordering::Acquire).into()
-}
-
/// Like rb_yjit_enabled_p, but for Rust code.
pub fn yjit_enabled_p() -> bool {
- YJIT_ENABLED.load(Ordering::Acquire)
+ unsafe { rb_yjit_enabled_p }
}
-/// After how many calls YJIT starts compiling a method
+/// This function is called from C code
#[no_mangle]
-pub extern "C" fn rb_yjit_call_threshold() -> raw::c_uint {
- get_option!(call_threshold) as raw::c_uint
+pub extern "C" fn rb_yjit_init(yjit_enabled: bool) {
+ // Register the method codegen functions. This must be done at boot.
+ yjit_reg_method_codegen_fns();
+
+ // If --yjit-disable, yjit_init() will not be called until RubyVM::YJIT.enable.
+ if yjit_enabled && !get_option!(disable) {
+ yjit_init();
+ }
}
-/// This function is called from C code
-#[no_mangle]
-pub extern "C" fn rb_yjit_init_rust() {
+/// Initialize and enable YJIT. You should call this at boot or with GVL.
+fn yjit_init() {
// TODO: need to make sure that command-line options have been
// initialized by CRuby
// Catch panics to avoid UB for unwinding into C frames.
// See https://doc.rust-lang.org/nomicon/exception-safety.html
- // TODO: set a panic handler so the we don't print a message
- // everytime we panic.
let result = std::panic::catch_unwind(|| {
Invariants::init();
CodegenGlobals::init();
YjitExitLocations::init();
+ ids::init();
+
+ rb_bug_panic_hook();
// YJIT enabled and initialized successfully
- YJIT_ENABLED.store(true, Ordering::Release);
+ assert!(unsafe{ !rb_yjit_enabled_p });
+ unsafe { rb_yjit_enabled_p = true; }
});
if let Err(_) = result {
- println!("YJIT: rb_yjit_init_rust() panicked. Aborting.");
+ println!("YJIT: yjit_init() panicked. Aborting.");
std::process::abort();
}
+
+ // Make sure --yjit-perf doesn't append symbols to an old file
+ if get_option!(perf_map).is_some() {
+ let perf_map = format!("/tmp/perf-{}.map", std::process::id());
+ let _ = std::fs::remove_file(&perf_map);
+ println!("YJIT perf map: {perf_map}");
+ }
+
+ // Initialize the GC hooks. Do this at last as some code depend on Rust initialization.
+ extern "C" {
+ fn rb_yjit_init_gc_hooks();
+ }
+ unsafe { rb_yjit_init_gc_hooks() }
+}
+
+/// At the moment, we abort in all cases we panic.
+/// To aid with getting diagnostics in the wild without requiring
+/// people to set RUST_BACKTRACE=1, register a panic hook that crash using rb_bug().
+/// rb_bug() might not be as good at printing a call trace as Rust's stdlib, but
+/// it dumps some other info that might be relevant.
+///
+/// In case we want to start doing fancier exception handling with panic=unwind,
+/// we can revisit this later. For now, this helps to get us good bug reports.
+fn rb_bug_panic_hook() {
+ use std::env;
+ use std::panic;
+ use std::io::{stderr, Write};
+
+ // Probably the default hook. We do this very early during process boot.
+ let previous_hook = panic::take_hook();
+
+ panic::set_hook(Box::new(move |panic_info| {
+ // Not using `eprintln` to avoid double panic.
+ let _ = stderr().write_all(b"ruby: YJIT has panicked. More info to follow...\n");
+
+ // Always show a Rust backtrace.
+ env::set_var("RUST_BACKTRACE", "1");
+ previous_hook(panic_info);
+
+ unsafe { rb_bug(b"YJIT panicked\0".as_ref().as_ptr() as *const raw::c_char); }
+ }));
}
/// Called from C code to begin compiling a function
/// NOTE: this should be wrapped in RB_VM_LOCK_ENTER(), rb_vm_barrier() on the C side
+/// If jit_exception is true, compile JIT code for handling exceptions.
+/// See [jit_compile_exception] for details.
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> *const u8 {
- let maybe_code_ptr = gen_entry_point(iseq, ec);
+pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> *const u8 {
+ // Don't compile when there is insufficient native stack space
+ if unsafe { rb_ec_stack_check(ec as _) } != 0 {
+ return std::ptr::null();
+ }
+
+ // Reject ISEQs with very large temp stacks,
+ // this will allow us to use u8/i8 values to track stack_size and sp_offset
+ let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) };
+ if stack_max >= i8::MAX as u32 {
+ incr_counter!(iseq_stack_too_large);
+ return std::ptr::null();
+ }
+
+ // Reject ISEQs that are too long,
+ // this will allow us to use u16 for instruction indices if we want to,
+ // very long ISEQs are also much more likely to be initialization code
+ let iseq_size = unsafe { get_iseq_encoded_size(iseq) };
+ if iseq_size >= u16::MAX as u32 {
+ incr_counter!(iseq_too_long);
+ return std::ptr::null();
+ }
+
+ // If a custom call threshold was not specified on the command-line and
+ // this is a large application (has very many ISEQs), switch to
+ // using the call threshold for large applications after this entry point
+ use crate::stats::rb_yjit_live_iseq_count;
+ if unsafe { rb_yjit_call_threshold } == SMALL_CALL_THRESHOLD && unsafe { rb_yjit_live_iseq_count } > LARGE_ISEQ_COUNT {
+ unsafe { rb_yjit_call_threshold = LARGE_CALL_THRESHOLD; };
+ }
+
+ let maybe_code_ptr = with_compile_time(|| { gen_entry_point(iseq, ec, jit_exception) });
match maybe_code_ptr {
- Some(ptr) => ptr.raw_ptr(),
+ Some(ptr) => ptr,
None => std::ptr::null(),
}
}
+/// Free and recompile all existing JIT code
+#[no_mangle]
+pub extern "C" fn rb_yjit_code_gc(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+ if !yjit_enabled_p() {
+ return Qnil;
+ }
+
+ with_vm_lock(src_loc!(), || {
+ let cb = CodegenGlobals::get_inline_cb();
+ let ocb = CodegenGlobals::get_outlined_cb();
+ cb.code_gc(ocb);
+ });
+
+ Qnil
+}
+
+/// Enable YJIT compilation, returning true if YJIT was previously disabled
+#[no_mangle]
+pub extern "C" fn rb_yjit_enable(_ec: EcPtr, _ruby_self: VALUE, gen_stats: VALUE, print_stats: VALUE) -> VALUE {
+ with_vm_lock(src_loc!(), || {
+ // Initialize and enable YJIT
+ if gen_stats.test() {
+ unsafe {
+ OPTIONS.gen_stats = gen_stats.test();
+ OPTIONS.print_stats = print_stats.test();
+ }
+ }
+ yjit_init();
+
+ // Add "+YJIT" to RUBY_DESCRIPTION
+ extern "C" {
+ fn ruby_set_yjit_description();
+ }
+ unsafe { ruby_set_yjit_description(); }
+
+ Qtrue
+ })
+}
+
/// Simulate a situation where we are out of executable memory
#[no_mangle]
pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
@@ -91,9 +201,25 @@ pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VA
if cfg!(debug_assertions) {
let cb = CodegenGlobals::get_inline_cb();
let ocb = CodegenGlobals::get_outlined_cb().unwrap();
- cb.set_pos(cb.get_mem_size() - 1);
- ocb.set_pos(ocb.get_mem_size() - 1);
+ cb.set_pos(cb.get_mem_size());
+ ocb.set_pos(ocb.get_mem_size());
}
return Qnil;
}
+
+/// Push a C method frame if the given PC is supposed to lazily push one.
+/// This is called from rb_raise() (at rb_exc_new_str()) and other functions
+/// that may make a method call (e.g. rb_to_int()).
+#[no_mangle]
+pub extern "C" fn rb_yjit_lazy_push_frame(pc: *mut VALUE) {
+ if !yjit_enabled_p() {
+ return;
+ }
+
+ incr_counter!(num_lazy_frame_check);
+ if let Some(&(cme, recv_idx)) = CodegenGlobals::get_pc_to_cfunc().get(&pc) {
+ incr_counter!(num_lazy_frame_push);
+ unsafe { rb_vm_push_cfunc_frame(cme, recv_idx as i32) }
+ }
+}