7 files changed, 133 insertions, 91 deletions
diff --git a/yjit/src/asm/arm64/arg/inst_offset.rs b/yjit/src/asm/arm64/arg/inst_offset.rs
new file mode 100644
index 0000000000..f4a6bc73a0
--- /dev/null
+++ b/yjit/src/asm/arm64/arg/inst_offset.rs
@@ -0,0 +1,47 @@
+/// There are a lot of instructions in the AArch64 architectrue that take an
+/// offset in terms of number of instructions. Usually they are jump
+/// instructions or instructions that load a value relative to the current PC.
+///
+/// This struct is used to mark those locations instead of a generic operand in
+/// order to give better clarity to the developer when reading the AArch64
+/// backend code. It also helps to clarify that everything is in terms of a
+/// number of instructions and not a number of bytes (i.e., the offset is the
+/// number of bytes divided by 4).
+#[derive(Copy, Clone)]
+pub struct InstructionOffset(i32);
+
+impl InstructionOffset {
+    /// Create a new instruction offset.
+    pub fn from_insns(insns: i32) -> Self {
+        InstructionOffset(insns)
+    }
+
+    /// Create a new instruction offset from a number of bytes.
+    pub fn from_bytes(bytes: i32) -> Self {
+        assert_eq!(bytes % 4, 0, "Byte offset must be a multiple of 4");
+        InstructionOffset(bytes / 4)
+    }
+}
+
+impl From<i32> for InstructionOffset {
+    /// Convert an i64 into an instruction offset.
+    fn from(value: i32) -> Self {
+        InstructionOffset(value)
+    }
+}
+
+impl From<InstructionOffset> for i32 {
+    /// Convert an instruction offset into a number of instructions as an i32.
+    fn from(offset: InstructionOffset) -> Self {
+        offset.0
+    }
+}
+
+impl From<InstructionOffset> for i64 {
+    /// Convert an instruction offset into a number of instructions as an i64.
+    /// This is useful for when we're checking how many bits this offset fits
+    /// into.
+    fn from(offset: InstructionOffset) -> Self {
+        offset.0.into()
+    }
+}
diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs
index 9bf4a8ea13..7eb37834f9 100644
--- a/yjit/src/asm/arm64/arg/mod.rs
+++ b/yjit/src/asm/arm64/arg/mod.rs
@@ -3,6 +3,7 @@
 
 mod bitmask_imm;
 mod condition;
+mod inst_offset;
 mod sf;
 mod shifted_imm;
 mod sys_reg;
@@ -10,6 +11,7 @@ mod truncate;
 
 pub use bitmask_imm::BitmaskImmediate;
 pub use condition::Condition;
+pub use inst_offset::InstructionOffset;
 pub use sf::Sf;
 pub use shifted_imm::ShiftedImmediate;
 pub use sys_reg::SystemRegister;
diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs
index c489bacef0..4338cf0f4f 100644
--- a/yjit/src/asm/arm64/inst/branch_cond.rs
+++ b/yjit/src/asm/arm64/inst/branch_cond.rs
@@ -1,4 +1,4 @@
-use super::super::arg::{Condition, truncate_imm};
+use super::super::arg::{Condition, InstructionOffset, truncate_imm};
 
 /// The struct that represents an A64 conditional branch instruction that can be
 /// encoded.
@@ -14,14 +14,14 @@ pub struct BranchCond {
     cond: u8,
 
     /// The instruction offset from this instruction to branch to.
-    imm19: i32
+    offset: InstructionOffset
 }
 
 impl BranchCond {
     /// B.cond
     /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
-    pub fn bcond(cond: u8, imm19: i32) -> Self {
-        Self { cond, imm19 }
+    pub fn bcond(cond: u8, offset: InstructionOffset) -> Self {
+        Self { cond, offset }
     }
 }
 
@@ -34,7 +34,7 @@ impl From<BranchCond> for u32 {
         0
         | (1 << 30)
         | (FAMILY << 26)
-        | (truncate_imm::<_, 19>(inst.imm19) << 5)
+        | (truncate_imm::<_, 19>(inst.offset) << 5)
         | (inst.cond as u32)
     }
 }
@@ -53,25 +53,25 @@ mod tests {
 
     #[test]
     fn test_b_eq() {
-        let result: u32 = BranchCond::bcond(Condition::EQ, 32).into();
+        let result: u32 = BranchCond::bcond(Condition::EQ, 32.into()).into();
         assert_eq!(0x54000400, result);
     }
 
     #[test]
     fn test_b_vs() {
-        let result: u32 = BranchCond::bcond(Condition::VS, 32).into();
+        let result: u32 = BranchCond::bcond(Condition::VS, 32.into()).into();
         assert_eq!(0x54000406, result);
     }
 
     #[test]
     fn test_b_eq_max() {
-        let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 18) - 1).into();
+        let result: u32 = BranchCond::bcond(Condition::EQ, ((1 << 18) - 1).into()).into();
         assert_eq!(0x547fffe0, result);
     }
 
     #[test]
     fn test_b_eq_min() {
-        let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 18)).into();
+        let result: u32 = BranchCond::bcond(Condition::EQ, (-(1 << 18)).into()).into();
         assert_eq!(0x54800000, result);
     }
 }
diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs
index 32d924f799..74debac7f7 100644
--- a/yjit/src/asm/arm64/inst/call.rs
+++ b/yjit/src/asm/arm64/inst/call.rs
@@ -1,4 +1,4 @@
-use super::super::arg::truncate_imm;
+use super::super::arg::{InstructionOffset, truncate_imm};
 
 /// The operation to perform for this instruction.
 enum Op {
@@ -20,8 +20,8 @@ enum Op {
 /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+
 ///
 pub struct Call {
-    /// The PC-relative offset to jump to (which will be multiplied by 4).
-    imm26: i32,
+    /// The PC-relative offset to jump to in terms of number of instructions.
+    offset: InstructionOffset,
 
     /// The operation to perform for this instruction.
     op: Op
@@ -30,14 +30,14 @@ pub struct Call {
 impl Call {
     /// B
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-
-    pub fn b(imm26: i32) -> Self {
-        Self { imm26, op: Op::Branch }
+    pub fn b(offset: InstructionOffset) -> Self {
+        Self { offset, op: Op::Branch }
     }
 
     /// BL
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en
-    pub fn bl(imm26: i32) -> Self {
-        Self { imm26, op: Op::BranchWithLink }
+    pub fn bl(offset: InstructionOffset) -> Self {
+        Self { offset, op: Op::BranchWithLink }
     }
 }
 
@@ -50,7 +50,7 @@ impl From<Call> for u32 {
         0
         | ((inst.op as u32) << 31)
         | (FAMILY << 26)
-        | truncate_imm::<_, 26>(inst.imm26)
+        | truncate_imm::<_, 26>(inst.offset)
     }
 }
 
@@ -68,37 +68,37 @@ mod tests {
 
     #[test]
     fn test_bl() {
-        let result: u32 = Call::bl(0).into();
+        let result: u32 = Call::bl(0.into()).into();
         assert_eq!(0x94000000, result);
     }
 
     #[test]
     fn test_bl_positive() {
-        let result: u32 = Call::bl(256).into();
+        let result: u32 = Call::bl(256.into()).into();
         assert_eq!(0x94000100, result);
     }
 
     #[test]
     fn test_bl_negative() {
-        let result: u32 = Call::bl(-256).into();
+        let result: u32 = Call::bl((-256).into()).into();
         assert_eq!(0x97ffff00, result);
     }
 
     #[test]
     fn test_b() {
-        let result: u32 = Call::b(0).into();
+        let result: u32 = Call::b(0.into()).into();
         assert_eq!(0x14000000, result);
     }
 
     #[test]
     fn test_b_positive() {
-        let result: u32 = Call::b((1 << 25) - 1).into();
+        let result: u32 = Call::b(((1 << 25) - 1).into()).into();
         assert_eq!(0x15ffffff, result);
     }
 
     #[test]
     fn test_b_negative() {
-        let result: u32 = Call::b(-(1 << 25)).into();
+        let result: u32 = Call::b((-(1 << 25)).into()).into();
         assert_eq!(0x16000000, result);
     }
 }
diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs
index c5ab09713c..3eade205c8 100644
--- a/yjit/src/asm/arm64/inst/load_literal.rs
+++ b/yjit/src/asm/arm64/inst/load_literal.rs
@@ -1,4 +1,4 @@
-use super::super::arg::truncate_imm;
+use super::super::arg::{InstructionOffset, truncate_imm};
 
 /// The size of the operands being operated on.
 enum Opc {
@@ -32,7 +32,7 @@ pub struct LoadLiteral {
     rt: u8,
 
     /// The PC-relative number of instructions to load the value from.
-    imm19: i32,
+    offset: InstructionOffset,
 
     /// The size of the operands being operated on.
     opc: Opc
@@ -41,8 +41,8 @@ pub struct LoadLiteral {
 impl LoadLiteral {
     /// LDR (load literal)
     /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en
-    pub fn ldr_literal(rt: u8, imm19: i32, num_bits: u8) -> Self {
-        Self { rt, imm19, opc: num_bits.into() }
+    pub fn ldr_literal(rt: u8, offset: InstructionOffset, num_bits: u8) -> Self {
+        Self { rt, offset, opc: num_bits.into() }
     }
 }
 
@@ -56,7 +56,7 @@ impl From<LoadLiteral> for u32 {
         | ((inst.opc as u32) << 30)
         | (1 << 28)
         | (FAMILY << 25)
-        | (truncate_imm::<_, 19>(inst.imm19) << 5)
+        | (truncate_imm::<_, 19>(inst.offset) << 5)
         | (inst.rt as u32)
     }
 }
@@ -75,14 +75,14 @@ mod tests {
 
     #[test]
     fn test_ldr_positive() {
-        let inst = LoadLiteral::ldr_literal(0, 5, 64);
+        let inst = LoadLiteral::ldr_literal(0, 5.into(), 64);
         let result: u32 = inst.into();
         assert_eq!(0x580000a0, result);
     }
 
     #[test]
     fn test_ldr_negative() {
-        let inst = LoadLiteral::ldr_literal(0, -5, 64);
+        let inst = LoadLiteral::ldr_literal(0, (-5).into(), 64);
         let result: u32 = inst.into();
         assert_eq!(0x58ffff60, result);
     }
diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs
index b73b3125e2..420151c6d1 100644
--- a/yjit/src/asm/arm64/mod.rs
+++ b/yjit/src/asm/arm64/mod.rs
@@ -190,15 +190,9 @@ pub const fn b_offset_fits_bits(offset: i64) -> bool {
 }
 
 /// B - branch without link (offset is number of instructions to jump)
-pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) {
-    let bytes: [u8; 4] = match imm26 {
-        A64Opnd::Imm(imm26) => {
-            assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less.");
-
-            Call::b(imm26 as i32).into()
-        },
-        _ => panic!("Invalid operand combination to b instruction.")
-    };
+pub fn b(cb: &mut CodeBlock, offset: InstructionOffset) {
+    assert!(b_offset_fits_bits(offset.into()), "The immediate operand must be 26 bits or less.");
+    let bytes: [u8; 4] = Call::b(offset).into();
 
     cb.write_bytes(&bytes);
 }
@@ -208,33 +202,21 @@ pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) {
 /// value into a register first, then use the b.cond instruction to skip past a
 /// direct jump.
 pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
-    imm_fits_bits(offset, 21) && (offset & 0b11 == 0)
+    imm_fits_bits(offset, 19)
 }
 
 /// B.cond - branch to target if condition is true
-pub fn bcond(cb: &mut CodeBlock, cond: u8, byte_offset: A64Opnd) {
-    let bytes: [u8; 4] = match byte_offset {
-        A64Opnd::Imm(imm) => {
-            assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary.");
-
-            BranchCond::bcond(cond, (imm / 4) as i32).into()
-        },
-        _ => panic!("Invalid operand combination to bcond instruction."),
-    };
+pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) {
+    assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less.");
+    let bytes: [u8; 4] = BranchCond::bcond(cond, offset).into();
 
     cb.write_bytes(&bytes);
 }
 
 /// BL - branch with link (offset is number of instructions to jump)
-pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) {
-    let bytes: [u8; 4] = match imm26 {
-        A64Opnd::Imm(imm26) => {
-            assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less.");
-
-            Call::bl(imm26 as i32).into()
-        },
-        _ => panic!("Invalid operand combination to bl instruction.")
-    };
+pub fn bl(cb: &mut CodeBlock, offset: InstructionOffset) {
+    assert!(b_offset_fits_bits(offset.into()), "The offset must be 26 bits or less.");
+    let bytes: [u8; 4] = Call::bl(offset).into();
 
     cb.write_bytes(&bytes);
 }
@@ -413,7 +395,7 @@ pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) {
 }
 
 /// LDR - load a PC-relative memory address into a register
-pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) {
+pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: InstructionOffset) {
     let bytes: [u8; 4] = match rt {
         A64Opnd::Reg(rt) => {
             LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into()
@@ -1087,45 +1069,52 @@ mod tests {
 
     #[test]
     fn test_bcond() {
-        check_bytes("01200054", |cb| bcond(cb, Condition::NE, A64Opnd::new_imm(0x400)));
+        let offset = InstructionOffset::from_insns(0x100);
+        check_bytes("01200054", |cb| bcond(cb, Condition::NE, offset));
     }
 
     #[test]
     fn test_b() {
-        check_bytes("ffffff15", |cb| b(cb, A64Opnd::new_imm((1 << 25) - 1)));
+        let offset = InstructionOffset::from_insns((1 << 25) - 1);
+        check_bytes("ffffff15", |cb| b(cb, offset));
     }
 
     #[test]
     #[should_panic]
     fn test_b_too_big() {
         // There are 26 bits available
-        check_bytes("", |cb| b(cb, A64Opnd::new_imm(1 << 25)));
+        let offset = InstructionOffset::from_insns(1 << 25);
+        check_bytes("", |cb| b(cb, offset));
     }
 
     #[test]
     #[should_panic]
     fn test_b_too_small() {
         // There are 26 bits available
-        check_bytes("", |cb| b(cb, A64Opnd::new_imm(-(1 << 25) - 1)));
+        let offset = InstructionOffset::from_insns(-(1 << 25) - 1);
+        check_bytes("", |cb| b(cb, offset));
     }
 
     #[test]
     fn test_bl() {
-        check_bytes("00000096", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25))));
+        let offset = InstructionOffset::from_insns(-(1 << 25));
+        check_bytes("00000096", |cb| bl(cb, offset));
     }
 
     #[test]
     #[should_panic]
     fn test_bl_too_big() {
         // There are 26 bits available
-        check_bytes("", |cb| bl(cb, A64Opnd::new_imm(1 << 25)));
+        let offset = InstructionOffset::from_insns(1 << 25);
+        check_bytes("", |cb| bl(cb, offset));
     }
 
     #[test]
     #[should_panic]
     fn test_bl_too_small() {
         // There are 26 bits available
-        check_bytes("", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25) - 1)));
+        let offset = InstructionOffset::from_insns(-(1 << 25) - 1);
+        check_bytes("", |cb| bl(cb, offset));
     }
 
     #[test]
@@ -1200,7 +1189,7 @@ mod tests {
 
     #[test]
     fn test_ldr_literal() {
-        check_bytes("40010058", |cb| ldr_literal(cb, X0, 10));
+        check_bytes("40010058", |cb| ldr_literal(cb, X0, 10.into()));
     }
 
     #[test]
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 32db0ab3dc..446332788a 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -589,13 +589,15 @@ impl Assembler
                 Target::CodePtr(dst_ptr) => {
                     let dst_addr = dst_ptr.into_i64();
                     let src_addr = cb.get_write_ptr().into_i64();
-                    let offset = dst_addr - src_addr;
 
-                    let num_insns = if bcond_offset_fits_bits(offset) {
+                    let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) {
                         // If the jump offset fits into the conditional jump as
                         // an immediate value and it's properly aligned, then we
-                        // can use the b.cond instruction directly.
-                        bcond(cb, CONDITION, A64Opnd::new_imm(offset));
+                        // can use the b.cond instruction directly. We're safe
+                        // to use as i32 here since we already checked that it
+                        // fits.
+                        let bytes = (dst_addr - src_addr) as i32;
+                        bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes));
 
                         // Here we're going to return 1 because we've only
                         // written out 1 instruction.
@@ -604,12 +606,12 @@ impl Assembler
                         // Otherwise, we need to load the address into a
                         // register and use the branch register instruction.
                         let dst_addr = dst_ptr.into_u64();
-                        let load_insns: i64 = emit_load_size(dst_addr).into();
+                        let load_insns: i32 = emit_load_size(dst_addr).into();
 
                         // We're going to write out the inverse condition so
                         // that if it doesn't match it will skip over the
                         // instructions used for branching.
-                        bcond(cb, Condition::inverse(CONDITION), A64Opnd::new_imm((load_insns + 2) * 4));
+                        bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into());
                         emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
                         br(cb, Assembler::SCRATCH0);
 
@@ -630,7 +632,8 @@ impl Assembler
                     // offset. We're going to assume we can fit into a single
                     // b.cond instruction. It will panic otherwise.
                     cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
-                        bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - (src_addr - 4)));
+                        let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
+                        bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes));
                     });
                 },
                 Target::FunPtr(_) => unreachable!()
@@ -756,8 +759,8 @@ impl Assembler
                             // references to GC'd Value operands. If the value
                             // being loaded is a heap object, we'll report that
                             // back out to the gc_offsets list.
-                            ldr_literal(cb, out.into(), 2);
-                            b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4));
+                            ldr_literal(cb, out.into(), 2.into());
+                            b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
                             cb.write_bytes(&value.as_u64().to_le_bytes());
 
                             let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
@@ -844,14 +847,11 @@ impl Assembler
                     // The offset to the call target in bytes
                     let src_addr = cb.get_write_ptr().into_i64();
                     let dst_addr = target.unwrap_fun_ptr() as i64;
-                    let offset = dst_addr - src_addr;
-                    // The offset in instruction count for BL's immediate
-                    let offset = offset / 4;
 
                     // Use BL if the offset is short enough to encode as an immediate.
                     // Otherwise, use BLR with a register.
-                    if b_offset_fits_bits(offset) {
-                        bl(cb, A64Opnd::new_imm(offset));
+                    if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+                        bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
                     } else {
                         emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
                         blr(cb, Self::SCRATCH0);
@@ -875,19 +875,22 @@ impl Assembler
                             let src_addr = cb.get_write_ptr().into_i64();
                             let dst_addr = dst_ptr.into_i64();
 
-                            // The offset between the two instructions in bytes.
-                            // Note that when we encode this into a b
-                            // instruction, we'll divide by 4 because it accepts
-                            // the number of instructions to jump over.
-                            let offset = dst_addr - src_addr;
-                            let offset = offset / 4;
-
                             // If the offset is short enough, then we'll use the
                             // branch instruction. Otherwise, we'll move the
                             // destination into a register and use the branch
                             // register instruction.
-                            let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
-                            br(cb, Self::SCRATCH0);
+                            let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+                                b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+                                0
+                            } else {
+                                let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
+                                br(cb, Self::SCRATCH0);
+                                num_insns
+                            };
+
+                            // Make sure it's always a consistent number of
+                            // instructions in case it gets patched and has to
+                            // use the other branch.
                             for _ in num_insns..4 {
                                 nop(cb);
                             }
@@ -899,7 +902,8 @@ impl Assembler
                             // to assume we can fit into a single b instruction.
                             // It will panic otherwise.
                             cb.label_ref(*label_idx, 4, |cb, src_addr, dst_addr| {
-                                b(cb, A64Opnd::new_imm((dst_addr - (src_addr - 4)) / 4));
+                                let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
+                                b(cb, InstructionOffset::from_bytes(bytes));
                             });
                         },
                         _ => unreachable!()