summaryrefslogtreecommitdiff
path: root/yjit/src/backend/arm64/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'yjit/src/backend/arm64/mod.rs')
-rw-r--r--yjit/src/backend/arm64/mod.rs1835
1 files changed, 1835 insertions, 0 deletions
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
new file mode 100644
index 0000000000..3bf949ba7d
--- /dev/null
+++ b/yjit/src/backend/arm64/mod.rs
@@ -0,0 +1,1835 @@
+use std::mem::take;
+
+use crate::asm::{CodeBlock, OutlinedCb};
+use crate::asm::arm64::*;
+use crate::cruby::*;
+use crate::backend::ir::*;
+use crate::virtualmem::CodePtr;
+use crate::utils::*;
+
+// Use the arm64 register type for this platform
+pub type Reg = A64Reg;
+
+// Callee-saved registers
+pub const _CFP: Opnd = Opnd::Reg(X19_REG);
+pub const _EC: Opnd = Opnd::Reg(X20_REG);
+pub const _SP: Opnd = Opnd::Reg(X21_REG);
+
+// C argument registers on this platform
+pub const _C_ARG_OPNDS: [Opnd; 6] = [
+ Opnd::Reg(X0_REG),
+ Opnd::Reg(X1_REG),
+ Opnd::Reg(X2_REG),
+ Opnd::Reg(X3_REG),
+ Opnd::Reg(X4_REG),
+ Opnd::Reg(X5_REG)
+];
+
+// C return value register on this platform
+pub const C_RET_REG: Reg = X0_REG;
+pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
+
+// These constants define the way we work with Arm64's stack pointer. The stack
+// pointer always needs to be aligned to a 16-byte boundary.
+pub const C_SP_REG: A64Opnd = X31;
+pub const C_SP_STEP: i32 = 16;
+
+impl CodeBlock {
+ // The maximum number of bytes that can be generated by emit_jmp_ptr.
+ pub fn jmp_ptr_bytes(&self) -> usize {
+ // b instruction's offset is encoded as imm26 times 4. It can jump to
+ // +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128.
+ let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) {
+ 1 // b instruction
+ } else {
+ 5 // 4 instructions to load a 64-bit absolute address + br instruction
+ };
+ num_insns * 4
+ }
+
+ // The maximum number of instructions that can be generated by emit_conditional_jump.
+ fn conditional_jump_insns(&self) -> i32 {
+ // The worst case is instructions for a jump + bcond.
+ self.jmp_ptr_bytes() as i32 / 4 + 1
+ }
+}
+
+/// Map Opnd to A64Opnd
+impl From<Opnd> for A64Opnd {
+ fn from(opnd: Opnd) -> Self {
+ match opnd {
+ Opnd::UImm(value) => A64Opnd::new_uimm(value),
+ Opnd::Imm(value) => A64Opnd::new_imm(value),
+ Opnd::Reg(reg) => A64Opnd::Reg(reg),
+ Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => {
+ A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp)
+ },
+ Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => {
+ panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base")
+ },
+ Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"),
+ Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"),
+ Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"),
+ Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"),
+ Opnd::None => panic!(
+ "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output."
+ ),
+
+ }
+ }
+}
+
+/// Also implement going from a reference to an operand for convenience.
+impl From<&Opnd> for A64Opnd {
+ fn from(opnd: &Opnd) -> Self {
+ A64Opnd::from(*opnd)
+ }
+}
+
+/// Call emit_jmp_ptr and immediately invalidate the written range.
+/// This is needed when next_page also moves other_cb that is not invalidated
+/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot
+/// more than necessary when other_cb jumps from a position early in the page.
+/// This invalidates a small range of cb twice, but we accept the small cost.
+fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
+ #[cfg(not(test))]
+ let start = cb.get_write_ptr();
+ emit_jmp_ptr(cb, dst_ptr, true);
+ #[cfg(not(test))]
+ {
+ let end = cb.get_write_ptr();
+ unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) };
+ }
+}
+
+fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
+ let src_addr = cb.get_write_ptr().as_offset();
+ let dst_addr = dst_ptr.as_offset();
+
+ // If the offset is short enough, then we'll use the
+ // branch instruction. Otherwise, we'll move the
+ // destination into a register and use the branch
+ // register instruction.
+ let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+ b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+ 1
+ } else {
+ let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
+ br(cb, Assembler::SCRATCH0);
+ num_insns + 1
+ };
+
+ if padding {
+ // Make sure it's always a consistent number of
+ // instructions in case it gets patched and has to
+ // use the other branch.
+ assert!(num_insns * 4 <= cb.jmp_ptr_bytes());
+ for _ in num_insns..(cb.jmp_ptr_bytes() / 4) {
+ nop(cb);
+ }
+ }
+}
+
+/// Emit the required instructions to load the given value into the
+/// given register. Our goal here is to use as few instructions as
+/// possible to get this value into the register.
+fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
+ let mut current = value;
+
+ if current <= 0xffff {
+ // If the value fits into a single movz
+ // instruction, then we'll use that.
+ movz(cb, rd, A64Opnd::new_uimm(current), 0);
+ return 1;
+ } else if BitmaskImmediate::try_from(current).is_ok() {
+ // Otherwise, if the immediate can be encoded
+ // with the special bitmask immediate encoding,
+ // we'll use that.
+ mov(cb, rd, A64Opnd::new_uimm(current));
+ return 1;
+ } else {
+ // Finally we'll fall back to encoding the value
+ // using movz for the first 16 bits and movk for
+ // each subsequent set of 16 bits as long we
+ // they are necessary.
+ movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0);
+ let mut num_insns = 1;
+
+ // (We're sure this is necessary since we
+ // checked if it only fit into movz above).
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16);
+ num_insns += 1;
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32);
+ num_insns += 1;
+ }
+
+ if current > 0xffff {
+ current >>= 16;
+ movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48);
+ num_insns += 1;
+ }
+ return num_insns;
+ }
+}
+
+/// List of registers that can be used for stack temps.
+/// These are caller-saved registers.
+pub static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG];
+
+#[derive(Debug, PartialEq)]
+enum EmitError {
+ RetryOnNextPage,
+ OutOfMemory,
+}
+
+impl Assembler
+{
+ // Special scratch registers for intermediate processing.
+ // This register is caller-saved (so we don't have to save it before using it)
+ pub const SCRATCH_REG: Reg = X16_REG;
+ const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG);
+ const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
+
+ /// Get the list of registers from which we will allocate on this platform
+ /// These are caller-saved registers
+ /// Note: we intentionally exclude C_RET_REG (X0) from this list
+ /// because of the way it's used in gen_leave() and gen_leave_exit()
+ pub fn get_alloc_regs() -> Vec<Reg> {
+ vec![X11_REG, X12_REG, X13_REG]
+ }
+
+ /// Get a list of all of the caller-saved registers
+ pub fn get_caller_save_regs() -> Vec<Reg> {
+ vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG]
+ }
+
+ /// Split platform-specific instructions
+ /// The transformations done here are meant to make our lives simpler in later
+ /// stages of the compilation pipeline.
+ /// Here we may want to make sure that all instructions (except load and store)
+ /// have no memory operands.
+ fn arm64_split(mut self) -> Assembler
+ {
+ /// When we're attempting to load a memory address into a register, the
+ /// displacement must fit into the maximum number of bits for an Op::Add
+ /// immediate. If it doesn't, we have to load the displacement into a
+ /// register first.
+ fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Mem(Mem { base, disp, num_bits }) => {
+ if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() {
+ asm.lea(opnd)
+ } else {
+ let disp = asm.load(Opnd::Imm(disp.into()));
+ let reg = match base {
+ MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }),
+ MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits }
+ };
+
+ asm.add(reg, disp)
+ }
+ },
+ _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.")
+ }
+ }
+
+ /// When you're storing a register into a memory location or loading a
+ /// memory location into a register, the displacement from the base
+ /// register of the memory location must fit into 9 bits. If it doesn't,
+ /// then we need to load that memory address into a register first.
+ fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Mem(mem) => {
+ if mem_disp_fits_bits(mem.disp) {
+ opnd
+ } else {
+ let base = split_lea_operand(asm, opnd);
+ Opnd::mem(64, base, 0)
+ }
+ },
+ _ => unreachable!("Can only split memory addresses.")
+ }
+ }
+
+ /// Any memory operands you're sending into an Op::Load instruction need
+ /// to be split in case their displacement doesn't fit into 9 bits.
+ fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd,
+ Opnd::Mem(_) => {
+ let split_opnd = split_memory_address(asm, opnd);
+ let out_opnd = asm.load(split_opnd);
+ // Many Arm insns support only 32-bit or 64-bit operands. asm.load with fewer
+ // bits zero-extends the value, so it's safe to recognize it as a 32-bit value.
+ if out_opnd.rm_num_bits() < 32 {
+ out_opnd.with_num_bits(32).unwrap()
+ } else {
+ out_opnd
+ }
+ },
+ _ => asm.load(opnd)
+ }
+ }
+
+ /// Operands that take the place of bitmask immediates must follow a
+ /// certain encoding. In this function we ensure that those operands
+ /// do follow that encoding, and if they don't then we load them first.
+ fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd {
+ match opnd {
+ Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd,
+ Opnd::Mem(_) => split_load_operand(asm, opnd),
+ Opnd::Imm(imm) => {
+ if imm == 0 {
+ Opnd::Reg(XZR_REG)
+ } else if (dest_num_bits == 64 &&
+ BitmaskImmediate::try_from(imm as u64).is_ok()) ||
+ (dest_num_bits == 32 &&
+ u32::try_from(imm).is_ok() &&
+ BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) {
+ Opnd::UImm(imm as u64)
+ } else {
+ asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
+ }
+ },
+ Opnd::UImm(uimm) => {
+ if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) ||
+ (dest_num_bits == 32 &&
+ u32::try_from(uimm).is_ok() &&
+ BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) {
+ opnd
+ } else {
+ asm.load(opnd).with_num_bits(dest_num_bits).unwrap()
+ }
+ },
+ Opnd::None | Opnd::Value(_) => unreachable!()
+ }
+ }
+
+ /// Operands that take the place of a shifted immediate must fit within
+ /// a certain size. If they don't then we need to load them first.
+ fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd {
+ match opnd {
+ Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd,
+ Opnd::Mem(_) => split_load_operand(asm, opnd),
+ Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() {
+ opnd
+ } else {
+ asm.load(opnd)
+ }
+ Opnd::UImm(uimm) => {
+ if ShiftedImmediate::try_from(uimm).is_ok() {
+ opnd
+ } else {
+ asm.load(opnd)
+ }
+ },
+ Opnd::None | Opnd::Value(_) | Opnd::Stack { .. } => unreachable!()
+ }
+ }
+
+ /// Returns the operands that should be used for a boolean logic
+ /// instruction.
+ fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) {
+ match (opnd0, opnd1) {
+ (Opnd::Reg(_), Opnd::Reg(_)) => {
+ (opnd0, opnd1)
+ },
+ (reg_opnd @ Opnd::Reg(_), other_opnd) |
+ (other_opnd, reg_opnd @ Opnd::Reg(_)) => {
+ let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits());
+ (reg_opnd, opnd1)
+ },
+ _ => {
+ let opnd0 = split_load_operand(asm, opnd0);
+ let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits());
+ (opnd0, opnd1)
+ }
+ }
+ }
+
+ /// Returns the operands that should be used for a csel instruction.
+ fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) {
+ let opnd0 = match opnd0 {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd0,
+ _ => split_load_operand(asm, opnd0)
+ };
+
+ let opnd1 = match opnd1 {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd1,
+ _ => split_load_operand(asm, opnd1)
+ };
+
+ (opnd0, opnd1)
+ }
+
+ fn split_less_than_32_cmp(asm: &mut Assembler, opnd0: Opnd) -> Opnd {
+ match opnd0 {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => {
+ match opnd0.rm_num_bits() {
+ 8 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xff)),
+ 16 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xffff)),
+ 32 | 64 => opnd0,
+ bits => unreachable!("Invalid number of bits. {}", bits)
+ }
+ }
+ _ => opnd0
+ }
+ }
+
+ let live_ranges: Vec<usize> = take(&mut self.live_ranges);
+ let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let asm = &mut asm_local;
+ let mut iterator = self.into_draining_iter();
+
+ while let Some((index, mut insn)) = iterator.next_mapped() {
+ // Here we're going to map the operands of the instruction to load
+ // any Opnd::Value operands into registers if they are heap objects
+ // such that only the Op::Load instruction needs to handle that
+ // case. If the values aren't heap objects then we'll treat them as
+ // if they were just unsigned integer.
+ let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
+ let mut opnd_iter = insn.opnd_iter_mut();
+
+ while let Some(opnd) = opnd_iter.next() {
+ match opnd {
+ Opnd::Value(value) => {
+ if value.special_const_p() {
+ *opnd = Opnd::UImm(value.as_u64());
+ } else if !is_load {
+ *opnd = asm.load(*opnd);
+ }
+ },
+ Opnd::Stack { .. } => {
+ *opnd = asm.lower_stack_opnd(opnd);
+ }
+ _ => {}
+ };
+ }
+
+ // We are replacing instructions here so we know they are already
+ // being used. It is okay not to use their output here.
+ #[allow(unused_must_use)]
+ match &mut insn {
+ Insn::Add { left, right, .. } => {
+ match (*left, *right) {
+ (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => {
+ asm.add(*left, *right);
+ },
+ (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) |
+ (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => {
+ let opnd1 = split_shifted_immediate(asm, other_opnd);
+ asm.add(reg_opnd, opnd1);
+ },
+ _ => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd1 = split_shifted_immediate(asm, *right);
+ asm.add(opnd0, opnd1);
+ }
+ }
+ },
+ Insn::And { left, right, out } |
+ Insn::Or { left, right, out } |
+ Insn::Xor { left, right, out } => {
+ let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right);
+ *left = opnd0;
+ *right = opnd1;
+
+ // Since these instructions are lowered to an instruction that have 2 input
+ // registers and an output register, look to merge with an `Insn::Mov` that
+ // follows which puts the output in another register. For example:
+ // `Add a, b => out` followed by `Mov c, out` becomes `Add a, b => c`.
+ if let (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) = (left, right, iterator.peek()) {
+ if live_ranges[index] == index + 1 {
+ // Check after potentially lowering a stack operand to a register operand
+ let lowered_dest = if let Opnd::Stack { .. } = dest {
+ asm.lower_stack_opnd(dest)
+ } else {
+ *dest
+ };
+ if out == src && matches!(lowered_dest, Opnd::Reg(_)) {
+ *out = lowered_dest;
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ }
+ }
+
+ asm.push_insn(insn);
+ }
+ // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch.
+ ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } |
+ ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if {
+ let same_opnd_if_test = if let Insn::Test { .. } = insn {
+ left == right
+ } else {
+ true
+ };
+
+ same_opnd_if_test && if let Some(
+ Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target)
+ ) = iterator.peek() {
+ matches!(target, Target::SideExit { .. })
+ } else {
+ false
+ }
+ } => {
+ let reg = split_load_operand(asm, *left);
+ match iterator.peek() {
+ Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)),
+ Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)),
+ _ => ()
+ }
+
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged jump instruction
+ }
+ Insn::CCall { opnds, fptr, .. } => {
+ assert!(opnds.len() <= C_ARG_OPNDS.len());
+
+ // Load each operand into the corresponding argument
+ // register.
+ // Note: the iteration order is reversed to avoid corrupting x0,
+ // which is both the return value and first argument register
+ for (idx, opnd) in opnds.into_iter().enumerate().rev() {
+ // If the value that we're sending is 0, then we can use
+ // the zero register, so in this case we'll just send
+ // a UImm of 0 along as the argument to the move.
+ let value = match opnd {
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0),
+ Opnd::Mem(_) => split_memory_address(asm, *opnd),
+ _ => *opnd
+ };
+
+ asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value);
+ }
+
+ // Now we push the CCall without any arguments so that it
+ // just performs the call.
+ asm.ccall(*fptr, vec![]);
+ },
+ Insn::Cmp { left, right } => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd0 = split_less_than_32_cmp(asm, opnd0);
+ let split_right = split_shifted_immediate(asm, *right);
+ let opnd1 = match split_right {
+ Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => {
+ split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap()
+ },
+ _ => split_right
+ };
+
+ asm.cmp(opnd0, opnd1);
+ },
+ Insn::CRet(opnd) => {
+ match opnd {
+ // If the value is already in the return register, then
+ // we don't need to do anything.
+ Opnd::Reg(C_RET_REG) => {},
+
+ // If the value is a memory address, we need to first
+ // make sure the displacement isn't too large and then
+ // load it into the return register.
+ Opnd::Mem(_) => {
+ let split = split_memory_address(asm, *opnd);
+ asm.load_into(C_RET_OPND, split);
+ },
+
+ // Otherwise we just need to load the value into the
+ // return register.
+ _ => {
+ asm.load_into(C_RET_OPND, *opnd);
+ }
+ }
+ asm.cret(C_RET_OPND);
+ },
+ Insn::CSelZ { truthy, falsy, out } |
+ Insn::CSelNZ { truthy, falsy, out } |
+ Insn::CSelE { truthy, falsy, out } |
+ Insn::CSelNE { truthy, falsy, out } |
+ Insn::CSelL { truthy, falsy, out } |
+ Insn::CSelLE { truthy, falsy, out } |
+ Insn::CSelG { truthy, falsy, out } |
+ Insn::CSelGE { truthy, falsy, out } => {
+ let (opnd0, opnd1) = split_csel_operands(asm, *truthy, *falsy);
+ *truthy = opnd0;
+ *falsy = opnd1;
+ // Merge `csel` and `mov` into a single `csel` when possible
+ match iterator.peek() {
+ Some(Insn::Mov { dest: Opnd::Reg(reg), src })
+ if matches!(out, Opnd::InsnOut { .. }) && *out == *src && live_ranges[index] == index + 1 => {
+ *out = Opnd::Reg(*reg);
+ asm.push_insn(insn);
+ iterator.map_insn_index(asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => {
+ asm.push_insn(insn);
+ }
+ }
+ },
+ Insn::IncrCounter { mem, value } => {
+ let counter_addr = match mem {
+ Opnd::Mem(_) => split_lea_operand(asm, *mem),
+ _ => *mem
+ };
+
+ asm.incr_counter(counter_addr, *value);
+ },
+ Insn::JmpOpnd(opnd) => {
+ if let Opnd::Mem(_) = opnd {
+ let opnd0 = split_load_operand(asm, *opnd);
+ asm.jmp_opnd(opnd0);
+ } else {
+ asm.jmp_opnd(*opnd);
+ }
+ },
+ Insn::Load { opnd, .. } |
+ Insn::LoadInto { opnd, .. } => {
+ *opnd = match opnd {
+ Opnd::Mem(_) => split_memory_address(asm, *opnd),
+ _ => *opnd
+ };
+ asm.push_insn(insn);
+ },
+ Insn::LoadSExt { opnd, .. } => {
+ match opnd {
+ // We only want to sign extend if the operand is a
+ // register, instruction output, or memory address that
+ // is 32 bits. Otherwise we'll just load the value
+ // directly since there's no need to sign extend.
+ Opnd::Reg(Reg { num_bits: 32, .. }) |
+ Opnd::InsnOut { num_bits: 32, .. } |
+ Opnd::Mem(Mem { num_bits: 32, .. }) => {
+ asm.load_sext(*opnd);
+ },
+ _ => {
+ asm.load(*opnd);
+ }
+ };
+ },
+ Insn::Mov { dest, src } => {
+ match (&dest, &src) {
+ // If we're attempting to load into a memory operand, then
+ // we'll switch over to the store instruction.
+ (Opnd::Mem(_), _) => {
+ let opnd0 = split_memory_address(asm, *dest);
+ let value = match *src {
+ // If the first operand is zero, then we can just use
+ // the zero register.
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
+ // If the first operand is a memory operand, we're going
+ // to transform this into a store instruction, so we'll
+ // need to load this anyway.
+ Opnd::UImm(_) => asm.load(*src),
+ // The value that is being moved must be either a
+ // register or an immediate that can be encoded as a
+ // bitmask immediate. Otherwise, we'll need to split the
+ // move into multiple instructions.
+ _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits())
+ };
+
+ asm.store(opnd0, value);
+ },
+ // If we're loading a memory operand into a register, then
+ // we'll switch over to the load instruction.
+ (Opnd::Reg(_), Opnd::Mem(_)) => {
+ let value = split_memory_address(asm, *src);
+ asm.load_into(*dest, value);
+ },
+ // Otherwise we'll use the normal mov instruction.
+ (Opnd::Reg(_), _) => {
+ let value = match *src {
+ // Unlike other instructions, we can avoid splitting this case, using movz.
+ Opnd::UImm(uimm) if uimm <= 0xffff => *src,
+ _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()),
+ };
+ asm.mov(*dest, value);
+ },
+ _ => unreachable!()
+ };
+ },
+ Insn::Not { opnd, .. } => {
+ // The value that is being negated must be in a register, so
+ // if we get anything else we need to load it first.
+ let opnd0 = match opnd {
+ Opnd::Mem(_) => split_load_operand(asm, *opnd),
+ _ => *opnd
+ };
+
+ asm.not(opnd0);
+ },
+ Insn::LShift { opnd, .. } |
+ Insn::RShift { opnd, .. } |
+ Insn::URShift { opnd, .. } => {
+ // The operand must be in a register, so
+ // if we get anything else we need to load it first.
+ let opnd0 = match opnd {
+ Opnd::Mem(_) => split_load_operand(asm, *opnd),
+ _ => *opnd
+ };
+
+ *opnd = opnd0;
+ asm.push_insn(insn);
+ },
+ Insn::Store { dest, src } => {
+ // The value being stored must be in a register, so if it's
+ // not already one we'll load it first.
+ let opnd1 = match src {
+ // If the first operand is zero, then we can just use
+ // the zero register.
+ Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
+ // Otherwise we'll check if we need to load it first.
+ _ => split_load_operand(asm, *src)
+ };
+
+ match dest {
+ Opnd::Reg(_) => {
+ // Store does not support a register as a dest operand.
+ asm.mov(*dest, opnd1);
+ }
+ _ => {
+ // The displacement for the STUR instruction can't be more
+ // than 9 bits long. If it's longer, we need to load the
+ // memory address into a register first.
+ let opnd0 = split_memory_address(asm, *dest);
+ asm.store(opnd0, opnd1);
+ }
+ }
+ },
+ Insn::Sub { left, right, .. } => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd1 = split_shifted_immediate(asm, *right);
+ asm.sub(opnd0, opnd1);
+ },
+ Insn::Mul { left, right, .. } => {
+ let opnd0 = split_load_operand(asm, *left);
+ let opnd1 = split_load_operand(asm, *right);
+ asm.mul(opnd0, opnd1);
+ },
+ Insn::Test { left, right } => {
+ // The value being tested must be in a register, so if it's
+ // not already one we'll load it first.
+ let opnd0 = split_load_operand(asm, *left);
+
+ // The second value must be either a register or an
+ // unsigned immediate that can be encoded as a bitmask
+ // immediate. If it's not one of those, we'll need to load
+ // it first.
+ let opnd1 = split_bitmask_immediate(asm, *right, opnd0.rm_num_bits());
+ asm.test(opnd0, opnd1);
+ },
+ _ => {
+ // If we have an output operand, then we need to replace it
+ // with a new output operand from the new assembler.
+ if insn.out_opnd().is_some() {
+ let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
+ let out = insn.out_opnd_mut().unwrap();
+ *out = asm.next_opnd_out(out_num_bits);
+ }
+
+ asm.push_insn(insn);
+ }
+ };
+
+ iterator.map_insn_index(asm);
+ }
+
+ asm_local
+ }
+
+ /// Emit platform-specific machine code
+ /// Returns a list of GC offsets. Can return failure to signal caller to retry.
+ fn arm64_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Result<Vec<u32>, EmitError> {
+ /// Determine how many instructions it will take to represent moving
+ /// this value into a register. Note that the return value of this
+ /// function must correspond to how many instructions are used to
+ /// represent this load in the emit_load_value function.
+ fn emit_load_size(value: u64) -> u8 {
+ if BitmaskImmediate::try_from(value).is_ok() {
+ return 1;
+ }
+
+ if value < (1 << 16) {
+ 1
+ } else if value < (1 << 32) {
+ 2
+ } else if value < (1 << 48) {
+ 3
+ } else {
+ 4
+ }
+ }
+
+ /// Emit a conditional jump instruction to a specific target. This is
+ /// called when lowering any of the conditional jump instructions.
+ fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
+ match target {
+ Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => {
+ let dst_addr = dst_ptr.as_offset();
+ let src_addr = cb.get_write_ptr().as_offset();
+
+ let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) {
+ // If the jump offset fits into the conditional jump as
+ // an immediate value and it's properly aligned, then we
+ // can use the b.cond instruction directly. We're safe
+ // to use as i32 here since we already checked that it
+ // fits.
+ let bytes = (dst_addr - src_addr) as i32;
+ bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes));
+
+ // Here we're going to return 1 because we've only
+ // written out 1 instruction.
+ 1
+ } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond
+ // If the jump offset fits into the unconditional jump as
+ // an immediate value, we can use inverse b.cond + b.
+ //
+ // We're going to write out the inverse condition so
+ // that if it doesn't match it will skip over the
+ // instruction used for branching.
+ bcond(cb, Condition::inverse(CONDITION), 2.into());
+ b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond
+
+ // We've only written out 2 instructions.
+ 2
+ } else {
+ // Otherwise, we need to load the address into a
+ // register and use the branch register instruction.
+ let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64();
+ let load_insns: i32 = emit_load_size(dst_addr).into();
+
+ // We're going to write out the inverse condition so
+ // that if it doesn't match it will skip over the
+ // instructions used for branching.
+ bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into());
+ emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
+ br(cb, Assembler::SCRATCH0);
+
+ // Here we'll return the number of instructions that it
+ // took to write out the destination address + 1 for the
+ // b.cond and 1 for the br.
+ load_insns + 2
+ };
+
+ if let Target::CodePtr(_) = target {
+ // We need to make sure we have at least 6 instructions for
+ // every kind of jump for invalidation purposes, so we're
+ // going to write out padding nop instructions here.
+ assert!(num_insns <= cb.conditional_jump_insns());
+ for _ in num_insns..cb.conditional_jump_insns() { nop(cb); }
+ }
+ },
+ Target::Label(label_idx) => {
+ // Here we're going to save enough space for ourselves and
+ // then come back and write the instruction once we know the
+ // offset. We're going to assume we can fit into a single
+ // b.cond instruction. It will panic otherwise.
+ cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
+ let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
+ bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes));
+ });
+ },
+ Target::SideExit { .. } => {
+ unreachable!("Target::SideExit should have been compiled by compile_side_exit")
+ },
+ };
+ }
+
+ /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero
+ fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) {
+ if let Target::SideExitPtr(dst_ptr) = target {
+ let dst_addr = dst_ptr.as_offset();
+ let src_addr = cb.get_write_ptr().as_offset();
+
+ if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) {
+ // If the offset fits in one instruction, generate cbz or cbnz
+ let bytes = (dst_addr - src_addr) as i32;
+ if branch_if_zero {
+ cbz(cb, reg, InstructionOffset::from_bytes(bytes));
+ } else {
+ cbnz(cb, reg, InstructionOffset::from_bytes(bytes));
+ }
+ } else {
+ // Otherwise, we load the address into a register and
+ // use the branch register instruction. Note that because
+ // side exits should always be close, this form should be
+ // rare or impossible to see.
+ let dst_addr = dst_ptr.raw_addr(cb) as u64;
+ let load_insns: i32 = emit_load_size(dst_addr).into();
+
+ // Write out the inverse condition so that if
+ // it doesn't match it will skip over the
+ // instructions used for branching.
+ if branch_if_zero {
+ cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
+ } else {
+ cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2));
+ }
+ emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
+ br(cb, Assembler::SCRATCH0);
+
+ }
+ } else {
+ unreachable!("We should only generate Joz/Jonz with side-exit targets");
+ }
+ }
+
+ /// Emit a push instruction for the given operand by adding to the stack
+ /// pointer and then storing the given value.
+ fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
+ str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP));
+ }
+
+ /// Emit a pop instruction into the given operand by loading the value
+ /// and then subtracting from the stack pointer.
+ fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) {
+ ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
+ }
+
+ /// Compile a side exit if Target::SideExit is given.
+ fn compile_side_exit(
+ target: Target,
+ asm: &mut Assembler,
+ ocb: &mut Option<&mut OutlinedCb>,
+ ) -> Result<Target, EmitError> {
+ if let Target::SideExit { counter, context } = target {
+ let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap())
+ .ok_or(EmitError::OutOfMemory)?;
+ Ok(Target::SideExitPtr(side_exit))
+ } else {
+ Ok(target)
+ }
+ }
+
+ // dbg!(&self.insns);
+
+ // List of GC offsets
+ let mut gc_offsets: Vec<u32> = Vec::new();
+
+ // Buffered list of PosMarker callbacks to fire if codegen is successful
+ let mut pos_markers: Vec<(usize, CodePtr)> = vec![];
+
+ // For each instruction
+ let start_write_pos = cb.get_write_pos();
+ let mut insn_idx: usize = 0;
+ while let Some(insn) = self.insns.get(insn_idx) {
+ let src_ptr = cb.get_write_ptr();
+ let had_dropped_bytes = cb.has_dropped_bytes();
+ let old_label_state = cb.get_label_state();
+ let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
+ match insn {
+ Insn::Comment(text) => {
+ if cfg!(feature = "disasm") {
+ cb.add_comment(text);
+ }
+ },
+ Insn::Label(target) => {
+ cb.write_label(target.unwrap_label_idx());
+ },
+ // Report back the current position in the generated code
+ Insn::PosMarker(..) => {
+ pos_markers.push((insn_idx, cb.get_write_ptr()))
+ }
+ Insn::BakeString(text) => {
+ for byte in text.as_bytes() {
+ cb.write_byte(*byte);
+ }
+
+ // Add a null-terminator byte for safety (in case we pass
+ // this to C code)
+ cb.write_byte(0);
+
+ // Pad out the string to the next 4-byte boundary so that
+ // it's easy to jump past.
+ for _ in 0..(4 - ((text.len() + 1) % 4)) {
+ cb.write_byte(0);
+ }
+ },
+ Insn::FrameSetup => {
+ stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16));
+
+ // X29 (frame_pointer) = SP
+ mov(cb, X29, C_SP_REG);
+ },
+ Insn::FrameTeardown => {
+ // SP = X29 (frame pointer)
+ mov(cb, C_SP_REG, X29);
+
+ ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16));
+ },
+ Insn::Add { left, right, out } => {
+ adds(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Sub { left, right, out } => {
+ subs(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Mul { left, right, out } => {
+ // If the next instruction is jo (jump on overflow)
+ match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) {
+ (Some(Insn::JoMul(_)), _) |
+ (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => {
+ // Compute the high 64 bits
+ smulh(cb, Self::SCRATCH0, left.into(), right.into());
+
+ // Compute the low 64 bits
+ // This may clobber one of the input registers,
+ // so we do it after smulh
+ mul(cb, out.into(), left.into(), right.into());
+
+ // Produce a register that is all zeros or all ones
+ // Based on the sign bit of the 64-bit mul result
+ asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63));
+
+ // If the high 64-bits are not all zeros or all ones,
+ // matching the sign bit, then we have an overflow
+ cmp(cb, Self::SCRATCH0, Self::SCRATCH1);
+ // Insn::JoMul will emit_conditional_jump::<{Condition::NE}>
+ }
+ _ => {
+ mul(cb, out.into(), left.into(), right.into());
+ }
+ }
+ },
+ Insn::And { left, right, out } => {
+ and(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Or { left, right, out } => {
+ orr(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Xor { left, right, out } => {
+ eor(cb, out.into(), left.into(), right.into());
+ },
+ Insn::Not { opnd, out } => {
+ mvn(cb, out.into(), opnd.into());
+ },
+ Insn::RShift { opnd, shift, out } => {
+ asr(cb, out.into(), opnd.into(), shift.into());
+ },
+ Insn::URShift { opnd, shift, out } => {
+ lsr(cb, out.into(), opnd.into(), shift.into());
+ },
+ Insn::LShift { opnd, shift, out } => {
+ lsl(cb, out.into(), opnd.into(), shift.into());
+ },
+ Insn::Store { dest, src } => {
+ // This order may be surprising but it is correct. The way
+ // the Arm64 assembler works, the register that is going to
+ // be stored is first and the address is second. However in
+ // our IR we have the address first and the register second.
+ match dest.rm_num_bits() {
+ 64 | 32 => stur(cb, src.into(), dest.into()),
+ 16 => sturh(cb, src.into(), dest.into()),
+ num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest),
+ }
+ },
+ Insn::Load { opnd, out } |
+ Insn::LoadInto { opnd, dest: out } => {
+ match *opnd {
+ Opnd::Reg(_) | Opnd::InsnOut { .. } => {
+ mov(cb, out.into(), opnd.into());
+ },
+ Opnd::UImm(uimm) => {
+ emit_load_value(cb, out.into(), uimm);
+ },
+ Opnd::Imm(imm) => {
+ emit_load_value(cb, out.into(), imm as u64);
+ },
+ Opnd::Mem(_) => {
+ match opnd.rm_num_bits() {
+ 64 | 32 => ldur(cb, out.into(), opnd.into()),
+ 16 => ldurh(cb, out.into(), opnd.into()),
+ 8 => ldurb(cb, out.into(), opnd.into()),
+ num_bits => panic!("unexpected num_bits: {}", num_bits)
+ };
+ },
+ Opnd::Value(value) => {
+ // We dont need to check if it's a special const
+ // here because we only allow these operands to hit
+ // this point if they're not a special const.
+ assert!(!value.special_const_p());
+
+ // This assumes only load instructions can contain
+ // references to GC'd Value operands. If the value
+ // being loaded is a heap object, we'll report that
+ // back out to the gc_offsets list.
+ ldr_literal(cb, out.into(), 2.into());
+ b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
+ cb.write_bytes(&value.as_u64().to_le_bytes());
+
+ let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+ insn_gc_offsets.push(ptr_offset);
+ },
+ Opnd::CArg { .. } => {
+ unreachable!("C argument operand was not lowered before arm64_emit");
+ }
+ Opnd::Stack { .. } => {
+ unreachable!("Stack operand was not lowered before arm64_emit");
+ }
+ Opnd::None => {
+ unreachable!("Attempted to load from None operand");
+ }
+ };
+ },
+ Insn::LoadSExt { opnd, out } => {
+ match *opnd {
+ Opnd::Reg(Reg { num_bits: 32, .. }) |
+ Opnd::InsnOut { num_bits: 32, .. } => {
+ sxtw(cb, out.into(), opnd.into());
+ },
+ Opnd::Mem(Mem { num_bits: 32, .. }) => {
+ ldursw(cb, out.into(), opnd.into());
+ },
+ _ => unreachable!()
+ };
+ },
+ Insn::Mov { dest, src } => {
+ // This supports the following two kinds of immediates:
+ // * The value fits into a single movz instruction
+ // * It can be encoded with the special bitmask immediate encoding
+ // arm64_split() should have split other immediates that require multiple instructions.
+ match src {
+ Opnd::UImm(uimm) if *uimm <= 0xffff => {
+ movz(cb, dest.into(), A64Opnd::new_uimm(*uimm), 0);
+ },
+ _ => {
+ mov(cb, dest.into(), src.into());
+ }
+ }
+ },
+ Insn::Lea { opnd, out } => {
+ let opnd: A64Opnd = opnd.into();
+
+ match opnd {
+ A64Opnd::Mem(mem) => {
+ add(
+ cb,
+ out.into(),
+ A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }),
+ A64Opnd::new_imm(mem.disp.into())
+ );
+ },
+ _ => {
+ panic!("Op::Lea only accepts Opnd::Mem operands.");
+ }
+ };
+ },
+ Insn::LeaJumpTarget { out, target, .. } => {
+ if let Target::Label(label_idx) = target {
+ // Set output to the raw address of the label
+ cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
+ adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
+ });
+
+ mov(cb, out.into(), Self::SCRATCH0);
+ } else {
+ // Set output to the jump target's raw address
+ let target_code = target.unwrap_code_ptr();
+ let target_addr = target_code.raw_addr(cb).as_u64();
+ emit_load_value(cb, out.into(), target_addr);
+ }
+ },
+ Insn::CPush(opnd) => {
+ emit_push(cb, opnd.into());
+ },
+ Insn::CPop { out } => {
+ emit_pop(cb, out.into());
+ },
+ Insn::CPopInto(opnd) => {
+ emit_pop(cb, opnd.into());
+ },
+ Insn::CPushAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ for reg in regs {
+ emit_push(cb, A64Opnd::Reg(reg));
+ }
+
+ // Push the flags/state register
+ mrs(cb, Self::SCRATCH0, SystemRegister::NZCV);
+ emit_push(cb, Self::SCRATCH0);
+ },
+ Insn::CPopAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ // Pop the state/flags register
+ msr(cb, SystemRegister::NZCV, Self::SCRATCH0);
+ emit_pop(cb, Self::SCRATCH0);
+
+ for reg in regs.into_iter().rev() {
+ emit_pop(cb, A64Opnd::Reg(reg));
+ }
+ },
+ Insn::CCall { fptr, .. } => {
+ // The offset to the call target in bytes
+ let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64;
+ let dst_addr = *fptr as i64;
+
+ // Use BL if the offset is short enough to encode as an immediate.
+ // Otherwise, use BLR with a register.
+ if b_offset_fits_bits((dst_addr - src_addr) / 4) {
+ bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
+ } else {
+ emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
+ blr(cb, Self::SCRATCH0);
+ }
+ },
+ Insn::CRet { .. } => {
+ ret(cb, A64Opnd::None);
+ },
+ Insn::Cmp { left, right } => {
+ cmp(cb, left.into(), right.into());
+ },
+ Insn::Test { left, right } => {
+ tst(cb, left.into(), right.into());
+ },
+ Insn::JmpOpnd(opnd) => {
+ br(cb, opnd.into());
+ },
+ Insn::Jmp(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(dst_ptr) => {
+ emit_jmp_ptr(cb, dst_ptr, true);
+ },
+ Target::SideExitPtr(dst_ptr) => {
+ emit_jmp_ptr(cb, dst_ptr, false);
+ },
+ Target::Label(label_idx) => {
+ // Here we're going to save enough space for
+ // ourselves and then come back and write the
+ // instruction once we know the offset. We're going
+ // to assume we can fit into a single b instruction.
+ // It will panic otherwise.
+ cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| {
+ let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap();
+ b(cb, InstructionOffset::from_bytes(bytes));
+ });
+ },
+ Target::SideExit { .. } => {
+ unreachable!("Target::SideExit should have been compiled by compile_side_exit")
+ },
+ };
+ },
+ Insn::Je(target) | Insn::Jz(target) => {
+ emit_conditional_jump::<{Condition::EQ}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => {
+ emit_conditional_jump::<{Condition::NE}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jl(target) => {
+ emit_conditional_jump::<{Condition::LT}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jg(target) => {
+ emit_conditional_jump::<{Condition::GT}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jge(target) => {
+ emit_conditional_jump::<{Condition::GE}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jbe(target) => {
+ emit_conditional_jump::<{Condition::LS}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jb(target) => {
+ emit_conditional_jump::<{Condition::CC}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jo(target) => {
+ emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Joz(opnd, target) => {
+ emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::Jonz(opnd, target) => {
+ emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?);
+ },
+ Insn::IncrCounter { mem, value } => {
+ let label = cb.new_label("incr_counter_loop".to_string());
+ cb.write_label(label);
+
+ ldaxr(cb, Self::SCRATCH0, mem.into());
+ add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into());
+
+ // The status register that gets used to track whether or
+ // not the store was successful must be 32 bytes. Since we
+ // store the SCRATCH registers as their 64-bit versions, we
+ // need to rewrap it here.
+ let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32));
+ stlxr(cb, status, Self::SCRATCH0, mem.into());
+
+ cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0));
+ emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label));
+ },
+ Insn::Breakpoint => {
+ brk(cb, A64Opnd::None);
+ },
+ Insn::CSelZ { truthy, falsy, out } |
+ Insn::CSelE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ);
+ },
+ Insn::CSelNZ { truthy, falsy, out } |
+ Insn::CSelNE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE);
+ },
+ Insn::CSelL { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT);
+ },
+ Insn::CSelLE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE);
+ },
+ Insn::CSelG { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT);
+ },
+ Insn::CSelGE { truthy, falsy, out } => {
+ csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE);
+ }
+ Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
+ Insn::PadInvalPatch => {
+ while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() {
+ nop(cb);
+ }
+ }
+ };
+
+ // On failure, jump to the next page and retry the current insn
+ if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) {
+ // Reset cb states before retrying the current Insn
+ cb.set_label_state(old_label_state);
+
+ // We don't want label references to cross page boundaries. Signal caller for
+ // retry.
+ if !self.label_names.is_empty() {
+ return Err(EmitError::RetryOnNextPage);
+ }
+ } else {
+ insn_idx += 1;
+ gc_offsets.append(&mut insn_gc_offsets);
+ }
+ }
+
+ // Error if we couldn't write out everything
+ if cb.has_dropped_bytes() {
+ return Err(EmitError::OutOfMemory)
+ } else {
+ // No bytes dropped, so the pos markers point to valid code
+ for (insn_idx, pos) in pos_markers {
+ if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() {
+ callback(pos, &cb);
+ } else {
+ panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}");
+ }
+ }
+
+ return Ok(gc_offsets)
+ }
+ }
+
+ /// Optimize and compile the stored instructions
+ pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> {
+ let asm = self.arm64_split();
+ let mut asm = asm.alloc_regs(regs);
+
+ // Create label instances in the code block
+ for (idx, name) in asm.label_names.iter().enumerate() {
+ let label_idx = cb.new_label(name.to_string());
+ assert!(label_idx == idx);
+ }
+
+ let start_ptr = cb.get_write_ptr();
+ let starting_label_state = cb.get_label_state();
+ let mut ocb = ocb; // for &mut
+ let emit_result = match asm.arm64_emit(cb, &mut ocb) {
+ Err(EmitError::RetryOnNextPage) => {
+ // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB
+ // range limit. We can easily exceed the limit in case the jump straddles two pages.
+ // In this case, we retry with a fresh page.
+ cb.set_label_state(starting_label_state);
+ cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation);
+ let result = asm.arm64_emit(cb, &mut ocb);
+ assert_ne!(
+ Err(EmitError::RetryOnNextPage),
+ result,
+ "should not fail when writing to a fresh code page"
+ );
+ result
+ }
+ result => result
+ };
+
+ if let (Ok(gc_offsets), false) = (emit_result, cb.has_dropped_bytes()) {
+ cb.link_labels();
+
+ // Invalidate icache for newly written out region so we don't run stale code.
+ // It should invalidate only the code ranges of the current cb because the code
+ // ranges of the other cb might have a memory region that is still PROT_NONE.
+ #[cfg(not(test))]
+ cb.without_page_end_reserve(|cb| {
+ for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) {
+ unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
+ }
+ });
+
+ Some((start_ptr, gc_offsets))
+ } else {
+ cb.clear_labels();
+
+ None
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::disasm::*;
+
+ fn setup_asm() -> (Assembler, CodeBlock) {
+ (Assembler::new(), CodeBlock::new_dummy(1024))
+ }
+
+ #[test]
+ fn test_emit_add() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_regs(&mut cb, None, vec![X3_REG]);
+
+ // Assert that only 2 instructions were written.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_bake_string() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.bake_string("Hello, world!");
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Testing that we pad the string to the nearest 4-byte boundary to make
+ // it easier to jump over.
+ assert_eq!(16, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_cpush_all() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cpush_all();
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_cpop_all() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cpop_all();
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_frame() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.frame_setup();
+ asm.frame_teardown();
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_je_fits_into_bcond() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let target: CodePtr = cb.get_write_ptr().add_bytes(80);
+
+ asm.je(Target::CodePtr(target));
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_je_does_not_fit_into_bcond() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let offset = 1 << 21;
+ let target: CodePtr = cb.get_write_ptr().add_bytes(offset);
+
+ asm.je(Target::CodePtr(target));
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_lea_label() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let label = asm.new_label("label");
+ let opnd = asm.lea_jump_target(label);
+
+ asm.write_label(label);
+ asm.bake_string("Hello, world!");
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_load_mem_disp_fits_into_load() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::mem(64, SP, 0));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that two instructions were written: LDUR and STUR.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_mem_disp_fits_into_add() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::mem(64, SP, 1 << 10));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that three instructions were written: ADD, LDUR, and STUR.
+ assert_eq!(12, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_mem_disp_does_not_fit_into_add() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR.
+ assert_eq!(16, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_value_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::Value(Qnil));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that only two instructions were written since the value is an
+ // immediate.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_load_value_non_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000)));
+ asm.store(Opnd::mem(64, SP, 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that five instructions were written since the value is not an
+ // immediate and needs to be loaded into a register.
+ assert_eq!(20, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_32b_reg_not_bitmask_imm() {
+ let (mut asm, mut cb) = setup_asm();
+ let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+ asm.test(w0, Opnd::UImm(u32::MAX.into()));
+ // All ones is not encodable with a bitmask immediate,
+ // so this needs one register
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_test_32b_reg_bitmask_imm() {
+ let (mut asm, mut cb) = setup_asm();
+ let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap();
+ asm.test(w0, Opnd::UImm(0x80000001));
+ asm.compile_with_num_regs(&mut cb, 0);
+ }
+
+ #[test]
+ fn test_emit_or() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_lshift() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_rshift() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_urshift() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+ asm.compile_with_num_regs(&mut cb, 1);
+ }
+
+ #[test]
+ fn test_emit_test() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Assert that only one instruction was written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_encodable_unsigned_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Assert that only one instruction was written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_unencodable_unsigned_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that a load and a test instruction were written.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_encodable_signed_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ // Assert that only one instruction was written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_unencodable_signed_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that a load and a test instruction were written.
+ assert_eq!(8, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_emit_test_with_negative_signed_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ // Assert that a test instruction is written.
+ assert_eq!(4, cb.get_write_pos());
+ }
+
+ #[test]
+ fn test_32_bit_register_with_some_number() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6);
+ asm.cmp(shape_opnd, Opnd::UImm(4097));
+ asm.compile_with_num_regs(&mut cb, 2);
+ }
+
+ #[test]
+ fn test_16_bit_register_store_some_number() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(16, Opnd::Reg(X0_REG), 0);
+ asm.store(shape_opnd, Opnd::UImm(4097));
+ asm.compile_with_num_regs(&mut cb, 2);
+ }
+
+ #[test]
+ fn test_32_bit_register_store_some_number() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6);
+ asm.store(shape_opnd, Opnd::UImm(4097));
+ asm.compile_with_num_regs(&mut cb, 2);
+ }
+
+ #[test]
+ fn test_bcond_straddling_code_pages() {
+ const LANDING_PAGE: usize = 65;
+ let mut asm = Assembler::new();
+ let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]);
+
+ // Skip to near the end of the page. Room for two instructions.
+ cb.set_pos(cb.page_start_pos() + cb.page_end() - 8);
+
+ let end = asm.new_label("end");
+ // Start with a conditional jump...
+ asm.jz(end);
+
+ // A few instructions, enough to cause a page switch.
+ let sum = asm.add(399.into(), 111.into());
+ let xorred = asm.xor(sum, 859.into());
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred);
+ asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred);
+
+ // The branch target. It should be in the landing page.
+ asm.write_label(end);
+ asm.cret(xorred);
+
+ // [Bug #19385]
+ // This used to panic with "The offset must be 19 bits or less."
+ // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB.
+ let starting_pos = cb.get_write_pos();
+ asm.compile_with_num_regs(&mut cb, 2);
+ let gap = cb.get_write_pos() - starting_pos;
+ assert!(gap > 0b1111111111111111111);
+
+ let instruction_at_starting_pos: [u8; 4] = unsafe {
+ std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4)
+ }.try_into().unwrap();
+ assert_eq!(
+ 0b000101 << 26_u32,
+ u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32),
+ "starting instruction should be an unconditional branch to the new page (B)"
+ );
+ }
+
+ #[test]
+ fn test_emit_xor() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG));
+ asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "0b0001ca4b0000f8", "
+ 0x0: eor x11, x0, x1
+ 0x4: stur x11, [x2]
+ ");
+ }
+
+ #[test]
+ #[cfg(feature = "disasm")]
+ fn test_simple_disasm() -> std::result::Result<(), capstone::Error> {
+ // Test drive Capstone with simple input
+ use capstone::prelude::*;
+
+ let cs = Capstone::new()
+ .arm64()
+ .mode(arch::arm64::ArchMode::Arm)
+ .build()?;
+
+ let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?;
+
+ match insns.as_ref() {
+ [insn] => {
+ assert_eq!(Some("movk"), insn.mnemonic());
+ Ok(())
+ }
+ _ => Err(capstone::Error::CustomError(
+ "expected to disassemble to movk",
+ )),
+ }
+ }
+
+ #[test]
+ fn test_replace_mov_with_ldur() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::mem(64, CFP, 8));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "618240f8", {"
+ 0x0: ldur x1, [x19, #8]
+ "});
+ }
+
+ #[test]
+ fn test_not_split_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0xffff));
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0x10000));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "e1ff9fd2e10370b2", {"
+ 0x0: mov x1, #0xffff
+ 0x4: orr x1, xzr, #0x10000
+ "});
+ }
+
+ #[test]
+ fn test_merge_csel_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.csel_l(Qtrue.into(), Qfalse.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "8b0280d20c0080d261b18c9a", {"
+ 0x0: mov x11, #0x14
+ 0x4: mov x12, #0
+ 0x8: csel x1, x11, x12, lt
+ "});
+ }
+
+ #[test]
+ fn test_add_with_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into());
+ let out = asm.add(out, 1_usize.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "2b0500b16b0500b1e1030baa", {"
+ 0x0: adds x11, x9, #1
+ 0x4: adds x11, x11, #1
+ 0x8: mov x1, x11
+ "});
+ }
+
+ #[test]
+ fn test_mul_with_immediate() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into());
+ asm.mov(Opnd::Reg(TEMP_REGS[0]), out);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_disasm!(cb, "6b0080d22b7d0b9be1030baa", {"
+ 0x0: mov x11, #3
+ 0x4: mul x11, x9, x11
+ 0x8: mov x1, x11
+ "});
+ }
+}