use std::mem::take; use crate::asm::{CodeBlock, Label}; use crate::asm::arm64::*; use crate::codegen::split_patch_point; use crate::cruby::*; use crate::backend::lir::*; use crate::options::asm_dump; use crate::stats::CompileError; use crate::virtualmem::CodePtr; use crate::cast::*; // Use the arm64 register type for this platform pub type Reg = A64Reg; /// Convert reg_no for MemBase::Reg into Reg, assuming it's a 64-bit register pub fn mem_base_reg(reg_no: u8) -> Reg { Reg { num_bits: 64, reg_no } } // Callee-saved registers pub const CFP: Opnd = Opnd::Reg(X19_REG); pub const EC: Opnd = Opnd::Reg(X20_REG); pub const SP: Opnd = Opnd::Reg(X21_REG); // C argument registers on this platform pub const C_ARG_OPNDS: [Opnd; 6] = [ Opnd::Reg(X0_REG), Opnd::Reg(X1_REG), Opnd::Reg(X2_REG), Opnd::Reg(X3_REG), Opnd::Reg(X4_REG), Opnd::Reg(X5_REG) ]; // C return value register on this platform pub const C_RET_REG: Reg = X0_REG; pub const C_RET_OPND: Opnd = Opnd::Reg(X0_REG); pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(XZR_REG); pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(X29_REG); // These constants define the way we work with Arm64's stack pointer. The stack // pointer always needs to be aligned to a 16-byte boundary. pub const C_SP_REG: A64Opnd = X31; pub const C_SP_STEP: i32 = 16; impl CodeBlock { // The maximum number of bytes that can be generated by emit_jmp_ptr. pub fn jmp_ptr_bytes(&self) -> usize { // b instruction's offset is encoded as imm26 times 4. It can jump to // +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128. /* let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) { 1 // b instruction } else { 5 // 4 instructions to load a 64-bit absolute address + br instruction }; */ let num_insns = 5; // TODO: support virtual_region_size() check num_insns * 4 } // The maximum number of instructions that can be generated by emit_conditional_jump. fn conditional_jump_insns(&self) -> i32 { // The worst case is instructions for a jump + bcond. self.jmp_ptr_bytes() as i32 / 4 + 1 } } /// Map Opnd to A64Opnd impl From for A64Opnd { fn from(opnd: Opnd) -> Self { match opnd { Opnd::UImm(value) => A64Opnd::new_uimm(value), Opnd::Imm(value) => A64Opnd::new_imm(value), Opnd::Reg(reg) => A64Opnd::Reg(reg), Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) }, Opnd::Mem(Mem { base: MemBase::VReg(_), .. }) => { panic!("attempted to lower an Opnd::Mem with a MemBase::VReg base") }, Opnd::Mem(Mem { base: MemBase::Stack { .. }, .. }) => { panic!("attempted to lower an Opnd::Mem with a MemBase::Stack base") }, Opnd::VReg { .. } => panic!("attempted to lower an Opnd::VReg"), Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), Opnd::None => panic!( "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." ), } } } /// Also implement going from a reference to an operand for convenience. impl From<&Opnd> for A64Opnd { fn from(opnd: &Opnd) -> Self { A64Opnd::from(*opnd) } } /// Call emit_jmp_ptr and immediately invalidate the written range. /// This is needed when next_page also moves other_cb that is not invalidated /// by compile_with_regs. Doing it here allows you to avoid invalidating a lot /// more than necessary when other_cb jumps from a position early in the page. /// This invalidates a small range of cb twice, but we accept the small cost. fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { let start = cb.get_write_ptr(); emit_jmp_ptr(cb, dst_ptr, true); let end = cb.get_write_ptr(); unsafe { rb_jit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) }; } fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { let src_addr = cb.get_write_ptr().as_offset(); let dst_addr = dst_ptr.as_offset(); // If the offset is short enough, then we'll use the // branch instruction. Otherwise, we'll move the // destination into a register and use the branch // register instruction. let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); 1 } else { let num_insns = emit_load_value(cb, Assembler::EMIT_OPND, dst_addr as u64); br(cb, Assembler::EMIT_OPND); num_insns + 1 }; if padding { // Make sure it's always a consistent number of // instructions in case it gets patched and has to // use the other branch. assert!(num_insns * 4 <= cb.jmp_ptr_bytes()); for _ in num_insns..(cb.jmp_ptr_bytes() / 4) { nop(cb); } } } /// Emit the required instructions to load the given value into the /// given register. Our goal here is to use as few instructions as /// possible to get this value into the register. fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { let mut current = value; if current <= 0xffff { // If the value fits into a single movz // instruction, then we'll use that. movz(cb, rd, A64Opnd::new_uimm(current), 0); 1 } else if u16::try_from(!value).is_ok() { // For small negative values, use a single movn movn(cb, rd, A64Opnd::new_uimm(!value), 0); 1 } else if BitmaskImmediate::try_from(current).is_ok() { // Otherwise, if the immediate can be encoded // with the special bitmask immediate encoding, // we'll use that. mov(cb, rd, A64Opnd::new_uimm(current)); 1 } else { // Finally we'll fall back to encoding the value // using movz for the first 16 bits and movk for // each subsequent set of 16 bits as long we // they are necessary. movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); let mut num_insns = 1; // (We're sure this is necessary since we // checked if it only fit into movz above). current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); num_insns += 1; if current > 0xffff { current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); num_insns += 1; } if current > 0xffff { current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); num_insns += 1; } num_insns } } /// List of registers that can be used for register allocation. /// This has the same number of registers for x86_64 and arm64. /// SCRATCH_OPND, SCRATCH1_OPND, and EMIT_OPND are excluded. pub const ALLOC_REGS: &[Reg] = &[ X0_REG, X1_REG, X2_REG, X3_REG, X4_REG, X5_REG, X11_REG, X12_REG, ]; /// Special scratch registers for intermediate processing. They should be used only by /// [`Assembler::arm64_scratch_split`] or [`Assembler::new_with_scratch_reg`]. const SCRATCH0_OPND: Opnd = Opnd::Reg(X15_REG); const SCRATCH1_OPND: Opnd = Opnd::Reg(X17_REG); const SCRATCH2_OPND: Opnd = Opnd::Reg(X14_REG); impl Assembler { /// Special register for intermediate processing in arm64_emit. It should be used only by arm64_emit. const EMIT_REG: Reg = X16_REG; const EMIT_OPND: A64Opnd = A64Opnd::Reg(Self::EMIT_REG); /// Return an Assembler with scratch registers disabled in the backend, and a scratch register. pub fn new_with_scratch_reg() -> (Self, Opnd) { (Self::new_with_accept_scratch_reg(true), SCRATCH0_OPND) } /// Return true if opnd contains a scratch reg pub fn has_scratch_reg(opnd: Opnd) -> bool { Self::has_reg(opnd, SCRATCH0_OPND.unwrap_reg()) } /// Get the list of registers from which we will allocate on this platform pub fn get_alloc_regs() -> Vec { ALLOC_REGS.to_vec() } /// Get a list of all of the caller-saved registers pub fn get_caller_save_regs() -> Vec { vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] } /// How many bytes a call and a [Self::frame_setup] would change native SP pub fn frame_size() -> i32 { 0x10 } /// Split platform-specific instructions /// The transformations done here are meant to make our lives simpler in later /// stages of the compilation pipeline. /// Here we may want to make sure that all instructions (except load and store) /// have no memory operands. fn arm64_split(mut self) -> Assembler { /// When you're storing a register into a memory location or loading a /// memory location into a register, the displacement from the base /// register of the memory location must fit into 9 bits. If it doesn't, /// then we need to load that memory address into a register first. fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Mem(mem) => { if mem_disp_fits_bits(mem.disp) { opnd } else { let base = asm.lea(Opnd::Mem(Mem { num_bits: 64, ..mem })); Opnd::mem(mem.num_bits, base, 0) } }, _ => unreachable!("Can only split memory addresses.") } } /// Any memory operands you're sending into an Op::Load instruction need /// to be split in case their displacement doesn't fit into 9 bits. fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::VReg { .. } => opnd, Opnd::Mem(_) => { let split_opnd = split_memory_address(asm, opnd); let out_opnd = asm.load(split_opnd); // Many Arm insns support only 32-bit or 64-bit operands. asm.load with fewer // bits zero-extends the value, so it's safe to recognize it as a 32-bit value. if out_opnd.rm_num_bits() < 32 { out_opnd.with_num_bits(32) } else { out_opnd } }, _ => asm.load(opnd) } } /// Operands that take the place of bitmask immediates must follow a /// certain encoding. In this function we ensure that those operands /// do follow that encoding, and if they don't then we load them first. fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::VReg { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => { if imm == 0 { Opnd::Reg(XZR_REG) } else if (dest_num_bits == 64 && BitmaskImmediate::try_from(imm as u64).is_ok()) || (dest_num_bits == 32 && u32::try_from(imm).is_ok() && BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) { Opnd::UImm(imm as u64) } else { asm.load(opnd).with_num_bits(dest_num_bits) } }, Opnd::UImm(uimm) => { if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) || (dest_num_bits == 32 && u32::try_from(uimm).is_ok() && BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) { opnd } else { asm.load(opnd).with_num_bits(dest_num_bits) } }, Opnd::None | Opnd::Value(_) => unreachable!() } } /// Operands that take the place of a shifted immediate must fit within /// a certain size. If they don't then we need to load them first. fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::VReg { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() { opnd } else { asm.load(opnd) } Opnd::UImm(uimm) => { if ShiftedImmediate::try_from(uimm).is_ok() { opnd } else { asm.load(opnd) } }, Opnd::None | Opnd::Value(_) => unreachable!() } } /// Returns the operands that should be used for a boolean logic /// instruction. fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { match (opnd0, opnd1) { (Opnd::Reg(_), Opnd::Reg(_)) => { (opnd0, opnd1) }, (reg_opnd @ Opnd::Reg(_), other_opnd) | (other_opnd, reg_opnd @ Opnd::Reg(_)) => { let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits()); (reg_opnd, opnd1) }, _ => { let opnd0 = split_load_operand(asm, opnd0); let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits()); (opnd0, opnd1) } } } /// Returns the operands that should be used for a csel instruction. fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { let opnd0 = match opnd0 { Opnd::Reg(_) | Opnd::VReg { .. } => opnd0, _ => split_load_operand(asm, opnd0) }; let opnd1 = match opnd1 { Opnd::Reg(_) | Opnd::VReg { .. } => opnd1, _ => split_load_operand(asm, opnd1) }; (opnd0, opnd1) } fn split_less_than_32_cmp(asm: &mut Assembler, opnd0: Opnd) -> Opnd { match opnd0 { Opnd::Reg(_) | Opnd::VReg { .. } => { match opnd0.rm_num_bits() { 8 => asm.and(opnd0.with_num_bits(64), Opnd::UImm(0xff)), 16 => asm.and(opnd0.with_num_bits(64), Opnd::UImm(0xffff)), 32 | 64 => opnd0, bits => unreachable!("Invalid number of bits. {}", bits) } } _ => opnd0 } } let mut asm_local = Assembler::new_with_asm(&self); let live_ranges: Vec = take(&mut self.live_ranges); let mut iterator = self.instruction_iterator(); let asm = &mut asm_local; while let Some((index, mut insn)) = iterator.next(asm) { // Here we're going to map the operands of the instruction to load // any Opnd::Value operands into registers if they are heap objects // such that only the Op::Load instruction needs to handle that // case. If the values aren't heap objects then we'll treat them as // if they were just unsigned integer. let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { if let Opnd::Value(value) = opnd { if value.special_const_p() { *opnd = Opnd::UImm(value.as_u64()); } else if !is_load { *opnd = asm.load(*opnd); } }; } // We are replacing instructions here so we know they are already // being used. It is okay not to use their output here. #[allow(unused_must_use)] match &mut insn { Insn::Add { left, right, out } => { match (*left, *right) { // When one operand is a register, legalize the other operand // into possibly an immdiate and swap the order if necessary. // Only the rhs of ADD can be an immediate, but addition is commutative. (reg_opnd @ (Opnd::Reg(_) | Opnd::VReg { .. }), other_opnd) | (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::VReg { .. })) => { *left = reg_opnd; *right = split_shifted_immediate(asm, other_opnd); // Now `right` is either a register or an immediate, both can try to // merge with a subsequent mov. merge_three_reg_mov(&live_ranges, &mut iterator, asm, left, left, out); asm.push_insn(insn); } _ => { *left = split_load_operand(asm, *left); *right = split_shifted_immediate(asm, *right); merge_three_reg_mov(&live_ranges, &mut iterator, asm, left, right, out); asm.push_insn(insn); } } } Insn::Sub { left, right, out } => { *left = split_load_operand(asm, *left); *right = split_shifted_immediate(asm, *right); // Now `right` is either a register or an immediate, // both can try to merge with a subsequent mov. merge_three_reg_mov(&live_ranges, &mut iterator, asm, left, left, out); asm.push_insn(insn); } Insn::And { left, right, out } | Insn::Or { left, right, out } | Insn::Xor { left, right, out } => { let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right); *left = opnd0; *right = opnd1; merge_three_reg_mov(&live_ranges, &mut iterator, asm, left, right, out); asm.push_insn(insn); } /* // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch. ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } | ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if { let same_opnd_if_test = if let Insn::Test { .. } = insn { left == right } else { true }; same_opnd_if_test && if let Some( Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target) ) = iterator.peek() { matches!(target, Target::SideExit { .. }) } else { false } } => { let reg = split_load_operand(asm, *left); match iterator.peek() { Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)), Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)), _ => () } iterator.map_insn_index(asm); iterator.next_unmapped(); // Pop merged jump instruction } */ Insn::CCall { opnds, .. } => { assert!(opnds.len() <= C_ARG_OPNDS.len()); // Load each operand into the corresponding argument // register. // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register if !opnds.is_empty() { let mut args: Vec<(Opnd, Opnd)> = vec![]; for (idx, opnd) in opnds.iter_mut().enumerate().rev() { // If the value that we're sending is 0, then we can use // the zero register, so in this case we'll just send // a UImm of 0 along as the argument to the move. let value = match opnd { Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), Opnd::Mem(_) => split_memory_address(asm, *opnd), _ => *opnd }; args.push((C_ARG_OPNDS[idx], value)); } asm.parallel_mov(args); } // Now we push the CCall without any arguments so that it // just performs the call. *opnds = vec![]; asm.push_insn(insn); }, Insn::Cmp { left, right } => { let opnd0 = split_load_operand(asm, *left); let opnd0 = split_less_than_32_cmp(asm, opnd0); let split_right = split_shifted_immediate(asm, *right); let opnd1 = match split_right { Opnd::VReg { .. } if opnd0.num_bits() != split_right.num_bits() => { split_right.with_num_bits(opnd0.num_bits().unwrap()) }, _ => split_right }; asm.cmp(opnd0, opnd1); }, Insn::CRet(opnd) => { match opnd { // If the value is already in the return register, then // we don't need to do anything. Opnd::Reg(C_RET_REG) => {}, // If the value is a memory address, we need to first // make sure the displacement isn't too large and then // load it into the return register. Opnd::Mem(_) => { let split = split_memory_address(asm, *opnd); asm.load_into(C_RET_OPND, split); }, // Otherwise we just need to load the value into the // return register. _ => { asm.load_into(C_RET_OPND, *opnd); } } asm.cret(C_RET_OPND); }, Insn::CSelZ { truthy, falsy, out } | Insn::CSelNZ { truthy, falsy, out } | Insn::CSelE { truthy, falsy, out } | Insn::CSelNE { truthy, falsy, out } | Insn::CSelL { truthy, falsy, out } | Insn::CSelLE { truthy, falsy, out } | Insn::CSelG { truthy, falsy, out } | Insn::CSelGE { truthy, falsy, out } => { let (opnd0, opnd1) = split_csel_operands(asm, *truthy, *falsy); *truthy = opnd0; *falsy = opnd1; // Merge `csel` and `mov` into a single `csel` when possible match iterator.peek().map(|(_, insn)| insn) { Some(Insn::Mov { dest: Opnd::Reg(reg), src }) if matches!(out, Opnd::VReg { .. }) && *out == *src && live_ranges[out.vreg_idx()].end() == index + 1 => { *out = Opnd::Reg(*reg); asm.push_insn(insn); iterator.next(asm); // Pop merged Insn::Mov } _ => { asm.push_insn(insn); } } }, Insn::JmpOpnd(opnd) => { if let Opnd::Mem(_) = opnd { let opnd0 = split_load_operand(asm, *opnd); asm.jmp_opnd(opnd0); } else { asm.jmp_opnd(*opnd); } }, Insn::Load { opnd, .. } | Insn::LoadInto { opnd, .. } => { *opnd = match opnd { Opnd::Mem(_) => split_memory_address(asm, *opnd), _ => *opnd }; asm.push_insn(insn); }, Insn::LoadSExt { opnd, out } => { match opnd { // We only want to sign extend if the operand is a // register, instruction output, or memory address that // is 32 bits. Otherwise we'll just load the value // directly since there's no need to sign extend. Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::VReg { num_bits: 32, .. } | Opnd::Mem(Mem { num_bits: 32, .. }) => { asm.push_insn(insn); }, _ => { asm.push_insn(Insn::Load { opnd: *opnd, out: *out }); } }; }, Insn::Mov { dest, src } => { match (&dest, &src) { // If we're attempting to load into a memory operand, then // we'll switch over to the store instruction. (Opnd::Mem(_), _) => { let opnd0 = split_memory_address(asm, *dest); let value = match *src { // If the first operand is zero, then we can just use // the zero register. Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), // If the first operand is a memory operand, we're going // to transform this into a store instruction, so we'll // need to load this anyway. Opnd::UImm(_) => asm.load(*src), // The value that is being moved must be either a // register or an immediate that can be encoded as a // bitmask immediate. Otherwise, we'll need to split the // move into multiple instructions. _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()) }; asm.store(opnd0, value); }, // If we're loading a memory operand into a register, then // we'll switch over to the load instruction. (Opnd::Reg(_) | Opnd::VReg { .. }, Opnd::Mem(_)) => { let value = split_memory_address(asm, *src); asm.load_into(*dest, value); }, // Otherwise we'll use the normal mov instruction. (Opnd::Reg(_), _) => { let value = match *src { // Unlike other instructions, we can avoid splitting this case, using movz. Opnd::UImm(uimm) if uimm <= 0xffff => *src, _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()), }; asm.mov(*dest, value); }, _ => unreachable!("unexpected combination of operands in Insn::Mov: {dest:?}, {src:?}") }; }, Insn::Not { opnd, .. } => { // The value that is being negated must be in a register, so // if we get anything else we need to load it first. *opnd = match opnd { Opnd::Mem(_) => split_load_operand(asm, *opnd), _ => *opnd }; asm.push_insn(insn); }, Insn::LShift { opnd, .. } | Insn::RShift { opnd, .. } | Insn::URShift { opnd, .. } => { // The operand must be in a register, so // if we get anything else we need to load it first. *opnd = split_load_operand(asm, *opnd); asm.push_insn(insn); }, Insn::Mul { left, right, .. } => { *left = split_load_operand(asm, *left); *right = split_load_operand(asm, *right); asm.push_insn(insn); }, Insn::Test { left, right } => { // The value being tested must be in a register, so if it's // not already one we'll load it first. let opnd0 = split_load_operand(asm, *left); // The second value must be either a register or an // unsigned immediate that can be encoded as a bitmask // immediate. If it's not one of those, we'll need to load // it first. let opnd1 = split_bitmask_immediate(asm, *right, opnd0.rm_num_bits()); asm.test(opnd0, opnd1); }, _ => { asm.push_insn(insn); } } } asm_local } /// Split instructions using scratch registers. To maximize the use of the register pool for /// VRegs, most splits should happen in [`Self::arm64_split`]. However, some instructions /// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so this /// splits them and uses scratch registers for it. fn arm64_scratch_split(mut self) -> Assembler { /// If opnd is Opnd::Mem with a too large disp, make the disp smaller using lea. fn split_large_disp(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd { match opnd { Opnd::Mem(Mem { num_bits, disp, .. }) if !mem_disp_fits_bits(disp) => { asm.lea_into(scratch_opnd, opnd); Opnd::mem(num_bits, scratch_opnd, 0) } _ => opnd, } } /// If opnd is Opnd::Mem with MemBase::Stack, lower it to Opnd::Mem with MemBase::Reg, and split a large disp. fn split_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd { let opnd = split_only_stack_membase(asm, opnd, scratch_opnd, stack_state); split_large_disp(asm, opnd, scratch_opnd) } /// split_stack_membase but without split_large_disp. This should be used only by lea. fn split_only_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd { if let Opnd::Mem(Mem { base: stack_membase @ MemBase::Stack { .. }, disp: opnd_disp, num_bits: opnd_num_bits }) = opnd { let base = Opnd::Mem(stack_state.stack_membase_to_mem(stack_membase)); let base = split_large_disp(asm, base, scratch_opnd); asm.load_into(scratch_opnd, base); Opnd::Mem(Mem { base: MemBase::Reg(scratch_opnd.unwrap_reg().reg_no), disp: opnd_disp, num_bits: opnd_num_bits }) } else { opnd } } /// If opnd is Opnd::Mem, lower it to scratch_opnd. You should use this when `opnd` is read by the instruction, not written. fn split_memory_read(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd { if let Opnd::Mem(_) = opnd { let opnd = split_large_disp(asm, opnd, scratch_opnd); let scratch_opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd); asm.load_into(scratch_opnd, opnd); scratch_opnd } else { opnd } } /// If opnd is Opnd::Mem, set scratch_reg to *opnd. Return Some(Opnd::Mem) if it needs to be written back from scratch_reg. fn split_memory_write(opnd: &mut Opnd, scratch_opnd: Opnd) -> Option { if let Opnd::Mem(_) = opnd { let mem_opnd = opnd.clone(); *opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd); Some(mem_opnd) } else { None } } // Prepare StackState to lower MemBase::Stack let stack_state = StackState::new(self.stack_base_idx); let mut asm_local = Assembler::new_with_asm(&self); let asm = &mut asm_local; asm.accept_scratch_reg = true; let iterator = &mut self.instruction_iterator(); while let Some((_, mut insn)) = iterator.next(asm) { match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | Insn::And { left, right, out } | Insn::Or { left, right, out } | Insn::Xor { left, right, out } | Insn::CSelZ { truthy: left, falsy: right, out } | Insn::CSelNZ { truthy: left, falsy: right, out } | Insn::CSelE { truthy: left, falsy: right, out } | Insn::CSelNE { truthy: left, falsy: right, out } | Insn::CSelL { truthy: left, falsy: right, out } | Insn::CSelLE { truthy: left, falsy: right, out } | Insn::CSelG { truthy: left, falsy: right, out } | Insn::CSelGE { truthy: left, falsy: right, out } => { *left = split_memory_read(asm, *left, SCRATCH0_OPND); *right = split_memory_read(asm, *right, SCRATCH1_OPND); let mem_out = split_memory_write(out, SCRATCH0_OPND); asm.push_insn(insn); if let Some(mem_out) = mem_out { let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND); asm.store(mem_out, SCRATCH0_OPND); } } Insn::Mul { left, right, out } => { *left = split_memory_read(asm, *left, SCRATCH0_OPND); *right = split_memory_read(asm, *right, SCRATCH1_OPND); let mem_out = split_memory_write(out, SCRATCH0_OPND); let reg_out = out.clone(); asm.push_insn(insn); if let Some(mem_out) = mem_out { let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND); asm.store(mem_out, SCRATCH0_OPND); }; // If the next instruction is JoMul if matches!(iterator.peek(), Some((_, Insn::JoMul(_)))) { // Produce a register that is all zeros or all ones // Based on the sign bit of the 64-bit mul result asm.push_insn(Insn::RShift { out: SCRATCH0_OPND, opnd: reg_out, shift: Opnd::UImm(63) }); } } Insn::LShift { opnd, out, .. } | Insn::RShift { opnd, out, .. } => { *opnd = split_memory_read(asm, *opnd, SCRATCH0_OPND); let mem_out = split_memory_write(out, SCRATCH0_OPND); asm.push_insn(insn); if let Some(mem_out) = mem_out { let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND); asm.store(mem_out, SCRATCH0_OPND); } } Insn::Cmp { left, right } | Insn::Test { left, right } => { *left = split_memory_read(asm, *left, SCRATCH0_OPND); *right = split_memory_read(asm, *right, SCRATCH1_OPND); asm.push_insn(insn); } // For compile_exits, support splitting simple C arguments here Insn::CCall { opnds, .. } if !opnds.is_empty() => { for (i, opnd) in opnds.iter().enumerate() { asm.load_into(C_ARG_OPNDS[i], *opnd); } *opnds = vec![]; asm.push_insn(insn); } // For compile_exits, support splitting simple return values here Insn::CRet(opnd) => { match opnd { Opnd::Reg(C_RET_REG) => {}, _ => asm.load_into(C_RET_OPND, *opnd), } asm.cret(C_RET_OPND); } Insn::Lea { opnd, out } => { *opnd = split_only_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state); let mem_out = split_memory_write(out, SCRATCH0_OPND); asm.push_insn(insn); if let Some(mem_out) = mem_out { let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND); asm.store(mem_out, SCRATCH0_OPND); } } Insn::Load { opnd, out } | Insn::LoadInto { opnd, dest: out } => { *opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state); *out = split_stack_membase(asm, *out, SCRATCH1_OPND, &stack_state); if let Opnd::Mem(_) = out { // If NATIVE_STACK_PTR is used as a source for Store, it's handled as xzr, storeing zero. // To save the content of NATIVE_STACK_PTR, we need to load it into another register first. if *opnd == NATIVE_STACK_PTR { asm.load_into(SCRATCH0_OPND, NATIVE_STACK_PTR); *opnd = SCRATCH0_OPND; } asm.store(*out, *opnd); } else { asm.push_insn(insn); } } &mut Insn::IncrCounter { mem, value } => { // Convert Opnd::const_ptr into Opnd::Mem. // It's split here to support IncrCounter in compile_exits. assert!(matches!(mem, Opnd::UImm(_))); asm.load_into(SCRATCH0_OPND, mem); asm.lea_into(SCRATCH0_OPND, Opnd::mem(64, SCRATCH0_OPND, 0)); // Create a local loop to atomically increment a counter using SCRATCH1_OPND to check if it succeeded. // Note that arm64_emit will peek at the next Cmp to set a status into SCRATCH1_OPND on IncrCounter. let label = asm.new_label("incr_counter_loop"); asm.write_label(label.clone()); asm.incr_counter(SCRATCH0_OPND, value); asm.cmp(SCRATCH1_OPND, 0.into()); asm.jne(label); } Insn::Store { dest, .. } => { *dest = split_stack_membase(asm, *dest, SCRATCH0_OPND, &stack_state); asm.push_insn(insn); } Insn::Mov { dest, src } => { *src = split_stack_membase(asm, *src, SCRATCH0_OPND, &stack_state); *dest = split_large_disp(asm, *dest, SCRATCH1_OPND); match dest { Opnd::Reg(_) => asm.load_into(*dest, *src), Opnd::Mem(_) => asm.store(*dest, *src), _ => asm.push_insn(insn), } } // Resolve ParallelMov that couldn't be handled without a scratch register. Insn::ParallelMov { moves } => { for (dst, src) in Self::resolve_parallel_moves(moves, Some(SCRATCH0_OPND)).unwrap() { let src = split_stack_membase(asm, src, SCRATCH1_OPND, &stack_state); let dst = split_large_disp(asm, dst, SCRATCH2_OPND); match dst { Opnd::Reg(_) => asm.load_into(dst, src), Opnd::Mem(_) => asm.store(dst, src), _ => asm.mov(dst, src), } } } &mut Insn::PatchPoint { ref target, invariant, version } => { split_patch_point(asm, target, invariant, version); } _ => { asm.push_insn(insn); } } } asm_local } /// Emit platform-specific machine code /// Returns a list of GC offsets. Can return failure to signal caller to retry. fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Option> { /// Determine how many instructions it will take to represent moving /// this value into a register. Note that the return value of this /// function must correspond to how many instructions are used to /// represent this load in the emit_load_value function. fn emit_load_size(value: u64) -> u8 { if BitmaskImmediate::try_from(value).is_ok() { return 1; } if value < (1 << 16) { 1 } else if value < (1 << 32) { 2 } else if value < (1 << 48) { 3 } else { 4 } } /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { fn generate_branch(cb: &mut CodeBlock, src_addr: i64, dst_addr: i64) { let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { // If the jump offset fits into the conditional jump as // an immediate value and it's properly aligned, then we // can use the b.cond instruction directly. We're safe // to use as i32 here since we already checked that it // fits. let bytes = (dst_addr - src_addr) as i32; bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); // Here we're going to return 1 because we've only // written out 1 instruction. 1 } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond // If the jump offset fits into the unconditional jump as // an immediate value, we can use inverse b.cond + b. // // We're going to write out the inverse condition so // that if it doesn't match it will skip over the // instruction used for branching. bcond(cb, Condition::inverse(CONDITION), 2.into()); b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond // We've only written out 2 instructions. 2 } else { // Otherwise, we need to load the address into a // register and use the branch register instruction. let load_insns: i32 = emit_load_size(dst_addr as u64).into(); // We're going to write out the inverse condition so // that if it doesn't match it will skip over the // instructions used for branching. bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); emit_load_value(cb, Assembler::EMIT_OPND, dst_addr as u64); br(cb, Assembler::EMIT_OPND); // Here we'll return the number of instructions that it // took to write out the destination address + 1 for the // b.cond and 1 for the br. load_insns + 2 }; // We need to make sure we have at least 6 instructions for // every kind of jump for invalidation purposes, so we're // going to write out padding nop instructions here. assert!(num_insns <= cb.conditional_jump_insns()); (num_insns..cb.conditional_jump_insns()).for_each(|_| nop(cb)); } match target { Target::CodePtr(dst_ptr) => { let dst_addr = dst_ptr.as_offset(); let src_addr = cb.get_write_ptr().as_offset(); generate_branch::(cb, src_addr, dst_addr); }, Target::Label(label_idx) => { // We save `cb.conditional_jump_insns` number of bytes since we may use up to that amount // `generate_branch` will pad the emitted branch instructions with `nop`s for each unused byte. cb.label_ref(label_idx, (cb.conditional_jump_insns() * 4) as usize, |cb, src_addr, dst_addr| { generate_branch::(cb, src_addr - (cb.conditional_jump_insns() * 4) as i64, dst_addr); }); }, Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_exits") }, }; } /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) { if let Target::CodePtr(dst_ptr) = target { let dst_addr = dst_ptr.as_offset(); let src_addr = cb.get_write_ptr().as_offset(); if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { // If the offset fits in one instruction, generate cbz or cbnz let bytes = (dst_addr - src_addr) as i32; if branch_if_zero { cbz(cb, reg, InstructionOffset::from_bytes(bytes)); } else { cbnz(cb, reg, InstructionOffset::from_bytes(bytes)); } } else { // Otherwise, we load the address into a register and // use the branch register instruction. Note that because // side exits should always be close, this form should be // rare or impossible to see. let dst_addr = dst_ptr.raw_addr(cb) as u64; let load_insns: i32 = emit_load_size(dst_addr).into(); // Write out the inverse condition so that if // it doesn't match it will skip over the // instructions used for branching. if branch_if_zero { cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); } else { cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); } emit_load_value(cb, Assembler::EMIT_OPND, dst_addr); br(cb, Assembler::EMIT_OPND); } } else { unreachable!("We should only generate Joz/Jonz with side-exit targets"); } } /// Do the address calculation of `out_reg = base_reg + disp` fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) { let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no }); let out_reg_no = out.unwrap_reg().reg_no; assert_ne!(31, out_reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead."); assert_ne!(base_reg_no, out_reg_no, "large displacement need a scratch register"); if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() { // Use ADD/SUB if the displacement fits add(cb, out, base_reg, A64Opnd::new_imm(disp.into())); } else { // Use add_extended() to interpret reg_no=31 as sp // since the base register is never the zero register. // Careful! Only the first two operands can refer to sp. emit_load_value(cb, out, disp as u64); add_extended(cb, out, base_reg, out); }; } /// Load a VALUE to a register and remember it for GC marking and reference updating fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec, dest: A64Opnd, value: VALUE) { // We dont need to check if it's a special const // here because we only allow these operands to hit // this point if they're not a special const. assert!(!value.special_const_p()); // This assumes only load instructions can contain // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. ldr_literal(cb, dest, 2.into()); b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); cb.write_bytes(&value.as_u64().to_le_bytes()); let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); gc_offsets.push(ptr_offset); } /// Emit a push instruction for the given operand by adding to the stack /// pointer and then storing the given value. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); } /// Emit a pop instruction into the given operand by loading the value /// and then subtracting from the stack pointer. fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } // List of GC offsets let mut gc_offsets: Vec = Vec::new(); // Buffered list of PosMarker callbacks to fire if codegen is successful let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; // The write_pos for the last Insn::PatchPoint, if any let mut last_patch_pos: Option = None; // Install a panic hook to dump Assembler with insn_idx on dev builds let (_hook, mut hook_insn_idx) = AssemblerPanicHook::new(self, 0); // For each instruction let mut insn_idx: usize = 0; while let Some(insn) = self.insns.get(insn_idx) { // Update insn_idx that is shown on panic hook_insn_idx.as_mut().map(|idx| idx.lock().map(|mut idx| *idx = insn_idx).unwrap()); match insn { Insn::Comment(text) => { cb.add_comment(text); }, Insn::Label(target) => { cb.write_label(target.unwrap_label()); }, // Report back the current position in the generated code Insn::PosMarker(..) => { pos_markers.push((insn_idx, cb.get_write_ptr())) } Insn::BakeString(text) => { for byte in text.as_bytes() { cb.write_byte(*byte); } // Add a null-terminator byte for safety (in case we pass // this to C code) cb.write_byte(0); // Pad out the string to the next 4-byte boundary so that // it's easy to jump past. for _ in 0..(4 - ((text.len() + 1) % 4)) { cb.write_byte(0); } }, &Insn::FrameSetup { preserved, mut slot_count } => { const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); } // Preserve X29 and set up frame record stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); mov(cb, X29, C_SP_REG); for regs in preserved.chunks(2) { // For the body, store pairs and move SP if let [reg0, reg1] = regs { stp_pre(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, C_SP_REG, -16)); } else if let [reg] = regs { // For overhang, store but don't move SP. Combine movement with // movement for slots below. stur(cb, reg.into(), A64Opnd::new_mem(64, C_SP_REG, -8)); slot_count += 1; } else { unreachable!("chunks(2)"); } } // Align slot_count if slot_count % 2 == 1 { slot_count += 1 } if slot_count > 0 { let slot_offset = (slot_count * SIZEOF_VALUE) as u64; // Bail when asked to reserve too many slots in one instruction. ShiftedImmediate::try_from(slot_offset).ok()?; sub(cb, C_SP_REG, C_SP_REG, A64Opnd::new_uimm(slot_offset)); } } Insn::FrameTeardown { preserved } => { // Restore preserved registers below frame pointer. let mut base_offset = 0; for regs in preserved.chunks(2) { if let [reg0, reg1] = regs { base_offset -= 16; ldp(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, X29, base_offset)); } else if let [reg] = regs { ldur(cb, reg.into(), A64Opnd::new_mem(64, X29, base_offset - 8)); } else { unreachable!("chunks(2)"); } } // SP = X29 (frame pointer) mov(cb, C_SP_REG, X29); ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); } Insn::Add { left, right, out } => { // Usually, we issue ADDS, so you could branch on overflow, but ADDS with // out=31 refers to out=XZR, which discards the sum. So, instead of ADDS // (aliased to CMN in this case) we issue ADD instead which writes the sum // to the stack pointer; we assume you got x31 from NATIVE_STACK_POINTER. let out: A64Opnd = out.into(); if let A64Opnd::Reg(A64Reg { reg_no: 31, .. }) = out { add(cb, out, left.into(), right.into()); } else { adds(cb, out, left.into(), right.into()); } }, Insn::Sub { left, right, out } => { // Usually, we issue SUBS, so you could branch on overflow, but SUBS with // out=31 refers to out=XZR, which discards the result. So, instead of SUBS // (aliased to CMP in this case) we issue SUB instead which writes the diff // to the stack pointer; we assume you got x31 from NATIVE_STACK_POINTER. let out: A64Opnd = out.into(); if let A64Opnd::Reg(A64Reg { reg_no: 31, .. }) = out { sub(cb, out, left.into(), right.into()); } else { subs(cb, out, left.into(), right.into()); } }, Insn::Mul { left, right, out } => { // If the next instruction is JoMul with RShift created by arm64_scratch_split match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) { (Some(Insn::RShift { out: out_sign, opnd: out_opnd, shift: out_shift }), Some(Insn::JoMul(_))) => { // Compute the high 64 bits smulh(cb, Self::EMIT_OPND, left.into(), right.into()); // Compute the low 64 bits // This may clobber one of the input registers, // so we do it after smulh mul(cb, out.into(), left.into(), right.into()); // Insert the shift instruction created by arm64_scratch_split // to prepare the register that has the sign bit of the high 64 bits after mul. asr(cb, out_sign.into(), out_opnd.into(), out_shift.into()); insn_idx += 1; // skip the next Insn::RShift // If the high 64-bits are not all zeros or all ones, // matching the sign bit, then we have an overflow cmp(cb, Self::EMIT_OPND, out_sign.into()); // Insn::JoMul will emit_conditional_jump::<{Condition::NE}> } _ => { mul(cb, out.into(), left.into(), right.into()); } } }, Insn::And { left, right, out } => { and(cb, out.into(), left.into(), right.into()); }, Insn::Or { left, right, out } => { orr(cb, out.into(), left.into(), right.into()); }, Insn::Xor { left, right, out } => { eor(cb, out.into(), left.into(), right.into()); }, Insn::Not { opnd, out } => { mvn(cb, out.into(), opnd.into()); }, Insn::RShift { opnd, shift, out } => { asr(cb, out.into(), opnd.into(), shift.into()); }, Insn::URShift { opnd, shift, out } => { lsr(cb, out.into(), opnd.into(), shift.into()); }, Insn::LShift { opnd, shift, out } => { lsl(cb, out.into(), opnd.into(), shift.into()); }, Insn::Store { dest, src } => { // Split src into EMIT0_OPND if necessary let src_reg: A64Reg = match src { Opnd::Reg(reg) => *reg, // Use zero register when possible Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG, // Immediates &Opnd::Imm(imm) => { emit_load_value(cb, Self::EMIT_OPND, imm as u64); Self::EMIT_REG } &Opnd::UImm(imm) => { emit_load_value(cb, Self::EMIT_OPND, imm); Self::EMIT_REG } &Opnd::Value(value) => { emit_load_gc_value(cb, &mut gc_offsets, Self::EMIT_OPND, value); Self::EMIT_REG } src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => { // For mem-to-mem store, load the source into EMIT0_OPND let src_mem = if mem_disp_fits_bits(src_disp) { src_mem.into() } else { // Split the load address into EMIT0_OPND first if necessary load_effective_address(cb, Self::EMIT_OPND, src_base_reg_no, src_disp); A64Opnd::new_mem(dest.rm_num_bits(), Self::EMIT_OPND, 0) }; let dst = A64Opnd::Reg(Self::EMIT_REG.with_num_bits(src_num_bits)); match src_num_bits { 64 | 32 => ldur(cb, dst, src_mem), 16 => ldurh(cb, dst, src_mem), 8 => ldurb(cb, dst, src_mem), num_bits => panic!("unexpected num_bits: {num_bits}") }; Self::EMIT_REG } src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}") }; let src = A64Opnd::Reg(src_reg); // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to // be stored is first and the address is second. However in // our IR we have the address first and the register second. match dest.rm_num_bits() { 64 | 32 => stur(cb, src, dest.into()), 16 => sturh(cb, src, dest.into()), 8 => sturb(cb, src, dest.into()), num_bits => panic!("unexpected dest num_bits: {num_bits} (src: {src:?}, dest: {dest:?})"), } }, Insn::Load { opnd, out } | Insn::LoadInto { opnd, dest: out } => { match *opnd { Opnd::Reg(_) | Opnd::VReg { .. } => { mov(cb, out.into(), opnd.into()); }, Opnd::UImm(uimm) => { emit_load_value(cb, out.into(), uimm); }, Opnd::Imm(imm) => { emit_load_value(cb, out.into(), imm as u64); }, Opnd::Mem(_) => { match opnd.rm_num_bits() { 64 | 32 => ldur(cb, out.into(), opnd.into()), 16 => ldurh(cb, out.into(), opnd.into()), 8 => ldurb(cb, out.into(), opnd.into()), num_bits => panic!("unexpected num_bits: {num_bits}"), }; }, Opnd::Value(value) => { emit_load_gc_value(cb, &mut gc_offsets, out.into(), value); }, Opnd::None => { unreachable!("Attempted to load from None operand"); } }; }, Insn::LoadSExt { opnd, out } => { match *opnd { Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::VReg { num_bits: 32, .. } => { sxtw(cb, out.into(), opnd.into()); }, Opnd::Mem(Mem { num_bits: 32, .. }) => { ldursw(cb, out.into(), opnd.into()); }, _ => unreachable!() }; }, Insn::ParallelMov { .. } => unreachable!("{insn:?} should have been lowered at alloc_regs()"), Insn::Mov { dest, src } => { // This supports the following two kinds of immediates: // * The value fits into a single movz instruction // * It can be encoded with the special bitmask immediate encoding // arm64_split() should have split other immediates that require multiple instructions. match src { Opnd::UImm(uimm) if *uimm <= 0xffff => { movz(cb, dest.into(), A64Opnd::new_uimm(*uimm), 0); }, _ => { mov(cb, dest.into(), src.into()); } } }, Insn::Lea { opnd, out } => { let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else { panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}"); }; let out_reg_no = out.unwrap_reg().reg_no; assert_ne!(31, out_reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead."); let out = A64Opnd::from(out); let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no }); if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() { // Use ADD/SUB if the displacement fits add(cb, out, base_reg, A64Opnd::new_imm(disp.into())); } else { // Use a scratch reg for `out += displacement` let disp_reg = if out_reg_no == base_reg_no { Self::EMIT_OPND } else { out }; // Use add_extended() to interpret reg_no=31 as sp // since the base register is never the zero register. // Careful! Only the first two operands can refer to sp. emit_load_value(cb, disp_reg, disp as u64); add_extended(cb, out, base_reg, disp_reg); } } Insn::LeaJumpTarget { out, target, .. } => { if let Target::Label(label_idx) = target { // Set output to the raw address of the label cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| { adr(cb, Self::EMIT_OPND, A64Opnd::new_imm(dst_addr - (end_addr - 4))); }); mov(cb, out.into(), Self::EMIT_OPND); } else { // Set output to the jump target's raw address let target_code = target.unwrap_code_ptr(); let target_addr = target_code.raw_addr(cb).as_u64(); emit_load_value(cb, out.into(), target_addr); } }, Insn::CPush(opnd) => { emit_push(cb, opnd.into()); }, Insn::CPop { out } => { emit_pop(cb, out.into()); }, Insn::CPopInto(opnd) => { emit_pop(cb, opnd.into()); }, Insn::CPushAll => { let regs = Assembler::get_caller_save_regs(); for reg in regs { emit_push(cb, A64Opnd::Reg(reg)); } // Push the flags/state register mrs(cb, Self::EMIT_OPND, SystemRegister::NZCV); emit_push(cb, Self::EMIT_OPND); }, Insn::CPopAll => { let regs = Assembler::get_caller_save_regs(); // Pop the state/flags register msr(cb, SystemRegister::NZCV, Self::EMIT_OPND); emit_pop(cb, Self::EMIT_OPND); for reg in regs.into_iter().rev() { emit_pop(cb, A64Opnd::Reg(reg)); } }, Insn::CCall { fptr, .. } => { match fptr { Opnd::UImm(fptr) => { // The offset to the call target in bytes let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64; let dst_addr = *fptr as i64; // Use BL if the offset is short enough to encode as an immediate. // Otherwise, use BLR with a register. if b_offset_fits_bits((dst_addr - src_addr) / 4) { bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); } else { emit_load_value(cb, Self::EMIT_OPND, dst_addr as u64); blr(cb, Self::EMIT_OPND); } } Opnd::Reg(_) => { blr(cb, fptr.into()); } _ => unreachable!("unsupported ccall fptr: {fptr:?}") } }, Insn::CRet { .. } => { ret(cb, A64Opnd::None); }, Insn::Cmp { left, right } => { cmp(cb, left.into(), right.into()); }, Insn::Test { left, right } => { tst(cb, left.into(), right.into()); }, Insn::JmpOpnd(opnd) => { br(cb, opnd.into()); }, Insn::Jmp(target) => { match *target { Target::CodePtr(dst_ptr) => { emit_jmp_ptr(cb, dst_ptr, true); }, Target::Label(label_idx) => { // Here we're going to save enough space for // ourselves and then come back and write the // instruction once we know the offset. We're going // to assume we can fit into a single b instruction. // It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); b(cb, InstructionOffset::from_bytes(bytes)); }); }, Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_exits") }, }; }, Insn::Je(target) | Insn::Jz(target) => { emit_conditional_jump::<{Condition::EQ}>(cb, target.clone()); }, Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => { emit_conditional_jump::<{Condition::NE}>(cb, target.clone()); }, Insn::Jl(target) => { emit_conditional_jump::<{Condition::LT}>(cb, target.clone()); }, Insn::Jg(target) => { emit_conditional_jump::<{Condition::GT}>(cb, target.clone()); }, Insn::Jge(target) => { emit_conditional_jump::<{Condition::GE}>(cb, target.clone()); }, Insn::Jbe(target) => { emit_conditional_jump::<{Condition::LS}>(cb, target.clone()); }, Insn::Jb(target) => { emit_conditional_jump::<{Condition::CC}>(cb, target.clone()); }, Insn::Jo(target) => { emit_conditional_jump::<{Condition::VS}>(cb, target.clone()); }, Insn::Joz(opnd, target) => { emit_cmp_zero_jump(cb, opnd.into(), true, target.clone()); }, Insn::Jonz(opnd, target) => { emit_cmp_zero_jump(cb, opnd.into(), false, target.clone()); }, Insn::PatchPoint { .. } => unreachable!("PatchPoint should have been lowered to PadPatchPoint in arm64_scratch_split"), Insn::PadPatchPoint => { // If patch points are too close to each other or the end of the block, fill nop instructions if let Some(last_patch_pos) = last_patch_pos { while cb.get_write_pos().saturating_sub(last_patch_pos) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() { nop(cb); } } last_patch_pos = Some(cb.get_write_pos()); }, Insn::IncrCounter { mem, value } => { // Get the status register allocated by arm64_scratch_split let Some(Insn::Cmp { left: status_reg @ Opnd::Reg(_), right: Opnd::UImm(_) | Opnd::Imm(_), }) = self.insns.get(insn_idx + 1) else { panic!("arm64_scratch_split should add Cmp after IncrCounter: {:?}", self.insns.get(insn_idx + 1)); }; // Attempt to increment a counter ldaxr(cb, Self::EMIT_OPND, mem.into()); add(cb, Self::EMIT_OPND, Self::EMIT_OPND, value.into()); // The status register that gets used to track whether or // not the store was successful must be 32 bytes. Since we // store the EMIT registers as their 64-bit versions, we // need to rewrap it here. let status = A64Opnd::Reg(status_reg.unwrap_reg().with_num_bits(32)); stlxr(cb, status, Self::EMIT_OPND, mem.into()); }, Insn::Breakpoint => { brk(cb, A64Opnd::None); }, Insn::CSelZ { truthy, falsy, out } | Insn::CSelE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); }, Insn::CSelNZ { truthy, falsy, out } | Insn::CSelNE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE); }, Insn::CSelL { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT); }, Insn::CSelLE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE); }, Insn::CSelG { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT); }, Insn::CSelGE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code }; insn_idx += 1; } // Error if we couldn't write out everything if cb.has_dropped_bytes() { None } else { // No bytes dropped, so the pos markers point to valid code for (insn_idx, pos) in pos_markers { if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { callback(pos, cb); } else { panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); } } Some(gc_offsets) } } /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Result<(CodePtr, Vec), CompileError> { // The backend is allowed to use scratch registers only if it has not accepted them so far. let use_scratch_reg = !self.accept_scratch_reg; asm_dump!(self, init); let asm = self.arm64_split(); asm_dump!(asm, split); let mut asm = asm.alloc_regs(regs)?; asm_dump!(asm, alloc_regs); // We put compile_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits. asm.compile_exits(); asm_dump!(asm, compile_exits); if use_scratch_reg { asm = asm.arm64_scratch_split(); asm_dump!(asm, scratch_split); } // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { let label = cb.new_label(name.to_string()); assert_eq!(label, Label(idx)); } let start_ptr = cb.get_write_ptr(); let gc_offsets = asm.arm64_emit(cb); if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) { cb.link_labels(); // Invalidate icache for newly written out region so we don't run stale code. unsafe { rb_jit_icache_invalidate(start_ptr.raw_ptr(cb) as _, cb.get_write_ptr().raw_ptr(cb) as _) }; Ok((start_ptr, gc_offsets)) } else { cb.clear_labels(); Err(CompileError::OutOfMemory) } } } /// LIR Instructions that are lowered to an instruction that have 2 input registers and an output /// register can look to merge with a succeeding `Insn::Mov`. /// For example: /// /// Add out, a, b /// Mov c, out /// /// Can become: /// /// Add c, a, b /// /// If a, b, and c are all registers. fn merge_three_reg_mov( live_ranges: &[LiveRange], iterator: &mut InsnIter, asm: &mut Assembler, left: &Opnd, right: &Opnd, out: &mut Opnd, ) { if let (Opnd::Reg(_) | Opnd::VReg{..}, Opnd::Reg(_) | Opnd::VReg{..}, Some((mov_idx, Insn::Mov { dest, src }))) = (left, right, iterator.peek()) { if out == src && live_ranges[out.vreg_idx()].end() == *mov_idx && matches!(*dest, Opnd::Reg(_) | Opnd::VReg{..}) { *out = *dest; iterator.next(asm); // Pop merged Insn::Mov } } } #[cfg(test)] mod tests { #[cfg(feature = "disasm")] use crate::disasms_with; use crate::{assert_disasm_snapshot, hexdumps}; use super::*; use insta::assert_snapshot; static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; fn setup_asm() -> (Assembler, CodeBlock) { crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler (Assembler::new(), CodeBlock::new_dummy()) } #[test] fn test_lir_string() { use crate::hir::SideExitReason; let mut asm = Assembler::new(); asm.stack_base_idx = 1; let label = asm.new_label("bb0"); asm.write_label(label.clone()); asm.push_insn(Insn::Comment("bb0(): foo@/tmp/a.rb:1".into())); asm.frame_setup(JIT_PRESERVED_REGS); let val64 = asm.add(CFP, Opnd::UImm(64)); asm.store(Opnd::mem(64, SP, 0x10), val64); let side_exit = Target::SideExit { reason: SideExitReason::Interrupt, exit: SideExit { pc: Opnd::const_ptr(0 as *const u8), stack: vec![], locals: vec![] } }; asm.push_insn(Insn::Joz(val64, side_exit)); asm.parallel_mov(vec![(C_ARG_OPNDS[0], C_RET_OPND.with_num_bits(32)), (C_ARG_OPNDS[1], Opnd::mem(64, SP, -8))]); let val32 = asm.sub(Opnd::Value(Qtrue), Opnd::Imm(1)); asm.store(Opnd::mem(64, EC, 0x10).with_num_bits(32), val32.with_num_bits(32)); asm.je(label); asm.cret(val64); asm.frame_teardown(JIT_PRESERVED_REGS); assert_disasm_snapshot!(lir_string(&mut asm), @r" bb0: # bb0(): foo@/tmp/a.rb:1 FrameSetup 1, x19, x21, x20 v0 = Add x19, 0x40 Store [x21 + 0x10], v0 Joz Exit(Interrupt), v0 ParallelMov x0 <- w0, x1 <- [x21 - 8] v1 = Sub Value(0x14), Imm(1) Store Mem32[x20 + 0x10], VReg32(v1) Je bb0 CRet v0 FrameTeardown x19, x21, x20 "); } #[test] fn test_mul_with_immediate() { let (mut asm, mut cb) = setup_asm(); let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into()); asm.mov(Opnd::Reg(TEMP_REGS[0]), out); asm.compile_with_num_regs(&mut cb, 2); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x0, #3 0x4: mul x0, x9, x0 0x8: mov x1, x0 "); assert_snapshot!(cb.hexdump(), @"600080d2207d009be10300aa"); } #[test] fn sp_movements_are_single_instruction() { let (mut asm, mut cb) = setup_asm(); let sp = Opnd::Reg(XZR_REG); let new_sp = asm.add(sp, 0x20.into()); asm.mov(sp, new_sp); let new_sp = asm.sub(sp, 0x20.into()); asm.mov(sp, new_sp); asm.compile_with_num_regs(&mut cb, 2); assert_disasm_snapshot!(cb.disasm(), @" 0x0: add sp, sp, #0x20 0x4: sub sp, sp, #0x20 "); assert_snapshot!(cb.hexdump(), @"ff830091ff8300d1"); } #[test] fn add_into() { let (mut asm, mut cb) = setup_asm(); let sp = Opnd::Reg(XZR_REG); asm.add_into(sp, 8.into()); asm.add_into(Opnd::Reg(X20_REG), 0x20.into()); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @" 0x0: add sp, sp, #8 0x4: adds x20, x20, #0x20 "); assert_snapshot!(cb.hexdump(), @"ff230091948200b1"); } #[test] fn sub_imm_reg() { let (mut asm, mut cb) = setup_asm(); let difference = asm.sub(0x8.into(), Opnd::Reg(X5_REG)); asm.load_into(Opnd::Reg(X1_REG), difference); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x0, #8 0x4: subs x0, x0, x5 0x8: mov x1, x0 "); assert_snapshot!(cb.hexdump(), @"000180d2000005ebe10300aa"); } #[test] fn no_dead_mov_from_vreg() { let (mut asm, mut cb) = setup_asm(); let ret_val = asm.load(Opnd::mem(64, C_RET_OPND, 0)); asm.cret(ret_val); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @" 0x0: ldur x0, [x0] 0x4: ret "); assert_snapshot!(cb.hexdump(), @"000040f8c0035fd6"); } #[test] fn test_emit_add() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_regs(&mut cb, vec![X3_REG]).unwrap(); // Assert that only 2 instructions were written. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: adds x3, x0, x1 0x4: stur x3, [x2] "); assert_snapshot!(cb.hexdump(), @"030001ab430000f8"); } #[test] fn test_emit_bake_string() { let (mut asm, mut cb) = setup_asm(); asm.bake_string("Hello, world!"); asm.compile_with_num_regs(&mut cb, 0); // Testing that we pad the string to the nearest 4-byte boundary to make // it easier to jump over. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: ldnp d8, d25, [x10, #-0x140] 0x4: .byte 0x6f, 0x2c, 0x20, 0x77 0x8: .byte 0x6f, 0x72, 0x6c, 0x64 0xc: .byte 0x21, 0x00, 0x00, 0x00 "); assert_snapshot!(cb.hexdump(), @"48656c6c6f2c20776f726c6421000000"); } #[test] fn test_emit_cpush_all() { let (mut asm, mut cb) = setup_asm(); asm.cpush_all(); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: str x1, [sp, #-0x10]! 0x4: str x9, [sp, #-0x10]! 0x8: str x10, [sp, #-0x10]! 0xc: str x11, [sp, #-0x10]! 0x10: str x12, [sp, #-0x10]! 0x14: str x13, [sp, #-0x10]! 0x18: str x14, [sp, #-0x10]! 0x1c: str x15, [sp, #-0x10]! 0x20: mrs x16, nzcv 0x24: str x16, [sp, #-0x10]! "); assert_snapshot!(cb.hexdump(), @"e10f1ff8e90f1ff8ea0f1ff8eb0f1ff8ec0f1ff8ed0f1ff8ee0f1ff8ef0f1ff810423bd5f00f1ff8"); } #[test] fn test_emit_cpop_all() { let (mut asm, mut cb) = setup_asm(); asm.cpop_all(); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: msr nzcv, x16 0x4: ldr x16, [sp], #0x10 0x8: ldr x15, [sp], #0x10 0xc: ldr x14, [sp], #0x10 0x10: ldr x13, [sp], #0x10 0x14: ldr x12, [sp], #0x10 0x18: ldr x11, [sp], #0x10 0x1c: ldr x10, [sp], #0x10 0x20: ldr x9, [sp], #0x10 0x24: ldr x1, [sp], #0x10 "); assert_snapshot!(cb.hexdump(), @"10421bd5f00741f8ef0741f8ee0741f8ed0741f8ec0741f8eb0741f8ea0741f8e90741f8e10741f8"); } #[test] fn test_emit_frame() { let (mut asm, mut cb) = setup_asm(); asm.frame_setup(&[]); asm.frame_teardown(&[]); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: stp x29, x30, [sp, #-0x10]! 0x4: mov x29, sp 0x8: mov sp, x29 0xc: ldp x29, x30, [sp], #0x10 "); assert_snapshot!(cb.hexdump(), @"fd7bbfa9fd030091bf030091fd7bc1a8"); } #[test] fn frame_setup_and_teardown() { const THREE_REGS: &[Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG)]; // Test 3 preserved regs (odd), odd slot_count let cb1 = { let (mut asm, mut cb) = setup_asm(); asm.stack_base_idx = 3; asm.frame_setup(THREE_REGS); asm.frame_teardown(THREE_REGS); asm.compile_with_num_regs(&mut cb, 0); cb }; // Test 3 preserved regs (odd), even slot_count let cb2 = { let (mut asm, mut cb) = setup_asm(); asm.stack_base_idx = 4; asm.frame_setup(THREE_REGS); asm.frame_teardown(THREE_REGS); asm.compile_with_num_regs(&mut cb, 0); cb }; // Test 4 preserved regs (even), odd slot_count let cb3 = { static FOUR_REGS: &[Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG), Opnd::Reg(X22_REG)]; let (mut asm, mut cb) = setup_asm(); asm.stack_base_idx = 3; asm.frame_setup(FOUR_REGS); asm.frame_teardown(FOUR_REGS); asm.compile_with_num_regs(&mut cb, 0); cb }; assert_disasm_snapshot!(disasms_with!("\n", cb1, cb2, cb3), @r" 0x0: stp x29, x30, [sp, #-0x10]! 0x4: mov x29, sp 0x8: stp x20, x19, [sp, #-0x10]! 0xc: stur x21, [sp, #-8] 0x10: sub sp, sp, #0x20 0x14: ldp x20, x19, [x29, #-0x10] 0x18: ldur x21, [x29, #-0x18] 0x1c: mov sp, x29 0x20: ldp x29, x30, [sp], #0x10 0x0: stp x29, x30, [sp, #-0x10]! 0x4: mov x29, sp 0x8: stp x20, x19, [sp, #-0x10]! 0xc: stur x21, [sp, #-8] 0x10: sub sp, sp, #0x30 0x14: ldp x20, x19, [x29, #-0x10] 0x18: ldur x21, [x29, #-0x18] 0x1c: mov sp, x29 0x20: ldp x29, x30, [sp], #0x10 0x0: stp x29, x30, [sp, #-0x10]! 0x4: mov x29, sp 0x8: stp x20, x19, [sp, #-0x10]! 0xc: stp x22, x21, [sp, #-0x10]! 0x10: sub sp, sp, #0x20 0x14: ldp x20, x19, [x29, #-0x10] 0x18: ldp x22, x21, [x29, #-0x20] 0x1c: mov sp, x29 0x20: ldp x29, x30, [sp], #0x10 "); assert_snapshot!(hexdumps!(cb1, cb2, cb3), @r" fd7bbfa9fd030091f44fbfa9f5831ff8ff8300d1b44f7fa9b5835ef8bf030091fd7bc1a8 fd7bbfa9fd030091f44fbfa9f5831ff8ffc300d1b44f7fa9b5835ef8bf030091fd7bc1a8 fd7bbfa9fd030091f44fbfa9f657bfa9ff8300d1b44f7fa9b6577ea9bf030091fd7bc1a8 "); } #[test] fn test_emit_je_fits_into_bcond() { let (mut asm, mut cb) = setup_asm(); let target: CodePtr = cb.get_write_ptr().add_bytes(80); asm.je(Target::CodePtr(target)); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: b.eq #0x50 0x4: nop 0x8: nop 0xc: nop 0x10: nop 0x14: nop "); assert_snapshot!(cb.hexdump(), @"800200541f2003d51f2003d51f2003d51f2003d51f2003d5"); } #[test] fn test_emit_je_does_not_fit_into_bcond() { let (mut asm, mut cb) = setup_asm(); let offset = 1 << 21; let target: CodePtr = cb.get_write_ptr().add_bytes(offset); asm.je(Target::CodePtr(target)); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: b.ne #8 0x4: b #0x200000 0x8: nop 0xc: nop 0x10: nop 0x14: nop "); assert_snapshot!(cb.hexdump(), @"41000054ffff07141f2003d51f2003d51f2003d51f2003d5"); } #[test] fn test_emit_lea() { let (mut asm, mut cb) = setup_asm(); // Test values that exercise various types of immediates. // - 9 bit displacement for Load/Store // - 12 bit ADD/SUB shifted immediate // - 16 bit MOV family shifted immediates // - bit mask immediates for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x10008, i32::MIN] { let mem = Opnd::mem(64, NATIVE_STACK_PTR, displacement); asm.lea_into(Opnd::Reg(X0_REG), mem); } asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: orr x0, xzr, #0x7fffffff 0x4: add x0, sp, x0 0x8: mov x0, #8 0xc: movk x0, #1, lsl #16 0x10: add x0, sp, x0 0x14: mov x0, #0x1800 0x18: add x0, sp, x0 0x1c: add x0, sp, #0x208 0x20: sub x0, sp, #0x208 0x24: mov x0, #-0x1800 0x28: add x0, sp, x0 0x2c: mov x0, #0xfff8 0x30: movk x0, #0xfffe, lsl #16 0x34: movk x0, #0xffff, lsl #32 0x38: movk x0, #0xffff, lsl #48 0x3c: add x0, sp, x0 0x40: orr x0, xzr, #0xffffffff80000000 0x44: add x0, sp, x0 "); assert_snapshot!(cb.hexdump(), @"e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d1e0ff8292e063208b00ff9fd2c0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b"); } #[test] fn test_load_larg_disp_mem() { let (mut asm, mut cb) = setup_asm(); let extended_ivars = asm.load(Opnd::mem(64, NATIVE_STACK_PTR, 0)); let result = asm.load(Opnd::mem(VALUE_BITS, extended_ivars, 1000 * SIZEOF_VALUE_I32)); asm.store(Opnd::mem(VALUE_BITS, NATIVE_STACK_PTR, 0), result); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: ldur x0, [sp] 0x4: mov x16, #0x1f40 0x8: add x0, x0, x16, uxtx 0xc: ldur x0, [x0] 0x10: stur x0, [sp] "); assert_snapshot!(cb.hexdump(), @"e00340f810e883d20060308b000040f8e00300f8"); } #[test] fn test_store() { let (mut asm, mut cb) = setup_asm(); // Large memory offsets in combinations of destination and source let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305); let small_mem = Opnd::mem(64, C_RET_OPND, 0); asm.store(small_mem, large_mem); asm.store(large_mem, small_mem); asm.store(large_mem, large_mem); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: sub x16, sp, #0x305 0x4: ldur x16, [x16] 0x8: stur x16, [x0] 0xc: sub x15, sp, #0x305 0x10: ldur x16, [x0] 0x14: stur x16, [x15] 0x18: sub x15, sp, #0x305 0x1c: sub x16, sp, #0x305 0x20: ldur x16, [x16] 0x24: stur x16, [x15] "); assert_snapshot!(cb.hexdump(), @"f0170cd1100240f8100000f8ef170cd1100040f8f00100f8ef170cd1f0170cd1100240f8f00100f8"); } #[test] fn test_store_value_without_split() { let (mut asm, mut cb) = setup_asm(); let imitation_heap_value = VALUE(0x1000); assert!(imitation_heap_value.heap_object_p()); asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); // Side exit code are compiled without the split pass, so we directly call emit here to // emulate that scenario. let gc_offsets = asm.arm64_emit(&mut cb).unwrap(); assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); assert_disasm_snapshot!(cb.disasm(), @" 0x0: ldr x16, #8 0x4: b #0x10 0x8: .byte 0x00, 0x10, 0x00, 0x00 0xc: .byte 0x00, 0x00, 0x00, 0x00 0x10: stur x16, [x21] "); assert_snapshot!(cb.hexdump(), @"50000058030000140010000000000000b00200f8"); } #[test] fn test_store_with_valid_scratch_reg() { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); let mut cb = CodeBlock::new_dummy(); asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into()); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x16, #0x3902 0x4: movk x16, #8, lsl #16 0x8: stur x16, [x15] "); assert_snapshot!(cb.hexdump(), @"502087d21001a0f2f00100f8"); } #[test] #[should_panic] fn test_store_with_invalid_scratch_reg() { let (_, scratch_reg) = Assembler::new_with_scratch_reg(); let (mut asm, mut cb) = setup_asm(); // This would put the source into scratch_reg, messing up the destination asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into()); asm.compile_with_num_regs(&mut cb, 0); } #[test] #[should_panic] fn test_load_into_with_invalid_scratch_reg() { let (_, scratch_reg) = Assembler::new_with_scratch_reg(); let (mut asm, mut cb) = setup_asm(); // This would put the source into scratch_reg, messing up the destination asm.load_into(scratch_reg, 0x83902.into()); asm.compile_with_num_regs(&mut cb, 0); } #[test] fn test_emit_lea_label() { let (mut asm, mut cb) = setup_asm(); let label = asm.new_label("label"); let opnd = asm.lea_jump_target(label.clone()); asm.write_label(label); asm.bake_string("Hello, world!"); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: adr x16, #8 0x4: mov x0, x16 0x8: ldnp d8, d25, [x10, #-0x140] 0xc: .byte 0x6f, 0x2c, 0x20, 0x77 0x10: .byte 0x6f, 0x72, 0x6c, 0x64 0x14: .byte 0x21, 0x00, 0x00, 0x00 0x18: stur x0, [x21] "); assert_snapshot!(cb.hexdump(), @"50000010e00310aa48656c6c6f2c20776f726c6421000000a00200f8"); } #[test] fn test_emit_load_mem_disp_fits_into_load() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::mem(64, SP, 0)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that two instructions were written: LDUR and STUR. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: ldur x0, [x21] 0x4: stur x0, [x21] "); assert_snapshot!(cb.hexdump(), @"a00240f8a00200f8"); } #[test] fn test_emit_load_mem_disp_fits_into_add() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::mem(64, SP, 1 << 10)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that three instructions were written: ADD, LDUR, and STUR. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: add x0, x21, #0x400 0x4: ldur x0, [x0] 0x8: stur x0, [x21] "); assert_snapshot!(cb.hexdump(), @"a0021091000040f8a00200f8"); } #[test] fn test_emit_load_mem_disp_does_not_fit_into_add() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x0, #0x1001 0x4: add x0, x21, x0, uxtx 0x8: ldur x0, [x0] 0xc: stur x0, [x21] "); assert_snapshot!(cb.hexdump(), @"200082d2a062208b000040f8a00200f8"); } #[test] fn test_emit_load_value_immediate() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::Value(Qnil)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that only two instructions were written since the value is an // immediate. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x0, #4 0x4: stur x0, [x21] "); assert_snapshot!(cb.hexdump(), @"800080d2a00200f8"); } #[test] fn test_emit_load_value_non_immediate() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000))); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that five instructions were written since the value is not an // immediate and needs to be loaded into a register. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: ldr x0, #8 0x4: b #0x10 0x8: eon x0, x0, x30, ror #0 0xc: eon x30, x23, x30, ror #50 0x10: stur x0, [x21] "); assert_snapshot!(cb.hexdump(), @"40000058030000140000fecafecafecaa00200f8"); } #[test] fn test_emit_test_32b_reg_not_bitmask_imm() { let (mut asm, mut cb) = setup_asm(); let w0 = Opnd::Reg(X0_REG).with_num_bits(32); asm.test(w0, Opnd::UImm(u32::MAX.into())); // All ones is not encodable with a bitmask immediate, // so this needs one register asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: orr x0, xzr, #0xffffffff 0x4: tst w0, w0 "); assert_snapshot!(cb.hexdump(), @"e07f40b21f00006a"); } #[test] fn test_emit_test_32b_reg_bitmask_imm() { let (mut asm, mut cb) = setup_asm(); let w0 = Opnd::Reg(X0_REG).with_num_bits(32); asm.test(w0, Opnd::UImm(0x80000001)); asm.compile_with_num_regs(&mut cb, 0); assert_disasm_snapshot!(cb.disasm(), @" 0x0: tst w0, #0x80000001"); assert_snapshot!(cb.hexdump(), @"1f040172"); } #[test] fn test_emit_or() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: orr x0, x0, x1 0x4: stur x0, [x2] "); assert_snapshot!(cb.hexdump(), @"000001aa400000f8"); } #[test] fn test_emit_lshift() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: lsl x0, x0, #5 0x4: stur x0, [x2] "); assert_snapshot!(cb.hexdump(), @"00e87bd3400000f8"); } #[test] fn test_emit_rshift() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: asr x0, x0, #5 0x4: stur x0, [x2] "); assert_snapshot!(cb.hexdump(), @"00fc4593400000f8"); } #[test] fn test_emit_urshift() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: lsr x0, x0, #5 0x4: stur x0, [x2] "); assert_snapshot!(cb.hexdump(), @"00fc45d3400000f8"); } #[test] fn test_emit_test() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.compile_with_num_regs(&mut cb, 0); // Assert that only one instruction was written. assert_disasm_snapshot!(cb.disasm(), @" 0x0: tst x0, x1"); assert_snapshot!(cb.hexdump(), @"1f0001ea"); } #[test] fn test_emit_test_with_encodable_unsigned_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7)); asm.compile_with_num_regs(&mut cb, 0); // Assert that only one instruction was written. assert_disasm_snapshot!(cb.disasm(), @" 0x0: tst x0, #7"); assert_snapshot!(cb.hexdump(), @"1f0840f2"); } #[test] fn test_emit_test_with_unencodable_unsigned_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.compile_with_num_regs(&mut cb, 1); // Assert that a load and a test instruction were written. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x0, #5 0x4: tst x0, x0 "); assert_snapshot!(cb.hexdump(), @"a00080d21f0000ea"); } #[test] fn test_emit_test_with_encodable_signed_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7)); asm.compile_with_num_regs(&mut cb, 0); // Assert that only one instruction was written. assert_disasm_snapshot!(cb.disasm(), @" 0x0: tst x0, #7"); assert_snapshot!(cb.hexdump(), @"1f0840f2"); } #[test] fn test_emit_test_with_unencodable_signed_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5)); asm.compile_with_num_regs(&mut cb, 1); // Assert that a load and a test instruction were written. assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x0, #5 0x4: tst x0, x0 "); assert_snapshot!(cb.hexdump(), @"a00080d21f0000ea"); } #[test] fn test_emit_test_with_negative_signed_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7)); asm.compile_with_num_regs(&mut cb, 1); // Assert that a test instruction is written. assert_disasm_snapshot!(cb.disasm(), @" 0x0: tst x0, #-7"); assert_snapshot!(cb.hexdump(), @"1ff47df2"); } #[test] fn test_32_bit_register_with_some_number() { let (mut asm, mut cb) = setup_asm(); let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); asm.cmp(shape_opnd, Opnd::UImm(4097)); asm.compile_with_num_regs(&mut cb, 2); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: ldur w0, [x0, #6] 0x4: mov x1, #0x1001 0x8: cmp w0, w1 "); assert_snapshot!(cb.hexdump(), @"006040b8210082d21f00016b"); } #[test] fn test_16_bit_register_store_some_number() { let (mut asm, mut cb) = setup_asm(); let shape_opnd = Opnd::mem(16, Opnd::Reg(X0_REG), 0); asm.store(shape_opnd, Opnd::UImm(4097)); asm.compile_with_num_regs(&mut cb, 2); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x16, #0x1001 0x4: sturh w16, [x0] "); assert_snapshot!(cb.hexdump(), @"300082d210000078"); } #[test] fn test_32_bit_register_store_some_number() { let (mut asm, mut cb) = setup_asm(); let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); asm.store(shape_opnd, Opnd::UImm(4097)); asm.compile_with_num_regs(&mut cb, 2); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x16, #0x1001 0x4: stur w16, [x0, #6] "); assert_snapshot!(cb.hexdump(), @"300082d2106000b8"); } #[test] fn test_emit_xor() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @" 0x0: eor x0, x0, x1 0x4: stur x0, [x2] "); assert_snapshot!(cb.hexdump(), @"000001ca400000f8"); } #[test] #[cfg(feature = "disasm")] fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { // Test drive Capstone with simple input use capstone::prelude::*; let cs = Capstone::new() .arm64() .mode(arch::arm64::ArchMode::Arm) .build()?; let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?; match insns.as_ref() { [insn] => { assert_eq!(Some("movk"), insn.mnemonic()); Ok(()) } _ => Err(capstone::Error::CustomError( "expected to disassemble to movk", )), } } #[test] fn test_replace_mov_with_ldur() { let (mut asm, mut cb) = setup_asm(); asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::mem(64, CFP, 8)); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @" 0x0: ldur x1, [x19, #8]"); assert_snapshot!(cb.hexdump(), @"618240f8"); } #[test] fn test_not_split_mov() { let (mut asm, mut cb) = setup_asm(); asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0xffff)); asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0x10000)); asm.compile_with_num_regs(&mut cb, 1); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x1, #0xffff 0x4: orr x1, xzr, #0x10000 "); assert_snapshot!(cb.hexdump(), @"e1ff9fd2e10370b2"); } #[test] fn test_merge_csel_mov() { let (mut asm, mut cb) = setup_asm(); let out = asm.csel_l(Qtrue.into(), Qfalse.into()); asm.mov(Opnd::Reg(TEMP_REGS[0]), out); asm.compile_with_num_regs(&mut cb, 2); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x0, #0x14 0x4: mov x1, #0 0x8: csel x1, x0, x1, lt "); assert_snapshot!(cb.hexdump(), @"800280d2010080d201b0819a"); } #[test] fn test_label_branch_generate_bounds() { // The immediate in a conditional branch is a 19 bit unsigned integer // which has a max value of 2^18 - 1. const IMMEDIATE_MAX_VALUE: usize = 2usize.pow(18) - 1; // `IMMEDIATE_MAX_VALUE` number of dummy instructions will be generated // plus a compare, a jump instruction, and a label. // Adding page_size to avoid OOM on the last page. let page_size = unsafe { rb_jit_get_page_size() } as usize; let memory_required = (IMMEDIATE_MAX_VALUE + 8) * 4 + page_size; let mut asm = Assembler::new(); let mut cb = CodeBlock::new_dummy_sized(memory_required); let far_label = asm.new_label("far"); asm.cmp(Opnd::Reg(X0_REG), Opnd::UImm(1)); asm.je(far_label.clone()); (0..IMMEDIATE_MAX_VALUE).for_each(|_| { asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::Reg(TEMP_REGS[2])); }); asm.write_label(far_label.clone()); asm.compile_with_num_regs(&mut cb, 1); } #[test] fn test_add_with_immediate() { let (mut asm, mut cb) = setup_asm(); let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into()); let out = asm.add(out, 1_usize.into()); asm.mov(Opnd::Reg(TEMP_REGS[0]), out); asm.compile_with_num_regs(&mut cb, 2); assert_disasm_snapshot!(cb.disasm(), @" 0x0: adds x0, x9, #1 0x4: adds x1, x0, #1 "); assert_snapshot!(cb.hexdump(), @"200500b1010400b1"); } #[test] fn test_store_spilled_byte() { let (mut asm, mut cb) = setup_asm(); asm.store(Opnd::mem(8, C_RET_OPND, 0), Opnd::mem(8, C_RET_OPND, 8)); asm.compile_with_num_regs(&mut cb, 0); // spill every VReg assert_disasm_snapshot!(cb.disasm(), @r" 0x0: ldurb w16, [x0, #8] 0x4: sturb w16, [x0] "); assert_snapshot!(cb.hexdump(), @"1080403810000038"); } #[test] fn test_ccall_resolve_parallel_moves_no_cycle() { let (mut asm, mut cb) = setup_asm(); asm.ccall(0 as _, vec![ C_ARG_OPNDS[0], // mov x0, x0 (optimized away) C_ARG_OPNDS[1], // mov x1, x1 (optimized away) ]); asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x16, #0 0x4: blr x16 "); assert_snapshot!(cb.hexdump(), @"100080d200023fd6"); } #[test] fn test_ccall_resolve_parallel_moves_single_cycle() { let (mut asm, mut cb) = setup_asm(); // x0 and x1 form a cycle asm.ccall(0 as _, vec![ C_ARG_OPNDS[1], // mov x0, x1 C_ARG_OPNDS[0], // mov x1, x0 C_ARG_OPNDS[2], // mov x2, x2 (optimized away) ]); asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x15, x0 0x4: mov x0, x1 0x8: mov x1, x15 0xc: mov x16, #0 0x10: blr x16 "); assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae1030faa100080d200023fd6"); } #[test] fn test_ccall_resolve_parallel_moves_two_cycles() { let (mut asm, mut cb) = setup_asm(); // x0 and x1 form a cycle, and x2 and rcx form another cycle asm.ccall(0 as _, vec![ C_ARG_OPNDS[1], // mov x0, x1 C_ARG_OPNDS[0], // mov x1, x0 C_ARG_OPNDS[3], // mov x2, rcx C_ARG_OPNDS[2], // mov rcx, x2 ]); asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x15, x2 0x4: mov x2, x3 0x8: mov x3, x15 0xc: mov x15, x0 0x10: mov x0, x1 0x14: mov x1, x15 0x18: mov x16, #0 0x1c: blr x16 "); assert_snapshot!(cb.hexdump(), @"ef0302aae20303aae3030faaef0300aae00301aae1030faa100080d200023fd6"); } #[test] fn test_ccall_resolve_parallel_moves_large_cycle() { let (mut asm, mut cb) = setup_asm(); // x0, x1, and x2 form a cycle asm.ccall(0 as _, vec![ C_ARG_OPNDS[1], // mov x0, x1 C_ARG_OPNDS[2], // mov x1, x2 C_ARG_OPNDS[0], // mov x2, x0 ]); asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); assert_disasm_snapshot!(cb.disasm(), @" 0x0: mov x15, x0 0x4: mov x0, x1 0x8: mov x1, x2 0xc: mov x2, x15 0x10: mov x16, #0 0x14: blr x16 "); assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae10302aae2030faa100080d200023fd6"); } #[test] fn test_split_spilled_lshift() { let (mut asm, mut cb) = setup_asm(); let opnd_vreg = asm.load(1.into()); let out_vreg = asm.lshift(opnd_vreg, Opnd::UImm(1)); asm.mov(C_RET_OPND, out_vreg); asm.compile_with_num_regs(&mut cb, 0); // spill every VReg assert_disasm_snapshot!(cb.disasm(), @r" 0x0: mov x16, #1 0x4: stur x16, [x29, #-8] 0x8: ldur x15, [x29, #-8] 0xc: lsl x15, x15, #1 0x10: stur x15, [x29, #-8] 0x14: ldur x0, [x29, #-8] "); assert_snapshot!(cb.hexdump(), @"300080d2b0831ff8af835ff8eff97fd3af831ff8a0835ff8"); } #[test] fn test_split_load16_mem_mem_with_large_displacement() { let (mut asm, mut cb) = setup_asm(); let _ = asm.load(Opnd::mem(16, C_RET_OPND, 0x200)); asm.compile(&mut cb).unwrap(); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: add x0, x0, #0x200 0x4: ldurh w0, [x0] "); assert_snapshot!(cb.hexdump(), @"0000089100004078"); } #[test] fn test_split_load32_mem_mem_with_large_displacement() { let (mut asm, mut cb) = setup_asm(); let _ = asm.load(Opnd::mem(32, C_RET_OPND, 0x200)); asm.compile(&mut cb).unwrap(); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: add x0, x0, #0x200 0x4: ldur w0, [x0] "); assert_snapshot!(cb.hexdump(), @"00000891000040b8"); } #[test] fn test_split_load64_mem_mem_with_large_displacement() { let (mut asm, mut cb) = setup_asm(); let _ = asm.load(Opnd::mem(64, C_RET_OPND, 0x200)); asm.compile(&mut cb).unwrap(); assert_disasm_snapshot!(cb.disasm(), @r" 0x0: add x0, x0, #0x200 0x4: ldur x0, [x0] "); assert_snapshot!(cb.hexdump(), @"00000891000040f8"); } }