diff options
Diffstat (limited to 'yjit/src/backend/arm64/mod.rs')
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 1835 |
1 files changed, 1835 insertions, 0 deletions
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs new file mode 100644 index 0000000000..3bf949ba7d --- /dev/null +++ b/yjit/src/backend/arm64/mod.rs @@ -0,0 +1,1835 @@ +use std::mem::take; + +use crate::asm::{CodeBlock, OutlinedCb}; +use crate::asm::arm64::*; +use crate::cruby::*; +use crate::backend::ir::*; +use crate::virtualmem::CodePtr; +use crate::utils::*; + +// Use the arm64 register type for this platform +pub type Reg = A64Reg; + +// Callee-saved registers +pub const _CFP: Opnd = Opnd::Reg(X19_REG); +pub const _EC: Opnd = Opnd::Reg(X20_REG); +pub const _SP: Opnd = Opnd::Reg(X21_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(X0_REG), + Opnd::Reg(X1_REG), + Opnd::Reg(X2_REG), + Opnd::Reg(X3_REG), + Opnd::Reg(X4_REG), + Opnd::Reg(X5_REG) +]; + +// C return value register on this platform +pub const C_RET_REG: Reg = X0_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); + +// These constants define the way we work with Arm64's stack pointer. The stack +// pointer always needs to be aligned to a 16-byte boundary. +pub const C_SP_REG: A64Opnd = X31; +pub const C_SP_STEP: i32 = 16; + +impl CodeBlock { + // The maximum number of bytes that can be generated by emit_jmp_ptr. + pub fn jmp_ptr_bytes(&self) -> usize { + // b instruction's offset is encoded as imm26 times 4. It can jump to + // +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128. + let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) { + 1 // b instruction + } else { + 5 // 4 instructions to load a 64-bit absolute address + br instruction + }; + num_insns * 4 + } + + // The maximum number of instructions that can be generated by emit_conditional_jump. + fn conditional_jump_insns(&self) -> i32 { + // The worst case is instructions for a jump + bcond. + self.jmp_ptr_bytes() as i32 / 4 + 1 + } +} + +/// Map Opnd to A64Opnd +impl From<Opnd> for A64Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + Opnd::UImm(value) => A64Opnd::new_uimm(value), + Opnd::Imm(value) => A64Opnd::new_imm(value), + Opnd::Reg(reg) => A64Opnd::Reg(reg), + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { + A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) + }, + Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { + panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") + }, + Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"), + Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), + Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), + Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"), + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + + } + } +} + +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for A64Opnd { + fn from(opnd: &Opnd) -> Self { + A64Opnd::from(*opnd) + } +} + +/// Call emit_jmp_ptr and immediately invalidate the written range. +/// This is needed when next_page also moves other_cb that is not invalidated +/// by compile_with_regs. Doing it here allows you to avoid invalidating a lot +/// more than necessary when other_cb jumps from a position early in the page. +/// This invalidates a small range of cb twice, but we accept the small cost. +fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { + #[cfg(not(test))] + let start = cb.get_write_ptr(); + emit_jmp_ptr(cb, dst_ptr, true); + #[cfg(not(test))] + { + let end = cb.get_write_ptr(); + unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) }; + } +} + +fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { + let src_addr = cb.get_write_ptr().as_offset(); + let dst_addr = dst_ptr.as_offset(); + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { + b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + 1 + } else { + let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); + br(cb, Assembler::SCRATCH0); + num_insns + 1 + }; + + if padding { + // Make sure it's always a consistent number of + // instructions in case it gets patched and has to + // use the other branch. + assert!(num_insns * 4 <= cb.jmp_ptr_bytes()); + for _ in num_insns..(cb.jmp_ptr_bytes() / 4) { + nop(cb); + } + } +} + +/// Emit the required instructions to load the given value into the +/// given register. Our goal here is to use as few instructions as +/// possible to get this value into the register. +fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { + let mut current = value; + + if current <= 0xffff { + // If the value fits into a single movz + // instruction, then we'll use that. + movz(cb, rd, A64Opnd::new_uimm(current), 0); + return 1; + } else if BitmaskImmediate::try_from(current).is_ok() { + // Otherwise, if the immediate can be encoded + // with the special bitmask immediate encoding, + // we'll use that. + mov(cb, rd, A64Opnd::new_uimm(current)); + return 1; + } else { + // Finally we'll fall back to encoding the value + // using movz for the first 16 bits and movk for + // each subsequent set of 16 bits as long we + // they are necessary. + movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); + let mut num_insns = 1; + + // (We're sure this is necessary since we + // checked if it only fit into movz above). + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); + num_insns += 1; + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); + num_insns += 1; + } + + if current > 0xffff { + current >>= 16; + movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); + num_insns += 1; + } + return num_insns; + } +} + +/// List of registers that can be used for stack temps. +/// These are caller-saved registers. +pub static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; + +#[derive(Debug, PartialEq)] +enum EmitError { + RetryOnNextPage, + OutOfMemory, +} + +impl Assembler +{ + // Special scratch registers for intermediate processing. + // This register is caller-saved (so we don't have to save it before using it) + pub const SCRATCH_REG: Reg = X16_REG; + const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); + const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); + + /// Get the list of registers from which we will allocate on this platform + /// These are caller-saved registers + /// Note: we intentionally exclude C_RET_REG (X0) from this list + /// because of the way it's used in gen_leave() and gen_leave_exit() + pub fn get_alloc_regs() -> Vec<Reg> { + vec![X11_REG, X12_REG, X13_REG] + } + + /// Get a list of all of the caller-saved registers + pub fn get_caller_save_regs() -> Vec<Reg> { + vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] + } + + /// Split platform-specific instructions + /// The transformations done here are meant to make our lives simpler in later + /// stages of the compilation pipeline. + /// Here we may want to make sure that all instructions (except load and store) + /// have no memory operands. + fn arm64_split(mut self) -> Assembler + { + /// When we're attempting to load a memory address into a register, the + /// displacement must fit into the maximum number of bits for an Op::Add + /// immediate. If it doesn't, we have to load the displacement into a + /// register first. + fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(Mem { base, disp, num_bits }) => { + if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() { + asm.lea(opnd) + } else { + let disp = asm.load(Opnd::Imm(disp.into())); + let reg = match base { + MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }), + MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits } + }; + + asm.add(reg, disp) + } + }, + _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.") + } + } + + /// When you're storing a register into a memory location or loading a + /// memory location into a register, the displacement from the base + /// register of the memory location must fit into 9 bits. If it doesn't, + /// then we need to load that memory address into a register first. + fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Mem(mem) => { + if mem_disp_fits_bits(mem.disp) { + opnd + } else { + let base = split_lea_operand(asm, opnd); + Opnd::mem(64, base, 0) + } + }, + _ => unreachable!("Can only split memory addresses.") + } + } + + /// Any memory operands you're sending into an Op::Load instruction need + /// to be split in case their displacement doesn't fit into 9 bits. + fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) => { + let split_opnd = split_memory_address(asm, opnd); + let out_opnd = asm.load(split_opnd); + // Many Arm insns support only 32-bit or 64-bit operands. asm.load with fewer + // bits zero-extends the value, so it's safe to recognize it as a 32-bit value. + if out_opnd.rm_num_bits() < 32 { + out_opnd.with_num_bits(32).unwrap() + } else { + out_opnd + } + }, + _ => asm.load(opnd) + } + } + + /// Operands that take the place of bitmask immediates must follow a + /// certain encoding. In this function we ensure that those operands + /// do follow that encoding, and if they don't then we load them first. + fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd, + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(imm) => { + if imm == 0 { + Opnd::Reg(XZR_REG) + } else if (dest_num_bits == 64 && + BitmaskImmediate::try_from(imm as u64).is_ok()) || + (dest_num_bits == 32 && + u32::try_from(imm).is_ok() && + BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) { + Opnd::UImm(imm as u64) + } else { + asm.load(opnd).with_num_bits(dest_num_bits).unwrap() + } + }, + Opnd::UImm(uimm) => { + if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) || + (dest_num_bits == 32 && + u32::try_from(uimm).is_ok() && + BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) { + opnd + } else { + asm.load(opnd).with_num_bits(dest_num_bits).unwrap() + } + }, + Opnd::None | Opnd::Value(_) => unreachable!() + } + } + + /// Operands that take the place of a shifted immediate must fit within + /// a certain size. If they don't then we need to load them first. + fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { + match opnd { + Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd, + Opnd::Mem(_) => split_load_operand(asm, opnd), + Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() { + opnd + } else { + asm.load(opnd) + } + Opnd::UImm(uimm) => { + if ShiftedImmediate::try_from(uimm).is_ok() { + opnd + } else { + asm.load(opnd) + } + }, + Opnd::None | Opnd::Value(_) | Opnd::Stack { .. } => unreachable!() + } + } + + /// Returns the operands that should be used for a boolean logic + /// instruction. + fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + match (opnd0, opnd1) { + (Opnd::Reg(_), Opnd::Reg(_)) => { + (opnd0, opnd1) + }, + (reg_opnd @ Opnd::Reg(_), other_opnd) | + (other_opnd, reg_opnd @ Opnd::Reg(_)) => { + let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits()); + (reg_opnd, opnd1) + }, + _ => { + let opnd0 = split_load_operand(asm, opnd0); + let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits()); + (opnd0, opnd1) + } + } + } + + /// Returns the operands that should be used for a csel instruction. + fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { + let opnd0 = match opnd0 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd0, + _ => split_load_operand(asm, opnd0) + }; + + let opnd1 = match opnd1 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd1, + _ => split_load_operand(asm, opnd1) + }; + + (opnd0, opnd1) + } + + fn split_less_than_32_cmp(asm: &mut Assembler, opnd0: Opnd) -> Opnd { + match opnd0 { + Opnd::Reg(_) | Opnd::InsnOut { .. } => { + match opnd0.rm_num_bits() { + 8 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xff)), + 16 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xffff)), + 32 | 64 => opnd0, + bits => unreachable!("Invalid number of bits. {}", bits) + } + } + _ => opnd0 + } + } + + let live_ranges: Vec<usize> = take(&mut self.live_ranges); + let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let asm = &mut asm_local; + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_mapped() { + // Here we're going to map the operands of the instruction to load + // any Opnd::Value operands into registers if they are heap objects + // such that only the Op::Load instruction needs to handle that + // case. If the values aren't heap objects then we'll treat them as + // if they were just unsigned integer. + let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { + match opnd { + Opnd::Value(value) => { + if value.special_const_p() { + *opnd = Opnd::UImm(value.as_u64()); + } else if !is_load { + *opnd = asm.load(*opnd); + } + }, + Opnd::Stack { .. } => { + *opnd = asm.lower_stack_opnd(opnd); + } + _ => {} + }; + } + + // We are replacing instructions here so we know they are already + // being used. It is okay not to use their output here. + #[allow(unused_must_use)] + match &mut insn { + Insn::Add { left, right, .. } => { + match (*left, *right) { + (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { + asm.add(*left, *right); + }, + (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) | + (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => { + let opnd1 = split_shifted_immediate(asm, other_opnd); + asm.add(reg_opnd, opnd1); + }, + _ => { + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_shifted_immediate(asm, *right); + asm.add(opnd0, opnd1); + } + } + }, + Insn::And { left, right, out } | + Insn::Or { left, right, out } | + Insn::Xor { left, right, out } => { + let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right); + *left = opnd0; + *right = opnd1; + + // Since these instructions are lowered to an instruction that have 2 input + // registers and an output register, look to merge with an `Insn::Mov` that + // follows which puts the output in another register. For example: + // `Add a, b => out` followed by `Mov c, out` becomes `Add a, b => c`. + if let (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) = (left, right, iterator.peek()) { + if live_ranges[index] == index + 1 { + // Check after potentially lowering a stack operand to a register operand + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + if out == src && matches!(lowered_dest, Opnd::Reg(_)) { + *out = lowered_dest; + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + } + } + + asm.push_insn(insn); + } + // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch. + ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } | + ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if { + let same_opnd_if_test = if let Insn::Test { .. } = insn { + left == right + } else { + true + }; + + same_opnd_if_test && if let Some( + Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target) + ) = iterator.peek() { + matches!(target, Target::SideExit { .. }) + } else { + false + } + } => { + let reg = split_load_operand(asm, *left); + match iterator.peek() { + Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)), + Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)), + _ => () + } + + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged jump instruction + } + Insn::CCall { opnds, fptr, .. } => { + assert!(opnds.len() <= C_ARG_OPNDS.len()); + + // Load each operand into the corresponding argument + // register. + // Note: the iteration order is reversed to avoid corrupting x0, + // which is both the return value and first argument register + for (idx, opnd) in opnds.into_iter().enumerate().rev() { + // If the value that we're sending is 0, then we can use + // the zero register, so in this case we'll just send + // a UImm of 0 along as the argument to the move. + let value = match opnd { + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), + Opnd::Mem(_) => split_memory_address(asm, *opnd), + _ => *opnd + }; + + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(*fptr, vec![]); + }, + Insn::Cmp { left, right } => { + let opnd0 = split_load_operand(asm, *left); + let opnd0 = split_less_than_32_cmp(asm, opnd0); + let split_right = split_shifted_immediate(asm, *right); + let opnd1 = match split_right { + Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => { + split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap() + }, + _ => split_right + }; + + asm.cmp(opnd0, opnd1); + }, + Insn::CRet(opnd) => { + match opnd { + // If the value is already in the return register, then + // we don't need to do anything. + Opnd::Reg(C_RET_REG) => {}, + + // If the value is a memory address, we need to first + // make sure the displacement isn't too large and then + // load it into the return register. + Opnd::Mem(_) => { + let split = split_memory_address(asm, *opnd); + asm.load_into(C_RET_OPND, split); + }, + + // Otherwise we just need to load the value into the + // return register. + _ => { + asm.load_into(C_RET_OPND, *opnd); + } + } + asm.cret(C_RET_OPND); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } | + Insn::CSelL { truthy, falsy, out } | + Insn::CSelLE { truthy, falsy, out } | + Insn::CSelG { truthy, falsy, out } | + Insn::CSelGE { truthy, falsy, out } => { + let (opnd0, opnd1) = split_csel_operands(asm, *truthy, *falsy); + *truthy = opnd0; + *falsy = opnd1; + // Merge `csel` and `mov` into a single `csel` when possible + match iterator.peek() { + Some(Insn::Mov { dest: Opnd::Reg(reg), src }) + if matches!(out, Opnd::InsnOut { .. }) && *out == *src && live_ranges[index] == index + 1 => { + *out = Opnd::Reg(*reg); + asm.push_insn(insn); + iterator.map_insn_index(asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => { + asm.push_insn(insn); + } + } + }, + Insn::IncrCounter { mem, value } => { + let counter_addr = match mem { + Opnd::Mem(_) => split_lea_operand(asm, *mem), + _ => *mem + }; + + asm.incr_counter(counter_addr, *value); + }, + Insn::JmpOpnd(opnd) => { + if let Opnd::Mem(_) = opnd { + let opnd0 = split_load_operand(asm, *opnd); + asm.jmp_opnd(opnd0); + } else { + asm.jmp_opnd(*opnd); + } + }, + Insn::Load { opnd, .. } | + Insn::LoadInto { opnd, .. } => { + *opnd = match opnd { + Opnd::Mem(_) => split_memory_address(asm, *opnd), + _ => *opnd + }; + asm.push_insn(insn); + }, + Insn::LoadSExt { opnd, .. } => { + match opnd { + // We only want to sign extend if the operand is a + // register, instruction output, or memory address that + // is 32 bits. Otherwise we'll just load the value + // directly since there's no need to sign extend. + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } | + Opnd::Mem(Mem { num_bits: 32, .. }) => { + asm.load_sext(*opnd); + }, + _ => { + asm.load(*opnd); + } + }; + }, + Insn::Mov { dest, src } => { + match (&dest, &src) { + // If we're attempting to load into a memory operand, then + // we'll switch over to the store instruction. + (Opnd::Mem(_), _) => { + let opnd0 = split_memory_address(asm, *dest); + let value = match *src { + // If the first operand is zero, then we can just use + // the zero register. + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), + // If the first operand is a memory operand, we're going + // to transform this into a store instruction, so we'll + // need to load this anyway. + Opnd::UImm(_) => asm.load(*src), + // The value that is being moved must be either a + // register or an immediate that can be encoded as a + // bitmask immediate. Otherwise, we'll need to split the + // move into multiple instructions. + _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()) + }; + + asm.store(opnd0, value); + }, + // If we're loading a memory operand into a register, then + // we'll switch over to the load instruction. + (Opnd::Reg(_), Opnd::Mem(_)) => { + let value = split_memory_address(asm, *src); + asm.load_into(*dest, value); + }, + // Otherwise we'll use the normal mov instruction. + (Opnd::Reg(_), _) => { + let value = match *src { + // Unlike other instructions, we can avoid splitting this case, using movz. + Opnd::UImm(uimm) if uimm <= 0xffff => *src, + _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()), + }; + asm.mov(*dest, value); + }, + _ => unreachable!() + }; + }, + Insn::Not { opnd, .. } => { + // The value that is being negated must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnd { + Opnd::Mem(_) => split_load_operand(asm, *opnd), + _ => *opnd + }; + + asm.not(opnd0); + }, + Insn::LShift { opnd, .. } | + Insn::RShift { opnd, .. } | + Insn::URShift { opnd, .. } => { + // The operand must be in a register, so + // if we get anything else we need to load it first. + let opnd0 = match opnd { + Opnd::Mem(_) => split_load_operand(asm, *opnd), + _ => *opnd + }; + + *opnd = opnd0; + asm.push_insn(insn); + }, + Insn::Store { dest, src } => { + // The value being stored must be in a register, so if it's + // not already one we'll load it first. + let opnd1 = match src { + // If the first operand is zero, then we can just use + // the zero register. + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), + // Otherwise we'll check if we need to load it first. + _ => split_load_operand(asm, *src) + }; + + match dest { + Opnd::Reg(_) => { + // Store does not support a register as a dest operand. + asm.mov(*dest, opnd1); + } + _ => { + // The displacement for the STUR instruction can't be more + // than 9 bits long. If it's longer, we need to load the + // memory address into a register first. + let opnd0 = split_memory_address(asm, *dest); + asm.store(opnd0, opnd1); + } + } + }, + Insn::Sub { left, right, .. } => { + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_shifted_immediate(asm, *right); + asm.sub(opnd0, opnd1); + }, + Insn::Mul { left, right, .. } => { + let opnd0 = split_load_operand(asm, *left); + let opnd1 = split_load_operand(asm, *right); + asm.mul(opnd0, opnd1); + }, + Insn::Test { left, right } => { + // The value being tested must be in a register, so if it's + // not already one we'll load it first. + let opnd0 = split_load_operand(asm, *left); + + // The second value must be either a register or an + // unsigned immediate that can be encoded as a bitmask + // immediate. If it's not one of those, we'll need to load + // it first. + let opnd1 = split_bitmask_immediate(asm, *right, opnd0.rm_num_bits()); + asm.test(opnd0, opnd1); + }, + _ => { + // If we have an output operand, then we need to replace it + // with a new output operand from the new assembler. + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + asm.push_insn(insn); + } + }; + + iterator.map_insn_index(asm); + } + + asm_local + } + + /// Emit platform-specific machine code + /// Returns a list of GC offsets. Can return failure to signal caller to retry. + fn arm64_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Result<Vec<u32>, EmitError> { + /// Determine how many instructions it will take to represent moving + /// this value into a register. Note that the return value of this + /// function must correspond to how many instructions are used to + /// represent this load in the emit_load_value function. + fn emit_load_size(value: u64) -> u8 { + if BitmaskImmediate::try_from(value).is_ok() { + return 1; + } + + if value < (1 << 16) { + 1 + } else if value < (1 << 32) { + 2 + } else if value < (1 << 48) { + 3 + } else { + 4 + } + } + + /// Emit a conditional jump instruction to a specific target. This is + /// called when lowering any of the conditional jump instructions. + fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) { + match target { + Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => { + let dst_addr = dst_ptr.as_offset(); + let src_addr = cb.get_write_ptr().as_offset(); + + let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { + // If the jump offset fits into the conditional jump as + // an immediate value and it's properly aligned, then we + // can use the b.cond instruction directly. We're safe + // to use as i32 here since we already checked that it + // fits. + let bytes = (dst_addr - src_addr) as i32; + bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); + + // Here we're going to return 1 because we've only + // written out 1 instruction. + 1 + } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond + // If the jump offset fits into the unconditional jump as + // an immediate value, we can use inverse b.cond + b. + // + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instruction used for branching. + bcond(cb, Condition::inverse(CONDITION), 2.into()); + b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond + + // We've only written out 2 instructions. + 2 + } else { + // Otherwise, we need to load the address into a + // register and use the branch register instruction. + let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64(); + let load_insns: i32 = emit_load_size(dst_addr).into(); + + // We're going to write out the inverse condition so + // that if it doesn't match it will skip over the + // instructions used for branching. + bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + + // Here we'll return the number of instructions that it + // took to write out the destination address + 1 for the + // b.cond and 1 for the br. + load_insns + 2 + }; + + if let Target::CodePtr(_) = target { + // We need to make sure we have at least 6 instructions for + // every kind of jump for invalidation purposes, so we're + // going to write out padding nop instructions here. + assert!(num_insns <= cb.conditional_jump_insns()); + for _ in num_insns..cb.conditional_jump_insns() { nop(cb); } + } + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for ourselves and + // then come back and write the instruction once we know the + // offset. We're going to assume we can fit into a single + // b.cond instruction. It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); + bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); + }); + }, + Target::SideExit { .. } => { + unreachable!("Target::SideExit should have been compiled by compile_side_exit") + }, + }; + } + + /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero + fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) { + if let Target::SideExitPtr(dst_ptr) = target { + let dst_addr = dst_ptr.as_offset(); + let src_addr = cb.get_write_ptr().as_offset(); + + if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) { + // If the offset fits in one instruction, generate cbz or cbnz + let bytes = (dst_addr - src_addr) as i32; + if branch_if_zero { + cbz(cb, reg, InstructionOffset::from_bytes(bytes)); + } else { + cbnz(cb, reg, InstructionOffset::from_bytes(bytes)); + } + } else { + // Otherwise, we load the address into a register and + // use the branch register instruction. Note that because + // side exits should always be close, this form should be + // rare or impossible to see. + let dst_addr = dst_ptr.raw_addr(cb) as u64; + let load_insns: i32 = emit_load_size(dst_addr).into(); + + // Write out the inverse condition so that if + // it doesn't match it will skip over the + // instructions used for branching. + if branch_if_zero { + cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); + } else { + cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); + } + emit_load_value(cb, Assembler::SCRATCH0, dst_addr); + br(cb, Assembler::SCRATCH0); + + } + } else { + unreachable!("We should only generate Joz/Jonz with side-exit targets"); + } + } + + /// Emit a push instruction for the given operand by adding to the stack + /// pointer and then storing the given value. + fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { + str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); + } + + /// Emit a pop instruction into the given operand by loading the value + /// and then subtracting from the stack pointer. + fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { + ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); + } + + /// Compile a side exit if Target::SideExit is given. + fn compile_side_exit( + target: Target, + asm: &mut Assembler, + ocb: &mut Option<&mut OutlinedCb>, + ) -> Result<Target, EmitError> { + if let Target::SideExit { counter, context } = target { + let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()) + .ok_or(EmitError::OutOfMemory)?; + Ok(Target::SideExitPtr(side_exit)) + } else { + Ok(target) + } + } + + // dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec<u32> = Vec::new(); + + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + + // For each instruction + let start_write_pos = cb.get_write_pos(); + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec<u32> = Vec::new(); + + match insn { + Insn::Comment(text) => { + if cfg!(feature = "disasm") { + cb.add_comment(text); + } + }, + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); + }, + // Report back the current position in the generated code + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())) + } + Insn::BakeString(text) => { + for byte in text.as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + + // Pad out the string to the next 4-byte boundary so that + // it's easy to jump past. + for _ in 0..(4 - ((text.len() + 1) % 4)) { + cb.write_byte(0); + } + }, + Insn::FrameSetup => { + stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); + + // X29 (frame_pointer) = SP + mov(cb, X29, C_SP_REG); + }, + Insn::FrameTeardown => { + // SP = X29 (frame pointer) + mov(cb, C_SP_REG, X29); + + ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); + }, + Insn::Add { left, right, out } => { + adds(cb, out.into(), left.into(), right.into()); + }, + Insn::Sub { left, right, out } => { + subs(cb, out.into(), left.into(), right.into()); + }, + Insn::Mul { left, right, out } => { + // If the next instruction is jo (jump on overflow) + match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) { + (Some(Insn::JoMul(_)), _) | + (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => { + // Compute the high 64 bits + smulh(cb, Self::SCRATCH0, left.into(), right.into()); + + // Compute the low 64 bits + // This may clobber one of the input registers, + // so we do it after smulh + mul(cb, out.into(), left.into(), right.into()); + + // Produce a register that is all zeros or all ones + // Based on the sign bit of the 64-bit mul result + asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63)); + + // If the high 64-bits are not all zeros or all ones, + // matching the sign bit, then we have an overflow + cmp(cb, Self::SCRATCH0, Self::SCRATCH1); + // Insn::JoMul will emit_conditional_jump::<{Condition::NE}> + } + _ => { + mul(cb, out.into(), left.into(), right.into()); + } + } + }, + Insn::And { left, right, out } => { + and(cb, out.into(), left.into(), right.into()); + }, + Insn::Or { left, right, out } => { + orr(cb, out.into(), left.into(), right.into()); + }, + Insn::Xor { left, right, out } => { + eor(cb, out.into(), left.into(), right.into()); + }, + Insn::Not { opnd, out } => { + mvn(cb, out.into(), opnd.into()); + }, + Insn::RShift { opnd, shift, out } => { + asr(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::URShift { opnd, shift, out } => { + lsr(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::LShift { opnd, shift, out } => { + lsl(cb, out.into(), opnd.into(), shift.into()); + }, + Insn::Store { dest, src } => { + // This order may be surprising but it is correct. The way + // the Arm64 assembler works, the register that is going to + // be stored is first and the address is second. However in + // our IR we have the address first and the register second. + match dest.rm_num_bits() { + 64 | 32 => stur(cb, src.into(), dest.into()), + 16 => sturh(cb, src.into(), dest.into()), + num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest), + } + }, + Insn::Load { opnd, out } | + Insn::LoadInto { opnd, dest: out } => { + match *opnd { + Opnd::Reg(_) | Opnd::InsnOut { .. } => { + mov(cb, out.into(), opnd.into()); + }, + Opnd::UImm(uimm) => { + emit_load_value(cb, out.into(), uimm); + }, + Opnd::Imm(imm) => { + emit_load_value(cb, out.into(), imm as u64); + }, + Opnd::Mem(_) => { + match opnd.rm_num_bits() { + 64 | 32 => ldur(cb, out.into(), opnd.into()), + 16 => ldurh(cb, out.into(), opnd.into()), + 8 => ldurb(cb, out.into(), opnd.into()), + num_bits => panic!("unexpected num_bits: {}", num_bits) + }; + }, + Opnd::Value(value) => { + // We dont need to check if it's a special const + // here because we only allow these operands to hit + // this point if they're not a special const. + assert!(!value.special_const_p()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr_literal(cb, out.into(), 2.into()); + b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + insn_gc_offsets.push(ptr_offset); + }, + Opnd::CArg { .. } => { + unreachable!("C argument operand was not lowered before arm64_emit"); + } + Opnd::Stack { .. } => { + unreachable!("Stack operand was not lowered before arm64_emit"); + } + Opnd::None => { + unreachable!("Attempted to load from None operand"); + } + }; + }, + Insn::LoadSExt { opnd, out } => { + match *opnd { + Opnd::Reg(Reg { num_bits: 32, .. }) | + Opnd::InsnOut { num_bits: 32, .. } => { + sxtw(cb, out.into(), opnd.into()); + }, + Opnd::Mem(Mem { num_bits: 32, .. }) => { + ldursw(cb, out.into(), opnd.into()); + }, + _ => unreachable!() + }; + }, + Insn::Mov { dest, src } => { + // This supports the following two kinds of immediates: + // * The value fits into a single movz instruction + // * It can be encoded with the special bitmask immediate encoding + // arm64_split() should have split other immediates that require multiple instructions. + match src { + Opnd::UImm(uimm) if *uimm <= 0xffff => { + movz(cb, dest.into(), A64Opnd::new_uimm(*uimm), 0); + }, + _ => { + mov(cb, dest.into(), src.into()); + } + } + }, + Insn::Lea { opnd, out } => { + let opnd: A64Opnd = opnd.into(); + + match opnd { + A64Opnd::Mem(mem) => { + add( + cb, + out.into(), + A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), + A64Opnd::new_imm(mem.disp.into()) + ); + }, + _ => { + panic!("Op::Lea only accepts Opnd::Mem operands."); + } + }; + }, + Insn::LeaJumpTarget { out, target, .. } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| { + adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); + }); + + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + emit_load_value(cb, out.into(), target_addr); + } + }, + Insn::CPush(opnd) => { + emit_push(cb, opnd.into()); + }, + Insn::CPop { out } => { + emit_pop(cb, out.into()); + }, + Insn::CPopInto(opnd) => { + emit_pop(cb, opnd.into()); + }, + Insn::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + emit_push(cb, A64Opnd::Reg(reg)); + } + + // Push the flags/state register + mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); + emit_push(cb, Self::SCRATCH0); + }, + Insn::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + // Pop the state/flags register + msr(cb, SystemRegister::NZCV, Self::SCRATCH0); + emit_pop(cb, Self::SCRATCH0); + + for reg in regs.into_iter().rev() { + emit_pop(cb, A64Opnd::Reg(reg)); + } + }, + Insn::CCall { fptr, .. } => { + // The offset to the call target in bytes + let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64; + let dst_addr = *fptr as i64; + + // Use BL if the offset is short enough to encode as an immediate. + // Otherwise, use BLR with a register. + if b_offset_fits_bits((dst_addr - src_addr) / 4) { + bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + } else { + emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + blr(cb, Self::SCRATCH0); + } + }, + Insn::CRet { .. } => { + ret(cb, A64Opnd::None); + }, + Insn::Cmp { left, right } => { + cmp(cb, left.into(), right.into()); + }, + Insn::Test { left, right } => { + tst(cb, left.into(), right.into()); + }, + Insn::JmpOpnd(opnd) => { + br(cb, opnd.into()); + }, + Insn::Jmp(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(dst_ptr) => { + emit_jmp_ptr(cb, dst_ptr, true); + }, + Target::SideExitPtr(dst_ptr) => { + emit_jmp_ptr(cb, dst_ptr, false); + }, + Target::Label(label_idx) => { + // Here we're going to save enough space for + // ourselves and then come back and write the + // instruction once we know the offset. We're going + // to assume we can fit into a single b instruction. + // It will panic otherwise. + cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { + let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); + b(cb, InstructionOffset::from_bytes(bytes)); + }); + }, + Target::SideExit { .. } => { + unreachable!("Target::SideExit should have been compiled by compile_side_exit") + }, + }; + }, + Insn::Je(target) | Insn::Jz(target) => { + emit_conditional_jump::<{Condition::EQ}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => { + emit_conditional_jump::<{Condition::NE}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jl(target) => { + emit_conditional_jump::<{Condition::LT}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jg(target) => { + emit_conditional_jump::<{Condition::GT}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jge(target) => { + emit_conditional_jump::<{Condition::GE}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jbe(target) => { + emit_conditional_jump::<{Condition::LS}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jb(target) => { + emit_conditional_jump::<{Condition::CC}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jo(target) => { + emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?); + }, + Insn::Joz(opnd, target) => { + emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?); + }, + Insn::Jonz(opnd, target) => { + emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?); + }, + Insn::IncrCounter { mem, value } => { + let label = cb.new_label("incr_counter_loop".to_string()); + cb.write_label(label); + + ldaxr(cb, Self::SCRATCH0, mem.into()); + add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into()); + + // The status register that gets used to track whether or + // not the store was successful must be 32 bytes. Since we + // store the SCRATCH registers as their 64-bit versions, we + // need to rewrap it here. + let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32)); + stlxr(cb, status, Self::SCRATCH0, mem.into()); + + cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0)); + emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label)); + }, + Insn::Breakpoint => { + brk(cb, A64Opnd::None); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); + }, + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE); + }, + Insn::CSelL { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT); + }, + Insn::CSelLE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE); + }, + Insn::CSelG { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT); + }, + Insn::CSelGE { truthy, falsy, out } => { + csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); + } + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadInvalPatch => { + while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() { + nop(cb); + } + } + }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + + // We don't want label references to cross page boundaries. Signal caller for + // retry. + if !self.label_names.is_empty() { + return Err(EmitError::RetryOnNextPage); + } + } else { + insn_idx += 1; + gc_offsets.append(&mut insn_gc_offsets); + } + } + + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return Err(EmitError::OutOfMemory) + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Ok(gc_offsets) + } + } + + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { + let asm = self.arm64_split(); + let mut asm = asm.alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let start_ptr = cb.get_write_ptr(); + let starting_label_state = cb.get_label_state(); + let mut ocb = ocb; // for &mut + let emit_result = match asm.arm64_emit(cb, &mut ocb) { + Err(EmitError::RetryOnNextPage) => { + // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB + // range limit. We can easily exceed the limit in case the jump straddles two pages. + // In this case, we retry with a fresh page. + cb.set_label_state(starting_label_state); + cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation); + let result = asm.arm64_emit(cb, &mut ocb); + assert_ne!( + Err(EmitError::RetryOnNextPage), + result, + "should not fail when writing to a fresh code page" + ); + result + } + result => result + }; + + if let (Ok(gc_offsets), false) = (emit_result, cb.has_dropped_bytes()) { + cb.link_labels(); + + // Invalidate icache for newly written out region so we don't run stale code. + // It should invalidate only the code ranges of the current cb because the code + // ranges of the other cb might have a memory region that is still PROT_NONE. + #[cfg(not(test))] + cb.without_page_end_reserve(|cb| { + for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) { + unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + } + }); + + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::disasm::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_regs(&mut cb, None, vec![X3_REG]); + + // Assert that only 2 instructions were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_bake_string() { + let (mut asm, mut cb) = setup_asm(); + + asm.bake_string("Hello, world!"); + asm.compile_with_num_regs(&mut cb, 0); + + // Testing that we pad the string to the nearest 4-byte boundary to make + // it easier to jump over. + assert_eq!(16, cb.get_write_pos()); + } + + #[test] + fn test_emit_cpush_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpush_all(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_cpop_all() { + let (mut asm, mut cb) = setup_asm(); + + asm.cpop_all(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_frame() { + let (mut asm, mut cb) = setup_asm(); + + asm.frame_setup(); + asm.frame_teardown(); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_je_fits_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let target: CodePtr = cb.get_write_ptr().add_bytes(80); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_je_does_not_fit_into_bcond() { + let (mut asm, mut cb) = setup_asm(); + + let offset = 1 << 21; + let target: CodePtr = cb.get_write_ptr().add_bytes(offset); + + asm.je(Target::CodePtr(target)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_lea_label() { + let (mut asm, mut cb) = setup_asm(); + + let label = asm.new_label("label"); + let opnd = asm.lea_jump_target(label); + + asm.write_label(label); + asm.bake_string("Hello, world!"); + asm.store(Opnd::mem(64, SP, 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_load() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 0)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that two instructions were written: LDUR and STUR. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_fits_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 10)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: ADD, LDUR, and STUR. + assert_eq!(12, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_mem_disp_does_not_fit_into_add() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR. + assert_eq!(16, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_value_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(Qnil)); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that only two instructions were written since the value is an + // immediate. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_load_value_non_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000))); + asm.store(Opnd::mem(64, SP, 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that five instructions were written since the value is not an + // immediate and needs to be loaded into a register. + assert_eq!(20, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_32b_reg_not_bitmask_imm() { + let (mut asm, mut cb) = setup_asm(); + let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap(); + asm.test(w0, Opnd::UImm(u32::MAX.into())); + // All ones is not encodable with a bitmask immediate, + // so this needs one register + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_test_32b_reg_bitmask_imm() { + let (mut asm, mut cb) = setup_asm(); + let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap(); + asm.test(w0, Opnd::UImm(0x80000001)); + asm.compile_with_num_regs(&mut cb, 0); + } + + #[test] + fn test_emit_or() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_lshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_rshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_urshift() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + asm.compile_with_num_regs(&mut cb, 1); + } + + #[test] + fn test_emit_test() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_unsigned_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_encodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7)); + asm.compile_with_num_regs(&mut cb, 0); + + // Assert that only one instruction was written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_unencodable_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a load and a test instruction were written. + assert_eq!(8, cb.get_write_pos()); + } + + #[test] + fn test_emit_test_with_negative_signed_immediate() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7)); + asm.compile_with_num_regs(&mut cb, 1); + + // Assert that a test instruction is written. + assert_eq!(4, cb.get_write_pos()); + } + + #[test] + fn test_32_bit_register_with_some_number() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); + asm.cmp(shape_opnd, Opnd::UImm(4097)); + asm.compile_with_num_regs(&mut cb, 2); + } + + #[test] + fn test_16_bit_register_store_some_number() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(16, Opnd::Reg(X0_REG), 0); + asm.store(shape_opnd, Opnd::UImm(4097)); + asm.compile_with_num_regs(&mut cb, 2); + } + + #[test] + fn test_32_bit_register_store_some_number() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); + asm.store(shape_opnd, Opnd::UImm(4097)); + asm.compile_with_num_regs(&mut cb, 2); + } + + #[test] + fn test_bcond_straddling_code_pages() { + const LANDING_PAGE: usize = 65; + let mut asm = Assembler::new(); + let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]); + + // Skip to near the end of the page. Room for two instructions. + cb.set_pos(cb.page_start_pos() + cb.page_end() - 8); + + let end = asm.new_label("end"); + // Start with a conditional jump... + asm.jz(end); + + // A few instructions, enough to cause a page switch. + let sum = asm.add(399.into(), 111.into()); + let xorred = asm.xor(sum, 859.into()); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred); + asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred); + + // The branch target. It should be in the landing page. + asm.write_label(end); + asm.cret(xorred); + + // [Bug #19385] + // This used to panic with "The offset must be 19 bits or less." + // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB. + let starting_pos = cb.get_write_pos(); + asm.compile_with_num_regs(&mut cb, 2); + let gap = cb.get_write_pos() - starting_pos; + assert!(gap > 0b1111111111111111111); + + let instruction_at_starting_pos: [u8; 4] = unsafe { + std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4) + }.try_into().unwrap(); + assert_eq!( + 0b000101 << 26_u32, + u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32), + "starting instruction should be an unconditional branch to the new page (B)" + ); + } + + #[test] + fn test_emit_xor() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); + asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); + + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "0b0001ca4b0000f8", " + 0x0: eor x11, x0, x1 + 0x4: stur x11, [x2] + "); + } + + #[test] + #[cfg(feature = "disasm")] + fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { + // Test drive Capstone with simple input + use capstone::prelude::*; + + let cs = Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build()?; + + let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?; + + match insns.as_ref() { + [insn] => { + assert_eq!(Some("movk"), insn.mnemonic()); + Ok(()) + } + _ => Err(capstone::Error::CustomError( + "expected to disassemble to movk", + )), + } + } + + #[test] + fn test_replace_mov_with_ldur() { + let (mut asm, mut cb) = setup_asm(); + + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::mem(64, CFP, 8)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "618240f8", {" + 0x0: ldur x1, [x19, #8] + "}); + } + + #[test] + fn test_not_split_mov() { + let (mut asm, mut cb) = setup_asm(); + + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0xffff)); + asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0x10000)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "e1ff9fd2e10370b2", {" + 0x0: mov x1, #0xffff + 0x4: orr x1, xzr, #0x10000 + "}); + } + + #[test] + fn test_merge_csel_mov() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.csel_l(Qtrue.into(), Qfalse.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "8b0280d20c0080d261b18c9a", {" + 0x0: mov x11, #0x14 + 0x4: mov x12, #0 + 0x8: csel x1, x11, x12, lt + "}); + } + + #[test] + fn test_add_with_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into()); + let out = asm.add(out, 1_usize.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "2b0500b16b0500b1e1030baa", {" + 0x0: adds x11, x9, #1 + 0x4: adds x11, x11, #1 + 0x8: mov x1, x11 + "}); + } + + #[test] + fn test_mul_with_immediate() { + let (mut asm, mut cb) = setup_asm(); + + let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into()); + asm.mov(Opnd::Reg(TEMP_REGS[0]), out); + asm.compile_with_num_regs(&mut cb, 2); + + assert_disasm!(cb, "6b0080d22b7d0b9be1030baa", {" + 0x0: mov x11, #3 + 0x4: mul x11, x9, x11 + 0x8: mov x1, x11 + "}); + } +} |