diff options
Diffstat (limited to 'yjit/src/backend/x86_64/mod.rs')
-rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 1322 |
1 files changed, 1322 insertions, 0 deletions
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs new file mode 100644 index 0000000000..4ca5e9be9c --- /dev/null +++ b/yjit/src/backend/x86_64/mod.rs @@ -0,0 +1,1322 @@ +use std::mem::take; + +use crate::asm::*; +use crate::asm::x86_64::*; +use crate::codegen::CodePtr; +use crate::cruby::*; +use crate::backend::ir::*; +use crate::options::*; +use crate::utils::*; + +// Use the x86 register type for this platform +pub type Reg = X86Reg; + +// Callee-saved registers +pub const _CFP: Opnd = Opnd::Reg(R13_REG); +pub const _EC: Opnd = Opnd::Reg(R12_REG); +pub const _SP: Opnd = Opnd::Reg(RBX_REG); + +// C argument registers on this platform +pub const _C_ARG_OPNDS: [Opnd; 6] = [ + Opnd::Reg(RDI_REG), + Opnd::Reg(RSI_REG), + Opnd::Reg(RDX_REG), + Opnd::Reg(RCX_REG), + Opnd::Reg(R8_REG), + Opnd::Reg(R9_REG) +]; + +// C return value register on this platform +pub const C_RET_REG: Reg = RAX_REG; +pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); + +impl CodeBlock { + // The number of bytes that are generated by jmp_ptr + pub fn jmp_ptr_bytes(&self) -> usize { 5 } +} + +/// Map Opnd to X86Opnd +impl From<Opnd> for X86Opnd { + fn from(opnd: Opnd) -> Self { + match opnd { + // NOTE: these operand types need to be lowered first + //Value(VALUE), // Immediate Ruby value, may be GC'd, movable + //InsnOut(usize), // Output of a preceding instruction in this block + + Opnd::InsnOut{..} => panic!("InsnOut operand made it past register allocation"), + + Opnd::UImm(val) => uimm_opnd(val), + Opnd::Imm(val) => imm_opnd(val), + Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64), + + // General-purpose register + Opnd::Reg(reg) => X86Opnd::Reg(reg), + + // Memory operand with displacement + Opnd::Mem(Mem{ base: MemBase::Reg(reg_no), num_bits, disp }) => { + let reg = X86Reg { + reg_no, + num_bits: 64, + reg_type: RegType::GP + }; + + mem_opnd(num_bits, X86Opnd::Reg(reg), disp) + } + + Opnd::None => panic!( + "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." + ), + + _ => panic!("unsupported x86 operand type") + } + } +} + +/// Also implement going from a reference to an operand for convenience. +impl From<&Opnd> for X86Opnd { + fn from(opnd: &Opnd) -> Self { + X86Opnd::from(*opnd) + } +} + +/// List of registers that can be used for stack temps. +pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; + +impl Assembler +{ + // A special scratch register for intermediate processing. + // This register is caller-saved (so we don't have to save it before using it) + pub const SCRATCH_REG: Reg = R11_REG; + const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); + + + /// Get the list of registers from which we can allocate on this platform + pub fn get_alloc_regs() -> Vec<Reg> + { + vec![ + RAX_REG, + RCX_REG, + RDX_REG, + ] + } + + /// Get a list of all of the caller-save registers + pub fn get_caller_save_regs() -> Vec<Reg> { + vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG] + } + + // These are the callee-saved registers in the x86-64 SysV ABI + // RBX, RSP, RBP, and R12–R15 + + /// Split IR instructions for the x86 platform + fn x86_split(mut self) -> Assembler + { + let live_ranges: Vec<usize> = take(&mut self.live_ranges); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let mut iterator = self.into_draining_iter(); + + while let Some((index, mut insn)) = iterator.next_unmapped() { + // When we're iterating through the instructions with x86_split, we + // need to know the previous live ranges in order to tell if a + // register lasts beyond the current instruction. So instead of + // using next_mapped, we call next_unmapped. When you're using the + // next_unmapped API, you need to make sure that you map each + // operand that could reference an old index, which means both + // Opnd::InsnOut operands and Opnd::Mem operands with a base of + // MemBase::InsnOut. + // + // You need to ensure that you only map it _once_, because otherwise + // you'll end up mapping an incorrect index which could end up being + // out of bounds of the old set of indices. + // + // We handle all of that mapping here to ensure that it's only + // mapped once. We also handle loading Opnd::Value operands into + // registers here so that all mapping happens in one place. We load + // Opnd::Value operands into registers here because: + // + // - Most instructions can't be encoded with 64-bit immediates. + // - We look for Op::Load specifically when emitting to keep GC'ed + // VALUEs alive. This is a sort of canonicalization. + let mut unmapped_opnds: Vec<Opnd> = vec![]; + + let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); + let mut opnd_iter = insn.opnd_iter_mut(); + + while let Some(opnd) = opnd_iter.next() { + if let Opnd::Stack { .. } = opnd { + *opnd = asm.lower_stack_opnd(opnd); + } + unmapped_opnds.push(*opnd); + + *opnd = match opnd { + Opnd::Value(value) if !is_load => { + // Since mov(mem64, imm32) sign extends, as_i64() makes sure + // we split when the extended value is different. + if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { + asm.load(iterator.map_opnd(*opnd)) + } else { + Opnd::UImm(value.as_u64()) + } + } + _ => iterator.map_opnd(*opnd), + }; + } + + // We are replacing instructions here so we know they are already + // being used. It is okay not to use their output here. + #[allow(unused_must_use)] + match &mut insn { + Insn::Add { left, right, out } | + Insn::Sub { left, right, out } | + Insn::Mul { left, right, out } | + Insn::And { left, right, out } | + Insn::Or { left, right, out } | + Insn::Xor { left, right, out } => { + match (&left, &right, iterator.peek()) { + // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible + (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src })) + if out == src && left == dest && live_ranges[index] == index + 1 && uimm_num_bits(*value) <= 32 => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) + if out == src && live_ranges[index] == index + 1 && { + // We want to do `dest == left`, but `left` has already gone + // through lower_stack_opnd() while `dest` has not. So we + // lower `dest` before comparing. + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + lowered_dest == *left + } => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => { + match (unmapped_opnds[0], unmapped_opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + *left = asm.load(*left); + *right = asm.load(*right); + }, + (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => { + *left = asm.load(*left); + }, + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[idx] > index { + *left = asm.load(*left); + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + *left = asm.load(*left); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); + asm.push_insn(insn); + } + } + }, + Insn::Cmp { left, right } => { + // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes) + // when next IR is `je`, `jne`, `csel_e`, or `csel_ne` + match (&left, &right, iterator.peek()) { + (Opnd::InsnOut { .. }, + Opnd::UImm(0) | Opnd::Imm(0), + Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => { + asm.push_insn(Insn::Test { left: *left, right: *left }); + } + _ => { + if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { + let loaded = asm.load(*right); + *right = loaded; + } + asm.push_insn(insn); + } + } + }, + Insn::Test { left, right } => { + if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { + let loaded = asm.load(*right); + *right = loaded; + } + asm.push_insn(insn); + }, + // These instructions modify their input operand in-place, so we + // may need to load the input value to preserve it + Insn::LShift { opnd, shift, out } | + Insn::RShift { opnd, shift, out } | + Insn::URShift { opnd, shift, out } => { + match (&unmapped_opnds[0], &unmapped_opnds[1]) { + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[*idx] > index { + *opnd = asm.load(*opnd); + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + *opnd = asm.load(*opnd); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*opnd, *shift])); + asm.push_insn(insn); + }, + Insn::CSelZ { truthy, falsy, out } | + Insn::CSelNZ { truthy, falsy, out } | + Insn::CSelE { truthy, falsy, out } | + Insn::CSelNE { truthy, falsy, out } | + Insn::CSelL { truthy, falsy, out } | + Insn::CSelLE { truthy, falsy, out } | + Insn::CSelG { truthy, falsy, out } | + Insn::CSelGE { truthy, falsy, out } => { + match unmapped_opnds[0] { + // If we have an instruction output whose live range + // spans beyond this instruction, we have to load it. + Opnd::InsnOut { idx, .. } => { + if live_ranges[idx] > index { + *truthy = asm.load(*truthy); + } + }, + Opnd::UImm(_) | Opnd::Imm(_) => { + *truthy = asm.load(*truthy); + }, + // Opnd::Value could have already been split + Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => { + *truthy = asm.load(*truthy); + }, + _ => {} + }; + + match falsy { + Opnd::UImm(_) | Opnd::Imm(_) => { + *falsy = asm.load(*falsy); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy])); + asm.push_insn(insn); + }, + Insn::Mov { dest, src } | Insn::Store { dest, src } => { + match (&dest, &src) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + // We load opnd1 because for mov, opnd0 is the output + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + }, + (Opnd::Mem(_), Opnd::UImm(value)) => { + // 32-bit values will be sign-extended + if imm_num_bits(*value as i64) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + } else { + asm.mov(*dest, *src); + } + }, + (Opnd::Mem(_), Opnd::Imm(value)) => { + if imm_num_bits(*value) > 32 { + let opnd1 = asm.load(*src); + asm.mov(*dest, opnd1); + } else { + asm.mov(*dest, *src); + } + }, + _ => { + asm.mov(*dest, *src); + } + } + }, + Insn::Not { opnd, .. } => { + let opnd0 = match unmapped_opnds[0] { + // If we have an instruction output whose live range + // spans beyond this instruction, we have to load it. + Opnd::InsnOut { idx, .. } => { + if live_ranges[idx] > index { + asm.load(*opnd) + } else { + *opnd + } + }, + // We have to load memory and register operands to avoid + // corrupting them. + Opnd::Mem(_) | Opnd::Reg(_) => { + asm.load(*opnd) + }, + // Otherwise we can just reuse the existing operand. + _ => *opnd + }; + + asm.not(opnd0); + }, + Insn::CCall { opnds, fptr, .. } => { + assert!(opnds.len() <= C_ARG_OPNDS.len()); + + // Load each operand into the corresponding argument + // register. + for (idx, opnd) in opnds.into_iter().enumerate() { + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd); + } + + // Now we push the CCall without any arguments so that it + // just performs the call. + asm.ccall(*fptr, vec![]); + }, + Insn::Lea { .. } => { + // Merge `lea` and `mov` into a single `lea` when possible + match (&insn, iterator.peek()) { + (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src })) + if matches!(out, Opnd::InsnOut { .. }) && out == src && live_ranges[index] == index + 1 => { + asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) }); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => asm.push_insn(insn), + } + }, + _ => { + if insn.out_opnd().is_some() { + let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); + let out = insn.out_opnd_mut().unwrap(); + *out = asm.next_opnd_out(out_num_bits); + } + + asm.push_insn(insn); + } + }; + + iterator.map_insn_index(&mut asm); + } + + asm + } + + /// Emit platform-specific machine code + pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>> + { + /// For some instructions, we want to be able to lower a 64-bit operand + /// without requiring more registers to be available in the register + /// allocator. So we just use the SCRATCH0 register temporarily to hold + /// the value before we immediately use it. + fn emit_64bit_immediate(cb: &mut CodeBlock, opnd: &Opnd) -> X86Opnd { + match opnd { + Opnd::Imm(value) => { + // 32-bit values will be sign-extended + if imm_num_bits(*value) > 32 { + mov(cb, Assembler::SCRATCH0, opnd.into()); + Assembler::SCRATCH0 + } else { + opnd.into() + } + }, + Opnd::UImm(value) => { + // 32-bit values will be sign-extended + if imm_num_bits(*value as i64) > 32 { + mov(cb, Assembler::SCRATCH0, opnd.into()); + Assembler::SCRATCH0 + } else { + opnd.into() + } + }, + _ => opnd.into() + } + } + + /// Compile a side exit if Target::SideExit is given. + fn compile_side_exit( + target: Target, + asm: &mut Assembler, + ocb: &mut Option<&mut OutlinedCb>, + ) -> Option<Target> { + if let Target::SideExit { counter, context } = target { + let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()); + Some(Target::SideExitPtr(side_exit?)) + } else { + Some(target) + } + } + + fn emit_csel( + cb: &mut CodeBlock, + truthy: Opnd, + falsy: Opnd, + out: Opnd, + cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd), + cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){ + + // Assert that output is a register + out.unwrap_reg(); + + // If the truthy value is a memory operand + if let Opnd::Mem(_) = truthy { + if out != falsy { + mov(cb, out.into(), falsy.into()); + } + + cmov_fn(cb, out.into(), truthy.into()); + } else { + if out != truthy { + mov(cb, out.into(), truthy.into()); + } + + cmov_neg(cb, out.into(), falsy.into()); + } + } + + //dbg!(&self.insns); + + // List of GC offsets + let mut gc_offsets: Vec<u32> = Vec::new(); + + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + + // For each instruction + let start_write_pos = cb.get_write_pos(); + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec<u32> = Vec::new(); + + match insn { + Insn::Comment(text) => { + if cfg!(feature = "disasm") { + cb.add_comment(text); + } + }, + + // Write the label at the current position + Insn::Label(target) => { + cb.write_label(target.unwrap_label_idx()); + }, + + // Report back the current position in the generated code + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())); + }, + + Insn::BakeString(text) => { + for byte in text.as_bytes() { + cb.write_byte(*byte); + } + + // Add a null-terminator byte for safety (in case we pass + // this to C code) + cb.write_byte(0); + }, + + // Set up RBP to work with frame pointer unwinding + // (e.g. with Linux `perf record --call-graph fp`) + Insn::FrameSetup => { + if get_option!(frame_pointer) { + push(cb, RBP); + mov(cb, RBP, RSP); + push(cb, RBP); + } + }, + Insn::FrameTeardown => { + if get_option!(frame_pointer) { + pop(cb, RBP); + pop(cb, RBP); + } + }, + + Insn::Add { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + add(cb, left.into(), opnd1); + }, + + Insn::Sub { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + sub(cb, left.into(), opnd1); + }, + + Insn::Mul { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + imul(cb, left.into(), opnd1); + }, + + Insn::And { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + and(cb, left.into(), opnd1); + }, + + Insn::Or { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + or(cb, left.into(), opnd1); + }, + + Insn::Xor { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + xor(cb, left.into(), opnd1); + }, + + Insn::Not { opnd, .. } => { + not(cb, opnd.into()); + }, + + Insn::LShift { opnd, shift , ..} => { + shl(cb, opnd.into(), shift.into()) + }, + + Insn::RShift { opnd, shift , ..} => { + sar(cb, opnd.into(), shift.into()) + }, + + Insn::URShift { opnd, shift, .. } => { + shr(cb, opnd.into(), shift.into()) + }, + + Insn::Store { dest, src } => { + mov(cb, dest.into(), src.into()); + }, + + // This assumes only load instructions can contain references to GC'd Value operands + Insn::Load { opnd, out } | + Insn::LoadInto { dest: out, opnd } => { + match opnd { + Opnd::Value(val) if val.heap_object_p() => { + // Using movabs because mov might write value in 32 bits + movabs(cb, out.into(), val.0 as _); + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); + insn_gc_offsets.push(ptr_offset); + } + _ => mov(cb, out.into(), opnd.into()) + } + }, + + Insn::LoadSExt { opnd, out } => { + movsx(cb, out.into(), opnd.into()); + }, + + Insn::Mov { dest, src } => { + mov(cb, dest.into(), src.into()); + }, + + // Load effective address + Insn::Lea { opnd, out } => { + lea(cb, out.into(), opnd.into()); + }, + + // Load address of jump target + Insn::LeaJumpTarget { target, out } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| { + let disp = dst_addr - src_addr; + lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + }); + + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + // Constant encoded length important for patching + movabs(cb, out.into(), target_addr); + } + }, + + // Push and pop to/from the C stack + Insn::CPush(opnd) => { + push(cb, opnd.into()); + }, + Insn::CPop { out } => { + pop(cb, out.into()); + }, + Insn::CPopInto(opnd) => { + pop(cb, opnd.into()); + }, + + // Push and pop to the C stack all caller-save registers and the + // flags + Insn::CPushAll => { + let regs = Assembler::get_caller_save_regs(); + + for reg in regs { + push(cb, X86Opnd::Reg(reg)); + } + pushfq(cb); + }, + Insn::CPopAll => { + let regs = Assembler::get_caller_save_regs(); + + popfq(cb); + for reg in regs.into_iter().rev() { + pop(cb, X86Opnd::Reg(reg)); + } + }, + + // C function call + Insn::CCall { fptr, .. } => { + call_ptr(cb, RAX, *fptr); + }, + + Insn::CRet(opnd) => { + // TODO: bias allocation towards return register + if *opnd != Opnd::Reg(C_RET_REG) { + mov(cb, RAX, opnd.into()); + } + + ret(cb); + }, + + // Compare + Insn::Cmp { left, right } => { + let num_bits = match right { + Opnd::Imm(value) => Some(imm_num_bits(*value)), + Opnd::UImm(value) => Some(uimm_num_bits(*value)), + _ => None + }; + + // If the immediate is less than 64 bits (like 32, 16, 8), and the operand + // sizes match, then we can represent it as an immediate in the instruction + // without moving it to a register first. + // IOW, 64 bit immediates must always be moved to a register + // before comparisons, where other sizes may be encoded + // directly in the instruction. + if num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64 { + cmp(cb, left.into(), right.into()); + } else { + let emitted = emit_64bit_immediate(cb, right); + cmp(cb, left.into(), emitted); + } + } + + // Test and set flags + Insn::Test { left, right } => { + let emitted = emit_64bit_immediate(cb, right); + test(cb, left.into(), emitted); + } + + Insn::JmpOpnd(opnd) => { + jmp_rm(cb, opnd.into()); + } + + // Conditional jump to a label + Insn::Jmp(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr), + Target::Label(label_idx) => jmp_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Je(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr), + Target::Label(label_idx) => je_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jne(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr), + Target::Label(label_idx) => jne_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jl(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr), + Target::Label(label_idx) => jl_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jg(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr), + Target::Label(label_idx) => jg_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jge(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr), + Target::Label(label_idx) => jge_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jbe(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr), + Target::Label(label_idx) => jbe_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jb(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr), + Target::Label(label_idx) => jb_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jz(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr), + Target::Label(label_idx) => jz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jnz(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr), + Target::Label(label_idx) => jnz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Jo(target) | + Insn::JoMul(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr), + Target::Label(label_idx) => jo_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + } + + Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"), + + // Atomically increment a counter at a given memory location + Insn::IncrCounter { mem, value } => { + assert!(matches!(mem, Opnd::Mem(_))); + assert!(matches!(value, Opnd::UImm(_) | Opnd::Imm(_) ) ); + write_lock_prefix(cb); + add(cb, mem.into(), value.into()); + }, + + Insn::Breakpoint => int3(cb), + + Insn::CSelZ { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz); + }, + Insn::CSelNZ { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz); + }, + Insn::CSelE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne); + }, + Insn::CSelNE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove); + }, + Insn::CSelL { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge); + }, + Insn::CSelLE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg); + }, + Insn::CSelG { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle); + }, + Insn::CSelGE { truthy, falsy, out } => { + emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl); + } + Insn::LiveReg { .. } => (), // just a reg alloc signal, no code + Insn::PadInvalPatch => { + let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos())); + if code_size < cb.jmp_ptr_bytes() { + nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32); + } + } + }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + } else { + insn_idx += 1; + gc_offsets.append(&mut insn_gc_offsets); + } + } + + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return None + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Some(gc_offsets) + } + } + + /// Optimize and compile the stored instructions + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { + let asm = self.x86_split(); + let mut asm = asm.alloc_regs(regs); + + // Create label instances in the code block + for (idx, name) in asm.label_names.iter().enumerate() { + let label_idx = cb.new_label(name.to_string()); + assert!(label_idx == idx); + } + + let mut ocb = ocb; // for &mut + let start_ptr = cb.get_write_ptr(); + let gc_offsets = asm.x86_emit(cb, &mut ocb); + + if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) { + cb.link_labels(); + + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } + } +} + +#[cfg(test)] +mod tests { + use crate::disasm::{assert_disasm}; + #[cfg(feature = "disasm")] + use crate::disasm::{unindent, disasm_addr_range}; + + use super::*; + + fn setup_asm() -> (Assembler, CodeBlock) { + (Assembler::new(), CodeBlock::new_dummy(1024)) + } + + #[test] + fn test_emit_add_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881c0ff000000"); + } + + #[test] + fn test_emit_add_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c01d8"); + } + + #[test] + fn test_emit_and_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881e0ff000000"); + } + + #[test] + fn test_emit_and_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c21d8"); + } + + #[test] + fn test_emit_cmp_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "4881f8ff000000"); + } + + #[test] + fn test_emit_cmp_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c39d8"); + } + + #[test] + fn test_emit_cmp_mem_16_bits_with_imm_16() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(16, Opnd::Reg(RAX_REG), 6); + + asm.cmp(shape_opnd, Opnd::UImm(0xF000)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "6681780600f0"); + } + + #[test] + fn test_emit_cmp_mem_32_bits_with_imm_32() { + let (mut asm, mut cb) = setup_asm(); + + let shape_opnd = Opnd::mem(32, Opnd::Reg(RAX_REG), 4); + + asm.cmp(shape_opnd, Opnd::UImm(0xF000_0000)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "817804000000f0"); + } + + #[test] + fn test_emit_or_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881c8ff000000"); + } + + #[test] + fn test_emit_or_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c09d8"); + } + + #[test] + fn test_emit_sub_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881e8ff000000"); + } + + #[test] + fn test_emit_sub_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c29d8"); + } + + #[test] + fn test_emit_test_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "f6c0ff"); + } + + #[test] + fn test_emit_test_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 0); + + assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c85d8"); + } + + #[test] + fn test_emit_xor_lt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c04881f0ff000000"); + } + + #[test] + fn test_emit_xor_gt_32_bits() { + let (mut asm, mut cb) = setup_asm(); + + let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF)); + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c31d8"); + } + + #[test] + fn test_merge_lea_reg() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(SP, sp); // should be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d5b08", {" + 0x0: lea rbx, [rbx + 8] + "}); + } + + #[test] + fn test_merge_lea_mem() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d4308488903", {" + 0x0: lea rax, [rbx + 8] + 0x4: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_replace_cmp_0() { + let (mut asm, mut cb) = setup_asm(); + + let val = asm.load(Opnd::mem(64, SP, 8)); + asm.cmp(val, 0.into()); + let result = asm.csel_e(Qtrue.into(), Qfalse.into()); + asm.mov(Opnd::Reg(RAX_REG), result); + asm.compile_with_num_regs(&mut cb, 2); + + assert_eq!(format!("{:x}", cb), "488b43084885c0b814000000b900000000480f45c14889c0"); + } + + #[test] + fn test_merge_add_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.add(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983c540"); + } + + #[test] + fn test_merge_sub_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.sub(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983ed40"); + } + + #[test] + fn test_merge_and_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.and(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983e540"); + } + + #[test] + fn test_merge_or_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.or(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983cd40"); + } + + #[test] + fn test_merge_xor_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.xor(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983f540"); + } + + #[test] + fn test_reorder_c_args_no_cycle() { + let (mut asm, mut cb) = setup_asm(); + + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[0], // mov rdi, rdi (optimized away) + C_ARG_OPNDS[1], // mov rsi, rsi (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "b800000000ffd0", {" + 0x0: mov eax, 0 + 0x5: call rax + "}); + } + + #[test] + fn test_reorder_c_args_single_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[2], // mov rdx, rdx (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov eax, 0 + 0xe: call rax + "}); + } + + #[test] + fn test_reorder_c_args_two_cycles() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle, and rdx and rcx form another cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[3], // mov rdx, rcx + C_ARG_OPNDS[2], // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov r11, rcx + 0xc: mov rcx, rdx + 0xf: mov rdx, r11 + 0x12: mov eax, 0 + 0x17: call rax + "}); + } + + #[test] + fn test_reorder_c_args_large_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi, rsi, and rdx form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[2], // mov rsi, rdx + C_ARG_OPNDS[0], // mov rdx, rdi + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdx + 0x6: mov rdx, rdi + 0x9: mov rdi, r11 + 0xc: mov eax, 0 + 0x11: call rax + "}); + } + + #[test] + fn test_reorder_c_args_with_insn_out() { + let (mut asm, mut cb) = setup_asm(); + + let rax = asm.load(Opnd::UImm(1)); + let rcx = asm.load(Opnd::UImm(2)); + let rdx = asm.load(Opnd::UImm(3)); + // rcx and rdx form a cycle + asm.ccall(0 as _, vec![ + rax, // mov rdi, rax + rcx, // mov rsi, rcx + rcx, // mov rdx, rcx + rdx, // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov eax, 1 + 0x5: mov ecx, 2 + 0xa: mov edx, 3 + 0xf: mov rdi, rax + 0x12: mov rsi, rcx + 0x15: mov r11, rcx + 0x18: mov rcx, rdx + 0x1b: mov rdx, r11 + 0x1e: mov eax, 0 + 0x23: call rax + "}); + } + + #[test] + fn test_cmov_mem() { + let (mut asm, mut cb) = setup_asm(); + + let top = Opnd::mem(64, SP, 0); + let ary_opnd = SP; + let array_len_opnd = Opnd::mem(64, SP, 16); + + asm.cmp(array_len_opnd, 1.into()); + let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into()); + asm.mov(top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {" + 0x0: cmp qword ptr [rbx + 0x10], 1 + 0x5: mov eax, 4 + 0xa: cmovg rax, qword ptr [rbx] + 0xe: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_csel_split() { + let (mut asm, mut cb) = setup_asm(); + + let stack_top = Opnd::mem(64, SP, 0); + let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into()); + asm.mov(stack_top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {" + 0x0: movabs rax, 0x7f22c88d1930 + 0xa: mov ecx, 4 + 0xf: cmove rax, rcx + 0x13: mov qword ptr [rbx], rax + "}); + } +} |