diff options
Diffstat (limited to 'yjit/src/backend/x86_64/mod.rs')
-rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 647 |
1 files changed, 537 insertions, 110 deletions
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index 297a0fd852..4ca5e9be9c 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -1,15 +1,12 @@ -#![allow(dead_code)] -#![allow(unused_variables)] -#![allow(unused_imports)] - use std::mem::take; use crate::asm::*; use crate::asm::x86_64::*; -use crate::codegen::{JITState}; +use crate::codegen::CodePtr; use crate::cruby::*; use crate::backend::ir::*; -use crate::codegen::CodegenGlobals; +use crate::options::*; +use crate::utils::*; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -33,8 +30,10 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [ pub const C_RET_REG: Reg = RAX_REG; pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); -// The number of bytes that are generated by jmp_ptr -pub const JMP_PTR_BYTES: usize = 6; +impl CodeBlock { + // The number of bytes that are generated by jmp_ptr + pub fn jmp_ptr_bytes(&self) -> usize { 5 } +} /// Map Opnd to X86Opnd impl From<Opnd> for X86Opnd { @@ -80,12 +79,16 @@ impl From<&Opnd> for X86Opnd { } } +/// List of registers that can be used for stack temps. +pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; + impl Assembler { // A special scratch register for intermediate processing. - // Note: right now this is only used by LeaLabel because label_ref accepts - // a closure and we don't want it to have to capture anything. - const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG); + // This register is caller-saved (so we don't have to save it before using it) + pub const SCRATCH_REG: Reg = R11_REG; + const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); + /// Get the list of registers from which we can allocate on this platform pub fn get_alloc_regs() -> Vec<Reg> @@ -109,7 +112,7 @@ impl Assembler fn x86_split(mut self) -> Assembler { let live_ranges: Vec<usize> = take(&mut self.live_ranges); - let mut asm = Assembler::new_with_label_names(take(&mut self.label_names)); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); let mut iterator = self.into_draining_iter(); while let Some((index, mut insn)) = iterator.next_unmapped() { @@ -132,7 +135,7 @@ impl Assembler // Opnd::Value operands into registers here because: // // - Most instructions can't be encoded with 64-bit immediates. - // - We look for Op::Load specifically when emiting to keep GC'ed + // - We look for Op::Load specifically when emitting to keep GC'ed // VALUEs alive. This is a sort of canonicalization. let mut unmapped_opnds: Vec<Opnd> = vec![]; @@ -140,21 +143,23 @@ impl Assembler let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { + if let Opnd::Stack { .. } = opnd { + *opnd = asm.lower_stack_opnd(opnd); + } unmapped_opnds.push(*opnd); - *opnd = if is_load { - iterator.map_opnd(*opnd) - } else if let Opnd::Value(value) = opnd { - // Since mov(mem64, imm32) sign extends, as_i64() makes sure - // we split when the extended value is different. - if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { - asm.load(iterator.map_opnd(*opnd)) - } else { - Opnd::UImm(value.as_u64()) + *opnd = match opnd { + Opnd::Value(value) if !is_load => { + // Since mov(mem64, imm32) sign extends, as_i64() makes sure + // we split when the extended value is different. + if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 { + asm.load(iterator.map_opnd(*opnd)) + } else { + Opnd::UImm(value.as_u64()) + } } - } else { - iterator.map_opnd(*opnd) - } + _ => iterator.map_opnd(*opnd), + }; } // We are replacing instructions here so we know they are already @@ -163,40 +168,86 @@ impl Assembler match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | + Insn::Mul { left, right, out } | Insn::And { left, right, out } | Insn::Or { left, right, out } | Insn::Xor { left, right, out } => { - match (unmapped_opnds[0], unmapped_opnds[1]) { - (Opnd::Mem(_), Opnd::Mem(_)) => { - *left = asm.load(*left); - *right = asm.load(*right); - }, - (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => { - *left = asm.load(*left); - }, - // Instruction output whose live range spans beyond this instruction - (Opnd::InsnOut { idx, .. }, _) => { - if live_ranges[idx] > index { - *left = asm.load(*left); + match (&left, &right, iterator.peek()) { + // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible + (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src })) + if out == src && left == dest && live_ranges[index] == index + 1 && uimm_num_bits(*value) <= 32 => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) + if out == src && live_ranges[index] == index + 1 && { + // We want to do `dest == left`, but `left` has already gone + // through lower_stack_opnd() while `dest` has not. So we + // lower `dest` before comparing. + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + lowered_dest == *left + } => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => { + match (unmapped_opnds[0], unmapped_opnds[1]) { + (Opnd::Mem(_), Opnd::Mem(_)) => { + *left = asm.load(*left); + *right = asm.load(*right); + }, + (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => { + *left = asm.load(*left); + }, + // Instruction output whose live range spans beyond this instruction + (Opnd::InsnOut { idx, .. }, _) => { + if live_ranges[idx] > index { + *left = asm.load(*left); + } + }, + // We have to load memory operands to avoid corrupting them + (Opnd::Mem(_) | Opnd::Reg(_), _) => { + *left = asm.load(*left); + }, + _ => {} + }; + + *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); + asm.push_insn(insn); + } + } + }, + Insn::Cmp { left, right } => { + // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes) + // when next IR is `je`, `jne`, `csel_e`, or `csel_ne` + match (&left, &right, iterator.peek()) { + (Opnd::InsnOut { .. }, + Opnd::UImm(0) | Opnd::Imm(0), + Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => { + asm.push_insn(Insn::Test { left: *left, right: *left }); + } + _ => { + if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { + let loaded = asm.load(*right); + *right = loaded; } - }, - // We have to load memory operands to avoid corrupting them - (Opnd::Mem(_) | Opnd::Reg(_), _) => { - *left = asm.load(*left); - }, - _ => {} - }; - - *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right])); - asm.push_insn(insn); + asm.push_insn(insn); + } + } }, - Insn::Cmp { left, right } | Insn::Test { left, right } => { if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) { let loaded = asm.load(*right); *right = loaded; } - asm.push_insn(insn); }, // These instructions modify their input operand in-place, so we @@ -237,7 +288,11 @@ impl Assembler *truthy = asm.load(*truthy); } }, - Opnd::UImm(_) | Opnd::Imm(_) | Opnd::Value(_) => { + Opnd::UImm(_) | Opnd::Imm(_) => { + *truthy = asm.load(*truthy); + }, + // Opnd::Value could have already been split + Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => { *truthy = asm.load(*truthy); }, _ => {} @@ -253,7 +308,7 @@ impl Assembler *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy])); asm.push_insn(insn); }, - Insn::Mov { dest, src } => { + Insn::Mov { dest, src } | Insn::Store { dest, src } => { match (&dest, &src) { (Opnd::Mem(_), Opnd::Mem(_)) => { // We load opnd1 because for mov, opnd0 is the output @@ -310,13 +365,25 @@ impl Assembler // Load each operand into the corresponding argument // register. for (idx, opnd) in opnds.into_iter().enumerate() { - asm.load_into(C_ARG_OPNDS[idx], *opnd); + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd); } // Now we push the CCall without any arguments so that it // just performs the call. asm.ccall(*fptr, vec![]); }, + Insn::Lea { .. } => { + // Merge `lea` and `mov` into a single `lea` when possible + match (&insn, iterator.peek()) { + (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src })) + if matches!(out, Opnd::InsnOut { .. }) && out == src && live_ranges[index] == index + 1 => { + asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) }); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } + _ => asm.push_insn(insn), + } + }, _ => { if insn.out_opnd().is_some() { let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); @@ -335,7 +402,7 @@ impl Assembler } /// Emit platform-specific machine code - pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32> + pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>> { /// For some instructions, we want to be able to lower a 64-bit operand /// without requiring more registers to be available in the register @@ -365,12 +432,45 @@ impl Assembler } } + /// Compile a side exit if Target::SideExit is given. + fn compile_side_exit( + target: Target, + asm: &mut Assembler, + ocb: &mut Option<&mut OutlinedCb>, + ) -> Option<Target> { + if let Target::SideExit { counter, context } = target { + let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()); + Some(Target::SideExitPtr(side_exit?)) + } else { + Some(target) + } + } - fn emit_csel(cb: &mut CodeBlock, truthy: Opnd, falsy: Opnd, out: Opnd, cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd)) { - if out != truthy { - mov(cb, out.into(), truthy.into()); + fn emit_csel( + cb: &mut CodeBlock, + truthy: Opnd, + falsy: Opnd, + out: Opnd, + cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd), + cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){ + + // Assert that output is a register + out.unwrap_reg(); + + // If the truthy value is a memory operand + if let Opnd::Mem(_) = truthy { + if out != falsy { + mov(cb, out.into(), falsy.into()); + } + + cmov_fn(cb, out.into(), truthy.into()); + } else { + if out != truthy { + mov(cb, out.into(), truthy.into()); + } + + cmov_neg(cb, out.into(), falsy.into()); } - cmov_fn(cb, out.into(), falsy.into()); } //dbg!(&self.insns); @@ -378,10 +478,13 @@ impl Assembler // List of GC offsets let mut gc_offsets: Vec<u32> = Vec::new(); + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + // For each instruction let start_write_pos = cb.get_write_pos(); - let mut insns_idx: usize = 0; - while let Some(insn) = self.insns.get(insns_idx) { + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { let src_ptr = cb.get_write_ptr(); let had_dropped_bytes = cb.has_dropped_bytes(); let old_label_state = cb.get_label_state(); @@ -400,8 +503,8 @@ impl Assembler }, // Report back the current position in the generated code - Insn::PosMarker(pos_marker) => { - pos_marker(cb.get_write_ptr()); + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())); }, Insn::BakeString(text) => { @@ -414,19 +517,37 @@ impl Assembler cb.write_byte(0); }, + // Set up RBP to work with frame pointer unwinding + // (e.g. with Linux `perf record --call-graph fp`) + Insn::FrameSetup => { + if get_option!(frame_pointer) { + push(cb, RBP); + mov(cb, RBP, RSP); + push(cb, RBP); + } + }, + Insn::FrameTeardown => { + if get_option!(frame_pointer) { + pop(cb, RBP); + pop(cb, RBP); + } + }, + Insn::Add { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); add(cb, left.into(), opnd1); }, - Insn::FrameSetup => {}, - Insn::FrameTeardown => {}, - Insn::Sub { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); sub(cb, left.into(), opnd1); }, + Insn::Mul { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + imul(cb, left.into(), opnd1); + }, + Insn::And { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); and(cb, left.into(), opnd1); @@ -490,16 +611,23 @@ impl Assembler lea(cb, out.into(), opnd.into()); }, - // Load relative address - Insn::LeaLabel { target, out } => { - let label_idx = target.unwrap_label_idx(); - - cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| { - let disp = dst_addr - src_addr; - lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); - }); + // Load address of jump target + Insn::LeaJumpTarget { target, out } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| { + let disp = dst_addr - src_addr; + lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + }); - mov(cb, out.into(), Self::SCRATCH0); + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + // Constant encoded length important for patching + movabs(cb, out.into(), target_addr); + } }, // Push and pop to/from the C stack @@ -580,61 +708,96 @@ impl Assembler // Conditional jump to a label Insn::Jmp(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr), Target::Label(label_idx) => jmp_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Je(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr), Target::Label(label_idx) => je_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Jne(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr), Target::Label(label_idx) => jne_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Jl(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr), Target::Label(label_idx) => jl_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jg(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr), + Target::Label(label_idx) => jg_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jge(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr), + Target::Label(label_idx) => jge_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } }, Insn::Jbe(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr), Target::Label(label_idx) => jbe_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jb(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr), + Target::Label(label_idx) => jb_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } }, Insn::Jz(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr), Target::Label(label_idx) => jz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } Insn::Jnz(target) => { - match *target { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr), Target::Label(label_idx) => jnz_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } - Insn::Jo(target) => { - match *target { + Insn::Jo(target) | + Insn::JoMul(target) => { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr), Target::Label(label_idx) => jo_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } + Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"), + // Atomically increment a counter at a given memory location Insn::IncrCounter { mem, value } => { assert!(matches!(mem, Opnd::Mem(_))); @@ -646,43 +809,36 @@ impl Assembler Insn::Breakpoint => int3(cb), Insn::CSelZ { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovnz); + emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz); }, Insn::CSelNZ { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovz); + emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz); }, Insn::CSelE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovne); + emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne); }, Insn::CSelNE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmove); + emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove); }, Insn::CSelL { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovge); + emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge); }, Insn::CSelLE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovg); + emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg); }, Insn::CSelG { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovle); + emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle); }, Insn::CSelGE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovl); + emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code Insn::PadInvalPatch => { let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos())); - if code_size < JMP_PTR_BYTES { - nop(cb, (JMP_PTR_BYTES - code_size) as u32); + if code_size < cb.jmp_ptr_bytes() { + nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32); } } - - // We want to keep the panic here because some instructions that - // we feed to the backend could get lowered into other - // instructions. So it's possible that some of our backend - // instructions can never make it to the emit stage. - #[allow(unreachable_patterns)] - _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn) }; // On failure, jump to the next page and retry the current insn @@ -690,18 +846,32 @@ impl Assembler // Reset cb states before retrying the current Insn cb.set_label_state(old_label_state); } else { - insns_idx += 1; + insn_idx += 1; gc_offsets.append(&mut insn_gc_offsets); } } - gc_offsets + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return None + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Some(gc_offsets) + } } /// Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32> - { - let mut asm = self.x86_split().alloc_regs(regs); + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { + let asm = self.x86_split(); + let mut asm = asm.alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { @@ -709,20 +879,28 @@ impl Assembler assert!(label_idx == idx); } - let gc_offsets = asm.x86_emit(cb); + let mut ocb = ocb; // for &mut + let start_ptr = cb.get_write_ptr(); + let gc_offsets = asm.x86_emit(cb, &mut ocb); - if cb.has_dropped_bytes() { - cb.clear_labels(); - } else { + if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) { cb.link_labels(); - } - gc_offsets + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } } } #[cfg(test)] mod tests { + use crate::disasm::{assert_disasm}; + #[cfg(feature = "disasm")] + use crate::disasm::{unindent, disasm_addr_range}; + use super::*; fn setup_asm() -> (Assembler, CodeBlock) { @@ -892,4 +1070,253 @@ mod tests { assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c31d8"); } + + #[test] + fn test_merge_lea_reg() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(SP, sp); // should be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d5b08", {" + 0x0: lea rbx, [rbx + 8] + "}); + } + + #[test] + fn test_merge_lea_mem() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.lea(Opnd::mem(64, SP, 8)); + asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "488d4308488903", {" + 0x0: lea rax, [rbx + 8] + 0x4: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_replace_cmp_0() { + let (mut asm, mut cb) = setup_asm(); + + let val = asm.load(Opnd::mem(64, SP, 8)); + asm.cmp(val, 0.into()); + let result = asm.csel_e(Qtrue.into(), Qfalse.into()); + asm.mov(Opnd::Reg(RAX_REG), result); + asm.compile_with_num_regs(&mut cb, 2); + + assert_eq!(format!("{:x}", cb), "488b43084885c0b814000000b900000000480f45c14889c0"); + } + + #[test] + fn test_merge_add_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.add(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983c540"); + } + + #[test] + fn test_merge_sub_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.sub(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983ed40"); + } + + #[test] + fn test_merge_and_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.and(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983e540"); + } + + #[test] + fn test_merge_or_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.or(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983cd40"); + } + + #[test] + fn test_merge_xor_mov() { + let (mut asm, mut cb) = setup_asm(); + + let sp = asm.xor(CFP, Opnd::UImm(0x40)); + asm.mov(CFP, sp); // should be merged to add + asm.compile_with_num_regs(&mut cb, 1); + + assert_eq!(format!("{:x}", cb), "4983f540"); + } + + #[test] + fn test_reorder_c_args_no_cycle() { + let (mut asm, mut cb) = setup_asm(); + + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[0], // mov rdi, rdi (optimized away) + C_ARG_OPNDS[1], // mov rsi, rsi (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "b800000000ffd0", {" + 0x0: mov eax, 0 + 0x5: call rax + "}); + } + + #[test] + fn test_reorder_c_args_single_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[2], // mov rdx, rdx (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov eax, 0 + 0xe: call rax + "}); + } + + #[test] + fn test_reorder_c_args_two_cycles() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle, and rdx and rcx form another cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[3], // mov rdx, rcx + C_ARG_OPNDS[2], // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov r11, rcx + 0xc: mov rcx, rdx + 0xf: mov rdx, r11 + 0x12: mov eax, 0 + 0x17: call rax + "}); + } + + #[test] + fn test_reorder_c_args_large_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi, rsi, and rdx form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[2], // mov rsi, rdx + C_ARG_OPNDS[0], // mov rdx, rdi + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdx + 0x6: mov rdx, rdi + 0x9: mov rdi, r11 + 0xc: mov eax, 0 + 0x11: call rax + "}); + } + + #[test] + fn test_reorder_c_args_with_insn_out() { + let (mut asm, mut cb) = setup_asm(); + + let rax = asm.load(Opnd::UImm(1)); + let rcx = asm.load(Opnd::UImm(2)); + let rdx = asm.load(Opnd::UImm(3)); + // rcx and rdx form a cycle + asm.ccall(0 as _, vec![ + rax, // mov rdi, rax + rcx, // mov rsi, rcx + rcx, // mov rdx, rcx + rdx, // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov eax, 1 + 0x5: mov ecx, 2 + 0xa: mov edx, 3 + 0xf: mov rdi, rax + 0x12: mov rsi, rcx + 0x15: mov r11, rcx + 0x18: mov rcx, rdx + 0x1b: mov rdx, r11 + 0x1e: mov eax, 0 + 0x23: call rax + "}); + } + + #[test] + fn test_cmov_mem() { + let (mut asm, mut cb) = setup_asm(); + + let top = Opnd::mem(64, SP, 0); + let ary_opnd = SP; + let array_len_opnd = Opnd::mem(64, SP, 16); + + asm.cmp(array_len_opnd, 1.into()); + let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into()); + asm.mov(top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {" + 0x0: cmp qword ptr [rbx + 0x10], 1 + 0x5: mov eax, 4 + 0xa: cmovg rax, qword ptr [rbx] + 0xe: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_csel_split() { + let (mut asm, mut cb) = setup_asm(); + + let stack_top = Opnd::mem(64, SP, 0); + let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into()); + asm.mov(stack_top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {" + 0x0: movabs rax, 0x7f22c88d1930 + 0xa: mov ecx, 4 + 0xf: cmove rax, rcx + 0x13: mov qword ptr [rbx], rax + "}); + } } |