summaryrefslogtreecommitdiff
path: root/yjit/src/backend/x86_64/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'yjit/src/backend/x86_64/mod.rs')
-rw-r--r--yjit/src/backend/x86_64/mod.rs1322
1 files changed, 1322 insertions, 0 deletions
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
new file mode 100644
index 0000000000..4ca5e9be9c
--- /dev/null
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -0,0 +1,1322 @@
+use std::mem::take;
+
+use crate::asm::*;
+use crate::asm::x86_64::*;
+use crate::codegen::CodePtr;
+use crate::cruby::*;
+use crate::backend::ir::*;
+use crate::options::*;
+use crate::utils::*;
+
+// Use the x86 register type for this platform
+pub type Reg = X86Reg;
+
+// Callee-saved registers
+pub const _CFP: Opnd = Opnd::Reg(R13_REG);
+pub const _EC: Opnd = Opnd::Reg(R12_REG);
+pub const _SP: Opnd = Opnd::Reg(RBX_REG);
+
+// C argument registers on this platform
+pub const _C_ARG_OPNDS: [Opnd; 6] = [
+ Opnd::Reg(RDI_REG),
+ Opnd::Reg(RSI_REG),
+ Opnd::Reg(RDX_REG),
+ Opnd::Reg(RCX_REG),
+ Opnd::Reg(R8_REG),
+ Opnd::Reg(R9_REG)
+];
+
+// C return value register on this platform
+pub const C_RET_REG: Reg = RAX_REG;
+pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
+
+impl CodeBlock {
+ // The number of bytes that are generated by jmp_ptr
+ pub fn jmp_ptr_bytes(&self) -> usize { 5 }
+}
+
+/// Map Opnd to X86Opnd
+impl From<Opnd> for X86Opnd {
+ fn from(opnd: Opnd) -> Self {
+ match opnd {
+ // NOTE: these operand types need to be lowered first
+ //Value(VALUE), // Immediate Ruby value, may be GC'd, movable
+ //InsnOut(usize), // Output of a preceding instruction in this block
+
+ Opnd::InsnOut{..} => panic!("InsnOut operand made it past register allocation"),
+
+ Opnd::UImm(val) => uimm_opnd(val),
+ Opnd::Imm(val) => imm_opnd(val),
+ Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64),
+
+ // General-purpose register
+ Opnd::Reg(reg) => X86Opnd::Reg(reg),
+
+ // Memory operand with displacement
+ Opnd::Mem(Mem{ base: MemBase::Reg(reg_no), num_bits, disp }) => {
+ let reg = X86Reg {
+ reg_no,
+ num_bits: 64,
+ reg_type: RegType::GP
+ };
+
+ mem_opnd(num_bits, X86Opnd::Reg(reg), disp)
+ }
+
+ Opnd::None => panic!(
+ "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output."
+ ),
+
+ _ => panic!("unsupported x86 operand type")
+ }
+ }
+}
+
+/// Also implement going from a reference to an operand for convenience.
+impl From<&Opnd> for X86Opnd {
+ fn from(opnd: &Opnd) -> Self {
+ X86Opnd::from(*opnd)
+ }
+}
+
+/// List of registers that can be used for stack temps.
+pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG];
+
+impl Assembler
+{
+ // A special scratch register for intermediate processing.
+ // This register is caller-saved (so we don't have to save it before using it)
+ pub const SCRATCH_REG: Reg = R11_REG;
+ const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
+
+
+ /// Get the list of registers from which we can allocate on this platform
+ pub fn get_alloc_regs() -> Vec<Reg>
+ {
+ vec![
+ RAX_REG,
+ RCX_REG,
+ RDX_REG,
+ ]
+ }
+
+ /// Get a list of all of the caller-save registers
+ pub fn get_caller_save_regs() -> Vec<Reg> {
+ vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG]
+ }
+
+ // These are the callee-saved registers in the x86-64 SysV ABI
+ // RBX, RSP, RBP, and R12–R15
+
+ /// Split IR instructions for the x86 platform
+ fn x86_split(mut self) -> Assembler
+ {
+ let live_ranges: Vec<usize> = take(&mut self.live_ranges);
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
+ let mut iterator = self.into_draining_iter();
+
+ while let Some((index, mut insn)) = iterator.next_unmapped() {
+ // When we're iterating through the instructions with x86_split, we
+ // need to know the previous live ranges in order to tell if a
+ // register lasts beyond the current instruction. So instead of
+ // using next_mapped, we call next_unmapped. When you're using the
+ // next_unmapped API, you need to make sure that you map each
+ // operand that could reference an old index, which means both
+ // Opnd::InsnOut operands and Opnd::Mem operands with a base of
+ // MemBase::InsnOut.
+ //
+ // You need to ensure that you only map it _once_, because otherwise
+ // you'll end up mapping an incorrect index which could end up being
+ // out of bounds of the old set of indices.
+ //
+ // We handle all of that mapping here to ensure that it's only
+ // mapped once. We also handle loading Opnd::Value operands into
+ // registers here so that all mapping happens in one place. We load
+ // Opnd::Value operands into registers here because:
+ //
+ // - Most instructions can't be encoded with 64-bit immediates.
+ // - We look for Op::Load specifically when emitting to keep GC'ed
+ // VALUEs alive. This is a sort of canonicalization.
+ let mut unmapped_opnds: Vec<Opnd> = vec![];
+
+ let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
+ let mut opnd_iter = insn.opnd_iter_mut();
+
+ while let Some(opnd) = opnd_iter.next() {
+ if let Opnd::Stack { .. } = opnd {
+ *opnd = asm.lower_stack_opnd(opnd);
+ }
+ unmapped_opnds.push(*opnd);
+
+ *opnd = match opnd {
+ Opnd::Value(value) if !is_load => {
+ // Since mov(mem64, imm32) sign extends, as_i64() makes sure
+ // we split when the extended value is different.
+ if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 {
+ asm.load(iterator.map_opnd(*opnd))
+ } else {
+ Opnd::UImm(value.as_u64())
+ }
+ }
+ _ => iterator.map_opnd(*opnd),
+ };
+ }
+
+ // We are replacing instructions here so we know they are already
+ // being used. It is okay not to use their output here.
+ #[allow(unused_must_use)]
+ match &mut insn {
+ Insn::Add { left, right, out } |
+ Insn::Sub { left, right, out } |
+ Insn::Mul { left, right, out } |
+ Insn::And { left, right, out } |
+ Insn::Or { left, right, out } |
+ Insn::Xor { left, right, out } => {
+ match (&left, &right, iterator.peek()) {
+ // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible
+ (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src }))
+ if out == src && left == dest && live_ranges[index] == index + 1 && uimm_num_bits(*value) <= 32 => {
+ *out = *dest;
+ asm.push_insn(insn);
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src }))
+ if out == src && live_ranges[index] == index + 1 && {
+ // We want to do `dest == left`, but `left` has already gone
+ // through lower_stack_opnd() while `dest` has not. So we
+ // lower `dest` before comparing.
+ let lowered_dest = if let Opnd::Stack { .. } = dest {
+ asm.lower_stack_opnd(dest)
+ } else {
+ *dest
+ };
+ lowered_dest == *left
+ } => {
+ *out = *dest;
+ asm.push_insn(insn);
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => {
+ match (unmapped_opnds[0], unmapped_opnds[1]) {
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ *left = asm.load(*left);
+ *right = asm.load(*right);
+ },
+ (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => {
+ *left = asm.load(*left);
+ },
+ // Instruction output whose live range spans beyond this instruction
+ (Opnd::InsnOut { idx, .. }, _) => {
+ if live_ranges[idx] > index {
+ *left = asm.load(*left);
+ }
+ },
+ // We have to load memory operands to avoid corrupting them
+ (Opnd::Mem(_) | Opnd::Reg(_), _) => {
+ *left = asm.load(*left);
+ },
+ _ => {}
+ };
+
+ *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right]));
+ asm.push_insn(insn);
+ }
+ }
+ },
+ Insn::Cmp { left, right } => {
+ // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes)
+ // when next IR is `je`, `jne`, `csel_e`, or `csel_ne`
+ match (&left, &right, iterator.peek()) {
+ (Opnd::InsnOut { .. },
+ Opnd::UImm(0) | Opnd::Imm(0),
+ Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => {
+ asm.push_insn(Insn::Test { left: *left, right: *left });
+ }
+ _ => {
+ if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) {
+ let loaded = asm.load(*right);
+ *right = loaded;
+ }
+ asm.push_insn(insn);
+ }
+ }
+ },
+ Insn::Test { left, right } => {
+ if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) {
+ let loaded = asm.load(*right);
+ *right = loaded;
+ }
+ asm.push_insn(insn);
+ },
+ // These instructions modify their input operand in-place, so we
+ // may need to load the input value to preserve it
+ Insn::LShift { opnd, shift, out } |
+ Insn::RShift { opnd, shift, out } |
+ Insn::URShift { opnd, shift, out } => {
+ match (&unmapped_opnds[0], &unmapped_opnds[1]) {
+ // Instruction output whose live range spans beyond this instruction
+ (Opnd::InsnOut { idx, .. }, _) => {
+ if live_ranges[*idx] > index {
+ *opnd = asm.load(*opnd);
+ }
+ },
+ // We have to load memory operands to avoid corrupting them
+ (Opnd::Mem(_) | Opnd::Reg(_), _) => {
+ *opnd = asm.load(*opnd);
+ },
+ _ => {}
+ };
+
+ *out = asm.next_opnd_out(Opnd::match_num_bits(&[*opnd, *shift]));
+ asm.push_insn(insn);
+ },
+ Insn::CSelZ { truthy, falsy, out } |
+ Insn::CSelNZ { truthy, falsy, out } |
+ Insn::CSelE { truthy, falsy, out } |
+ Insn::CSelNE { truthy, falsy, out } |
+ Insn::CSelL { truthy, falsy, out } |
+ Insn::CSelLE { truthy, falsy, out } |
+ Insn::CSelG { truthy, falsy, out } |
+ Insn::CSelGE { truthy, falsy, out } => {
+ match unmapped_opnds[0] {
+ // If we have an instruction output whose live range
+ // spans beyond this instruction, we have to load it.
+ Opnd::InsnOut { idx, .. } => {
+ if live_ranges[idx] > index {
+ *truthy = asm.load(*truthy);
+ }
+ },
+ Opnd::UImm(_) | Opnd::Imm(_) => {
+ *truthy = asm.load(*truthy);
+ },
+ // Opnd::Value could have already been split
+ Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => {
+ *truthy = asm.load(*truthy);
+ },
+ _ => {}
+ };
+
+ match falsy {
+ Opnd::UImm(_) | Opnd::Imm(_) => {
+ *falsy = asm.load(*falsy);
+ },
+ _ => {}
+ };
+
+ *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy]));
+ asm.push_insn(insn);
+ },
+ Insn::Mov { dest, src } | Insn::Store { dest, src } => {
+ match (&dest, &src) {
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ // We load opnd1 because for mov, opnd0 is the output
+ let opnd1 = asm.load(*src);
+ asm.mov(*dest, opnd1);
+ },
+ (Opnd::Mem(_), Opnd::UImm(value)) => {
+ // 32-bit values will be sign-extended
+ if imm_num_bits(*value as i64) > 32 {
+ let opnd1 = asm.load(*src);
+ asm.mov(*dest, opnd1);
+ } else {
+ asm.mov(*dest, *src);
+ }
+ },
+ (Opnd::Mem(_), Opnd::Imm(value)) => {
+ if imm_num_bits(*value) > 32 {
+ let opnd1 = asm.load(*src);
+ asm.mov(*dest, opnd1);
+ } else {
+ asm.mov(*dest, *src);
+ }
+ },
+ _ => {
+ asm.mov(*dest, *src);
+ }
+ }
+ },
+ Insn::Not { opnd, .. } => {
+ let opnd0 = match unmapped_opnds[0] {
+ // If we have an instruction output whose live range
+ // spans beyond this instruction, we have to load it.
+ Opnd::InsnOut { idx, .. } => {
+ if live_ranges[idx] > index {
+ asm.load(*opnd)
+ } else {
+ *opnd
+ }
+ },
+ // We have to load memory and register operands to avoid
+ // corrupting them.
+ Opnd::Mem(_) | Opnd::Reg(_) => {
+ asm.load(*opnd)
+ },
+ // Otherwise we can just reuse the existing operand.
+ _ => *opnd
+ };
+
+ asm.not(opnd0);
+ },
+ Insn::CCall { opnds, fptr, .. } => {
+ assert!(opnds.len() <= C_ARG_OPNDS.len());
+
+ // Load each operand into the corresponding argument
+ // register.
+ for (idx, opnd) in opnds.into_iter().enumerate() {
+ asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd);
+ }
+
+ // Now we push the CCall without any arguments so that it
+ // just performs the call.
+ asm.ccall(*fptr, vec![]);
+ },
+ Insn::Lea { .. } => {
+ // Merge `lea` and `mov` into a single `lea` when possible
+ match (&insn, iterator.peek()) {
+ (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src }))
+ if matches!(out, Opnd::InsnOut { .. }) && out == src && live_ranges[index] == index + 1 => {
+ asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) });
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => asm.push_insn(insn),
+ }
+ },
+ _ => {
+ if insn.out_opnd().is_some() {
+ let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
+ let out = insn.out_opnd_mut().unwrap();
+ *out = asm.next_opnd_out(out_num_bits);
+ }
+
+ asm.push_insn(insn);
+ }
+ };
+
+ iterator.map_insn_index(&mut asm);
+ }
+
+ asm
+ }
+
+ /// Emit platform-specific machine code
+ pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>>
+ {
+ /// For some instructions, we want to be able to lower a 64-bit operand
+ /// without requiring more registers to be available in the register
+ /// allocator. So we just use the SCRATCH0 register temporarily to hold
+ /// the value before we immediately use it.
+ fn emit_64bit_immediate(cb: &mut CodeBlock, opnd: &Opnd) -> X86Opnd {
+ match opnd {
+ Opnd::Imm(value) => {
+ // 32-bit values will be sign-extended
+ if imm_num_bits(*value) > 32 {
+ mov(cb, Assembler::SCRATCH0, opnd.into());
+ Assembler::SCRATCH0
+ } else {
+ opnd.into()
+ }
+ },
+ Opnd::UImm(value) => {
+ // 32-bit values will be sign-extended
+ if imm_num_bits(*value as i64) > 32 {
+ mov(cb, Assembler::SCRATCH0, opnd.into());
+ Assembler::SCRATCH0
+ } else {
+ opnd.into()
+ }
+ },
+ _ => opnd.into()
+ }
+ }
+
+ /// Compile a side exit if Target::SideExit is given.
+ fn compile_side_exit(
+ target: Target,
+ asm: &mut Assembler,
+ ocb: &mut Option<&mut OutlinedCb>,
+ ) -> Option<Target> {
+ if let Target::SideExit { counter, context } = target {
+ let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap());
+ Some(Target::SideExitPtr(side_exit?))
+ } else {
+ Some(target)
+ }
+ }
+
+ fn emit_csel(
+ cb: &mut CodeBlock,
+ truthy: Opnd,
+ falsy: Opnd,
+ out: Opnd,
+ cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd),
+ cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){
+
+ // Assert that output is a register
+ out.unwrap_reg();
+
+ // If the truthy value is a memory operand
+ if let Opnd::Mem(_) = truthy {
+ if out != falsy {
+ mov(cb, out.into(), falsy.into());
+ }
+
+ cmov_fn(cb, out.into(), truthy.into());
+ } else {
+ if out != truthy {
+ mov(cb, out.into(), truthy.into());
+ }
+
+ cmov_neg(cb, out.into(), falsy.into());
+ }
+ }
+
+ //dbg!(&self.insns);
+
+ // List of GC offsets
+ let mut gc_offsets: Vec<u32> = Vec::new();
+
+ // Buffered list of PosMarker callbacks to fire if codegen is successful
+ let mut pos_markers: Vec<(usize, CodePtr)> = vec![];
+
+ // For each instruction
+ let start_write_pos = cb.get_write_pos();
+ let mut insn_idx: usize = 0;
+ while let Some(insn) = self.insns.get(insn_idx) {
+ let src_ptr = cb.get_write_ptr();
+ let had_dropped_bytes = cb.has_dropped_bytes();
+ let old_label_state = cb.get_label_state();
+ let mut insn_gc_offsets: Vec<u32> = Vec::new();
+
+ match insn {
+ Insn::Comment(text) => {
+ if cfg!(feature = "disasm") {
+ cb.add_comment(text);
+ }
+ },
+
+ // Write the label at the current position
+ Insn::Label(target) => {
+ cb.write_label(target.unwrap_label_idx());
+ },
+
+ // Report back the current position in the generated code
+ Insn::PosMarker(..) => {
+ pos_markers.push((insn_idx, cb.get_write_ptr()));
+ },
+
+ Insn::BakeString(text) => {
+ for byte in text.as_bytes() {
+ cb.write_byte(*byte);
+ }
+
+ // Add a null-terminator byte for safety (in case we pass
+ // this to C code)
+ cb.write_byte(0);
+ },
+
+ // Set up RBP to work with frame pointer unwinding
+ // (e.g. with Linux `perf record --call-graph fp`)
+ Insn::FrameSetup => {
+ if get_option!(frame_pointer) {
+ push(cb, RBP);
+ mov(cb, RBP, RSP);
+ push(cb, RBP);
+ }
+ },
+ Insn::FrameTeardown => {
+ if get_option!(frame_pointer) {
+ pop(cb, RBP);
+ pop(cb, RBP);
+ }
+ },
+
+ Insn::Add { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ add(cb, left.into(), opnd1);
+ },
+
+ Insn::Sub { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ sub(cb, left.into(), opnd1);
+ },
+
+ Insn::Mul { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ imul(cb, left.into(), opnd1);
+ },
+
+ Insn::And { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ and(cb, left.into(), opnd1);
+ },
+
+ Insn::Or { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ or(cb, left.into(), opnd1);
+ },
+
+ Insn::Xor { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ xor(cb, left.into(), opnd1);
+ },
+
+ Insn::Not { opnd, .. } => {
+ not(cb, opnd.into());
+ },
+
+ Insn::LShift { opnd, shift , ..} => {
+ shl(cb, opnd.into(), shift.into())
+ },
+
+ Insn::RShift { opnd, shift , ..} => {
+ sar(cb, opnd.into(), shift.into())
+ },
+
+ Insn::URShift { opnd, shift, .. } => {
+ shr(cb, opnd.into(), shift.into())
+ },
+
+ Insn::Store { dest, src } => {
+ mov(cb, dest.into(), src.into());
+ },
+
+ // This assumes only load instructions can contain references to GC'd Value operands
+ Insn::Load { opnd, out } |
+ Insn::LoadInto { dest: out, opnd } => {
+ match opnd {
+ Opnd::Value(val) if val.heap_object_p() => {
+ // Using movabs because mov might write value in 32 bits
+ movabs(cb, out.into(), val.0 as _);
+ // The pointer immediate is encoded as the last part of the mov written out
+ let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+ insn_gc_offsets.push(ptr_offset);
+ }
+ _ => mov(cb, out.into(), opnd.into())
+ }
+ },
+
+ Insn::LoadSExt { opnd, out } => {
+ movsx(cb, out.into(), opnd.into());
+ },
+
+ Insn::Mov { dest, src } => {
+ mov(cb, dest.into(), src.into());
+ },
+
+ // Load effective address
+ Insn::Lea { opnd, out } => {
+ lea(cb, out.into(), opnd.into());
+ },
+
+ // Load address of jump target
+ Insn::LeaJumpTarget { target, out } => {
+ if let Target::Label(label_idx) = target {
+ // Set output to the raw address of the label
+ cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| {
+ let disp = dst_addr - src_addr;
+ lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
+ });
+
+ mov(cb, out.into(), Self::SCRATCH0);
+ } else {
+ // Set output to the jump target's raw address
+ let target_code = target.unwrap_code_ptr();
+ let target_addr = target_code.raw_addr(cb).as_u64();
+ // Constant encoded length important for patching
+ movabs(cb, out.into(), target_addr);
+ }
+ },
+
+ // Push and pop to/from the C stack
+ Insn::CPush(opnd) => {
+ push(cb, opnd.into());
+ },
+ Insn::CPop { out } => {
+ pop(cb, out.into());
+ },
+ Insn::CPopInto(opnd) => {
+ pop(cb, opnd.into());
+ },
+
+ // Push and pop to the C stack all caller-save registers and the
+ // flags
+ Insn::CPushAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ for reg in regs {
+ push(cb, X86Opnd::Reg(reg));
+ }
+ pushfq(cb);
+ },
+ Insn::CPopAll => {
+ let regs = Assembler::get_caller_save_regs();
+
+ popfq(cb);
+ for reg in regs.into_iter().rev() {
+ pop(cb, X86Opnd::Reg(reg));
+ }
+ },
+
+ // C function call
+ Insn::CCall { fptr, .. } => {
+ call_ptr(cb, RAX, *fptr);
+ },
+
+ Insn::CRet(opnd) => {
+ // TODO: bias allocation towards return register
+ if *opnd != Opnd::Reg(C_RET_REG) {
+ mov(cb, RAX, opnd.into());
+ }
+
+ ret(cb);
+ },
+
+ // Compare
+ Insn::Cmp { left, right } => {
+ let num_bits = match right {
+ Opnd::Imm(value) => Some(imm_num_bits(*value)),
+ Opnd::UImm(value) => Some(uimm_num_bits(*value)),
+ _ => None
+ };
+
+ // If the immediate is less than 64 bits (like 32, 16, 8), and the operand
+ // sizes match, then we can represent it as an immediate in the instruction
+ // without moving it to a register first.
+ // IOW, 64 bit immediates must always be moved to a register
+ // before comparisons, where other sizes may be encoded
+ // directly in the instruction.
+ if num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64 {
+ cmp(cb, left.into(), right.into());
+ } else {
+ let emitted = emit_64bit_immediate(cb, right);
+ cmp(cb, left.into(), emitted);
+ }
+ }
+
+ // Test and set flags
+ Insn::Test { left, right } => {
+ let emitted = emit_64bit_immediate(cb, right);
+ test(cb, left.into(), emitted);
+ }
+
+ Insn::JmpOpnd(opnd) => {
+ jmp_rm(cb, opnd.into());
+ }
+
+ // Conditional jump to a label
+ Insn::Jmp(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jmp_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Je(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr),
+ Target::Label(label_idx) => je_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jne(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jne_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jl(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jl_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jg(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jg_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jge(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jge_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jbe(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jbe_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jb(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jb_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jz(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jz_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jnz(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jnz_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Jo(target) |
+ Insn::JoMul(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jo_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ }
+
+ Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"),
+
+ // Atomically increment a counter at a given memory location
+ Insn::IncrCounter { mem, value } => {
+ assert!(matches!(mem, Opnd::Mem(_)));
+ assert!(matches!(value, Opnd::UImm(_) | Opnd::Imm(_) ) );
+ write_lock_prefix(cb);
+ add(cb, mem.into(), value.into());
+ },
+
+ Insn::Breakpoint => int3(cb),
+
+ Insn::CSelZ { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz);
+ },
+ Insn::CSelNZ { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz);
+ },
+ Insn::CSelE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne);
+ },
+ Insn::CSelNE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove);
+ },
+ Insn::CSelL { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge);
+ },
+ Insn::CSelLE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg);
+ },
+ Insn::CSelG { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle);
+ },
+ Insn::CSelGE { truthy, falsy, out } => {
+ emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl);
+ }
+ Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
+ Insn::PadInvalPatch => {
+ let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
+ if code_size < cb.jmp_ptr_bytes() {
+ nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32);
+ }
+ }
+ };
+
+ // On failure, jump to the next page and retry the current insn
+ if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) {
+ // Reset cb states before retrying the current Insn
+ cb.set_label_state(old_label_state);
+ } else {
+ insn_idx += 1;
+ gc_offsets.append(&mut insn_gc_offsets);
+ }
+ }
+
+ // Error if we couldn't write out everything
+ if cb.has_dropped_bytes() {
+ return None
+ } else {
+ // No bytes dropped, so the pos markers point to valid code
+ for (insn_idx, pos) in pos_markers {
+ if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() {
+ callback(pos, &cb);
+ } else {
+ panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}");
+ }
+ }
+
+ return Some(gc_offsets)
+ }
+ }
+
+ /// Optimize and compile the stored instructions
+ pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> {
+ let asm = self.x86_split();
+ let mut asm = asm.alloc_regs(regs);
+
+ // Create label instances in the code block
+ for (idx, name) in asm.label_names.iter().enumerate() {
+ let label_idx = cb.new_label(name.to_string());
+ assert!(label_idx == idx);
+ }
+
+ let mut ocb = ocb; // for &mut
+ let start_ptr = cb.get_write_ptr();
+ let gc_offsets = asm.x86_emit(cb, &mut ocb);
+
+ if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
+ cb.link_labels();
+
+ Some((start_ptr, gc_offsets))
+ } else {
+ cb.clear_labels();
+
+ None
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::disasm::{assert_disasm};
+ #[cfg(feature = "disasm")]
+ use crate::disasm::{unindent, disasm_addr_range};
+
+ use super::*;
+
+ fn setup_asm() -> (Assembler, CodeBlock) {
+ (Assembler::new(), CodeBlock::new_dummy(1024))
+ }
+
+ #[test]
+ fn test_emit_add_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881c0ff000000");
+ }
+
+ #[test]
+ fn test_emit_add_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c01d8");
+ }
+
+ #[test]
+ fn test_emit_and_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881e0ff000000");
+ }
+
+ #[test]
+ fn test_emit_and_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c21d8");
+ }
+
+ #[test]
+ fn test_emit_cmp_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "4881f8ff000000");
+ }
+
+ #[test]
+ fn test_emit_cmp_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c39d8");
+ }
+
+ #[test]
+ fn test_emit_cmp_mem_16_bits_with_imm_16() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(16, Opnd::Reg(RAX_REG), 6);
+
+ asm.cmp(shape_opnd, Opnd::UImm(0xF000));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "6681780600f0");
+ }
+
+ #[test]
+ fn test_emit_cmp_mem_32_bits_with_imm_32() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let shape_opnd = Opnd::mem(32, Opnd::Reg(RAX_REG), 4);
+
+ asm.cmp(shape_opnd, Opnd::UImm(0xF000_0000));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "817804000000f0");
+ }
+
+ #[test]
+ fn test_emit_or_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881c8ff000000");
+ }
+
+ #[test]
+ fn test_emit_or_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c09d8");
+ }
+
+ #[test]
+ fn test_emit_sub_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881e8ff000000");
+ }
+
+ #[test]
+ fn test_emit_sub_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c29d8");
+ }
+
+ #[test]
+ fn test_emit_test_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "f6c0ff");
+ }
+
+ #[test]
+ fn test_emit_test_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_eq!(format!("{:x}", cb), "49bbffffffffffff00004c85d8");
+ }
+
+ #[test]
+ fn test_emit_xor_lt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c04881f0ff000000");
+ }
+
+ #[test]
+ fn test_emit_xor_gt_32_bits() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c31d8");
+ }
+
+ #[test]
+ fn test_merge_lea_reg() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.lea(Opnd::mem(64, SP, 8));
+ asm.mov(SP, sp); // should be merged to lea
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "488d5b08", {"
+ 0x0: lea rbx, [rbx + 8]
+ "});
+ }
+
+ #[test]
+ fn test_merge_lea_mem() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.lea(Opnd::mem(64, SP, 8));
+ asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "488d4308488903", {"
+ 0x0: lea rax, [rbx + 8]
+ 0x4: mov qword ptr [rbx], rax
+ "});
+ }
+
+ #[test]
+ fn test_replace_cmp_0() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let val = asm.load(Opnd::mem(64, SP, 8));
+ asm.cmp(val, 0.into());
+ let result = asm.csel_e(Qtrue.into(), Qfalse.into());
+ asm.mov(Opnd::Reg(RAX_REG), result);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_eq!(format!("{:x}", cb), "488b43084885c0b814000000b900000000480f45c14889c0");
+ }
+
+ #[test]
+ fn test_merge_add_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.add(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983c540");
+ }
+
+ #[test]
+ fn test_merge_sub_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.sub(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983ed40");
+ }
+
+ #[test]
+ fn test_merge_and_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.and(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983e540");
+ }
+
+ #[test]
+ fn test_merge_or_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.or(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983cd40");
+ }
+
+ #[test]
+ fn test_merge_xor_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.xor(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983f540");
+ }
+
+ #[test]
+ fn test_reorder_c_args_no_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[0], // mov rdi, rdi (optimized away)
+ C_ARG_OPNDS[1], // mov rsi, rsi (optimized away)
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "b800000000ffd0", {"
+ 0x0: mov eax, 0
+ 0x5: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_single_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi and rsi form a cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[0], // mov rsi, rdi
+ C_ARG_OPNDS[2], // mov rdx, rdx (optimized away)
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdi
+ 0x6: mov rdi, r11
+ 0x9: mov eax, 0
+ 0xe: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_two_cycles() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi and rsi form a cycle, and rdx and rcx form another cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[0], // mov rsi, rdi
+ C_ARG_OPNDS[3], // mov rdx, rcx
+ C_ARG_OPNDS[2], // mov rcx, rdx
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdi
+ 0x6: mov rdi, r11
+ 0x9: mov r11, rcx
+ 0xc: mov rcx, rdx
+ 0xf: mov rdx, r11
+ 0x12: mov eax, 0
+ 0x17: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_large_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi, rsi, and rdx form a cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[2], // mov rsi, rdx
+ C_ARG_OPNDS[0], // mov rdx, rdi
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdx
+ 0x6: mov rdx, rdi
+ 0x9: mov rdi, r11
+ 0xc: mov eax, 0
+ 0x11: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_with_insn_out() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let rax = asm.load(Opnd::UImm(1));
+ let rcx = asm.load(Opnd::UImm(2));
+ let rdx = asm.load(Opnd::UImm(3));
+ // rcx and rdx form a cycle
+ asm.ccall(0 as _, vec![
+ rax, // mov rdi, rax
+ rcx, // mov rsi, rcx
+ rcx, // mov rdx, rcx
+ rdx, // mov rcx, rdx
+ ]);
+ asm.compile_with_num_regs(&mut cb, 3);
+
+ assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {"
+ 0x0: mov eax, 1
+ 0x5: mov ecx, 2
+ 0xa: mov edx, 3
+ 0xf: mov rdi, rax
+ 0x12: mov rsi, rcx
+ 0x15: mov r11, rcx
+ 0x18: mov rcx, rdx
+ 0x1b: mov rdx, r11
+ 0x1e: mov eax, 0
+ 0x23: call rax
+ "});
+ }
+
+ #[test]
+ fn test_cmov_mem() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let top = Opnd::mem(64, SP, 0);
+ let ary_opnd = SP;
+ let array_len_opnd = Opnd::mem(64, SP, 16);
+
+ asm.cmp(array_len_opnd, 1.into());
+ let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into());
+ asm.mov(top, elem_opnd);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {"
+ 0x0: cmp qword ptr [rbx + 0x10], 1
+ 0x5: mov eax, 4
+ 0xa: cmovg rax, qword ptr [rbx]
+ 0xe: mov qword ptr [rbx], rax
+ "});
+ }
+
+ #[test]
+ fn test_csel_split() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let stack_top = Opnd::mem(64, SP, 0);
+ let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into());
+ asm.mov(stack_top, elem_opnd);
+
+ asm.compile_with_num_regs(&mut cb, 3);
+
+ assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {"
+ 0x0: movabs rax, 0x7f22c88d1930
+ 0xa: mov ecx, 4
+ 0xf: cmove rax, rcx
+ 0x13: mov qword ptr [rbx], rax
+ "});
+ }
+}