summaryrefslogtreecommitdiff
path: root/yjit/src/backend/x86_64/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'yjit/src/backend/x86_64/mod.rs')
-rw-r--r--yjit/src/backend/x86_64/mod.rs647
1 files changed, 537 insertions, 110 deletions
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index 297a0fd852..4ca5e9be9c 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -1,15 +1,12 @@
-#![allow(dead_code)]
-#![allow(unused_variables)]
-#![allow(unused_imports)]
-
use std::mem::take;
use crate::asm::*;
use crate::asm::x86_64::*;
-use crate::codegen::{JITState};
+use crate::codegen::CodePtr;
use crate::cruby::*;
use crate::backend::ir::*;
-use crate::codegen::CodegenGlobals;
+use crate::options::*;
+use crate::utils::*;
// Use the x86 register type for this platform
pub type Reg = X86Reg;
@@ -33,8 +30,10 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
pub const C_RET_REG: Reg = RAX_REG;
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
-// The number of bytes that are generated by jmp_ptr
-pub const JMP_PTR_BYTES: usize = 6;
+impl CodeBlock {
+ // The number of bytes that are generated by jmp_ptr
+ pub fn jmp_ptr_bytes(&self) -> usize { 5 }
+}
/// Map Opnd to X86Opnd
impl From<Opnd> for X86Opnd {
@@ -80,12 +79,16 @@ impl From<&Opnd> for X86Opnd {
}
}
+/// List of registers that can be used for stack temps.
+pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG];
+
impl Assembler
{
// A special scratch register for intermediate processing.
- // Note: right now this is only used by LeaLabel because label_ref accepts
- // a closure and we don't want it to have to capture anything.
- const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG);
+ // This register is caller-saved (so we don't have to save it before using it)
+ pub const SCRATCH_REG: Reg = R11_REG;
+ const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
+
/// Get the list of registers from which we can allocate on this platform
pub fn get_alloc_regs() -> Vec<Reg>
@@ -109,7 +112,7 @@ impl Assembler
fn x86_split(mut self) -> Assembler
{
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
- let mut asm = Assembler::new_with_label_names(take(&mut self.label_names));
+ let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
let mut iterator = self.into_draining_iter();
while let Some((index, mut insn)) = iterator.next_unmapped() {
@@ -132,7 +135,7 @@ impl Assembler
// Opnd::Value operands into registers here because:
//
// - Most instructions can't be encoded with 64-bit immediates.
- // - We look for Op::Load specifically when emiting to keep GC'ed
+ // - We look for Op::Load specifically when emitting to keep GC'ed
// VALUEs alive. This is a sort of canonicalization.
let mut unmapped_opnds: Vec<Opnd> = vec![];
@@ -140,21 +143,23 @@ impl Assembler
let mut opnd_iter = insn.opnd_iter_mut();
while let Some(opnd) = opnd_iter.next() {
+ if let Opnd::Stack { .. } = opnd {
+ *opnd = asm.lower_stack_opnd(opnd);
+ }
unmapped_opnds.push(*opnd);
- *opnd = if is_load {
- iterator.map_opnd(*opnd)
- } else if let Opnd::Value(value) = opnd {
- // Since mov(mem64, imm32) sign extends, as_i64() makes sure
- // we split when the extended value is different.
- if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 {
- asm.load(iterator.map_opnd(*opnd))
- } else {
- Opnd::UImm(value.as_u64())
+ *opnd = match opnd {
+ Opnd::Value(value) if !is_load => {
+ // Since mov(mem64, imm32) sign extends, as_i64() makes sure
+ // we split when the extended value is different.
+ if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 {
+ asm.load(iterator.map_opnd(*opnd))
+ } else {
+ Opnd::UImm(value.as_u64())
+ }
}
- } else {
- iterator.map_opnd(*opnd)
- }
+ _ => iterator.map_opnd(*opnd),
+ };
}
// We are replacing instructions here so we know they are already
@@ -163,40 +168,86 @@ impl Assembler
match &mut insn {
Insn::Add { left, right, out } |
Insn::Sub { left, right, out } |
+ Insn::Mul { left, right, out } |
Insn::And { left, right, out } |
Insn::Or { left, right, out } |
Insn::Xor { left, right, out } => {
- match (unmapped_opnds[0], unmapped_opnds[1]) {
- (Opnd::Mem(_), Opnd::Mem(_)) => {
- *left = asm.load(*left);
- *right = asm.load(*right);
- },
- (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => {
- *left = asm.load(*left);
- },
- // Instruction output whose live range spans beyond this instruction
- (Opnd::InsnOut { idx, .. }, _) => {
- if live_ranges[idx] > index {
- *left = asm.load(*left);
+ match (&left, &right, iterator.peek()) {
+ // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible
+ (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src }))
+ if out == src && left == dest && live_ranges[index] == index + 1 && uimm_num_bits(*value) <= 32 => {
+ *out = *dest;
+ asm.push_insn(insn);
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src }))
+ if out == src && live_ranges[index] == index + 1 && {
+ // We want to do `dest == left`, but `left` has already gone
+ // through lower_stack_opnd() while `dest` has not. So we
+ // lower `dest` before comparing.
+ let lowered_dest = if let Opnd::Stack { .. } = dest {
+ asm.lower_stack_opnd(dest)
+ } else {
+ *dest
+ };
+ lowered_dest == *left
+ } => {
+ *out = *dest;
+ asm.push_insn(insn);
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => {
+ match (unmapped_opnds[0], unmapped_opnds[1]) {
+ (Opnd::Mem(_), Opnd::Mem(_)) => {
+ *left = asm.load(*left);
+ *right = asm.load(*right);
+ },
+ (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => {
+ *left = asm.load(*left);
+ },
+ // Instruction output whose live range spans beyond this instruction
+ (Opnd::InsnOut { idx, .. }, _) => {
+ if live_ranges[idx] > index {
+ *left = asm.load(*left);
+ }
+ },
+ // We have to load memory operands to avoid corrupting them
+ (Opnd::Mem(_) | Opnd::Reg(_), _) => {
+ *left = asm.load(*left);
+ },
+ _ => {}
+ };
+
+ *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right]));
+ asm.push_insn(insn);
+ }
+ }
+ },
+ Insn::Cmp { left, right } => {
+ // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes)
+ // when next IR is `je`, `jne`, `csel_e`, or `csel_ne`
+ match (&left, &right, iterator.peek()) {
+ (Opnd::InsnOut { .. },
+ Opnd::UImm(0) | Opnd::Imm(0),
+ Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => {
+ asm.push_insn(Insn::Test { left: *left, right: *left });
+ }
+ _ => {
+ if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) {
+ let loaded = asm.load(*right);
+ *right = loaded;
}
- },
- // We have to load memory operands to avoid corrupting them
- (Opnd::Mem(_) | Opnd::Reg(_), _) => {
- *left = asm.load(*left);
- },
- _ => {}
- };
-
- *out = asm.next_opnd_out(Opnd::match_num_bits(&[*left, *right]));
- asm.push_insn(insn);
+ asm.push_insn(insn);
+ }
+ }
},
- Insn::Cmp { left, right } |
Insn::Test { left, right } => {
if let (Opnd::Mem(_), Opnd::Mem(_)) = (&left, &right) {
let loaded = asm.load(*right);
*right = loaded;
}
-
asm.push_insn(insn);
},
// These instructions modify their input operand in-place, so we
@@ -237,7 +288,11 @@ impl Assembler
*truthy = asm.load(*truthy);
}
},
- Opnd::UImm(_) | Opnd::Imm(_) | Opnd::Value(_) => {
+ Opnd::UImm(_) | Opnd::Imm(_) => {
+ *truthy = asm.load(*truthy);
+ },
+ // Opnd::Value could have already been split
+ Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => {
*truthy = asm.load(*truthy);
},
_ => {}
@@ -253,7 +308,7 @@ impl Assembler
*out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy]));
asm.push_insn(insn);
},
- Insn::Mov { dest, src } => {
+ Insn::Mov { dest, src } | Insn::Store { dest, src } => {
match (&dest, &src) {
(Opnd::Mem(_), Opnd::Mem(_)) => {
// We load opnd1 because for mov, opnd0 is the output
@@ -310,13 +365,25 @@ impl Assembler
// Load each operand into the corresponding argument
// register.
for (idx, opnd) in opnds.into_iter().enumerate() {
- asm.load_into(C_ARG_OPNDS[idx], *opnd);
+ asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd);
}
// Now we push the CCall without any arguments so that it
// just performs the call.
asm.ccall(*fptr, vec![]);
},
+ Insn::Lea { .. } => {
+ // Merge `lea` and `mov` into a single `lea` when possible
+ match (&insn, iterator.peek()) {
+ (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src }))
+ if matches!(out, Opnd::InsnOut { .. }) && out == src && live_ranges[index] == index + 1 => {
+ asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) });
+ iterator.map_insn_index(&mut asm);
+ iterator.next_unmapped(); // Pop merged Insn::Mov
+ }
+ _ => asm.push_insn(insn),
+ }
+ },
_ => {
if insn.out_opnd().is_some() {
let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter());
@@ -335,7 +402,7 @@ impl Assembler
}
/// Emit platform-specific machine code
- pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Vec<u32>
+ pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>>
{
/// For some instructions, we want to be able to lower a 64-bit operand
/// without requiring more registers to be available in the register
@@ -365,12 +432,45 @@ impl Assembler
}
}
+ /// Compile a side exit if Target::SideExit is given.
+ fn compile_side_exit(
+ target: Target,
+ asm: &mut Assembler,
+ ocb: &mut Option<&mut OutlinedCb>,
+ ) -> Option<Target> {
+ if let Target::SideExit { counter, context } = target {
+ let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap());
+ Some(Target::SideExitPtr(side_exit?))
+ } else {
+ Some(target)
+ }
+ }
- fn emit_csel(cb: &mut CodeBlock, truthy: Opnd, falsy: Opnd, out: Opnd, cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd)) {
- if out != truthy {
- mov(cb, out.into(), truthy.into());
+ fn emit_csel(
+ cb: &mut CodeBlock,
+ truthy: Opnd,
+ falsy: Opnd,
+ out: Opnd,
+ cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd),
+ cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){
+
+ // Assert that output is a register
+ out.unwrap_reg();
+
+ // If the truthy value is a memory operand
+ if let Opnd::Mem(_) = truthy {
+ if out != falsy {
+ mov(cb, out.into(), falsy.into());
+ }
+
+ cmov_fn(cb, out.into(), truthy.into());
+ } else {
+ if out != truthy {
+ mov(cb, out.into(), truthy.into());
+ }
+
+ cmov_neg(cb, out.into(), falsy.into());
}
- cmov_fn(cb, out.into(), falsy.into());
}
//dbg!(&self.insns);
@@ -378,10 +478,13 @@ impl Assembler
// List of GC offsets
let mut gc_offsets: Vec<u32> = Vec::new();
+ // Buffered list of PosMarker callbacks to fire if codegen is successful
+ let mut pos_markers: Vec<(usize, CodePtr)> = vec![];
+
// For each instruction
let start_write_pos = cb.get_write_pos();
- let mut insns_idx: usize = 0;
- while let Some(insn) = self.insns.get(insns_idx) {
+ let mut insn_idx: usize = 0;
+ while let Some(insn) = self.insns.get(insn_idx) {
let src_ptr = cb.get_write_ptr();
let had_dropped_bytes = cb.has_dropped_bytes();
let old_label_state = cb.get_label_state();
@@ -400,8 +503,8 @@ impl Assembler
},
// Report back the current position in the generated code
- Insn::PosMarker(pos_marker) => {
- pos_marker(cb.get_write_ptr());
+ Insn::PosMarker(..) => {
+ pos_markers.push((insn_idx, cb.get_write_ptr()));
},
Insn::BakeString(text) => {
@@ -414,19 +517,37 @@ impl Assembler
cb.write_byte(0);
},
+ // Set up RBP to work with frame pointer unwinding
+ // (e.g. with Linux `perf record --call-graph fp`)
+ Insn::FrameSetup => {
+ if get_option!(frame_pointer) {
+ push(cb, RBP);
+ mov(cb, RBP, RSP);
+ push(cb, RBP);
+ }
+ },
+ Insn::FrameTeardown => {
+ if get_option!(frame_pointer) {
+ pop(cb, RBP);
+ pop(cb, RBP);
+ }
+ },
+
Insn::Add { left, right, .. } => {
let opnd1 = emit_64bit_immediate(cb, right);
add(cb, left.into(), opnd1);
},
- Insn::FrameSetup => {},
- Insn::FrameTeardown => {},
-
Insn::Sub { left, right, .. } => {
let opnd1 = emit_64bit_immediate(cb, right);
sub(cb, left.into(), opnd1);
},
+ Insn::Mul { left, right, .. } => {
+ let opnd1 = emit_64bit_immediate(cb, right);
+ imul(cb, left.into(), opnd1);
+ },
+
Insn::And { left, right, .. } => {
let opnd1 = emit_64bit_immediate(cb, right);
and(cb, left.into(), opnd1);
@@ -490,16 +611,23 @@ impl Assembler
lea(cb, out.into(), opnd.into());
},
- // Load relative address
- Insn::LeaLabel { target, out } => {
- let label_idx = target.unwrap_label_idx();
-
- cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| {
- let disp = dst_addr - src_addr;
- lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
- });
+ // Load address of jump target
+ Insn::LeaJumpTarget { target, out } => {
+ if let Target::Label(label_idx) = target {
+ // Set output to the raw address of the label
+ cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| {
+ let disp = dst_addr - src_addr;
+ lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
+ });
- mov(cb, out.into(), Self::SCRATCH0);
+ mov(cb, out.into(), Self::SCRATCH0);
+ } else {
+ // Set output to the jump target's raw address
+ let target_code = target.unwrap_code_ptr();
+ let target_addr = target_code.raw_addr(cb).as_u64();
+ // Constant encoded length important for patching
+ movabs(cb, out.into(), target_addr);
+ }
},
// Push and pop to/from the C stack
@@ -580,61 +708,96 @@ impl Assembler
// Conditional jump to a label
Insn::Jmp(target) => {
- match *target {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr),
Target::Label(label_idx) => jmp_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
}
Insn::Je(target) => {
- match *target {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr),
Target::Label(label_idx) => je_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
}
Insn::Jne(target) => {
- match *target {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr),
Target::Label(label_idx) => jne_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
}
Insn::Jl(target) => {
- match *target {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr),
Target::Label(label_idx) => jl_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jg(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jg_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jge(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jge_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
},
Insn::Jbe(target) => {
- match *target {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr),
Target::Label(label_idx) => jbe_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
+ }
+ },
+
+ Insn::Jb(target) => {
+ match compile_side_exit(*target, self, ocb)? {
+ Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr),
+ Target::Label(label_idx) => jb_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
},
Insn::Jz(target) => {
- match *target {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr),
Target::Label(label_idx) => jz_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
}
Insn::Jnz(target) => {
- match *target {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr),
Target::Label(label_idx) => jnz_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
}
- Insn::Jo(target) => {
- match *target {
+ Insn::Jo(target) |
+ Insn::JoMul(target) => {
+ match compile_side_exit(*target, self, ocb)? {
Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr),
Target::Label(label_idx) => jo_label(cb, label_idx),
+ Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"),
}
}
+ Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"),
+
// Atomically increment a counter at a given memory location
Insn::IncrCounter { mem, value } => {
assert!(matches!(mem, Opnd::Mem(_)));
@@ -646,43 +809,36 @@ impl Assembler
Insn::Breakpoint => int3(cb),
Insn::CSelZ { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmovnz);
+ emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz);
},
Insn::CSelNZ { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmovz);
+ emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz);
},
Insn::CSelE { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmovne);
+ emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne);
},
Insn::CSelNE { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmove);
+ emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove);
},
Insn::CSelL { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmovge);
+ emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge);
},
Insn::CSelLE { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmovg);
+ emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg);
},
Insn::CSelG { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmovle);
+ emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle);
},
Insn::CSelGE { truthy, falsy, out } => {
- emit_csel(cb, *truthy, *falsy, *out, cmovl);
+ emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl);
}
Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
Insn::PadInvalPatch => {
let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
- if code_size < JMP_PTR_BYTES {
- nop(cb, (JMP_PTR_BYTES - code_size) as u32);
+ if code_size < cb.jmp_ptr_bytes() {
+ nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32);
}
}
-
- // We want to keep the panic here because some instructions that
- // we feed to the backend could get lowered into other
- // instructions. So it's possible that some of our backend
- // instructions can never make it to the emit stage.
- #[allow(unreachable_patterns)]
- _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn)
};
// On failure, jump to the next page and retry the current insn
@@ -690,18 +846,32 @@ impl Assembler
// Reset cb states before retrying the current Insn
cb.set_label_state(old_label_state);
} else {
- insns_idx += 1;
+ insn_idx += 1;
gc_offsets.append(&mut insn_gc_offsets);
}
}
- gc_offsets
+ // Error if we couldn't write out everything
+ if cb.has_dropped_bytes() {
+ return None
+ } else {
+ // No bytes dropped, so the pos markers point to valid code
+ for (insn_idx, pos) in pos_markers {
+ if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() {
+ callback(pos, &cb);
+ } else {
+ panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}");
+ }
+ }
+
+ return Some(gc_offsets)
+ }
}
/// Optimize and compile the stored instructions
- pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Vec<u32>
- {
- let mut asm = self.x86_split().alloc_regs(regs);
+ pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> {
+ let asm = self.x86_split();
+ let mut asm = asm.alloc_regs(regs);
// Create label instances in the code block
for (idx, name) in asm.label_names.iter().enumerate() {
@@ -709,20 +879,28 @@ impl Assembler
assert!(label_idx == idx);
}
- let gc_offsets = asm.x86_emit(cb);
+ let mut ocb = ocb; // for &mut
+ let start_ptr = cb.get_write_ptr();
+ let gc_offsets = asm.x86_emit(cb, &mut ocb);
- if cb.has_dropped_bytes() {
- cb.clear_labels();
- } else {
+ if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
cb.link_labels();
- }
- gc_offsets
+ Some((start_ptr, gc_offsets))
+ } else {
+ cb.clear_labels();
+
+ None
+ }
}
}
#[cfg(test)]
mod tests {
+ use crate::disasm::{assert_disasm};
+ #[cfg(feature = "disasm")]
+ use crate::disasm::{unindent, disasm_addr_range};
+
use super::*;
fn setup_asm() -> (Assembler, CodeBlock) {
@@ -892,4 +1070,253 @@ mod tests {
assert_eq!(format!("{:x}", cb), "4889c049bbffffffffffff00004c31d8");
}
+
+ #[test]
+ fn test_merge_lea_reg() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.lea(Opnd::mem(64, SP, 8));
+ asm.mov(SP, sp); // should be merged to lea
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "488d5b08", {"
+ 0x0: lea rbx, [rbx + 8]
+ "});
+ }
+
+ #[test]
+ fn test_merge_lea_mem() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.lea(Opnd::mem(64, SP, 8));
+ asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "488d4308488903", {"
+ 0x0: lea rax, [rbx + 8]
+ 0x4: mov qword ptr [rbx], rax
+ "});
+ }
+
+ #[test]
+ fn test_replace_cmp_0() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let val = asm.load(Opnd::mem(64, SP, 8));
+ asm.cmp(val, 0.into());
+ let result = asm.csel_e(Qtrue.into(), Qfalse.into());
+ asm.mov(Opnd::Reg(RAX_REG), result);
+ asm.compile_with_num_regs(&mut cb, 2);
+
+ assert_eq!(format!("{:x}", cb), "488b43084885c0b814000000b900000000480f45c14889c0");
+ }
+
+ #[test]
+ fn test_merge_add_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.add(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983c540");
+ }
+
+ #[test]
+ fn test_merge_sub_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.sub(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983ed40");
+ }
+
+ #[test]
+ fn test_merge_and_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.and(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983e540");
+ }
+
+ #[test]
+ fn test_merge_or_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.or(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983cd40");
+ }
+
+ #[test]
+ fn test_merge_xor_mov() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let sp = asm.xor(CFP, Opnd::UImm(0x40));
+ asm.mov(CFP, sp); // should be merged to add
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_eq!(format!("{:x}", cb), "4983f540");
+ }
+
+ #[test]
+ fn test_reorder_c_args_no_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[0], // mov rdi, rdi (optimized away)
+ C_ARG_OPNDS[1], // mov rsi, rsi (optimized away)
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "b800000000ffd0", {"
+ 0x0: mov eax, 0
+ 0x5: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_single_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi and rsi form a cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[0], // mov rsi, rdi
+ C_ARG_OPNDS[2], // mov rdx, rdx (optimized away)
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdi
+ 0x6: mov rdi, r11
+ 0x9: mov eax, 0
+ 0xe: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_two_cycles() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi and rsi form a cycle, and rdx and rcx form another cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[0], // mov rsi, rdi
+ C_ARG_OPNDS[3], // mov rdx, rcx
+ C_ARG_OPNDS[2], // mov rcx, rdx
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdi
+ 0x6: mov rdi, r11
+ 0x9: mov r11, rcx
+ 0xc: mov rcx, rdx
+ 0xf: mov rdx, r11
+ 0x12: mov eax, 0
+ 0x17: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_large_cycle() {
+ let (mut asm, mut cb) = setup_asm();
+
+ // rdi, rsi, and rdx form a cycle
+ asm.ccall(0 as _, vec![
+ C_ARG_OPNDS[1], // mov rdi, rsi
+ C_ARG_OPNDS[2], // mov rsi, rdx
+ C_ARG_OPNDS[0], // mov rdx, rdi
+ ]);
+ asm.compile_with_num_regs(&mut cb, 0);
+
+ assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {"
+ 0x0: mov r11, rsi
+ 0x3: mov rsi, rdx
+ 0x6: mov rdx, rdi
+ 0x9: mov rdi, r11
+ 0xc: mov eax, 0
+ 0x11: call rax
+ "});
+ }
+
+ #[test]
+ fn test_reorder_c_args_with_insn_out() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let rax = asm.load(Opnd::UImm(1));
+ let rcx = asm.load(Opnd::UImm(2));
+ let rdx = asm.load(Opnd::UImm(3));
+ // rcx and rdx form a cycle
+ asm.ccall(0 as _, vec![
+ rax, // mov rdi, rax
+ rcx, // mov rsi, rcx
+ rcx, // mov rdx, rcx
+ rdx, // mov rcx, rdx
+ ]);
+ asm.compile_with_num_regs(&mut cb, 3);
+
+ assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {"
+ 0x0: mov eax, 1
+ 0x5: mov ecx, 2
+ 0xa: mov edx, 3
+ 0xf: mov rdi, rax
+ 0x12: mov rsi, rcx
+ 0x15: mov r11, rcx
+ 0x18: mov rcx, rdx
+ 0x1b: mov rdx, r11
+ 0x1e: mov eax, 0
+ 0x23: call rax
+ "});
+ }
+
+ #[test]
+ fn test_cmov_mem() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let top = Opnd::mem(64, SP, 0);
+ let ary_opnd = SP;
+ let array_len_opnd = Opnd::mem(64, SP, 16);
+
+ asm.cmp(array_len_opnd, 1.into());
+ let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into());
+ asm.mov(top, elem_opnd);
+
+ asm.compile_with_num_regs(&mut cb, 1);
+
+ assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {"
+ 0x0: cmp qword ptr [rbx + 0x10], 1
+ 0x5: mov eax, 4
+ 0xa: cmovg rax, qword ptr [rbx]
+ 0xe: mov qword ptr [rbx], rax
+ "});
+ }
+
+ #[test]
+ fn test_csel_split() {
+ let (mut asm, mut cb) = setup_asm();
+
+ let stack_top = Opnd::mem(64, SP, 0);
+ let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into());
+ asm.mov(stack_top, elem_opnd);
+
+ asm.compile_with_num_regs(&mut cb, 3);
+
+ assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {"
+ 0x0: movabs rax, 0x7f22c88d1930
+ 0xa: mov ecx, 4
+ 0xf: cmove rax, rcx
+ 0x13: mov qword ptr [rbx], rax
+ "});
+ }
}