diff options
Diffstat (limited to 'yjit/src/backend/x86_64/mod.rs')
-rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 390 |
1 files changed, 327 insertions, 63 deletions
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index a2ee94cf66..4ca5e9be9c 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -1,17 +1,12 @@ -#![allow(dead_code)] -#![allow(unused_variables)] -#![allow(unused_imports)] - use std::mem::take; use crate::asm::*; use crate::asm::x86_64::*; -use crate::codegen::{JITState}; -use crate::core::Context; +use crate::codegen::CodePtr; use crate::cruby::*; use crate::backend::ir::*; -use crate::codegen::CodegenGlobals; use crate::options::*; +use crate::utils::*; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -37,7 +32,7 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); impl CodeBlock { // The number of bytes that are generated by jmp_ptr - pub fn jmp_ptr_bytes(&self) -> usize { 6 } + pub fn jmp_ptr_bytes(&self) -> usize { 5 } } /// Map Opnd to X86Opnd @@ -84,15 +79,16 @@ impl From<&Opnd> for X86Opnd { } } +/// List of registers that can be used for stack temps. +pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; + impl Assembler { // A special scratch register for intermediate processing. - // Note: right now this is only used by LeaLabel because label_ref accepts - // a closure and we don't want it to have to capture anything. - const SCRATCH0: X86Opnd = X86Opnd::Reg(R11_REG); + // This register is caller-saved (so we don't have to save it before using it) + pub const SCRATCH_REG: Reg = R11_REG; + const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); - /// List of registers that can be used for stack temps. - pub const TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; /// Get the list of registers from which we can allocate on this platform pub fn get_alloc_regs() -> Vec<Reg> @@ -139,7 +135,7 @@ impl Assembler // Opnd::Value operands into registers here because: // // - Most instructions can't be encoded with 64-bit immediates. - // - We look for Op::Load specifically when emiting to keep GC'ed + // - We look for Op::Load specifically when emitting to keep GC'ed // VALUEs alive. This is a sort of canonicalization. let mut unmapped_opnds: Vec<Opnd> = vec![]; @@ -172,6 +168,7 @@ impl Assembler match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | + Insn::Mul { left, right, out } | Insn::And { left, right, out } | Insn::Or { left, right, out } | Insn::Xor { left, right, out } => { @@ -184,6 +181,23 @@ impl Assembler iterator.map_insn_index(&mut asm); iterator.next_unmapped(); // Pop merged Insn::Mov } + (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) + if out == src && live_ranges[index] == index + 1 && { + // We want to do `dest == left`, but `left` has already gone + // through lower_stack_opnd() while `dest` has not. So we + // lower `dest` before comparing. + let lowered_dest = if let Opnd::Stack { .. } = dest { + asm.lower_stack_opnd(dest) + } else { + *dest + }; + lowered_dest == *left + } => { + *out = *dest; + asm.push_insn(insn); + iterator.map_insn_index(&mut asm); + iterator.next_unmapped(); // Pop merged Insn::Mov + } _ => { match (unmapped_opnds[0], unmapped_opnds[1]) { (Opnd::Mem(_), Opnd::Mem(_)) => { @@ -274,7 +288,11 @@ impl Assembler *truthy = asm.load(*truthy); } }, - Opnd::UImm(_) | Opnd::Imm(_) | Opnd::Value(_) => { + Opnd::UImm(_) | Opnd::Imm(_) => { + *truthy = asm.load(*truthy); + }, + // Opnd::Value could have already been split + Opnd::Value(_) if !matches!(truthy, Opnd::InsnOut { .. }) => { *truthy = asm.load(*truthy); }, _ => {} @@ -290,7 +308,7 @@ impl Assembler *out = asm.next_opnd_out(Opnd::match_num_bits(&[*truthy, *falsy])); asm.push_insn(insn); }, - Insn::Mov { dest, src } => { + Insn::Mov { dest, src } | Insn::Store { dest, src } => { match (&dest, &src) { (Opnd::Mem(_), Opnd::Mem(_)) => { // We load opnd1 because for mov, opnd0 is the output @@ -347,7 +365,7 @@ impl Assembler // Load each operand into the corresponding argument // register. for (idx, opnd) in opnds.into_iter().enumerate() { - asm.load_into(C_ARG_OPNDS[idx], *opnd); + asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), *opnd); } // Now we push the CCall without any arguments so that it @@ -384,7 +402,7 @@ impl Assembler } /// Emit platform-specific machine code - pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Vec<u32> + pub fn x86_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Option<Vec<u32>> { /// For some instructions, we want to be able to lower a 64-bit operand /// without requiring more registers to be available in the register @@ -419,20 +437,40 @@ impl Assembler target: Target, asm: &mut Assembler, ocb: &mut Option<&mut OutlinedCb>, - ) -> Target { + ) -> Option<Target> { if let Target::SideExit { counter, context } = target { let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()); - Target::SideExitPtr(side_exit) + Some(Target::SideExitPtr(side_exit?)) } else { - target + Some(target) } } - fn emit_csel(cb: &mut CodeBlock, truthy: Opnd, falsy: Opnd, out: Opnd, cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd)) { - if out != truthy { - mov(cb, out.into(), truthy.into()); + fn emit_csel( + cb: &mut CodeBlock, + truthy: Opnd, + falsy: Opnd, + out: Opnd, + cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd), + cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){ + + // Assert that output is a register + out.unwrap_reg(); + + // If the truthy value is a memory operand + if let Opnd::Mem(_) = truthy { + if out != falsy { + mov(cb, out.into(), falsy.into()); + } + + cmov_fn(cb, out.into(), truthy.into()); + } else { + if out != truthy { + mov(cb, out.into(), truthy.into()); + } + + cmov_neg(cb, out.into(), falsy.into()); } - cmov_fn(cb, out.into(), falsy.into()); } //dbg!(&self.insns); @@ -440,6 +478,9 @@ impl Assembler // List of GC offsets let mut gc_offsets: Vec<u32> = Vec::new(); + // Buffered list of PosMarker callbacks to fire if codegen is successful + let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; + // For each instruction let start_write_pos = cb.get_write_pos(); let mut insn_idx: usize = 0; @@ -462,8 +503,8 @@ impl Assembler }, // Report back the current position in the generated code - Insn::PosMarker(pos_marker) => { - pos_marker(cb.get_write_ptr()); + Insn::PosMarker(..) => { + pos_markers.push((insn_idx, cb.get_write_ptr())); }, Insn::BakeString(text) => { @@ -476,19 +517,37 @@ impl Assembler cb.write_byte(0); }, + // Set up RBP to work with frame pointer unwinding + // (e.g. with Linux `perf record --call-graph fp`) + Insn::FrameSetup => { + if get_option!(frame_pointer) { + push(cb, RBP); + mov(cb, RBP, RSP); + push(cb, RBP); + } + }, + Insn::FrameTeardown => { + if get_option!(frame_pointer) { + pop(cb, RBP); + pop(cb, RBP); + } + }, + Insn::Add { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); add(cb, left.into(), opnd1); }, - Insn::FrameSetup => {}, - Insn::FrameTeardown => {}, - Insn::Sub { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); sub(cb, left.into(), opnd1); }, + Insn::Mul { left, right, .. } => { + let opnd1 = emit_64bit_immediate(cb, right); + imul(cb, left.into(), opnd1); + }, + Insn::And { left, right, .. } => { let opnd1 = emit_64bit_immediate(cb, right); and(cb, left.into(), opnd1); @@ -552,16 +611,23 @@ impl Assembler lea(cb, out.into(), opnd.into()); }, - // Load relative address - Insn::LeaLabel { target, out } => { - let label_idx = target.unwrap_label_idx(); - - cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| { - let disp = dst_addr - src_addr; - lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); - }); + // Load address of jump target + Insn::LeaJumpTarget { target, out } => { + if let Target::Label(label_idx) = target { + // Set output to the raw address of the label + cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| { + let disp = dst_addr - src_addr; + lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + }); - mov(cb, out.into(), Self::SCRATCH0); + mov(cb, out.into(), Self::SCRATCH0); + } else { + // Set output to the jump target's raw address + let target_code = target.unwrap_code_ptr(); + let target_addr = target_code.raw_addr(cb).as_u64(); + // Constant encoded length important for patching + movabs(cb, out.into(), target_addr); + } }, // Push and pop to/from the C stack @@ -642,7 +708,7 @@ impl Assembler // Conditional jump to a label Insn::Jmp(target) => { - match compile_side_exit(*target, self, ocb) { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jmp_ptr(cb, code_ptr), Target::Label(label_idx) => jmp_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), @@ -650,7 +716,7 @@ impl Assembler } Insn::Je(target) => { - match compile_side_exit(*target, self, ocb) { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => je_ptr(cb, code_ptr), Target::Label(label_idx) => je_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), @@ -658,7 +724,7 @@ impl Assembler } Insn::Jne(target) => { - match compile_side_exit(*target, self, ocb) { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jne_ptr(cb, code_ptr), Target::Label(label_idx) => jne_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), @@ -666,23 +732,47 @@ impl Assembler } Insn::Jl(target) => { - match compile_side_exit(*target, self, ocb) { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jl_ptr(cb, code_ptr), Target::Label(label_idx) => jl_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } }, + Insn::Jg(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jg_ptr(cb, code_ptr), + Target::Label(label_idx) => jg_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + + Insn::Jge(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jge_ptr(cb, code_ptr), + Target::Label(label_idx) => jge_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + Insn::Jbe(target) => { - match compile_side_exit(*target, self, ocb) { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jbe_ptr(cb, code_ptr), Target::Label(label_idx) => jbe_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } }, + Insn::Jb(target) => { + match compile_side_exit(*target, self, ocb)? { + Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jb_ptr(cb, code_ptr), + Target::Label(label_idx) => jb_label(cb, label_idx), + Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), + } + }, + Insn::Jz(target) => { - match compile_side_exit(*target, self, ocb) { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jz_ptr(cb, code_ptr), Target::Label(label_idx) => jz_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), @@ -690,21 +780,24 @@ impl Assembler } Insn::Jnz(target) => { - match compile_side_exit(*target, self, ocb) { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jnz_ptr(cb, code_ptr), Target::Label(label_idx) => jnz_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } - Insn::Jo(target) => { - match compile_side_exit(*target, self, ocb) { + Insn::Jo(target) | + Insn::JoMul(target) => { + match compile_side_exit(*target, self, ocb)? { Target::CodePtr(code_ptr) | Target::SideExitPtr(code_ptr) => jo_ptr(cb, code_ptr), Target::Label(label_idx) => jo_label(cb, label_idx), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_side_exit"), } } + Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"), + // Atomically increment a counter at a given memory location Insn::IncrCounter { mem, value } => { assert!(matches!(mem, Opnd::Mem(_))); @@ -716,28 +809,28 @@ impl Assembler Insn::Breakpoint => int3(cb), Insn::CSelZ { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovnz); + emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz); }, Insn::CSelNZ { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovz); + emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz); }, Insn::CSelE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovne); + emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne); }, Insn::CSelNE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmove); + emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove); }, Insn::CSelL { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovge); + emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge); }, Insn::CSelLE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovg); + emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg); }, Insn::CSelG { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovle); + emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle); }, Insn::CSelGE { truthy, falsy, out } => { - emit_csel(cb, *truthy, *falsy, *out, cmovl); + emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code Insn::PadInvalPatch => { @@ -758,11 +851,25 @@ impl Assembler } } - gc_offsets + // Error if we couldn't write out everything + if cb.has_dropped_bytes() { + return None + } else { + // No bytes dropped, so the pos markers point to valid code + for (insn_idx, pos) in pos_markers { + if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + callback(pos, &cb); + } else { + panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); + } + } + + return Some(gc_offsets) + } } /// Optimize and compile the stored instructions - pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Vec<u32> { + pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec<Reg>) -> Option<(CodePtr, Vec<u32>)> { let asm = self.x86_split(); let mut asm = asm.alloc_regs(regs); @@ -773,15 +880,18 @@ impl Assembler } let mut ocb = ocb; // for &mut + let start_ptr = cb.get_write_ptr(); let gc_offsets = asm.x86_emit(cb, &mut ocb); - if cb.has_dropped_bytes() { - cb.clear_labels(); - } else { + if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) { cb.link_labels(); - } - gc_offsets + Some((start_ptr, gc_offsets)) + } else { + cb.clear_labels(); + + None + } } } @@ -1055,4 +1165,158 @@ mod tests { assert_eq!(format!("{:x}", cb), "4983f540"); } + + #[test] + fn test_reorder_c_args_no_cycle() { + let (mut asm, mut cb) = setup_asm(); + + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[0], // mov rdi, rdi (optimized away) + C_ARG_OPNDS[1], // mov rsi, rsi (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "b800000000ffd0", {" + 0x0: mov eax, 0 + 0x5: call rax + "}); + } + + #[test] + fn test_reorder_c_args_single_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[2], // mov rdx, rdx (optimized away) + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov eax, 0 + 0xe: call rax + "}); + } + + #[test] + fn test_reorder_c_args_two_cycles() { + let (mut asm, mut cb) = setup_asm(); + + // rdi and rsi form a cycle, and rdx and rcx form another cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[0], // mov rsi, rdi + C_ARG_OPNDS[3], // mov rdx, rcx + C_ARG_OPNDS[2], // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdi + 0x6: mov rdi, r11 + 0x9: mov r11, rcx + 0xc: mov rcx, rdx + 0xf: mov rdx, r11 + 0x12: mov eax, 0 + 0x17: call rax + "}); + } + + #[test] + fn test_reorder_c_args_large_cycle() { + let (mut asm, mut cb) = setup_asm(); + + // rdi, rsi, and rdx form a cycle + asm.ccall(0 as _, vec![ + C_ARG_OPNDS[1], // mov rdi, rsi + C_ARG_OPNDS[2], // mov rsi, rdx + C_ARG_OPNDS[0], // mov rdx, rdi + ]); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm!(cb, "4989f34889d64889fa4c89dfb800000000ffd0", {" + 0x0: mov r11, rsi + 0x3: mov rsi, rdx + 0x6: mov rdx, rdi + 0x9: mov rdi, r11 + 0xc: mov eax, 0 + 0x11: call rax + "}); + } + + #[test] + fn test_reorder_c_args_with_insn_out() { + let (mut asm, mut cb) = setup_asm(); + + let rax = asm.load(Opnd::UImm(1)); + let rcx = asm.load(Opnd::UImm(2)); + let rdx = asm.load(Opnd::UImm(3)); + // rcx and rdx form a cycle + asm.ccall(0 as _, vec![ + rax, // mov rdi, rax + rcx, // mov rsi, rcx + rcx, // mov rdx, rcx + rdx, // mov rcx, rdx + ]); + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0", {" + 0x0: mov eax, 1 + 0x5: mov ecx, 2 + 0xa: mov edx, 3 + 0xf: mov rdi, rax + 0x12: mov rsi, rcx + 0x15: mov r11, rcx + 0x18: mov rcx, rdx + 0x1b: mov rdx, r11 + 0x1e: mov eax, 0 + 0x23: call rax + "}); + } + + #[test] + fn test_cmov_mem() { + let (mut asm, mut cb) = setup_asm(); + + let top = Opnd::mem(64, SP, 0); + let ary_opnd = SP; + let array_len_opnd = Opnd::mem(64, SP, 16); + + asm.cmp(array_len_opnd, 1.into()); + let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into()); + asm.mov(top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 1); + + assert_disasm!(cb, "48837b1001b804000000480f4f03488903", {" + 0x0: cmp qword ptr [rbx + 0x10], 1 + 0x5: mov eax, 4 + 0xa: cmovg rax, qword ptr [rbx] + 0xe: mov qword ptr [rbx], rax + "}); + } + + #[test] + fn test_csel_split() { + let (mut asm, mut cb) = setup_asm(); + + let stack_top = Opnd::mem(64, SP, 0); + let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into()); + asm.mov(stack_top, elem_opnd); + + asm.compile_with_num_regs(&mut cb, 3); + + assert_disasm!(cb, "48b830198dc8227f0000b904000000480f44c1488903", {" + 0x0: movabs rax, 0x7f22c88d1930 + 0xa: mov ecx, 4 + 0xf: cmove rax, rcx + 0x13: mov qword ptr [rbx], rax + "}); + } } |