diff options
| -rw-r--r-- | zjit/src/backend/arm64/mod.rs | 114 | ||||
| -rw-r--r-- | zjit/src/backend/lir.rs | 623 | ||||
| -rw-r--r-- | zjit/src/backend/tests.rs | 10 | ||||
| -rw-r--r-- | zjit/src/backend/x86_64/mod.rs | 47 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 190 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 4 | ||||
| -rw-r--r-- | zjit/src/invariants.rs | 1 | ||||
| -rw-r--r-- | zjit/src/options.rs | 3 |
8 files changed, 773 insertions, 219 deletions
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 574249dabd..d06e84536f 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -694,7 +694,8 @@ impl Assembler { /// VRegs, most splits should happen in [`Self::arm64_split`]. However, some instructions /// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so this /// splits them and uses scratch registers for it. - fn arm64_scratch_split(mut self) -> Assembler { + /// Linearizes all blocks into a single giant block. + fn arm64_scratch_split(self) -> Assembler { /// If opnd is Opnd::Mem with a too large disp, make the disp smaller using lea. fn split_large_disp(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd { match opnd { @@ -750,12 +751,23 @@ impl Assembler { // Prepare StackState to lower MemBase::Stack let stack_state = StackState::new(self.stack_base_idx); - let mut asm_local = Assembler::new_with_asm(&self); + let mut asm_local = Assembler::new(); + asm_local.accept_scratch_reg = true; + asm_local.stack_base_idx = self.stack_base_idx; + asm_local.label_names = self.label_names.clone(); + asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + + // Create one giant block to linearize everything into + asm_local.new_block_without_id(); + let asm = &mut asm_local; - asm.accept_scratch_reg = true; - let iterator = &mut self.instruction_iterator(); - while let Some((_, mut insn)) = iterator.next(asm) { + // Get linearized instructions with branch parameters expanded into ParallelMov + let linearized_insns = self.linearize_instructions(); + + // Process each linearized instruction + for (idx, insn) in linearized_insns.iter().enumerate() { + let mut insn = insn.clone(); match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | @@ -795,7 +807,7 @@ impl Assembler { }; // If the next instruction is JoMul - if matches!(iterator.peek(), Some((_, Insn::JoMul(_)))) { + if idx + 1 < linearized_insns.len() && matches!(linearized_insns[idx + 1], Insn::JoMul(_)) { // Produce a register that is all zeros or all ones // Based on the sign bit of the 64-bit mul result asm.push_insn(Insn::RShift { out: SCRATCH0_OPND, opnd: reg_out, shift: Opnd::UImm(63) }); @@ -940,7 +952,7 @@ impl Assembler { /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. - fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) { + fn emit_conditional_jump<const CONDITION: u8>(asm: &Assembler, cb: &mut CodeBlock, target: Target) { fn generate_branch<const CONDITION: u8>(cb: &mut CodeBlock, src_addr: i64, dst_addr: i64) { let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { // If the jump offset fits into the conditional jump as @@ -991,30 +1003,31 @@ impl Assembler { (num_insns..cb.conditional_jump_insns()).for_each(|_| nop(cb)); } - match target { + let label = match target { Target::CodePtr(dst_ptr) => { let dst_addr = dst_ptr.as_offset(); let src_addr = cb.get_write_ptr().as_offset(); generate_branch::<CONDITION>(cb, src_addr, dst_addr); + return; }, - Target::Label(label_idx) => { - // Try to use a single B.cond instruction - cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - // +1 since src_addr is after the instruction while A64 - // counts the offset relative to the start. - let offset = (dst_addr - src_addr) / 4 + 1; - if bcond_offset_fits_bits(offset) { - bcond(cb, CONDITION, InstructionOffset::from_insns(offset as i32)); - Ok(()) - } else { - Err(()) - } - }); - }, + Target::Label(l) => l, + Target::Block(ref edge) => asm.block_label(edge.target), Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_exits") }, }; + // Try to use a single B.cond instruction + cb.label_ref(label, 4, |cb, src_addr, dst_addr| { + // +1 since src_addr is after the instruction while A64 + // counts the offset relative to the start. + let offset = (dst_addr - src_addr) / 4 + 1; + if bcond_offset_fits_bits(offset) { + bcond(cb, CONDITION, InstructionOffset::from_insns(offset as i32)); + Ok(()) + } else { + Err(()) + } + }); } /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero @@ -1117,8 +1130,13 @@ impl Assembler { let (_hook, mut hook_insn_idx) = AssemblerPanicHook::new(self, 0); // For each instruction + // NOTE: At this point, the assembler should have been linearized into a single giant block + // by either resolve_parallel_mov_pass() or arm64_scratch_split(). let mut insn_idx: usize = 0; - while let Some(insn) = self.insns.get(insn_idx) { + assert_eq!(self.basic_blocks.len(), 1, "Assembler should be linearized into a single block before arm64_emit"); + let insns = &self.basic_blocks[0].insns; + + while let Some(insn) = insns.get(insn_idx) { // Update insn_idx that is shown on panic hook_insn_idx.as_mut().map(|idx| idx.lock().map(|mut idx| *idx = insn_idx).unwrap()); @@ -1222,7 +1240,7 @@ impl Assembler { }, Insn::Mul { left, right, out } => { // If the next instruction is JoMul with RShift created by arm64_scratch_split - match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) { + match (insns.get(insn_idx + 1), insns.get(insn_idx + 2)) { (Some(Insn::RShift { out: out_sign, opnd: out_opnd, shift: out_shift }), Some(Insn::JoMul(_))) => { // Compute the high 64 bits smulh(cb, Self::EMIT_OPND, left.into(), right.into()); @@ -1487,34 +1505,48 @@ impl Assembler { } }); }, + Target::Block(ref edge) => { + let label = self.block_label(edge.target); + cb.label_ref(label, 4, |cb, src_addr, dst_addr| { + // +1 since src_addr is after the instruction while A64 + // counts the offset relative to the start. + let offset = (dst_addr - src_addr) / 4 + 1; + if b_offset_fits_bits(offset) { + b(cb, InstructionOffset::from_insns(offset as i32)); + Ok(()) + } else { + Err(()) + } + }); + }, Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_exits") }, }; }, Insn::Je(target) | Insn::Jz(target) => { - emit_conditional_jump::<{Condition::EQ}>(cb, target.clone()); + emit_conditional_jump::<{Condition::EQ}>(self, cb, target.clone()); }, Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => { - emit_conditional_jump::<{Condition::NE}>(cb, target.clone()); + emit_conditional_jump::<{Condition::NE}>(self, cb, target.clone()); }, Insn::Jl(target) => { - emit_conditional_jump::<{Condition::LT}>(cb, target.clone()); + emit_conditional_jump::<{Condition::LT}>(self, cb, target.clone()); }, Insn::Jg(target) => { - emit_conditional_jump::<{Condition::GT}>(cb, target.clone()); + emit_conditional_jump::<{Condition::GT}>(self, cb, target.clone()); }, Insn::Jge(target) => { - emit_conditional_jump::<{Condition::GE}>(cb, target.clone()); + emit_conditional_jump::<{Condition::GE}>(self, cb, target.clone()); }, Insn::Jbe(target) => { - emit_conditional_jump::<{Condition::LS}>(cb, target.clone()); + emit_conditional_jump::<{Condition::LS}>(self, cb, target.clone()); }, Insn::Jb(target) => { - emit_conditional_jump::<{Condition::CC}>(cb, target.clone()); + emit_conditional_jump::<{Condition::CC}>(self, cb, target.clone()); }, Insn::Jo(target) => { - emit_conditional_jump::<{Condition::VS}>(cb, target.clone()); + emit_conditional_jump::<{Condition::VS}>(self, cb, target.clone()); }, Insn::Joz(opnd, target) => { emit_cmp_zero_jump(cb, opnd.into(), true, target.clone()); @@ -1537,8 +1569,8 @@ impl Assembler { let Some(Insn::Cmp { left: status_reg @ Opnd::Reg(_), right: Opnd::UImm(_) | Opnd::Imm(_), - }) = self.insns.get(insn_idx + 1) else { - panic!("arm64_scratch_split should add Cmp after IncrCounter: {:?}", self.insns.get(insn_idx + 1)); + }) = insns.get(insn_idx + 1) else { + panic!("arm64_scratch_split should add Cmp after IncrCounter: {:?}", insns.get(insn_idx + 1)); }; // Attempt to increment a counter @@ -1587,7 +1619,7 @@ impl Assembler { } else { // No bytes dropped, so the pos markers point to valid code for (insn_idx, pos) in pos_markers { - if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + if let Insn::PosMarker(callback) = insns.get(insn_idx).unwrap() { callback(pos, cb); } else { panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); @@ -1617,6 +1649,10 @@ impl Assembler { if use_scratch_reg { asm = asm.arm64_scratch_split(); asm_dump!(asm, scratch_split); + } else { + // For trampolines that use scratch registers, resolve ParallelMov without scratch_reg. + asm = asm.resolve_parallel_mov_pass(); + asm_dump!(asm, resolve_parallel_mov); } // Create label instances in the code block @@ -1681,12 +1717,15 @@ mod tests { use super::*; use insta::assert_snapshot; + use crate::hir; static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; fn setup_asm() -> (Assembler, CodeBlock) { crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler - (Assembler::new(), CodeBlock::new_dummy()) + let mut asm = Assembler::new(); + asm.new_block_without_id(); + (asm, CodeBlock::new_dummy()) } #[test] @@ -1694,6 +1733,7 @@ mod tests { use crate::hir::SideExitReason; let mut asm = Assembler::new(); + asm.new_block_without_id(); asm.stack_base_idx = 1; let label = asm.new_label("bb0"); @@ -2107,6 +2147,7 @@ mod tests { #[test] fn test_store_with_valid_scratch_reg() { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + asm.new_block_without_id(); let mut cb = CodeBlock::new_dummy(); asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into()); @@ -2560,6 +2601,7 @@ mod tests { crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler let mut asm = Assembler::new(); + asm.new_block_without_id(); let mut cb = CodeBlock::new_dummy_sized(memory_required); let far_label = asm.new_label("far"); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 06127b5c1a..f2f7bc6165 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -7,6 +7,7 @@ use std::sync::{Arc, Mutex}; use crate::codegen::local_size_and_idx_to_ep_offset; use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32, vm_stack_canary}; use crate::hir::{Invariant, SideExitReason}; +use crate::hir; use crate::options::{TraceExits, debug, get_option}; use crate::cruby::VALUE; use crate::payload::IseqVersionRef; @@ -15,6 +16,104 @@ use crate::virtualmem::CodePtr; use crate::asm::{CodeBlock, Label}; use crate::state::rb_zjit_record_exit_stack; +/// LIR Block ID. Unique ID for each block, and also defined in LIR so +/// we can differentiate it from HIR block ids. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] +pub struct BlockId(pub usize); + +impl From<BlockId> for usize { + fn from(val: BlockId) -> Self { + val.0 + } +} + +impl std::fmt::Display for BlockId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "l{}", self.0) + } +} + +/// Dummy HIR block ID used when creating test or invalid LIR blocks +const DUMMY_HIR_BLOCK_ID: usize = usize::MAX; +/// Dummy RPO index used when creating test or invalid LIR blocks +const DUMMY_RPO_INDEX: usize = usize::MAX; + +#[derive(Debug, PartialEq, Clone)] +pub struct BranchEdge { + pub target: BlockId, + pub args: Vec<Opnd>, +} + +#[derive(Clone, Debug)] +pub struct BasicBlock { + // Unique id for this block + pub id: BlockId, + + // HIR block this LIR block was lowered from. Not injective: multiple LIR blocks may share + // the same hir_block_id because we split HIR blocks into multiple LIR blocks during lowering. + pub hir_block_id: hir::BlockId, + + pub is_entry: bool, + + // Instructions in this basic block + pub insns: Vec<Insn>, + + // Input parameters for this block + pub parameters: Vec<Opnd>, + + // RPO position of the source HIR block + pub rpo_index: usize, +} + +pub struct EdgePair(Option<BranchEdge>, Option<BranchEdge>); + +impl BasicBlock { + fn new(id: BlockId, hir_block_id: hir::BlockId, is_entry: bool, rpo_index: usize) -> Self { + Self { + id, + hir_block_id, + is_entry, + insns: vec![], + parameters: vec![], + rpo_index, + } + } + + pub fn add_parameter(&mut self, param: Opnd) { + self.parameters.push(param); + } + + pub fn push_insn(&mut self, insn: Insn) { + self.insns.push(insn); + } + + pub fn edges(&self) -> EdgePair { + assert!(self.insns.last().unwrap().is_terminator()); + let extract_edge = |insn: &Insn| -> Option<BranchEdge> { + if let Some(Target::Block(edge)) = insn.target() { + Some(edge.clone()) + } else { + None + } + }; + + match self.insns.as_slice() { + [] => panic!("empty block"), + [.., second_last, last] => { + EdgePair(extract_edge(second_last), extract_edge(last)) + }, + [.., last] => { + EdgePair(extract_edge(last), None) + } + } + } + + /// Sort key for scheduling blocks in code layout order + pub fn sort_key(&self) -> (usize, usize) { + (self.rpo_index, self.id.0) + } +} + pub use crate::backend::current::{ mem_base_reg, Reg, @@ -309,13 +408,15 @@ pub struct SideExit { /// Branch target (something that we can jump to) /// for branch instructions -#[derive(Clone, Debug)] +#[derive(Clone)] pub enum Target { /// Pointer to a piece of ZJIT-generated code CodePtr(CodePtr), /// A label within the generated code Label(Label), + /// An LIR branch edge + Block(BranchEdge), /// Side exit to the interpreter SideExit { /// Context used for compiling the side exit @@ -325,6 +426,32 @@ pub enum Target }, } +impl fmt::Debug for Target { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Target::CodePtr(ptr) => write!(f, "CodePtr({:?})", ptr), + Target::Label(label) => write!(f, "Label({:?})", label), + Target::Block(edge) => { + if edge.args.is_empty() { + write!(f, "Block({:?})", edge.target) + } else { + write!(f, "Block({:?}(", edge.target)?; + for (i, arg) in edge.args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", arg)?; + } + write!(f, "))") + } + } + Target::SideExit { exit, reason } => { + write!(f, "SideExit {{ exit: {:?}, reason: {:?} }}", exit, reason) + } + } + } +} + impl Target { pub fn unwrap_label(&self) -> Label { @@ -771,6 +898,29 @@ impl Insn { _ => None } } + + /// Returns true if this instruction is a terminator (ends a basic block). + pub fn is_terminator(&self) -> bool { + match self { + Insn::Jbe(_) | + Insn::Jb(_) | + Insn::Je(_) | + Insn::Jl(_) | + Insn::Jg(_) | + Insn::Jge(_) | + Insn::Jmp(_) | + Insn::JmpOpnd(_) | + Insn::Jne(_) | + Insn::Jnz(_) | + Insn::Jo(_) | + Insn::JoMul(_) | + Insn::Jz(_) | + Insn::Joz(..) | + Insn::Jonz(..) | + Insn::CRet(_) => true, + _ => false + } + } } /// An iterator that will yield a non-mutable reference to each operand in turn @@ -806,22 +956,33 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::Label(target) | Insn::LeaJumpTarget { target, .. } | Insn::PatchPoint { target, .. } => { - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx; - if stack_idx < stack.len() { - let opnd = &stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx; + if stack_idx < stack.len() { + let opnd = &stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = self.idx - stack.len(); - if local_idx < locals.len() { - let opnd = &locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = self.idx - stack.len(); + if local_idx < locals.len() { + let opnd = &locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + Target::Block(edge) => { + if self.idx < edge.args.len() { + let opnd = &edge.args[self.idx]; + self.idx += 1; + return Some(opnd); + } + None } + _ => None } - None } Insn::Joz(opnd, target) | @@ -831,22 +992,34 @@ impl<'a> Iterator for InsnOpndIterator<'a> { return Some(opnd); } - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx - 1; - if stack_idx < stack.len() { - let opnd = &stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx - 1; + if stack_idx < stack.len() { + let opnd = &stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = stack_idx - stack.len(); - if local_idx < locals.len() { - let opnd = &locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = stack_idx - stack.len(); + if local_idx < locals.len() { + let opnd = &locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + Target::Block(edge) => { + let arg_idx = self.idx - 1; + if arg_idx < edge.args.len() { + let opnd = &edge.args[arg_idx]; + self.idx += 1; + return Some(opnd); + } + None } + _ => None } - None } Insn::BakeString(_) | @@ -975,22 +1148,33 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::Label(target) | Insn::LeaJumpTarget { target, .. } | Insn::PatchPoint { target, .. } => { - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx; - if stack_idx < stack.len() { - let opnd = &mut stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx; + if stack_idx < stack.len() { + let opnd = &mut stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = self.idx - stack.len(); - if local_idx < locals.len() { - let opnd = &mut locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = self.idx - stack.len(); + if local_idx < locals.len() { + let opnd = &mut locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + Target::Block(edge) => { + if self.idx < edge.args.len() { + let opnd = &mut edge.args[self.idx]; + self.idx += 1; + return Some(opnd); + } + None } + _ => None } - None } Insn::Joz(opnd, target) | @@ -1000,22 +1184,34 @@ impl<'a> InsnOpndMutIterator<'a> { return Some(opnd); } - if let Target::SideExit { exit: SideExit { stack, locals, .. }, .. } = target { - let stack_idx = self.idx - 1; - if stack_idx < stack.len() { - let opnd = &mut stack[stack_idx]; - self.idx += 1; - return Some(opnd); - } + match target { + Target::SideExit { exit: SideExit { stack, locals, .. }, .. } => { + let stack_idx = self.idx - 1; + if stack_idx < stack.len() { + let opnd = &mut stack[stack_idx]; + self.idx += 1; + return Some(opnd); + } - let local_idx = stack_idx - stack.len(); - if local_idx < locals.len() { - let opnd = &mut locals[local_idx]; - self.idx += 1; - return Some(opnd); + let local_idx = stack_idx - stack.len(); + if local_idx < locals.len() { + let opnd = &mut locals[local_idx]; + self.idx += 1; + return Some(opnd); + } + None } + Target::Block(edge) => { + let arg_idx = self.idx - 1; + if arg_idx < edge.args.len() { + let opnd = &mut edge.args[arg_idx]; + self.idx += 1; + return Some(opnd); + } + None + } + _ => None } - None } Insn::BakeString(_) | @@ -1332,7 +1528,12 @@ const ASSEMBLER_INSNS_CAPACITY: usize = 256; /// optimized and lowered #[derive(Clone)] pub struct Assembler { - pub(super) insns: Vec<Insn>, + pub basic_blocks: Vec<BasicBlock>, + + /// The block to which new instructions are added. Used during HIR to LIR lowering to + /// determine which LIR block we should add instructions to. Set by `set_current_block()` + /// and automatically set to new entry blocks created by `new_block()`. + current_block_id: BlockId, /// Live range for each VReg indexed by its `idx`` pub(super) live_ranges: Vec<LiveRange>, @@ -1350,7 +1551,10 @@ pub struct Assembler { pub(super) stack_base_idx: usize, /// If Some, the next ccall should verify its leafness - leaf_ccall_stack_size: Option<usize> + leaf_ccall_stack_size: Option<usize>, + + /// Current instruction index, incremented for each instruction pushed + idx: usize, } impl Assembler @@ -1358,12 +1562,14 @@ impl Assembler /// Create an Assembler with defaults pub fn new() -> Self { Self { - insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), - live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), label_names: Vec::default(), accept_scratch_reg: false, stack_base_idx: 0, leaf_ccall_stack_size: None, + basic_blocks: Vec::default(), + current_block_id: BlockId(0), + live_ranges: Vec::default(), + idx: 0, } } @@ -1387,11 +1593,62 @@ impl Assembler stack_base_idx: old_asm.stack_base_idx, ..Self::new() }; - // Bump the initial VReg index to allow the use of the VRegs for the old Assembler + + // Initialize basic blocks from the old assembler, preserving hir_block_id and entry flag + // but with empty instruction lists + for old_block in &old_asm.basic_blocks { + asm.new_block_from_old_block(&old_block); + } + + // Initialize live_ranges to match the old assembler's size + // This allows reusing VRegs from the old assembler asm.live_ranges.resize(old_asm.live_ranges.len(), LiveRange { start: None, end: None }); + asm } + // Create a new LIR basic block. Returns the newly created block ID + pub fn new_block(&mut self, hir_block_id: hir::BlockId, is_entry: bool, rpo_index: usize) -> BlockId { + let bb_id = BlockId(self.basic_blocks.len()); + let lir_bb = BasicBlock::new(bb_id, hir_block_id, is_entry, rpo_index); + self.basic_blocks.push(lir_bb); + if is_entry { + self.set_current_block(bb_id); + } + bb_id + } + + // Create a new LIR basic block from an old one. This should only be used + // when creating new assemblers during passes when we want to translate + // one assembler to a new one. + pub fn new_block_from_old_block(&mut self, old_block: &BasicBlock) -> BlockId { + let bb_id = BlockId(self.basic_blocks.len()); + let lir_bb = BasicBlock::new(bb_id, old_block.hir_block_id, old_block.is_entry, old_block.rpo_index); + self.basic_blocks.push(lir_bb); + bb_id + } + + // Create a LIR basic block without a valid HIR block ID (for testing or internal use). + pub fn new_block_without_id(&mut self) -> BlockId { + self.new_block(hir::BlockId(DUMMY_HIR_BLOCK_ID), true, DUMMY_RPO_INDEX) + } + + pub fn set_current_block(&mut self, block_id: BlockId) { + self.current_block_id = block_id; + } + + pub fn current_block(&mut self) -> &mut BasicBlock { + &mut self.basic_blocks[self.current_block_id.0] + } + + /// Return basic blocks sorted by RPO index, then by block ID. + /// TODO: Use a more advanced scheduling algorithm + pub fn sorted_blocks(&self) -> Vec<&BasicBlock> { + let mut sorted: Vec<&BasicBlock> = self.basic_blocks.iter().collect(); + sorted.sort_by_key(|block| block.sort_key()); + sorted + } + /// Return true if `opnd` is or depends on `reg` pub fn has_reg(opnd: Opnd, reg: Reg) -> bool { match opnd { @@ -1402,11 +1659,100 @@ impl Assembler } pub fn instruction_iterator(&mut self) -> InsnIter { - let insns = take(&mut self.insns); - InsnIter { - old_insns_iter: insns.into_iter(), + let mut blocks = take(&mut self.basic_blocks); + blocks.sort_by_key(|block| block.sort_key()); + + let mut iter = InsnIter { + blocks, + current_block_idx: 0, + current_insn_iter: vec![].into_iter(), // Will be replaced immediately peeked: None, index: 0, + }; + + // Set up first block's iterator + if !iter.blocks.is_empty() { + iter.current_insn_iter = take(&mut iter.blocks[0].insns).into_iter(); + } + + iter + } + + /// Return an operand for a basic block argument at a given index. + /// To simplify the implementation, we allocate a fixed register or a stack slot + /// for each basic block argument. + pub fn param_opnd(idx: usize) -> Opnd { + use crate::backend::current::ALLOC_REGS; + use crate::cruby::SIZEOF_VALUE_I32; + + if idx < ALLOC_REGS.len() { + Opnd::Reg(ALLOC_REGS[idx]) + } else { + // With FrameSetup, the address that NATIVE_BASE_PTR points to stores an old value in the register. + // To avoid clobbering it, we need to start from the next slot, hence `+ 1` for the index. + Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32) + } + } + + pub fn linearize_instructions(&self) -> Vec<Insn> { + // Emit instructions with labels, expanding branch parameters + let mut insns = Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY); + + for block in self.sorted_blocks() { + // Process each instruction, expanding branch params if needed + for insn in &block.insns { + self.expand_branch_insn(insn, &mut insns); + } + } + insns + } + + /// Expand and linearize a branch instruction: + /// 1. If the branch has Target::Block with arguments, insert a ParallelMov first + /// 2. Convert Target::Block to Target::Label + /// 3. Push the converted instruction + fn expand_branch_insn(&self, insn: &Insn, insns: &mut Vec<Insn>) { + // Helper to process branch arguments and return the label target + let mut process_edge = |edge: &BranchEdge| -> Label { + if !edge.args.is_empty() { + insns.push(Insn::ParallelMov { + moves: edge.args.iter().enumerate() + .map(|(idx, &arg)| (Assembler::param_opnd(idx), arg)) + .collect() + }); + } + self.block_label(edge.target) + }; + + // Convert Target::Block to Target::Label, processing args if needed + let stripped_insn = match insn { + Insn::Jmp(Target::Block(edge)) => Insn::Jmp(Target::Label(process_edge(edge))), + Insn::Jz(Target::Block(edge)) => Insn::Jz(Target::Label(process_edge(edge))), + Insn::Jnz(Target::Block(edge)) => Insn::Jnz(Target::Label(process_edge(edge))), + Insn::Je(Target::Block(edge)) => Insn::Je(Target::Label(process_edge(edge))), + Insn::Jne(Target::Block(edge)) => Insn::Jne(Target::Label(process_edge(edge))), + Insn::Jl(Target::Block(edge)) => Insn::Jl(Target::Label(process_edge(edge))), + Insn::Jg(Target::Block(edge)) => Insn::Jg(Target::Label(process_edge(edge))), + Insn::Jge(Target::Block(edge)) => Insn::Jge(Target::Label(process_edge(edge))), + Insn::Jbe(Target::Block(edge)) => Insn::Jbe(Target::Label(process_edge(edge))), + Insn::Jb(Target::Block(edge)) => Insn::Jb(Target::Label(process_edge(edge))), + Insn::Jo(Target::Block(edge)) => Insn::Jo(Target::Label(process_edge(edge))), + Insn::JoMul(Target::Block(edge)) => Insn::JoMul(Target::Label(process_edge(edge))), + Insn::Joz(opnd, Target::Block(edge)) => Insn::Joz(*opnd, Target::Label(process_edge(edge))), + Insn::Jonz(opnd, Target::Block(edge)) => Insn::Jonz(*opnd, Target::Label(process_edge(edge))), + _ => insn.clone() + }; + + // Push the stripped instruction + insns.push(stripped_insn); + } + + // Get the label for a given block by extracting it from the first instruction. + pub(super) fn block_label(&self, block_id: BlockId) -> Label { + let block = &self.basic_blocks[block_id.0]; + match block.insns.first() { + Some(Insn::Label(Target::Label(label))) => *label, + other => panic!("Expected first instruction of block {:?} to be a Label, but found: {:?}", block_id, other), } } @@ -1444,7 +1790,7 @@ impl Assembler /// operands to this instruction. pub fn push_insn(&mut self, insn: Insn) { // Index of this instruction - let insn_idx = self.insns.len(); + let insn_idx = self.idx; // Initialize the live range of the output VReg to insn_idx..=insn_idx if let Some(Opnd::VReg { idx, .. }) = insn.out_opnd() { @@ -1475,7 +1821,9 @@ impl Assembler } } - self.insns.push(insn); + self.idx += 1; + + self.current_block().push_insn(insn); } /// Create a new label instance that we can jump to @@ -1533,6 +1881,7 @@ impl Assembler Some(new_moves) } + /// Sets the out field on the various instructions that require allocated /// registers because their output is used as the operand on a subsequent /// instruction. This is our implementation of the linear scan algorithm. @@ -1548,17 +1897,22 @@ impl Assembler let mut saved_regs: Vec<(Reg, usize)> = vec![]; // Remember the indexes of Insn::FrameSetup to update the stack size later - let mut frame_setup_idxs: Vec<usize> = vec![]; + let mut frame_setup_idxs: Vec<(BlockId, usize)> = vec![]; // live_ranges is indexed by original `index` given by the iterator. - let mut asm = Assembler::new_with_asm(&self); + let mut asm_local = Assembler::new_with_asm(&self); + + let iterator = &mut self.instruction_iterator(); + + let asm = &mut asm_local; + let live_ranges: Vec<LiveRange> = take(&mut self.live_ranges); - let mut iterator = self.insns.into_iter().enumerate().peekable(); - while let Some((index, mut insn)) = iterator.next() { + while let Some((index, mut insn)) = iterator.next(asm) { // Remember the index of FrameSetup to bump slot_count when we know the max number of spilled VRegs. if let Insn::FrameSetup { .. } = insn { - frame_setup_idxs.push(asm.insns.len()); + assert!(asm.current_block().is_entry); + frame_setup_idxs.push((asm.current_block().id, asm.current_block().insns.len())); } let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) { @@ -1715,17 +2069,6 @@ impl Assembler // Push instruction(s) let is_ccall = matches!(insn, Insn::CCall { .. }); match insn { - Insn::ParallelMov { moves } => { - // For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg. - if let Some(moves) = Self::resolve_parallel_moves(&moves, None) { - for (dst, src) in moves { - asm.mov(dst, src); - } - } else { - // If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it. - asm.push_insn(Insn::ParallelMov { moves }); - } - } Insn::CCall { opnds, fptr, start_marker, end_marker, out } => { // Split start_marker and end_marker here to avoid inserting push/pop between them. if let Some(start_marker) = start_marker { @@ -1768,8 +2111,8 @@ impl Assembler } // Extend the stack space for spilled operands - for frame_setup_idx in frame_setup_idxs { - match &mut asm.insns[frame_setup_idx] { + for (block_id, frame_setup_idx) in frame_setup_idxs { + match &mut asm.basic_blocks[block_id.0].insns[frame_setup_idx] { Insn::FrameSetup { slot_count, .. } => { *slot_count += pool.stack_state.stack_size; } @@ -1778,7 +2121,7 @@ impl Assembler } assert!(pool.is_empty(), "Expected all registers to be returned to the pool"); - Ok(asm) + Ok(asm_local) } /// Compile the instructions down to machine code. @@ -1852,16 +2195,19 @@ impl Assembler // Extract targets first so that we can update instructions while referencing part of them. let mut targets = HashMap::new(); - for (idx, insn) in self.insns.iter().enumerate() { - if let Some(target @ Target::SideExit { .. }) = insn.target() { - targets.insert(idx, target.clone()); + + for block in self.sorted_blocks().iter() { + for (idx, insn) in block.insns.iter().enumerate() { + if let Some(target @ Target::SideExit { .. }) = insn.target() { + targets.insert((block.id.0, idx), target.clone()); + } } } // Map from SideExit to compiled Label. This table is used to deduplicate side exit code. let mut compiled_exits: HashMap<SideExit, Label> = HashMap::new(); - for (idx, target) in targets { + for ((block_id, idx), target) in targets { // Compile a side exit. Note that this is past the split pass and alloc_regs(), // so you can't use an instruction that returns a VReg. if let Target::SideExit { exit: exit @ SideExit { pc, .. }, reason } = target { @@ -1914,7 +2260,7 @@ impl Assembler new_exit }; - *self.insns[idx].target_mut().unwrap() = counted_exit.unwrap_or(compiled_exit); + *self.basic_blocks[block_id].insns[idx].target_mut().unwrap() = counted_exit.unwrap_or(compiled_exit); } } } @@ -1949,7 +2295,7 @@ impl fmt::Display for Assembler { } } - for insn in self.insns.iter() { + for insn in self.linearize_instructions().iter() { match insn { Insn::Comment(comment) => { writeln!(f, " {bold_begin}# {comment}{bold_end}")?; @@ -1985,6 +2331,20 @@ impl fmt::Display for Assembler { Target::CodePtr(code_ptr) => write!(f, " {code_ptr:?}")?, Target::Label(Label(label_idx)) => write!(f, " {}", label_name(self, *label_idx, &label_counts))?, Target::SideExit { reason, .. } => write!(f, " Exit({reason})")?, + Target::Block(edge) => { + if edge.args.is_empty() { + write!(f, " bb{}", edge.target.0)?; + } else { + write!(f, " bb{}(", edge.target.0)?; + for (i, arg) in edge.args.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", arg)?; + } + write!(f, ")")?; + } + } } } @@ -2000,6 +2360,17 @@ impl fmt::Display for Assembler { } _ => {} } + } else if let Some(Target::Block(_)) = insn.target() { + // If the instruction has a Block target, avoid using opnd_iter() for branch args + // since they're already printed inline with the target. Only print non-target operands. + match insn { + Insn::Joz(opnd, _) | + Insn::Jonz(opnd, _) | + Insn::LeaJumpTarget { out: opnd, target: _ } => { + write!(f, ", {opnd}")?; + } + _ => {} + } } else if let Insn::ParallelMov { moves } = insn { // Print operands with a special syntax for ParallelMov moves.iter().try_fold(" ", |prefix, (dst, src)| write!(f, "{prefix}{dst} <- {src}").and(Ok(", ")))?; @@ -2019,7 +2390,7 @@ impl fmt::Debug for Assembler { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { writeln!(fmt, "Assembler")?; - for (idx, insn) in self.insns.iter().enumerate() { + for (idx, insn) in self.linearize_instructions().iter().enumerate() { writeln!(fmt, " {idx:03} {insn:?}")?; } @@ -2028,7 +2399,9 @@ impl fmt::Debug for Assembler { } pub struct InsnIter { - old_insns_iter: std::vec::IntoIter<Insn>, + blocks: Vec<BasicBlock>, + current_block_idx: usize, + current_insn_iter: std::vec::IntoIter<Insn>, peeked: Option<(usize, Insn)>, index: usize, } @@ -2039,7 +2412,7 @@ impl InsnIter { pub fn peek(&mut self) -> Option<&(usize, Insn)> { // If we don't have a peeked value, get one if self.peeked.is_none() { - let insn = self.old_insns_iter.next()?; + let insn = self.current_insn_iter.next()?; let idx = self.index; self.index += 1; self.peeked = Some((idx, insn)); @@ -2048,17 +2421,34 @@ impl InsnIter { self.peeked.as_ref() } - // Get the next instruction. Right now we're passing the "new" assembler - // (the assembler we're copying in to) as a parameter. Once we've - // introduced basic blocks to LIR, we'll use the to set the correct BB - // on the new assembler, but for now it is unused. - pub fn next(&mut self, _new_asm: &mut Assembler) -> Option<(usize, Insn)> { + // Get the next instruction, advancing to the next block when current block is exhausted. + // Sets the current block on new_asm when moving to a new block. + pub fn next(&mut self, new_asm: &mut Assembler) -> Option<(usize, Insn)> { // If we have a peeked value, return it if let Some(item) = self.peeked.take() { return Some(item); } - // Otherwise get the next from underlying iterator - let insn = self.old_insns_iter.next()?; + + // Try to get the next instruction from current block + if let Some(insn) = self.current_insn_iter.next() { + let idx = self.index; + self.index += 1; + return Some((idx, insn)); + } + + // Current block is exhausted, move to next block + self.current_block_idx += 1; + if self.current_block_idx >= self.blocks.len() { + return None; + } + + // Set up the next block + let next_block = &mut self.blocks[self.current_block_idx]; + new_asm.set_current_block(next_block.id); + self.current_insn_iter = take(&mut next_block.insns).into_iter(); + + // Get first instruction from the new block + let insn = self.current_insn_iter.next()?; let idx = self.index; self.index += 1; Some((idx, insn)) @@ -2451,6 +2841,43 @@ impl Assembler { self.push_insn(Insn::Xor { left, right, out }); out } + + /// This is used for trampolines that don't allow scratch registers. + /// Linearizes all blocks into a single giant block. + pub fn resolve_parallel_mov_pass(self) -> Assembler { + let mut asm_local = Assembler::new(); + asm_local.accept_scratch_reg = self.accept_scratch_reg; + asm_local.stack_base_idx = self.stack_base_idx; + asm_local.label_names = self.label_names.clone(); + asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + + // Create one giant block to linearize everything into + asm_local.new_block_without_id(); + + // Get linearized instructions with branch parameters expanded into ParallelMov + let linearized_insns = self.linearize_instructions(); + + // Process each linearized instruction + for insn in linearized_insns { + match insn { + Insn::ParallelMov { moves } => { + // Resolve parallel moves without scratch register + if let Some(resolved_moves) = Assembler::resolve_parallel_moves(&moves, None) { + for (dst, src) in resolved_moves { + asm_local.mov(dst, src); + } + } else { + unreachable!("ParallelMov requires scratch register but scratch_reg is not allowed"); + } + } + _ => { + asm_local.push_insn(insn); + } + } + } + + asm_local + } } /// Macro to use format! for Insn::Comment, which skips a format! call diff --git a/zjit/src/backend/tests.rs b/zjit/src/backend/tests.rs index ece6f8605f..701029b8ec 100644 --- a/zjit/src/backend/tests.rs +++ b/zjit/src/backend/tests.rs @@ -3,10 +3,12 @@ use crate::backend::lir::*; use crate::cruby::*; use crate::codegen::c_callable; use crate::options::rb_zjit_prepare_options; +use crate::hir; #[test] fn test_add() { let mut asm = Assembler::new(); + asm.new_block_without_id(); let out = asm.add(SP, Opnd::UImm(1)); let _ = asm.add(out, Opnd::UImm(2)); } @@ -15,6 +17,7 @@ fn test_add() { fn test_alloc_regs() { rb_zjit_prepare_options(); // for asm.alloc_regs let mut asm = Assembler::new(); + asm.new_block_without_id(); // Get the first output that we're going to reuse later. let out1 = asm.add(EC, Opnd::UImm(1)); @@ -37,7 +40,7 @@ fn test_alloc_regs() { let _ = asm.add(out3, Opnd::UImm(6)); // Here we're going to allocate the registers. - let result = asm.alloc_regs(Assembler::get_alloc_regs()).unwrap(); + let result = &asm.alloc_regs(Assembler::get_alloc_regs()).unwrap().basic_blocks[0]; // Now we're going to verify that the out field has been appropriately // updated for each of the instructions that needs it. @@ -63,7 +66,9 @@ fn test_alloc_regs() { fn setup_asm() -> (Assembler, CodeBlock) { rb_zjit_prepare_options(); // for get_option! on asm.compile - (Assembler::new(), CodeBlock::new_dummy()) + let mut asm = Assembler::new(); + asm.new_block_without_id(); + (asm, CodeBlock::new_dummy()) } // Test full codegen pipeline @@ -293,6 +298,7 @@ fn test_no_pos_marker_callback_when_compile_fails() { // We don't want to invoke the pos_marker callbacks with positions of malformed code. let mut asm = Assembler::new(); rb_zjit_prepare_options(); // for asm.compile + asm.new_block_without_id(); // Markers around code to exhaust memory limit let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called"); diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 38b9f2791b..a4cf8dfcc5 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -392,7 +392,7 @@ impl Assembler { /// for VRegs, most splits should happen in [`Self::x86_split`]. However, some instructions /// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so /// this splits them and uses scratch registers for it. - pub fn x86_scratch_split(mut self) -> Assembler { + pub fn x86_scratch_split(self) -> Assembler { /// For some instructions, we want to be able to lower a 64-bit operand /// without requiring more registers to be available in the register /// allocator. So we just use the SCRATCH0_OPND register temporarily to hold @@ -468,12 +468,22 @@ impl Assembler { // Prepare StackState to lower MemBase::Stack let stack_state = StackState::new(self.stack_base_idx); - let mut asm_local = Assembler::new_with_asm(&self); + let mut asm_local = Assembler::new(); + asm_local.accept_scratch_reg = true; + asm_local.stack_base_idx = self.stack_base_idx; + asm_local.label_names = self.label_names.clone(); + asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + + // Create one giant block to linearize everything into + asm_local.new_block_without_id(); + let asm = &mut asm_local; - asm.accept_scratch_reg = true; - let mut iterator = self.instruction_iterator(); - while let Some((_, mut insn)) = iterator.next(asm) { + // Get linearized instructions with branch parameters expanded into ParallelMov + let linearized_insns = self.linearize_instructions(); + + for insn in linearized_insns.iter() { + let mut insn = insn.clone(); match &mut insn { Insn::Add { left, right, out } | Insn::Sub { left, right, out } | @@ -703,7 +713,10 @@ impl Assembler { // For each instruction let mut insn_idx: usize = 0; - while let Some(insn) = self.insns.get(insn_idx) { + assert_eq!(self.basic_blocks.len(), 1, "Assembler should be linearized into a single block before arm64_emit"); + let insns = &self.basic_blocks[0].insns; + + while let Some(insn) = insns.get(insn_idx) { // Update insn_idx that is shown on panic hook_insn_idx.as_mut().map(|idx| idx.lock().map(|mut idx| *idx = insn_idx).unwrap()); @@ -907,6 +920,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jmp_ptr(cb, code_ptr), Target::Label(label) => jmp_label(cb, label), + Target::Block(ref edge) => jmp_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -915,6 +929,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => je_ptr(cb, code_ptr), Target::Label(label) => je_label(cb, label), + Target::Block(ref edge) => je_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -923,6 +938,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jne_ptr(cb, code_ptr), Target::Label(label) => jne_label(cb, label), + Target::Block(ref edge) => jne_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -931,6 +947,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jl_ptr(cb, code_ptr), Target::Label(label) => jl_label(cb, label), + Target::Block(ref edge) => jl_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -939,6 +956,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jg_ptr(cb, code_ptr), Target::Label(label) => jg_label(cb, label), + Target::Block(ref edge) => jg_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -947,6 +965,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jge_ptr(cb, code_ptr), Target::Label(label) => jge_label(cb, label), + Target::Block(ref edge) => jge_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -955,6 +974,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr), Target::Label(label) => jbe_label(cb, label), + Target::Block(ref edge) => jbe_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -963,6 +983,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jb_ptr(cb, code_ptr), Target::Label(label) => jb_label(cb, label), + Target::Block(ref edge) => jb_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } }, @@ -971,6 +992,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr), Target::Label(label) => jz_label(cb, label), + Target::Block(ref edge) => jz_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -979,6 +1001,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr), Target::Label(label) => jnz_label(cb, label), + Target::Block(ref edge) => jnz_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -988,6 +1011,7 @@ impl Assembler { match *target { Target::CodePtr(code_ptr) => jo_ptr(cb, code_ptr), Target::Label(label) => jo_label(cb, label), + Target::Block(ref edge) => jo_label(cb, self.block_label(edge.target)), Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"), } } @@ -1052,7 +1076,7 @@ impl Assembler { } else { // No bytes dropped, so the pos markers point to valid code for (insn_idx, pos) in pos_markers { - if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { + if let Insn::PosMarker(callback) = insns.get(insn_idx).unwrap() { callback(pos, cb); } else { panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); @@ -1082,6 +1106,10 @@ impl Assembler { if use_scratch_regs { asm = asm.x86_scratch_split(); asm_dump!(asm, scratch_split); + } else { + // For trampolines that use scratch registers, resolve ParallelMov without scratch_reg. + asm = asm.resolve_parallel_mov_pass(); + asm_dump!(asm, resolve_parallel_mov); } // Create label instances in the code block @@ -1115,7 +1143,9 @@ mod tests { fn setup_asm() -> (Assembler, CodeBlock) { rb_zjit_prepare_options(); // for get_option! on asm.compile - (Assembler::new(), CodeBlock::new_dummy()) + let mut asm = Assembler::new(); + asm.new_block_without_id(); + (asm, CodeBlock::new_dummy()) } #[test] @@ -1123,6 +1153,7 @@ mod tests { use crate::hir::SideExitReason; let mut asm = Assembler::new(); + asm.new_block_without_id(); asm.stack_base_idx = 1; let label = asm.new_label("bb0"); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index a3cf09d7c4..7c867cd6b4 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -18,8 +18,8 @@ use crate::state::ZJITState; use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_for_super_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type}; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; -use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_BASE_PTR, NATIVE_STACK_PTR, Opnd, SP, SideExit, Target, asm_ccall, asm_comment}; -use crate::hir::{iseq_to_hir, BlockId, BranchEdge, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; +use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_STACK_PTR, Opnd, SP, SideExit, Target, asm_ccall, asm_comment}; +use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason}; use crate::hir_type::{types, Type}; use crate::options::get_option; @@ -75,12 +75,17 @@ impl JITState { } /// Find or create a label for a given BlockId - fn get_label(&mut self, asm: &mut Assembler, block_id: BlockId) -> Target { - match &self.labels[block_id.0] { + fn get_label(&mut self, asm: &mut Assembler, lir_block_id: lir::BlockId, hir_block_id: BlockId) -> Target { + // Extend labels vector if the requested index is out of bounds + if lir_block_id.0 >= self.labels.len() { + self.labels.resize(lir_block_id.0 + 1, None); + } + + match &self.labels[lir_block_id.0] { Some(label) => label.clone(), None => { - let label = asm.new_label(&format!("{block_id}")); - self.labels[block_id.0] = Some(label.clone()); + let label = asm.new_label(&format!("{hir_block_id}_{lir_block_id}")); + self.labels[lir_block_id.0] = Some(label.clone()); label } } @@ -176,6 +181,7 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) { pub fn gen_entry_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, CompileError> { // Set up registers for CFP, EC, SP, and basic block arguments let mut asm = Assembler::new(); + asm.new_block_without_id(); gen_entry_prologue(&mut asm); // Jump to the first block using a call instruction. This trampoline is used @@ -264,11 +270,28 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func let mut jit = JITState::new(iseq, version, function.num_insns(), function.num_blocks()); let mut asm = Assembler::new_with_stack_slots(num_spilled_params); - // Compile each basic block + // Mapping from HIR block IDs to LIR block IDs. + // This is is a one-to-one mapping from HIR to LIR blocks used for finding + // jump targets in LIR (LIR should always jump to the head of an HIR block) + let mut hir_to_lir: Vec<Option<lir::BlockId>> = vec![None; function.num_blocks()]; + let reverse_post_order = function.rpo(); - for &block_id in reverse_post_order.iter() { + + // Create all LIR basic blocks corresponding to HIR basic blocks + for (rpo_idx, &block_id) in reverse_post_order.iter().enumerate() { + let lir_block_id = asm.new_block(block_id, function.is_entry_block(block_id), rpo_idx); + hir_to_lir[block_id.0] = Some(lir_block_id); + } + + // Compile each basic block + for (rpo_idx, &block_id) in reverse_post_order.iter().enumerate() { + // Set the current block to the LIR block that corresponds to this + // HIR block. + let lir_block_id = hir_to_lir[block_id.0].unwrap(); + asm.set_current_block(lir_block_id); + // Write a label to jump to the basic block - let label = jit.get_label(&mut asm, block_id); + let label = jit.get_label(&mut asm, lir_block_id, block_id); asm.write_label(label); let block = function.block(block_id); @@ -291,15 +314,73 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func // Compile all instructions for &insn_id in block.insns() { let insn = function.find(insn_id); - if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { - debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); - gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn)); - gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot)); - // Don't bother generating code after a side-exit. We won't run it. - // TODO(max): Generate ud2 or equivalent. - break; - }; - // It's fine; we generated the instruction + match insn { + Insn::IfFalse { val, target } => { + let val_opnd = jit.get_opnd(val); + + let lir_target = hir_to_lir[target.target.0].unwrap(); + + let fall_through_target = asm.new_block(block_id, false, rpo_idx); + + let branch_edge = lir::BranchEdge { + target: lir_target, + args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() + }; + + let fall_through_edge = lir::BranchEdge { + target: fall_through_target, + args: vec![] + }; + + gen_if_false(&mut asm, val_opnd, branch_edge, fall_through_edge); + asm.set_current_block(fall_through_target); + + let label = jit.get_label(&mut asm, fall_through_target, block_id); + asm.write_label(label); + }, + Insn::IfTrue { val, target } => { + let val_opnd = jit.get_opnd(val); + + let lir_target = hir_to_lir[target.target.0].unwrap(); + + let fall_through_target = asm.new_block(block_id, false, rpo_idx); + + let branch_edge = lir::BranchEdge { + target: lir_target, + args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() + }; + + let fall_through_edge = lir::BranchEdge { + target: fall_through_target, + args: vec![] + }; + + gen_if_true(&mut asm, val_opnd, branch_edge, fall_through_edge); + asm.set_current_block(fall_through_target); + + let label = jit.get_label(&mut asm, fall_through_target, block_id); + asm.write_label(label); + } + Insn::Jump(target) => { + let lir_target = hir_to_lir[target.target.0].unwrap(); + let branch_edge = lir::BranchEdge { + target: lir_target, + args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() + }; + gen_jump(&mut asm, branch_edge); + }, + _ => { + if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { + debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); + gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn)); + gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot)); + // Don't bother generating code after a side-exit. We won't run it. + // TODO(max): Generate ud2 or equivalent. + break; + }; + // It's fine; we generated the instruction + } + } } // Make sure the last patch point has enough space to insert a jump asm.pad_patch_point(); @@ -395,9 +476,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param => unreachable!("block.insns should not have Insn::Param"), Insn::Snapshot { .. } => return Ok(()), // we don't need to do anything for this instruction at the moment - Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), - Insn::IfTrue { val, target } => no_output!(gen_if_true(jit, asm, opnd!(val), target)), - Insn::IfFalse { val, target } => no_output!(gen_if_false(jit, asm, opnd!(val), target)), &Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), @@ -511,6 +589,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::ArrayMax { state, .. } | &Insn::Throw { state, .. } => return Err(state), + &Insn::IfFalse { .. } | Insn::IfTrue { .. } + | &Insn::Jump { .. } => unreachable!(), }; assert!(insn.has_output(), "Cannot write LIR output of HIR instruction with no output: {insn}"); @@ -1190,18 +1270,6 @@ fn gen_entry_prologue(asm: &mut Assembler) { asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); } -/// Set branch params to basic block arguments -fn gen_branch_params(jit: &mut JITState, asm: &mut Assembler, branch: &BranchEdge) { - if branch.args.is_empty() { - return; - } - - asm_comment!(asm, "set branch params: {}", branch.args.len()); - asm.parallel_mov(branch.args.iter().enumerate().map(|(idx, &arg)| - (param_opnd(idx), jit.get_opnd(arg)) - ).collect()); -} - /// Compile a constant fn gen_const_value(val: VALUE) -> lir::Opnd { // Just propagate the constant value and generate nothing @@ -1228,7 +1296,7 @@ fn gen_const_uint32(val: u32) -> lir::Opnd { /// Compile a basic block argument fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd { // Allocate a register or a stack slot - match param_opnd(idx) { + match Assembler::param_opnd(idx) { // If it's a register, insert LiveReg instruction to reserve the register // in the register pool for register allocation. param @ Opnd::Reg(_) => asm.live_reg_opnd(param), @@ -1237,45 +1305,25 @@ fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd { } /// Compile a jump to a basic block -fn gen_jump(jit: &mut JITState, asm: &mut Assembler, branch: &BranchEdge) { - // Set basic block arguments - gen_branch_params(jit, asm, branch); - +fn gen_jump(asm: &mut Assembler, branch: lir::BranchEdge) { // Jump to the basic block - let target = jit.get_label(asm, branch.target); - asm.jmp(target); + asm.jmp(Target::Block(branch)); } /// Compile a conditional branch to a basic block -fn gen_if_true(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: &BranchEdge) { +fn gen_if_true(asm: &mut Assembler, val: lir::Opnd, branch: lir::BranchEdge, fall_through: lir::BranchEdge) { // If val is zero, move on to the next instruction. - let if_false = asm.new_label("if_false"); asm.test(val, val); - asm.jz(if_false.clone()); - - // If val is not zero, set basic block arguments and jump to the branch target. - // TODO: Consider generating the loads out-of-line - let if_true = jit.get_label(asm, branch.target); - gen_branch_params(jit, asm, branch); - asm.jmp(if_true); - - asm.write_label(if_false); + asm.jz(Target::Block(fall_through)); + asm.jmp(Target::Block(branch)); } /// Compile a conditional branch to a basic block -fn gen_if_false(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: &BranchEdge) { +fn gen_if_false(asm: &mut Assembler, val: lir::Opnd, branch: lir::BranchEdge, fall_through: lir::BranchEdge) { // If val is not zero, move on to the next instruction. - let if_true = asm.new_label("if_true"); asm.test(val, val); - asm.jnz(if_true.clone()); - - // If val is zero, set basic block arguments and jump to the branch target. - // TODO: Consider generating the loads out-of-line - let if_false = jit.get_label(asm, branch.target); - gen_branch_params(jit, asm, branch); - asm.jmp(if_false); - - asm.write_label(if_true); + asm.jnz(Target::Block(fall_through)); + asm.jmp(Target::Block(branch)); } /// Compile a dynamic dispatch with block @@ -2411,19 +2459,6 @@ fn gen_stack_overflow_check(jit: &mut JITState, asm: &mut Assembler, state: &Fra asm.jbe(side_exit(jit, state, StackOverflow)); } -/// Return an operand we use for the basic block argument at a given index -fn param_opnd(idx: usize) -> Opnd { - // To simplify the implementation, allocate a fixed register or a stack slot for each basic block argument for now. - // Note that this is implemented here as opposed to automatically inside LIR machineries. - // TODO: Allow allocating arbitrary registers for basic block arguments - if idx < ALLOC_REGS.len() { - Opnd::Reg(ALLOC_REGS[idx]) - } else { - // With FrameSetup, the address that NATIVE_BASE_PTR points to stores an old value in the register. - // To avoid clobbering it, we need to start from the next slot, hence `+ 1` for the index. - Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32) - } -} /// Inverse of ep_offset_to_local_idx(). See ep_offset_to_local_idx() for details. pub fn local_idx_to_ep_offset(iseq: IseqPtr, local_idx: usize) -> i32 { @@ -2618,6 +2653,7 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result /// Compile a stub for an ISEQ called by SendWithoutBlockDirect fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result<CodePtr, CompileError> { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + asm.new_block_without_id(); asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq.get(), 0)); // Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`. @@ -2635,6 +2671,7 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result<CodeP /// See [gen_function_stub] for how it's used. pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, CompileError> { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + asm.new_block_without_id(); asm_comment!(asm, "function_stub_hit trampoline"); // Maintain alignment for x86_64, and set up a frame for arm64 properly @@ -2693,6 +2730,7 @@ pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, C /// Generate a trampoline that is used when a function exits without restoring PC and the stack pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, CompileError> { let mut asm = Assembler::new(); + asm.new_block_without_id(); asm_comment!(asm, "side-exit trampoline"); asm.frame_teardown(&[]); // matching the setup in gen_entry_point() @@ -2707,6 +2745,7 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, CompileError> /// Generate a trampoline that increments exit_compilation_failure and jumps to exit_trampoline. pub fn gen_exit_trampoline_with_counter(cb: &mut CodeBlock, exit_trampoline: CodePtr) -> Result<CodePtr, CompileError> { let mut asm = Assembler::new(); + asm.new_block_without_id(); asm_comment!(asm, "function stub exit trampoline"); gen_incr_counter(&mut asm, exit_compile_error); @@ -2915,6 +2954,7 @@ impl IseqCall { fn regenerate(&self, cb: &mut CodeBlock, callback: impl Fn(&mut Assembler)) { cb.with_write_ptr(self.start_addr.get().unwrap(), |cb| { let mut asm = Assembler::new(); + asm.new_block_without_id(); callback(&mut asm); asm.compile(cb).unwrap(); assert_eq!(self.end_addr.get().unwrap(), cb.get_write_ptr()); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index fc49dc0c3c..6bbf477c94 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4702,6 +4702,10 @@ impl Function { entry_blocks } + pub fn is_entry_block(&self, block_id: BlockId) -> bool { + self.entry_block == block_id || self.jit_entry_blocks.contains(&block_id) + } + /// Return a traversal of the `Function`'s `BlockId`s in reverse post-order. pub fn rpo(&self) -> Vec<BlockId> { let mut result = self.po_from(self.entry_blocks()); diff --git a/zjit/src/invariants.rs b/zjit/src/invariants.rs index d183eb18ab..f1180acf2a 100644 --- a/zjit/src/invariants.rs +++ b/zjit/src/invariants.rs @@ -16,6 +16,7 @@ macro_rules! compile_patch_points { for patch_point in $patch_points { let written_range = $cb.with_write_ptr(patch_point.patch_point_ptr, |cb| { let mut asm = Assembler::new(); + asm.new_block_without_id(); asm_comment!(asm, $($comment_args)*); asm.jmp(patch_point.side_exit_ptr.into()); asm.compile(cb).expect("can write existing code"); diff --git a/zjit/src/options.rs b/zjit/src/options.rs index 40b49146b7..9121e49bff 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -180,6 +180,8 @@ pub enum DumpLIR { alloc_regs, /// Dump LIR after compile_exits compile_exits, + /// Dump LIR after resolve_parallel_mov + resolve_parallel_mov, /// Dump LIR after {arch}_scratch_split scratch_split, } @@ -190,6 +192,7 @@ const DUMP_LIR_ALL: &[DumpLIR] = &[ DumpLIR::split, DumpLIR::alloc_regs, DumpLIR::compile_exits, + DumpLIR::resolve_parallel_mov, DumpLIR::scratch_split, ]; |
