diff options
Diffstat (limited to 'yjit/src/core.rs')
-rw-r--r-- | yjit/src/core.rs | 3382 |
1 files changed, 2502 insertions, 880 deletions
diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 64585653d9..4152eab02c 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1,18 +1,32 @@ -use crate::asm::x86_64::*; +//! Code versioning, retained live control flow graph mutations, type tracking, etc. + +// So we can comment on individual uses of `unsafe` in `unsafe` functions +#![warn(unsafe_op_in_unsafe_fn)] + use crate::asm::*; +use crate::backend::ir::*; use crate::codegen::*; use crate::virtualmem::CodePtr; use crate::cruby::*; use crate::options::*; use crate::stats::*; use crate::utils::*; +#[cfg(feature="disasm")] +use crate::disasm::*; use core::ffi::c_void; use std::cell::*; -use std::hash::{Hash, Hasher}; +use std::collections::HashSet; +use std::fmt; use std::mem; -use std::rc::{Rc}; -use InsnOpnd::*; -use TempMapping::*; +use std::mem::transmute; +use std::ops::Range; +use std::rc::Rc; +use mem::MaybeUninit; +use std::ptr; +use ptr::NonNull; +use YARVOpnd::*; +use TempMappingKind::*; +use crate::invariants::*; // Maximum number of temp value types we keep track of pub const MAX_TEMP_TYPES: usize = 8; @@ -20,10 +34,15 @@ pub const MAX_TEMP_TYPES: usize = 8; // Maximum number of local variable types we keep track of const MAX_LOCAL_TYPES: usize = 8; +/// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points +/// to a YARV instruction or an instruction operand. +pub type IseqIdx = u16; + // Represent the type of a value (local/stack/self) in YJIT -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +#[repr(u8)] pub enum Type { - Unknown, + Unknown = 0, UnknownImm, UnknownHeap, Nil, @@ -31,15 +50,20 @@ pub enum Type { False, Fixnum, Flonum, - Array, - Hash, ImmSymbol, - #[allow(unused)] - HeapSymbol, - TString, // An object with the T_STRING flag set, possibly an rb_cString CString, // An un-subclassed string of type rb_cString (can have instance vars in some cases) + TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray + CArray, // An un-subclassed array of type rb_cArray (can have instance vars in some cases) + THash, // An object with the T_HASH flag set, possibly an rb_cHash + CHash, // An un-subclassed hash of type rb_cHash (can have instance vars in some cases) + + BlockParamProxy, // A special sentinel value indicating the block parameter should be read from + // the current surrounding cfp + + // The context currently relies on types taking at most 4 bits (max value 15) + // to encode, so if we add any more, we will need to refactor the context. } // Default initialization @@ -66,18 +90,27 @@ impl Type { } else if val.flonum_p() { Type::Flonum } else { - unreachable!() + unreachable!("Illegal value: {:?}", val) } } else { // Core.rs can't reference rb_cString because it's linked by Rust-only tests. // But CString vs TString is only an optimisation and shouldn't affect correctness. #[cfg(not(test))] - if val.class_of() == unsafe { rb_cString } { - return Type::CString; + match val.class_of() { + class if class == unsafe { rb_cArray } => return Type::CArray, + class if class == unsafe { rb_cHash } => return Type::CHash, + class if class == unsafe { rb_cString } => return Type::CString, + _ => {} + } + // We likewise can't reference rb_block_param_proxy, but it's again an optimisation; + // we can just treat it as a normal Object. + #[cfg(not(test))] + if val == unsafe { rb_block_param_proxy } { + return Type::BlockParamProxy; } match val.builtin_type() { - RUBY_T_ARRAY => Type::Array, - RUBY_T_HASH => Type::Hash, + RUBY_T_ARRAY => Type::TArray, + RUBY_T_HASH => Type::THash, RUBY_T_STRING => Type::TString, _ => Type::UnknownHeap, } @@ -117,15 +150,32 @@ impl Type { pub fn is_heap(&self) -> bool { match self { Type::UnknownHeap => true, - Type::Array => true, - Type::Hash => true, - Type::HeapSymbol => true, + Type::TArray => true, + Type::CArray => true, + Type::THash => true, + Type::CHash => true, Type::TString => true, Type::CString => true, + Type::BlockParamProxy => true, _ => false, } } + /// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY) + pub fn is_array(&self) -> bool { + matches!(self, Type::TArray | Type::CArray) + } + + /// Check if it's a T_HASH object (both THash and CHash are T_HASH) + pub fn is_hash(&self) -> bool { + matches!(self, Type::THash | Type::CHash) + } + + /// Check if it's a T_STRING object (both TString and CString are T_STRING) + pub fn is_string(&self) -> bool { + matches!(self, Type::TString | Type::CString) + } + /// Returns an Option with the T_ value type if it is known, otherwise None pub fn known_value_type(&self) -> Option<ruby_value_type> { match self { @@ -134,11 +184,12 @@ impl Type { Type::False => Some(RUBY_T_FALSE), Type::Fixnum => Some(RUBY_T_FIXNUM), Type::Flonum => Some(RUBY_T_FLOAT), - Type::Array => Some(RUBY_T_ARRAY), - Type::Hash => Some(RUBY_T_HASH), - Type::ImmSymbol | Type::HeapSymbol => Some(RUBY_T_SYMBOL), + Type::TArray | Type::CArray => Some(RUBY_T_ARRAY), + Type::THash | Type::CHash => Some(RUBY_T_HASH), + Type::ImmSymbol => Some(RUBY_T_SYMBOL), Type::TString | Type::CString => Some(RUBY_T_STRING), - Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None + Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None, + Type::BlockParamProxy => None, } } @@ -151,7 +202,9 @@ impl Type { Type::False => Some(rb_cFalseClass), Type::Fixnum => Some(rb_cInteger), Type::Flonum => Some(rb_cFloat), - Type::ImmSymbol | Type::HeapSymbol => Some(rb_cSymbol), + Type::ImmSymbol => Some(rb_cSymbol), + Type::CArray => Some(rb_cArray), + Type::CHash => Some(rb_cHash), Type::CString => Some(rb_cString), _ => None, } @@ -169,7 +222,7 @@ impl Type { } } - /// Returns an Option with the exact value if it is known, otherwise None + /// Returns an Option boolean representing whether the value is truthy if known, otherwise None pub fn known_truthy(&self) -> Option<bool> { match self { Type::Nil => Some(false), @@ -180,112 +233,281 @@ impl Type { } } + /// Returns an Option boolean representing whether the value is equal to nil if known, otherwise None + pub fn known_nil(&self) -> Option<bool> { + match (self, self.known_truthy()) { + (Type::Nil, _) => Some(true), + (Type::False, _) => Some(false), // Qfalse is not nil + (_, Some(true)) => Some(false), // if truthy, can't be nil + (_, _) => None // otherwise unknown + } + } + /// Compute a difference between two value types - /// Returns 0 if the two are the same - /// Returns > 0 if different but compatible - /// Returns usize::MAX if incompatible - pub fn diff(self, dst: Self) -> usize { + pub fn diff(self, dst: Self) -> TypeDiff { // Perfect match, difference is zero if self == dst { - return 0; + return TypeDiff::Compatible(0); } // Any type can flow into an unknown type if dst == Type::Unknown { - return 1; + return TypeDiff::Compatible(1); + } + + // A CArray is also a TArray. + if self == Type::CArray && dst == Type::TArray { + return TypeDiff::Compatible(1); + } + + // A CHash is also a THash. + if self == Type::CHash && dst == Type::THash { + return TypeDiff::Compatible(1); } // A CString is also a TString. if self == Type::CString && dst == Type::TString { - return 1; + return TypeDiff::Compatible(1); } // Specific heap type into unknown heap type is imperfect but valid if self.is_heap() && dst == Type::UnknownHeap { - return 1; + return TypeDiff::Compatible(1); } // Specific immediate type into unknown immediate type is imperfect but valid if self.is_imm() && dst == Type::UnknownImm { - return 1; + return TypeDiff::Compatible(1); } // Incompatible types - return usize::MAX; + return TypeDiff::Incompatible; } /// Upgrade this type into a more specific compatible type /// The new type must be compatible and at least as specific as the previously known type. - fn upgrade(&mut self, src: Self) { - // Here we're checking that src is more specific than self - assert!(src.diff(*self) != usize::MAX); - *self = src; + fn upgrade(&mut self, new_type: Self) { + // We can only upgrade to a type that is more specific + assert!(new_type.diff(*self) != TypeDiff::Incompatible); + *self = new_type; } } +#[derive(Debug, Eq, PartialEq)] +pub enum TypeDiff { + // usize == 0: Same type + // usize >= 1: Different but compatible. The smaller, the more compatible. + Compatible(usize), + Incompatible, +} + +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] +#[repr(u8)] +pub enum TempMappingKind +{ + MapToStack = 0, + MapToSelf = 1, + MapToLocal = 2, +} + // Potential mapping of a value on the temporary stack to // self, a local variable or constant so that we can track its type -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum TempMapping { - MapToStack, // Normal stack value - MapToSelf, // Temp maps to the self operand - MapToLocal(u8), // Temp maps to a local variable with index - //ConstMapping, // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue) +// +// The highest two bits represent TempMappingKind, and the rest of +// the bits are used differently across different kinds. +// * MapToStack: The lowest 5 bits are used for mapping Type. +// * MapToSelf: The remaining bits are not used; the type is stored in self_type. +// * MapToLocal: The lowest 3 bits store the index of a local variable. +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] +pub struct TempMapping(u8); + +impl TempMapping { + pub fn map_to_stack(t: Type) -> TempMapping + { + let kind_bits = TempMappingKind::MapToStack as u8; + let type_bits = t as u8; + assert!(type_bits <= 0b11111); + let bits = (kind_bits << 6) | (type_bits & 0b11111); + TempMapping(bits) + } + + pub fn map_to_self() -> TempMapping + { + let kind_bits = TempMappingKind::MapToSelf as u8; + let bits = kind_bits << 6; + TempMapping(bits) + } + + pub fn map_to_local(local_idx: u8) -> TempMapping + { + let kind_bits = TempMappingKind::MapToLocal as u8; + assert!(local_idx <= 0b111); + let bits = (kind_bits << 6) | (local_idx & 0b111); + TempMapping(bits) + } + + pub fn without_type(&self) -> TempMapping + { + if self.get_kind() != TempMappingKind::MapToStack { + return *self; + } + + TempMapping::map_to_stack(Type::Unknown) + } + + pub fn get_kind(&self) -> TempMappingKind + { + // Take the two highest bits + let TempMapping(bits) = self; + let kind_bits = bits >> 6; + assert!(kind_bits <= 2); + unsafe { transmute::<u8, TempMappingKind>(kind_bits) } + } + + pub fn get_type(&self) -> Type + { + assert!(self.get_kind() == TempMappingKind::MapToStack); + + // Take the 5 lowest bits + let TempMapping(bits) = self; + let type_bits = bits & 0b11111; + unsafe { transmute::<u8, Type>(type_bits) } + } + + pub fn get_local_idx(&self) -> u8 + { + assert!(self.get_kind() == TempMappingKind::MapToLocal); + + // Take the 3 lowest bits + let TempMapping(bits) = self; + bits & 0b111 + } } impl Default for TempMapping { fn default() -> Self { - MapToStack + TempMapping::map_to_stack(Type::Unknown) } } -// Operand to a bytecode instruction +// Operand to a YARV bytecode instruction #[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub enum InsnOpnd { +pub enum YARVOpnd { // The value is self SelfOpnd, // Temporary stack operand with stack index - StackOpnd(u16), + StackOpnd(u8), +} + +impl From<Opnd> for YARVOpnd { + fn from(value: Opnd) -> Self { + match value { + Opnd::Stack { idx, .. } => StackOpnd(idx.try_into().unwrap()), + _ => unreachable!("{:?} cannot be converted to YARVOpnd", value) + } + } +} + +/// Maximum index of stack temps that could be in a register +pub const MAX_REG_TEMPS: u8 = 8; + +/// Bitmap of which stack temps are in a register +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] +pub struct RegTemps(u8); + +impl RegTemps { + pub fn get(&self, index: u8) -> bool { + assert!(index < MAX_REG_TEMPS); + (self.0 >> index) & 1 == 1 + } + + pub fn set(&mut self, index: u8, value: bool) { + assert!(index < MAX_REG_TEMPS); + if value { + self.0 = self.0 | (1 << index); + } else { + self.0 = self.0 & !(1 << index); + } + } + + pub fn as_u8(&self) -> u8 { + self.0 + } + + /// Return true if there's a register that conflicts with a given stack_idx. + pub fn conflicts_with(&self, stack_idx: u8) -> bool { + let mut other_idx = stack_idx as usize % get_option!(num_temp_regs); + while other_idx < MAX_REG_TEMPS as usize { + if stack_idx as usize != other_idx && self.get(other_idx as u8) { + return true; + } + other_idx += get_option!(num_temp_regs); + } + false + } } +/// Bits for chain_depth_return_landing_defer +const RETURN_LANDING_BIT: u8 = 0b10000000; +const DEFER_BIT: u8 = 0b01000000; +const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63 + /// Code generation context /// Contains information we can use to specialize/optimize code /// There are a lot of context objects so we try to keep the size small. -#[derive(Copy, Clone, Default, Debug)] +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] +#[repr(packed)] pub struct Context { // Number of values currently on the temporary stack - stack_size: u16, + stack_size: u8, // Offset of the JIT SP relative to the interpreter SP // This represents how far the JIT's SP is from the "real" SP - sp_offset: i16, + sp_offset: i8, - // Depth of this block in the sidechain (eg: inline-cache chain) - chain_depth: u8, + /// Bitmap of which stack temps are in a register + reg_temps: RegTemps, - // Local variable types we keep track of - local_types: [Type; MAX_LOCAL_TYPES], - - // Temporary variable types we keep track of - temp_types: [Type; MAX_TEMP_TYPES], + /// Fields packed into u8 + /// - 1st bit from the left: Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing]) + /// - 2nd bit from the left: Whether the compilation of this code has been deferred ([Self::is_deferred]) + /// - Last 6 bits (max: 63): Depth of this block in the sidechain (eg: inline-cache chain) + chain_depth_and_flags: u8, // Type we track for self self_type: Type, - // Mapping of temp stack entries to types we track - temp_mapping: [TempMapping; MAX_TEMP_TYPES], + // Local variable types we keep track of + // We store 8 local types, requiring 4 bits each, for a total of 32 bits + local_types: u32, + + // Temp mapping kinds we track + // 8 temp mappings * 2 bits, total 16 bits + temp_mapping_kind: u16, + + // Stack slot type/local_idx we track + // 8 temp types * 4 bits, total 32 bits + temp_payload: u32, + + /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined. + /// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)] + /// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr + /// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which + /// could allow this to consume no bytes, so we're leaving this as is. + inline_block: u64, } /// Tuple of (iseq, idx) used to identify basic blocks /// There are a lot of blockid objects so we try to keep the size small. #[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(packed)] pub struct BlockId { /// Instruction sequence pub iseq: IseqPtr, /// Index in the iseq where the block starts - pub idx: u32, + pub idx: u16, } /// Branch code shape enumeration @@ -296,116 +518,457 @@ pub enum BranchShape { Default, // Neither target is next } -// Branch code generation function signature -type BranchGenFn = - fn(cb: &mut CodeBlock, target0: CodePtr, target1: Option<CodePtr>, shape: BranchShape) -> (); +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BranchGenFn { + BranchIf(Cell<BranchShape>), + BranchNil(Cell<BranchShape>), + BranchUnless(Cell<BranchShape>), + JumpToTarget0(Cell<BranchShape>), + JNZToTarget0, + JZToTarget0, + JBEToTarget0, + JBToTarget0, + JOMulToTarget0, + JITReturn, +} + +impl BranchGenFn { + pub fn call(&self, asm: &mut Assembler, target0: Target, target1: Option<Target>) { + match self { + BranchGenFn::BranchIf(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jz(target1.unwrap()), + BranchShape::Next1 => asm.jnz(target0), + BranchShape::Default => { + asm.jnz(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::BranchNil(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jne(target1.unwrap()), + BranchShape::Next1 => asm.je(target0), + BranchShape::Default => { + asm.je(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::BranchUnless(shape) => { + match shape.get() { + BranchShape::Next0 => asm.jnz(target1.unwrap()), + BranchShape::Next1 => asm.jz(target0), + BranchShape::Default => { + asm.jz(target0); + asm.jmp(target1.unwrap()); + } + } + } + BranchGenFn::JumpToTarget0(shape) => { + if shape.get() == BranchShape::Next1 { + panic!("Branch shape Next1 not allowed in JumpToTarget0!"); + } + if shape.get() == BranchShape::Default { + asm.jmp(target0); + } + } + BranchGenFn::JNZToTarget0 => { + asm.jnz(target0) + } + BranchGenFn::JZToTarget0 => { + asm.jz(target0) + } + BranchGenFn::JBEToTarget0 => { + asm.jbe(target0) + } + BranchGenFn::JBToTarget0 => { + asm.jb(target0) + } + BranchGenFn::JOMulToTarget0 => { + asm.jo_mul(target0) + } + BranchGenFn::JITReturn => { + asm_comment!(asm, "update cfp->jit_return"); + let jit_return = RUBY_OFFSET_CFP_JIT_RETURN - RUBY_SIZEOF_CONTROL_FRAME as i32; + let raw_ptr = asm.lea_jump_target(target0); + asm.mov(Opnd::mem(64, CFP, jit_return), raw_ptr); + } + } + } + + pub fn get_shape(&self) -> BranchShape { + match self { + BranchGenFn::BranchIf(shape) | + BranchGenFn::BranchNil(shape) | + BranchGenFn::BranchUnless(shape) | + BranchGenFn::JumpToTarget0(shape) => shape.get(), + BranchGenFn::JNZToTarget0 | + BranchGenFn::JZToTarget0 | + BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | + BranchGenFn::JITReturn => BranchShape::Default, + } + } + + pub fn set_shape(&self, new_shape: BranchShape) { + match self { + BranchGenFn::BranchIf(shape) | + BranchGenFn::BranchNil(shape) | + BranchGenFn::BranchUnless(shape) => { + shape.set(new_shape); + } + BranchGenFn::JumpToTarget0(shape) => { + if new_shape == BranchShape::Next1 { + panic!("Branch shape Next1 not allowed in JumpToTarget0!"); + } + shape.set(new_shape); + } + BranchGenFn::JNZToTarget0 | + BranchGenFn::JZToTarget0 | + BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | + BranchGenFn::JITReturn => { + assert_eq!(new_shape, BranchShape::Default); + } + } + } +} + +/// A place that a branch could jump to +#[derive(Debug, Clone)] +enum BranchTarget { + Stub(Box<BranchStub>), // Not compiled yet + Block(BlockRef), // Already compiled +} + +impl BranchTarget { + fn get_address(&self) -> Option<CodePtr> { + match self { + BranchTarget::Stub(stub) => stub.address, + BranchTarget::Block(blockref) => Some(unsafe { blockref.as_ref() }.start_addr), + } + } + + fn get_blockid(&self) -> BlockId { + match self { + BranchTarget::Stub(stub) => BlockId { iseq: stub.iseq.get(), idx: stub.iseq_idx }, + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.get_blockid(), + } + } + + fn get_ctx(&self) -> Context { + match self { + BranchTarget::Stub(stub) => stub.ctx, + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx, + } + } + + fn get_block(&self) -> Option<BlockRef> { + match self { + BranchTarget::Stub(_) => None, + BranchTarget::Block(blockref) => Some(*blockref), + } + } + + fn set_iseq(&self, iseq: IseqPtr) { + match self { + BranchTarget::Stub(stub) => stub.iseq.set(iseq), + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.iseq.set(iseq), + } + } +} + +#[derive(Debug, Clone)] +struct BranchStub { + address: Option<CodePtr>, + iseq: Cell<IseqPtr>, + iseq_idx: IseqIdx, + ctx: Context, +} /// Store info about an outgoing branch in a code segment /// Note: care must be taken to minimize the size of branch objects -struct Branch { +pub struct Branch { // Block this is attached to block: BlockRef, // Positions where the generated code starts and ends - start_addr: Option<CodePtr>, - end_addr: Option<CodePtr>, - - // Context right after the branch instruction - #[allow(unused)] // set but not read at the moment - src_ctx: Context, + start_addr: CodePtr, + end_addr: Cell<CodePtr>, // exclusive // Branch target blocks and their contexts - targets: [Option<BlockId>; 2], - target_ctxs: [Context; 2], - blocks: [Option<BlockRef>; 2], - - // Jump target addresses - dst_addrs: [Option<CodePtr>; 2], + targets: [Cell<Option<Box<BranchTarget>>>; 2], // Branch code generation function gen_fn: BranchGenFn, +} + +/// A [Branch] for a [Block] that is under construction. +/// Fields correspond, but may be `None` during construction. +pub struct PendingBranch { + /// Allocation holder for the address of the constructed branch + /// in error paths Box deallocates it. + uninit_branch: Box<MaybeUninit<Branch>>, + + /// Branch code generation function + gen_fn: BranchGenFn, + + /// Positions where the generated code starts and ends + start_addr: Cell<Option<CodePtr>>, + end_addr: Cell<Option<CodePtr>>, // exclusive + + /// Branch target blocks and their contexts + targets: [Cell<Option<Box<BranchTarget>>>; 2], +} + +impl Branch { + // Compute the size of the branch code + fn code_size(&self) -> usize { + (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize + } + + /// Get the address of one of the branch destination + fn get_target_address(&self, target_idx: usize) -> Option<CodePtr> { + unsafe { + self.targets[target_idx] + .ref_unchecked() + .as_ref() + .and_then(|target| target.get_address()) + } + } + + fn get_stub_count(&self) -> usize { + let mut count = 0; + for target in self.targets.iter() { + if unsafe { + // SAFETY: no mutation + matches!( + target.ref_unchecked().as_ref().map(Box::as_ref), + Some(BranchTarget::Stub(_)) + ) + } { + count += 1; + } + } + count + } - // Shape of the branch - shape: BranchShape, + fn assert_layout(&self) { + let shape = self.gen_fn.get_shape(); + assert!( + !(shape == BranchShape::Default && 0 == self.code_size()), + "zero-size branches are incorrect when code for neither targets are adjacent" + // One needs to issue some instruction to steer to the branch target + // when falling through isn't an option. + ); + } } impl std::fmt::Debug for Branch { + // Can't derive this because `targets: !Copy` due to Cell. fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO: expand this if needed. #[derive(Debug)] on Branch gave a - // strange error related to BranchGenFn + let targets = unsafe { + // SAFETY: + // While the references are live for the result of this function, + // no mutation happens because we are only calling derived fmt::Debug functions. + [self.targets[0].as_ptr().as_ref().unwrap(), self.targets[1].as_ptr().as_ref().unwrap()] + }; + formatter .debug_struct("Branch") + .field("block", &self.block) .field("start", &self.start_addr) .field("end", &self.end_addr) - .field("targets", &self.targets) + .field("targets", &targets) + .field("gen_fn", &self.gen_fn) .finish() } } -impl Branch { - // Compute the size of the branch code - fn code_size(&self) -> usize { - (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize) +impl PendingBranch { + /// Set up a branch target at `target_idx`. Find an existing block to branch to + /// or generate a stub for one. + fn set_target( + &self, + target_idx: u32, + target: BlockId, + ctx: &Context, + ocb: &mut OutlinedCb, + ) -> Option<CodePtr> { + // If the block already exists + if let Some(blockref) = find_block_version(target, ctx) { + let block = unsafe { blockref.as_ref() }; + + // Fill out the target with this block + self.targets[target_idx.as_usize()] + .set(Some(Box::new(BranchTarget::Block(blockref)))); + return Some(block.start_addr); + } + + // The branch struct is uninitialized right now but as a stable address. + // We make sure the stub runs after the branch is initialized. + let branch_struct_addr = self.uninit_branch.as_ptr() as usize; + let stub_addr = gen_branch_stub(ctx, ocb, branch_struct_addr, target_idx); + + if let Some(stub_addr) = stub_addr { + // Fill the branch target with a stub + self.targets[target_idx.as_usize()].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + address: Some(stub_addr), + iseq: Cell::new(target.iseq), + iseq_idx: target.idx, + ctx: *ctx, + }))))); + } + + stub_addr + } + + // Construct the branch and wire it up in the grpah + fn into_branch(mut self, uninit_block: BlockRef) -> BranchRef { + // Make the branch + let branch = Branch { + block: uninit_block, + start_addr: self.start_addr.get().unwrap(), + end_addr: Cell::new(self.end_addr.get().unwrap()), + targets: self.targets, + gen_fn: self.gen_fn, + }; + // Move it to the designated place on + // the heap and unwrap MaybeUninit. + self.uninit_branch.write(branch); + let raw_branch: *mut MaybeUninit<Branch> = Box::into_raw(self.uninit_branch); + let branchref = NonNull::new(raw_branch as *mut Branch).expect("no null from Box"); + + // SAFETY: just allocated it + let branch = unsafe { branchref.as_ref() }; + // For block branch targets, put the new branch in the + // appropriate incoming list. + for target in branch.targets.iter() { + // SAFETY: no mutation + let out_block: Option<BlockRef> = unsafe { + target.ref_unchecked().as_ref().and_then(|target| target.get_block()) + }; + + if let Some(out_block) = out_block { + // SAFETY: These blockrefs come from set_target() which only puts blocks from + // ISeqs, which are all initialized. Note that uninit_block isn't in any ISeq + // payload yet. + unsafe { out_block.as_ref() }.incoming.push(branchref); + } + } + + branch.assert_layout(); + + branchref } } -// In case this block is invalidated, these two pieces of info -// help to remove all pointers to this block in the system. -#[derive(Debug)] -pub struct CmeDependency { - pub receiver_klass: VALUE, - pub callee_cme: *const rb_callable_method_entry_t, +// Store info about code used on YJIT entry +pub struct Entry { + // Positions where the generated code starts and ends + start_addr: CodePtr, + end_addr: CodePtr, // exclusive } +/// A [Branch] for a [Block] that is under construction. +pub struct PendingEntry { + pub uninit_entry: Box<MaybeUninit<Entry>>, + start_addr: Cell<Option<CodePtr>>, + end_addr: Cell<Option<CodePtr>>, // exclusive +} + +impl PendingEntry { + // Construct the entry in the heap + pub fn into_entry(mut self) -> EntryRef { + // Make the entry + let entry = Entry { + start_addr: self.start_addr.get().unwrap(), + end_addr: self.end_addr.get().unwrap(), + }; + // Move it to the designated place on the heap and unwrap MaybeUninit. + self.uninit_entry.write(entry); + let raw_entry: *mut MaybeUninit<Entry> = Box::into_raw(self.uninit_entry); + NonNull::new(raw_entry as *mut Entry).expect("no null from Box") + } +} + +// In case a block is invalidated, this helps to remove all pointers to the block. +pub type CmePtr = *const rb_callable_method_entry_t; + /// Basic block version /// Represents a portion of an iseq compiled with a given context /// Note: care must be taken to minimize the size of block_t objects #[derive(Debug)] pub struct Block { - // Bytecode sequence (iseq, idx) this is a version of - blockid: BlockId, + // The byte code instruction sequence this is a version of. + // Can change due to moving GC. + iseq: Cell<IseqPtr>, - // Index one past the last instruction for this block in the iseq - end_idx: u32, + // Index range covered by this version in `ISEQ_BODY(iseq)->iseq_encoded`. + iseq_range: Range<IseqIdx>, // Context at the start of the block // This should never be mutated ctx: Context, // Positions where the generated code starts and ends - start_addr: Option<CodePtr>, - end_addr: Option<CodePtr>, + start_addr: CodePtr, + end_addr: Cell<CodePtr>, // List of incoming branches (from predecessors) // These are reference counted (ownership shared between predecessor and successors) - incoming: Vec<BranchRef>, + incoming: MutableBranchList, // NOTE: we might actually be able to store the branches here without refcounting // however, using a RefCell makes it easy to get a pointer to Branch objects // // List of outgoing branches (to successors) - outgoing: Vec<BranchRef>, + outgoing: Box<[BranchRef]>, // FIXME: should these be code pointers instead? // Offsets for GC managed objects in the mainline code block - gc_object_offsets: Vec<u32>, + gc_obj_offsets: Box<[u32]>, // CME dependencies of this block, to help to remove all pointers to this // block in the system. - cme_dependencies: Vec<CmeDependency>, + cme_dependencies: Box<[Cell<CmePtr>]>, // Code address of an exit for `ctx` and `blockid`. // Used for block invalidation. - pub entry_exit: Option<CodePtr>, + entry_exit: Option<CodePtr>, } -/// Reference-counted pointer to a block that can be borrowed mutably. -/// Wrapped so we could implement [Hash] and [Eq] for use with stdlib collections. -#[derive(Debug)] -pub struct BlockRef(Rc<RefCell<Block>>); - -/// Reference-counted pointer to a branch that can be borrowed mutably -type BranchRef = Rc<RefCell<Branch>>; +/// Pointer to a [Block]. +/// +/// # Safety +/// +/// _Never_ derive a `&mut Block` from this and always use +/// [std::ptr::NonNull::as_ref] to get a `&Block`. `&'a mut` +/// in Rust asserts that there are no other references live +/// over the lifetime `'a`. This uniqueness assertion does +/// not hold in many situations for us, even when you ignore +/// the fact that our control flow graph can have cycles. +/// Here are just two examples where we have overlapping references: +/// - Yielding to a different OS thread within the same +/// ractor during compilation +/// - The GC calling [rb_yjit_iseq_mark] during compilation +/// +/// Technically, for soundness, we also need to ensure that +/// the we have the VM lock while the result of `as_ref()` +/// is live, so that no deallocation happens while the +/// shared reference is live. The vast majority of our code run while +/// holding the VM lock, though. +pub type BlockRef = NonNull<Block>; + +/// Pointer to a [Branch]. See [BlockRef] for notes about +/// proper usage. +pub type BranchRef = NonNull<Branch>; + +/// Pointer to an entry that is already added to an ISEQ +pub type EntryRef = NonNull<Entry>; /// List of block versions for a given blockid type VersionList = Vec<BlockRef>; @@ -414,55 +977,51 @@ type VersionList = Vec<BlockRef>; /// An instance of this is stored on each iseq type VersionMap = Vec<VersionList>; -impl BlockRef { - /// Constructor - pub fn new(rc: Rc<RefCell<Block>>) -> Self { - Self(rc) - } - - /// Borrow the block through [RefCell]. - pub fn borrow(&self) -> Ref<'_, Block> { - self.0.borrow() - } - - /// Borrow the block for mutation through [RefCell]. - pub fn borrow_mut(&self) -> RefMut<'_, Block> { - self.0.borrow_mut() - } -} - -impl Clone for BlockRef { - /// Clone the [Rc] - fn clone(&self) -> Self { - Self(self.0.clone()) +/// [Interior mutability][1] wrapper for a list of branches. +/// O(n) insertion, but space efficient. We generally expect +/// blocks to have only a few branches. +/// +/// [1]: https://doc.rust-lang.org/std/cell/struct.UnsafeCell.html +#[repr(transparent)] +struct MutableBranchList(Cell<Box<[BranchRef]>>); + +impl MutableBranchList { + fn push(&self, branch: BranchRef) { + // Temporary move the boxed slice out of self. + // oom=abort is load bearing here... + let mut current_list = self.0.take().into_vec(); + current_list.push(branch); + self.0.set(current_list.into_boxed_slice()); } } -impl Hash for BlockRef { - /// Hash the reference by hashing the pointer - fn hash<H: Hasher>(&self, state: &mut H) { - let rc_ptr = Rc::as_ptr(&self.0); - rc_ptr.hash(state); - } -} +impl fmt::Debug for MutableBranchList { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: the derived Clone for boxed slices does not mutate this Cell + let branches = unsafe { self.0.ref_unchecked().clone() }; -impl PartialEq for BlockRef { - /// Equality defined by allocation identity - fn eq(&self, other: &Self) -> bool { - Rc::ptr_eq(&self.0, &other.0) + formatter.debug_list().entries(branches.into_iter()).finish() } } -/// It's comparison by identity so all the requirements are statisfied -impl Eq for BlockRef {} - /// This is all the data YJIT stores on an iseq /// This will be dynamically allocated by C code /// C code should pass an &mut IseqPayload to us /// when calling into YJIT #[derive(Default)] pub struct IseqPayload { - version_map: VersionMap, + // Basic block versions + pub version_map: VersionMap, + + // Indexes of code pages used by this this ISEQ + pub pages: HashSet<usize>, + + // List of ISEQ entry codes + pub entries: Vec<EntryRef>, + + // Blocks that are invalidated but are not yet deallocated. + // The code GC will free them later. + pub dead_blocks: Vec<BlockRef>, } impl IseqPayload { @@ -478,14 +1037,14 @@ impl IseqPayload { /// Get the payload for an iseq. For safety it's up to the caller to ensure the returned `&mut` /// upholds aliasing rules and that the argument is a valid iseq. -pub unsafe fn load_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> { - let payload = rb_iseq_get_yjit_payload(iseq); +pub fn get_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; let payload: *mut IseqPayload = payload.cast(); - payload.as_mut() + unsafe { payload.as_mut() } } /// Get the payload object associated with an iseq. Create one if none exists. -fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload { +pub fn get_or_create_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload { type VoidPtr = *mut c_void; let payload_non_null = unsafe { @@ -498,7 +1057,8 @@ fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload { // We drop the payload with Box::from_raw when the GC frees the iseq and calls us. // NOTE(alan): Sometimes we read from an iseq without ever writing to it. // We allocate in those cases anyways. - let new_payload = Box::into_raw(Box::new(IseqPayload::default())); + let new_payload = IseqPayload::default(); + let new_payload = Box::into_raw(Box::new(new_payload)); rb_iseq_set_yjit_payload(iseq, new_payload as VoidPtr); new_payload @@ -514,6 +1074,68 @@ fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload { unsafe { payload_non_null.as_mut() }.unwrap() } +/// Iterate over all existing ISEQs +pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) { + unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { + // SAFETY: points to the local below + let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; + callback(iseq); + } + let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; + unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; +} + +/// Iterate over all ISEQ payloads +pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) { + for_each_iseq(|iseq| { + if let Some(iseq_payload) = get_iseq_payload(iseq) { + callback(iseq_payload); + } + }); +} + +/// Iterate over all on-stack ISEQs +pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) { + unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { + // SAFETY: points to the local below + let callback: &mut &mut dyn FnMut(IseqPtr) -> bool = unsafe { std::mem::transmute(&mut *data) }; + callback(iseq); + } + let mut data: &mut dyn FnMut(IseqPtr) = &mut callback; + unsafe { rb_jit_cont_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; +} + +/// Iterate over all on-stack ISEQ payloads +pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) { + for_each_on_stack_iseq(|iseq| { + if let Some(iseq_payload) = get_iseq_payload(iseq) { + callback(iseq_payload); + } + }); +} + +/// Iterate over all NOT on-stack ISEQ payloads +pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback: F) { + // Get all ISEQs on the heap. Note that rb_objspace_each_objects() runs GC first, + // which could move ISEQ pointers when GC.auto_compact = true. + // So for_each_on_stack_iseq() must be called after this, which doesn't run GC. + let mut iseqs: Vec<IseqPtr> = vec![]; + for_each_iseq(|iseq| iseqs.push(iseq)); + + // Get all ISEQs that are on a CFP of existing ECs. + let mut on_stack_iseqs: HashSet<IseqPtr> = HashSet::new(); + for_each_on_stack_iseq(|iseq| { on_stack_iseqs.insert(iseq); }); + + // Invoke the callback for iseqs - on_stack_iseqs + for iseq in iseqs { + if !on_stack_iseqs.contains(&iseq) { + if let Some(iseq_payload) = get_iseq_payload(iseq) { + callback(iseq_payload); + } + } + } +} + /// Free the per-iseq payload #[no_mangle] pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) { @@ -526,31 +1148,50 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) { } }; - use crate::invariants; - // Take ownership of the payload with Box::from_raw(). // It drops right before this function returns. // SAFETY: We got the pointer from Box::into_raw(). let payload = unsafe { Box::from_raw(payload) }; - // Remove all blocks in the payload from global invariants table. + // Free all blocks in version_map. The GC doesn't free running iseqs. for versions in &payload.version_map { for block in versions { - invariants::block_assumptions_free(&block); + // SAFETY: blocks in the version_map are always well connected + unsafe { free_block(*block, true) }; } } + + // Free dead blocks + for block in payload.dead_blocks { + unsafe { free_block(block, false) }; + } + + // Free all entries + for entryref in payload.entries.iter() { + let entry = unsafe { Box::from_raw(entryref.as_ptr()) }; + mem::drop(entry); + } + + // Increment the freed iseq count + incr_counter!(freed_iseq_count); } -/// GC callback for marking GC objects in the the per-iseq payload. +/// GC callback for marking GC objects in the per-iseq payload. #[no_mangle] pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { let payload = if payload.is_null() { // Nothing to mark. return; } else { - // SAFETY: It looks like the GC takes the VM lock while marking - // so we should be satisfying aliasing rules here. - unsafe { &*(payload as *const IseqPayload) } + // SAFETY: The GC takes the VM lock while marking, which + // we assert, so we should be synchronized and data race free. + // + // For aliasing, having the VM lock hopefully also implies that no one + // else has an overlapping &mut IseqPayload. + unsafe { + rb_yjit_assert_holding_vm_lock(); + &*(payload as *const IseqPayload) + } }; // For marking VALUEs written into the inline code block. @@ -559,29 +1200,56 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { for versions in &payload.version_map { for block in versions { - let block = block.borrow(); + // SAFETY: all blocks inside version_map are initialized. + let block = unsafe { block.as_ref() }; + mark_block(block, cb, false); + } + } + // Mark dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + mark_block(block, cb, true); + } - unsafe { rb_gc_mark_movable(block.blockid.iseq.into()) }; + return; - // Mark method entry dependencies - for cme_dep in &block.cme_dependencies { - unsafe { rb_gc_mark_movable(cme_dep.receiver_klass) }; - unsafe { rb_gc_mark_movable(cme_dep.callee_cme.into()) }; - } + fn mark_block(block: &Block, cb: &CodeBlock, dead: bool) { + unsafe { rb_gc_mark_movable(block.iseq.get().into()) }; - // Mark outgoing branch entries - for branch in &block.outgoing { - let branch = branch.borrow(); - for target in &branch.targets { - if let Some(target) = target { - unsafe { rb_gc_mark_movable(target.iseq.into()) }; - } + // Mark method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + unsafe { rb_gc_mark_movable(cme_dep.get().into()) }; + } + + // Mark outgoing branch entries + for branch in block.outgoing.iter() { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let target_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; + + if let Some(target_iseq) = target_iseq { + unsafe { rb_gc_mark_movable(target_iseq.into()) }; } } + } - // Walk over references to objects in generated code. - for offset in &block.gc_object_offsets { - let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); + // Mark references to objects in generated code. + // Skip for dead blocks since they shouldn't run. + if !dead { + for offset in block.gc_obj_offsets.iter() { + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address = value_address as *const VALUE; @@ -595,7 +1263,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { } } -/// GC callback for updating GC objects in the the per-iseq payload. +/// GC callback for updating GC objects in the per-iseq payload. /// This is a mirror of [rb_yjit_iseq_mark]. #[no_mangle] pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { @@ -603,9 +1271,15 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { // Nothing to update. return; } else { - // SAFETY: It looks like the GC takes the VM lock while updating references - // so we should be satisfying aliasing rules here. - unsafe { &*(payload as *const IseqPayload) } + // SAFETY: The GC takes the VM lock while marking, which + // we assert, so we should be synchronized and data race free. + // + // For aliasing, having the VM lock hopefully also implies that no one + // else has an overlapping &mut IseqPayload. + unsafe { + rb_yjit_assert_holding_vm_lock(); + &*(payload as *const IseqPayload) + } }; // Evict other threads from generated code since we are about to patch them. @@ -616,32 +1290,73 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { let cb = CodegenGlobals::get_inline_cb(); for versions in &payload.version_map { - for block in versions { - let mut block = block.borrow_mut(); + for version in versions { + // SAFETY: all blocks inside version_map are initialized + let block = unsafe { version.as_ref() }; + block_update_references(block, cb, false); + } + } + // Update dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + block_update_references(block, cb, true); + } - block.blockid.iseq = unsafe { rb_gc_location(block.blockid.iseq.into()) }.as_iseq(); + // Note that we would have returned already if YJIT is off. + cb.mark_all_executable(); - // Update method entry dependencies - for cme_dep in &mut block.cme_dependencies { - cme_dep.receiver_klass = unsafe { rb_gc_location(cme_dep.receiver_klass) }; - cme_dep.callee_cme = unsafe { rb_gc_location(cme_dep.callee_cme.into()) }.as_cme(); - } + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_executable(); - // Update outgoing branch entries - for branch in &block.outgoing { - let mut branch = branch.borrow_mut(); - for target in &mut branch.targets { - if let Some(target) = target { - target.iseq = unsafe { rb_gc_location(target.iseq.into()) }.as_iseq(); - } + return; + + fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) { + block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq()); + + // Update method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + let cur_cme: VALUE = cme_dep.get().into(); + let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme(); + cme_dep.set(new_cme); + } + + // Update outgoing branch entries + for branch in block.outgoing.iter() { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let current_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; + + if let Some(current_iseq) = current_iseq { + let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) } + .as_iseq(); + // SAFETY: the Cell::set is not on the reference given out + // by ref_unchecked. + unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) }; } } + } - // Walk over references to objects in generated code. - for offset in &block.gc_object_offsets { + // Update references to objects in generated code. + // Skip for dead blocks since they shouldn't run and + // so there is no potential of writing over invalidation jumps + if !dead { + for offset in block.gc_obj_offsets.iter() { let offset_to_value = offset.as_usize(); let value_code_ptr = cb.get_ptr(offset_to_value); - let value_ptr: *const u8 = value_code_ptr.raw_ptr(); + let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_ptr = value_ptr as *mut VALUE; @@ -653,25 +1368,30 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { if new_addr != object { for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() { let byte_code_ptr = value_code_ptr.add_bytes(byte_idx); - cb.get_mem().write_byte(byte_code_ptr, byte) + cb.write_mem(byte_code_ptr, byte) .expect("patching existing code should be within bounds"); } } } } - } - - // Note that we would have returned already if YJIT is off. - cb.mark_all_executable(); - CodegenGlobals::get_outlined_cb() - .unwrap() - .mark_all_executable(); + } } /// Get all blocks for a particular place in an iseq. -fn get_version_list(blockid: BlockId) -> &'static mut VersionList { - let payload = get_iseq_payload(blockid.iseq); +fn get_version_list(blockid: BlockId) -> Option<&'static mut VersionList> { + let insn_idx = blockid.idx.as_usize(); + match get_iseq_payload(blockid.iseq) { + Some(payload) if insn_idx < payload.version_map.len() => { + Some(payload.version_map.get_mut(insn_idx).unwrap()) + }, + _ => None + } +} + +/// Get or create all blocks for a particular place in an iseq. +fn get_or_create_version_list(blockid: BlockId) -> &'static mut VersionList { + let payload = get_or_create_iseq_payload(blockid.iseq); let insn_idx = blockid.idx.as_usize(); // Expand the version map as necessary @@ -686,32 +1406,39 @@ fn get_version_list(blockid: BlockId) -> &'static mut VersionList { /// Take all of the blocks for a particular place in an iseq pub fn take_version_list(blockid: BlockId) -> VersionList { - let payload = get_iseq_payload(blockid.iseq); let insn_idx = blockid.idx.as_usize(); - - if insn_idx >= payload.version_map.len() { - VersionList::default() - } else { - mem::take(&mut payload.version_map[insn_idx]) + match get_iseq_payload(blockid.iseq) { + Some(payload) if insn_idx < payload.version_map.len() => { + mem::take(&mut payload.version_map[insn_idx]) + }, + _ => VersionList::default(), } } /// Count the number of block versions matching a given blockid -fn get_num_versions(blockid: BlockId) -> usize { +/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions. +fn get_num_versions(blockid: BlockId, inlined: bool) -> usize { let insn_idx = blockid.idx.as_usize(); - let payload = get_iseq_payload(blockid.iseq); - - payload - .version_map - .get(insn_idx) - .map(|versions| versions.len()) - .unwrap_or(0) + match get_iseq_payload(blockid.iseq) { + Some(payload) => { + payload + .version_map + .get(insn_idx) + .map(|versions| { + versions.iter().filter(|&&version| + unsafe { version.as_ref() }.ctx.inline() == inlined + ).count() + }) + .unwrap_or(0) + } + None => 0, + } } -/// Get a list of block versions generated for an iseq +/// Get or create a list of block versions generated for an iseq /// This is used for disassembly (see disasm.rs) -pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> { - let payload = get_iseq_payload(iseq); +pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> { + let payload = get_or_create_iseq_payload(iseq); let mut blocks = Vec::<BlockRef>::new(); @@ -722,7 +1449,7 @@ pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> { // For each version at this instruction index for version in version_list { // Clone the block ref and add it to the list - blocks.push(version.clone()); + blocks.push(*version); } } @@ -732,82 +1459,125 @@ pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> { /// Retrieve a basic block version for an (iseq, idx) tuple /// This will return None if no version is found fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> { - let versions = get_version_list(blockid); + let versions = match get_version_list(blockid) { + Some(versions) => versions, + None => return None, + }; // Best match found let mut best_version: Option<BlockRef> = None; let mut best_diff = usize::MAX; // For each version matching the blockid - for blockref in versions.iter_mut() { - let block = blockref.borrow(); - let diff = ctx.diff(&block.ctx); + for blockref in versions.iter() { + let block = unsafe { blockref.as_ref() }; // Note that we always prefer the first matching // version found because of inline-cache chains - if diff < best_diff { - best_version = Some(blockref.clone()); - best_diff = diff; - } - } - - // If greedy versioning is enabled - if get_option!(greedy_versioning) { - // If we're below the version limit, don't settle for an imperfect match - if versions.len() + 1 < get_option!(max_versions) && best_diff > 0 { - return None; + match ctx.diff(&block.ctx) { + TypeDiff::Compatible(diff) if diff < best_diff => { + best_version = Some(*blockref); + best_diff = diff; + } + _ => {} } } return best_version; } +/// Allow inlining a Block up to MAX_INLINE_VERSIONS times. +const MAX_INLINE_VERSIONS: usize = 1000; + /// Produce a generic context when the block version limit is hit for a blockid pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context { // Guard chains implement limits separately, do nothing - if ctx.chain_depth > 0 { + if ctx.get_chain_depth() > 0 { return *ctx; } + let next_versions = get_num_versions(blockid, ctx.inline()) + 1; + let max_versions = if ctx.inline() { + MAX_INLINE_VERSIONS + } else { + get_option!(max_versions) + }; + // If this block version we're about to add will hit the version limit - if get_num_versions(blockid) + 1 >= get_option!(max_versions) { + if next_versions >= max_versions { // Produce a generic context that stores no type information, // but still respects the stack_size and sp_offset constraints. // This new context will then match all future requests. - let mut generic_ctx = Context::default(); - generic_ctx.stack_size = ctx.stack_size; - generic_ctx.sp_offset = ctx.sp_offset; + let generic_ctx = ctx.get_generic_ctx(); + + if cfg!(debug_assertions) { + let mut ctx = ctx.clone(); + if ctx.inline() { + // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible + // to keep inlining blocks until we hit the limit, but it's safe to give up inlining. + ctx.inline_block = 0; + assert!(generic_ctx.inline_block == 0); + } + + assert_ne!( + TypeDiff::Incompatible, + ctx.diff(&generic_ctx), + "should substitute a compatible context", + ); + } - // Mutate the incoming context return generic_ctx; } + incr_counter_to!(max_inline_versions, next_versions); return *ctx; } -/// Keep track of a block version. Block should be fully constructed. -/// Uses `cb` for running write barriers. -fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) { - let block = blockref.borrow(); +/// Install a block version into its [IseqPayload], letting the GC track its +/// lifetime, and allowing it to be considered for use for other +/// blocks we might generate. Uses `cb` for running write barriers. +/// +/// # Safety +/// +/// The block must be fully initialized. Its incoming and outgoing edges, +/// if there are any, must point to initialized blocks, too. +/// +/// Note that the block might gain edges after this function returns, +/// as can happen during [gen_block_series]. Initialized here doesn't mean +/// ready to be consumed or that the machine code tracked by the block is +/// ready to be run. +/// +/// Due to this transient state where a block is tracked by the GC by +/// being inside an [IseqPayload] but not ready to be executed, it's +/// generally unsound to call any Ruby methods during codegen. That has +/// the potential to run blocks which are not ready. +unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) { + // SAFETY: caller ensures initialization + let block = unsafe { blockref.as_ref() }; // Function entry blocks must have stack size 0 - assert!(!(block.blockid.idx == 0 && block.ctx.stack_size > 0)); + assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0)); - let version_list = get_version_list(block.blockid); + let version_list = get_or_create_version_list(block.get_blockid()); - version_list.push(blockref.clone()); + // If this the first block being compiled with this block id + if version_list.len() == 0 { + incr_counter!(compiled_blockid_count); + } + + version_list.push(blockref); + version_list.shrink_to_fit(); // By writing the new block to the iseq, the iseq now // contains new references to Ruby objects. Run write barriers. - let iseq: VALUE = block.blockid.iseq.into(); + let iseq: VALUE = block.iseq.get().into(); for dep in block.iter_cme_deps() { - obj_written!(iseq, dep.receiver_klass); - obj_written!(iseq, dep.callee_cme.into()); + obj_written!(iseq, dep.into()); } // Run write barriers for all objects in generated code. - for offset in &block.gc_object_offsets { - let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); + for offset in block.gc_obj_offsets.iter() { + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address: *const VALUE = value_address.cast(); @@ -816,251 +1586,234 @@ fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) { } incr_counter!(compiled_block_count); + + // Mark code pages for code GC + let iseq_payload = get_iseq_payload(block.iseq.get()).unwrap(); + for page in cb.addrs_to_pages(block.start_addr, block.end_addr.get()) { + iseq_payload.pages.insert(page); + } } /// Remove a block version from the version map of its parent ISEQ fn remove_block_version(blockref: &BlockRef) { - let block = blockref.borrow(); - let version_list = get_version_list(block.blockid); + let block = unsafe { blockref.as_ref() }; + let version_list = match get_version_list(block.get_blockid()) { + Some(version_list) => version_list, + None => return, + }; // Retain the versions that are not this one version_list.retain(|other| blockref != other); } -//=========================================================================== -// I put the implementation of traits for core.rs types below -// We can move these closer to the above structs later if we want. -//=========================================================================== +impl JITState { + // Finish compiling and turn a jit state into a block + // note that the block is still not in shape. + pub fn into_block(self, end_insn_idx: IseqIdx, start_addr: CodePtr, end_addr: CodePtr, gc_obj_offsets: Vec<u32>) -> BlockRef { + // Allocate the block and get its pointer + let blockref: *mut MaybeUninit<Block> = Box::into_raw(Box::new(MaybeUninit::uninit())); + + incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len()); + + // Make the new block + let block = MaybeUninit::new(Block { + start_addr, + iseq: Cell::new(self.get_iseq()), + iseq_range: self.get_starting_insn_idx()..end_insn_idx, + ctx: self.get_starting_ctx(), + end_addr: Cell::new(end_addr), + incoming: MutableBranchList(Cell::default()), + gc_obj_offsets: gc_obj_offsets.into_boxed_slice(), + entry_exit: self.get_block_entry_exit(), + cme_dependencies: self.method_lookup_assumptions.into_iter().map(Cell::new).collect(), + // Pending branches => actual branches + outgoing: self.pending_outgoing.into_iter().map(|pending_out| { + let pending_out = Rc::try_unwrap(pending_out) + .ok().expect("all PendingBranchRefs should be unique when ready to construct a Block"); + pending_out.into_branch(NonNull::new(blockref as *mut Block).expect("no null from Box")) + }).collect() + }); + // Initialize it on the heap + // SAFETY: allocated with Box above + unsafe { ptr::write(blockref, block) }; + + // Block is initialized now. Note that MaybeUnint<T> has the same layout as T. + let blockref = NonNull::new(blockref as *mut Block).expect("no null from Box"); -impl Block { - pub fn new(blockid: BlockId, ctx: &Context) -> BlockRef { - let block = Block { - blockid, - end_idx: 0, - ctx: *ctx, - start_addr: None, - end_addr: None, - incoming: Vec::new(), - outgoing: Vec::new(), - gc_object_offsets: Vec::new(), - cme_dependencies: Vec::new(), - entry_exit: None, - }; + // Track all the assumptions the block makes as invariants + if self.block_assumes_single_ractor { + track_single_ractor_assumption(blockref); + } + for bop in self.bop_assumptions { + track_bop_assumption(blockref, bop); + } + // SAFETY: just allocated it above + for cme in unsafe { blockref.as_ref() }.cme_dependencies.iter() { + track_method_lookup_stability_assumption(blockref, cme.get()); + } + if let Some(idlist) = self.stable_constant_names_assumption { + track_stable_constant_names_assumption(blockref, idlist); + } + for klass in self.no_singleton_class_assumptions { + track_no_singleton_class_assumption(blockref, klass); + } - // Wrap the block in a reference counted refcell - // so that the block ownership can be shared - BlockRef::new(Rc::new(RefCell::new(block))) + blockref } +} +impl Block { pub fn get_blockid(&self) -> BlockId { - self.blockid + BlockId { iseq: self.iseq.get(), idx: self.iseq_range.start } } - pub fn get_end_idx(&self) -> u32 { - self.end_idx + pub fn get_end_idx(&self) -> IseqIdx { + self.iseq_range.end } - pub fn get_ctx(&self) -> Context { - self.ctx + pub fn get_ctx_count(&self) -> usize { + let mut count = 1; // block.ctx + for branch in self.outgoing.iter() { + // SAFETY: &self implies it's initialized + count += unsafe { branch.as_ref() }.get_stub_count(); + } + count } #[allow(unused)] - pub fn get_start_addr(&self) -> Option<CodePtr> { + pub fn get_start_addr(&self) -> CodePtr { self.start_addr } #[allow(unused)] - pub fn get_end_addr(&self) -> Option<CodePtr> { - self.end_addr + pub fn get_end_addr(&self) -> CodePtr { + self.end_addr.get() } /// Get an immutable iterator over cme dependencies - pub fn iter_cme_deps(&self) -> std::slice::Iter<'_, CmeDependency> { - self.cme_dependencies.iter() + pub fn iter_cme_deps(&self) -> impl Iterator<Item = CmePtr> + '_ { + self.cme_dependencies.iter().map(Cell::get) } - /// Set the starting address in the generated code for the block - /// This can be done only once for a block - pub fn set_start_addr(&mut self, addr: CodePtr) { - assert!(self.start_addr.is_none()); - self.start_addr = Some(addr); - } - - /// Set the end address in the generated for the block - /// This can be done only once for a block - pub fn set_end_addr(&mut self, addr: CodePtr) { - // The end address can only be set after the start address is set - assert!(self.start_addr.is_some()); - - // TODO: assert constraint that blocks can shrink but not grow in length - self.end_addr = Some(addr); - } - - /// Set the index of the last instruction in the block - /// This can be done only once for a block - pub fn set_end_idx(&mut self, end_idx: u32) { - assert!(self.end_idx == 0); - self.end_idx = end_idx; - } - - pub fn add_gc_object_offset(self: &mut Block, ptr_offset: u32) { - self.gc_object_offsets.push(ptr_offset); - } - - /// Instantiate a new CmeDependency struct and add it to the list of - /// dependencies for this block. - pub fn add_cme_dependency( - &mut self, - receiver_klass: VALUE, - callee_cme: *const rb_callable_method_entry_t, - ) { - self.cme_dependencies.push(CmeDependency { - receiver_klass, - callee_cme, - }); + // Push an incoming branch ref and shrink the vector + fn push_incoming(&self, branch: BranchRef) { + self.incoming.push(branch); } // Compute the size of the block code pub fn code_size(&self) -> usize { - (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize) + (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap() } } impl Context { - pub fn new_with_stack_size(size: i16) -> Self { - return Context { - stack_size: size as u16, - sp_offset: size, - chain_depth: 0, - local_types: [Type::Unknown; MAX_LOCAL_TYPES], - temp_types: [Type::Unknown; MAX_TEMP_TYPES], - self_type: Type::Unknown, - temp_mapping: [MapToStack; MAX_TEMP_TYPES], - }; + pub fn get_stack_size(&self) -> u8 { + self.stack_size } - pub fn new() -> Self { - return Self::new_with_stack_size(0); + pub fn set_stack_size(&mut self, stack_size: u8) { + self.stack_size = stack_size; } - pub fn get_stack_size(&self) -> u16 { - self.stack_size + /// Create a new Context that is compatible with self but doesn't have type information. + pub fn get_generic_ctx(&self) -> Context { + let mut generic_ctx = Context::default(); + generic_ctx.stack_size = self.stack_size; + generic_ctx.sp_offset = self.sp_offset; + generic_ctx.reg_temps = self.reg_temps; + if self.is_return_landing() { + generic_ctx.set_as_return_landing(); + } + if self.is_deferred() { + generic_ctx.mark_as_deferred(); + } + generic_ctx } - pub fn get_sp_offset(&self) -> i16 { + /// Create a new Context instance with a given stack_size and sp_offset adjusted + /// accordingly. This is useful when you want to virtually rewind a stack_size for + /// generating a side exit while considering past sp_offset changes on gen_save_sp. + pub fn with_stack_size(&self, stack_size: u8) -> Context { + let mut ctx = *self; + ctx.sp_offset -= (ctx.get_stack_size() as isize - stack_size as isize) as i8; + ctx.stack_size = stack_size; + ctx + } + + pub fn get_sp_offset(&self) -> i8 { self.sp_offset } - pub fn set_sp_offset(&mut self, offset: i16) { + pub fn set_sp_offset(&mut self, offset: i8) { self.sp_offset = offset; } - pub fn get_chain_depth(&self) -> u8 { - self.chain_depth + pub fn get_reg_temps(&self) -> RegTemps { + self.reg_temps } - pub fn reset_chain_depth(&mut self) { - self.chain_depth = 0; + pub fn set_reg_temps(&mut self, reg_temps: RegTemps) { + self.reg_temps = reg_temps; } - pub fn increment_chain_depth(&mut self) { - self.chain_depth += 1; + pub fn get_chain_depth(&self) -> u8 { + self.chain_depth_and_flags & CHAIN_DEPTH_MASK } - /// Get an operand for the adjusted stack pointer address - pub fn sp_opnd(&self, offset_bytes: isize) -> X86Opnd { - let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes; - let offset = offset as i32; - return mem_opnd(64, REG_SP, offset); + pub fn reset_chain_depth_and_defer(&mut self) { + self.chain_depth_and_flags &= !CHAIN_DEPTH_MASK; + self.chain_depth_and_flags &= !DEFER_BIT; } - /// Push one new value on the temp stack with an explicit mapping - /// Return a pointer to the new stack top - pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> X86Opnd { - // If type propagation is disabled, store no types - if get_option!(no_type_prop) { - return self.stack_push_mapping((mapping, Type::Unknown)); - } - - let stack_size: usize = self.stack_size.into(); - - // Keep track of the type and mapping of the value - if stack_size < MAX_TEMP_TYPES { - self.temp_mapping[stack_size] = mapping; - self.temp_types[stack_size] = temp_type; - - if let MapToLocal(idx) = mapping { - assert!((idx as usize) < MAX_LOCAL_TYPES); - } + pub fn increment_chain_depth(&mut self) { + if self.get_chain_depth() == CHAIN_DEPTH_MASK { + panic!("max block version chain depth reached!"); } - - self.stack_size += 1; - self.sp_offset += 1; - - // SP points just above the topmost value - let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - return mem_opnd(64, REG_SP, offset); + self.chain_depth_and_flags += 1; } - /// Push one new value on the temp stack - /// Return a pointer to the new stack top - pub fn stack_push(&mut self, val_type: Type) -> X86Opnd { - return self.stack_push_mapping((MapToStack, val_type)); + pub fn set_as_return_landing(&mut self) { + self.chain_depth_and_flags |= RETURN_LANDING_BIT; } - /// Push the self value on the stack - pub fn stack_push_self(&mut self) -> X86Opnd { - return self.stack_push_mapping((MapToSelf, Type::Unknown)); + pub fn clear_return_landing(&mut self) { + self.chain_depth_and_flags &= !RETURN_LANDING_BIT; } - /// Push a local variable on the stack - pub fn stack_push_local(&mut self, local_idx: usize) -> X86Opnd { - if local_idx >= MAX_LOCAL_TYPES { - return self.stack_push(Type::Unknown); - } - - return self.stack_push_mapping((MapToLocal(local_idx as u8), Type::Unknown)); + pub fn is_return_landing(&self) -> bool { + self.chain_depth_and_flags & RETURN_LANDING_BIT != 0 } - // Pop N values off the stack - // Return a pointer to the stack top before the pop operation - pub fn stack_pop(&mut self, n: usize) -> X86Opnd { - assert!(n <= self.stack_size.into()); - - // SP points just above the topmost value - let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32); - let top = mem_opnd(64, REG_SP, offset); - - // Clear the types of the popped values - for i in 0..n { - let idx: usize = (self.stack_size as usize) - i - 1; - - if idx < MAX_TEMP_TYPES { - self.temp_types[idx] = Type::Unknown; - self.temp_mapping[idx] = MapToStack; - } - } - - self.stack_size -= n as u16; - self.sp_offset -= n as i16; + pub fn mark_as_deferred(&mut self) { + self.chain_depth_and_flags |= DEFER_BIT; + } - return top; + pub fn is_deferred(&self) -> bool { + self.chain_depth_and_flags & DEFER_BIT != 0 } - /// Get an operand pointing to a slot on the temp stack - pub fn stack_opnd(&self, idx: i32) -> X86Opnd { - // SP points just above the topmost value - let offset = ((self.sp_offset as i32) - 1 - idx) * (SIZEOF_VALUE as i32); - let opnd = mem_opnd(64, REG_SP, offset); - return opnd; + /// Get an operand for the adjusted stack pointer address + pub fn sp_opnd(&self, offset: i32) -> Opnd { + let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32; + return Opnd::mem(64, SP, offset); + } + + /// Stop using a register for a given stack temp. + /// This allows us to reuse the register for a value that we know is dead + /// and will no longer be used (e.g. popped stack temp). + pub fn dealloc_temp_reg(&mut self, stack_idx: u8) { + if stack_idx < MAX_REG_TEMPS { + let mut reg_temps = self.get_reg_temps(); + reg_temps.set(stack_idx, false); + self.set_reg_temps(reg_temps); + } } /// Get the type of an instruction operand - pub fn get_opnd_type(&self, opnd: InsnOpnd) -> Type { + pub fn get_opnd_type(&self, opnd: YARVOpnd) -> Type { match opnd { SelfOpnd => self.self_type, StackOpnd(idx) => { - let idx = idx as u16; assert!(idx < self.stack_size); let stack_idx: usize = (self.stack_size - 1 - idx).into(); @@ -1069,14 +1822,15 @@ impl Context { return Type::Unknown; } - let mapping = self.temp_mapping[stack_idx]; + let mapping = self.get_temp_mapping(stack_idx); - match mapping { + match mapping.get_kind() { MapToSelf => self.self_type, - MapToStack => self.temp_types[(self.stack_size - 1 - idx) as usize], - MapToLocal(idx) => { + MapToStack => mapping.get_type(), + MapToLocal => { + let idx = mapping.get_local_idx(); assert!((idx as usize) < MAX_LOCAL_TYPES); - return self.local_types[idx as usize]; + return self.get_local_type(idx.into()); } } } @@ -1084,15 +1838,90 @@ impl Context { } /// Get the currently tracked type for a local variable - pub fn get_local_type(&self, idx: usize) -> Type { - *self.local_types.get(idx).unwrap_or(&Type::Unknown) + pub fn get_local_type(&self, local_idx: usize) -> Type { + if local_idx >= MAX_LOCAL_TYPES { + return Type::Unknown + } else { + // Each type is stored in 4 bits + let type_bits = (self.local_types >> (4 * local_idx)) & 0b1111; + unsafe { transmute::<u8, Type>(type_bits as u8) } + } + } + + /// Get the current temp mapping for a given stack slot + fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping { + assert!(temp_idx < MAX_TEMP_TYPES); + + // Extract the temp mapping kind + let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11; + let temp_kind = unsafe { transmute::<u8, TempMappingKind>(kind_bits as u8) }; + + // Extract the payload bits (temp type or local idx) + let payload_bits = (self.temp_payload >> (4 * temp_idx)) & 0b1111; + + match temp_kind { + MapToSelf => TempMapping::map_to_self(), + + MapToStack => { + TempMapping::map_to_stack( + unsafe { transmute::<u8, Type>(payload_bits as u8) } + ) + } + + MapToLocal => { + TempMapping::map_to_local( + payload_bits as u8 + ) + } + } + } + + /// Get the current temp mapping for a given stack slot + fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) { + assert!(temp_idx < MAX_TEMP_TYPES); + + // Extract the kind bits + let mapping_kind = mapping.get_kind(); + let kind_bits = unsafe { transmute::<TempMappingKind, u8>(mapping_kind) }; + assert!(kind_bits <= 0b11); + + // Extract the payload bits + let payload_bits = match mapping_kind { + MapToSelf => 0, + + MapToStack => { + let t = mapping.get_type(); + unsafe { transmute::<Type, u8>(t) } + } + + MapToLocal => { + mapping.get_local_idx() + } + }; + assert!(payload_bits <= 0b1111); + + // Update the kind bits + { + let mask_bits = 0b11_u16 << (2 * temp_idx); + let shifted_bits = (kind_bits as u16) << (2 * temp_idx); + let all_kind_bits = self.temp_mapping_kind as u16; + self.temp_mapping_kind = (all_kind_bits & !mask_bits) | shifted_bits; + } + + // Update the payload bits + { + let mask_bits = 0b1111_u32 << (4 * temp_idx); + let shifted_bits = (payload_bits as u32) << (4 * temp_idx); + let all_payload_bits = self.temp_payload as u32; + self.temp_payload = (all_payload_bits & !mask_bits) | shifted_bits; + } } /// Upgrade (or "learn") the type of an instruction operand /// This value must be compatible and at least as specific as the previously known type. /// If this value originated from self, or an lvar, the learned type will be /// propagated back to its source. - pub fn upgrade_opnd_type(&mut self, opnd: InsnOpnd, opnd_type: Type) { + pub fn upgrade_opnd_type(&mut self, opnd: YARVOpnd, opnd_type: Type) { // If type propagation is disabled, store no types if get_option!(no_type_prop) { return; @@ -1101,7 +1930,6 @@ impl Context { match opnd { SelfOpnd => self.self_type.upgrade(opnd_type), StackOpnd(idx) => { - let idx = idx as u16; assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; @@ -1110,15 +1938,24 @@ impl Context { return; } - let mapping = self.temp_mapping[stack_idx]; + let mapping = self.get_temp_mapping(stack_idx); - match mapping { + match mapping.get_kind() { MapToSelf => self.self_type.upgrade(opnd_type), - MapToStack => self.temp_types[stack_idx].upgrade(opnd_type), - MapToLocal(idx) => { - let idx = idx as usize; + MapToStack => { + let mut temp_type = mapping.get_type(); + temp_type.upgrade(opnd_type); + self.set_temp_mapping(stack_idx, TempMapping::map_to_stack(temp_type)); + } + MapToLocal => { + let idx = mapping.get_local_idx() as usize; assert!(idx < MAX_LOCAL_TYPES); - self.local_types[idx].upgrade(opnd_type); + let mut new_type = self.get_local_type(idx); + new_type.upgrade(opnd_type); + self.set_local_type(idx, new_type); + // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches + // all MapToLocal mappings, including the one we're upgrading here. + self.set_opnd_mapping(opnd, mapping); } } } @@ -1130,30 +1967,29 @@ impl Context { This is can be used with stack_push_mapping or set_opnd_mapping to copy a stack value's type while maintaining the mapping. */ - pub fn get_opnd_mapping(&self, opnd: InsnOpnd) -> (TempMapping, Type) { + pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> TempMapping { let opnd_type = self.get_opnd_type(opnd); match opnd { - SelfOpnd => (MapToSelf, opnd_type), + SelfOpnd => TempMapping::map_to_self(), StackOpnd(idx) => { - let idx = idx as u16; assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; if stack_idx < MAX_TEMP_TYPES { - (self.temp_mapping[stack_idx], opnd_type) + self.get_temp_mapping(stack_idx) } else { // We can't know the source of this stack operand, so we assume it is // a stack-only temporary. type will be UNKNOWN assert!(opnd_type == Type::Unknown); - (MapToStack, opnd_type) + TempMapping::map_to_stack(opnd_type) } } } } /// Overwrite both the type and mapping of a stack operand. - pub fn set_opnd_mapping(&mut self, opnd: InsnOpnd, (mapping, opnd_type): (TempMapping, Type)) { + pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, mapping: TempMapping) { match opnd { SelfOpnd => unreachable!("self always maps to self"), StackOpnd(idx) => { @@ -1170,44 +2006,47 @@ impl Context { return; } - self.temp_mapping[stack_idx] = mapping; - - // Only used when mapping == MAP_STACK - self.temp_types[stack_idx] = opnd_type; + self.set_temp_mapping(stack_idx, mapping); } } } /// Set the type of a local variable pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) { - let ctx = self; - // If type propagation is disabled, store no types if get_option!(no_type_prop) { return; } if local_idx >= MAX_LOCAL_TYPES { - return; + return } // If any values on the stack map to this local we must detach them - for (i, mapping) in ctx.temp_mapping.iter_mut().enumerate() { - *mapping = match *mapping { - MapToStack => MapToStack, - MapToSelf => MapToSelf, - MapToLocal(idx) => { + for mapping_idx in 0..MAX_TEMP_TYPES { + let mapping = self.get_temp_mapping(mapping_idx); + let tm = match mapping.get_kind() { + MapToStack => mapping, + MapToSelf => mapping, + MapToLocal => { + let idx = mapping.get_local_idx(); if idx as usize == local_idx { - ctx.temp_types[i] = ctx.local_types[idx as usize]; - MapToStack + let local_type = self.get_local_type(local_idx); + TempMapping::map_to_stack(local_type) } else { - MapToLocal(idx) + TempMapping::map_to_local(idx) } } - } + }; + self.set_temp_mapping(mapping_idx, tm); } - ctx.local_types[local_idx] = local_type; + // Update the type bits + let type_bits = local_type as u32; + assert!(type_bits <= 0b1111); + let mask_bits = 0b1111_u32 << (4 * local_idx); + let shifted_bits = type_bits << (4 * local_idx); + self.local_types = (self.local_types & !mask_bits) | shifted_bits; } /// Erase local variable type information @@ -1215,99 +2054,238 @@ impl Context { pub fn clear_local_types(&mut self) { // When clearing local types we must detach any stack mappings to those // locals. Even if local values may have changed, stack values will not. - for (i, mapping) in self.temp_mapping.iter_mut().enumerate() { - *mapping = match *mapping { - MapToStack => MapToStack, - MapToSelf => MapToSelf, - MapToLocal(idx) => { - self.temp_types[i] = self.local_types[idx as usize]; - MapToStack - } + + for mapping_idx in 0..MAX_TEMP_TYPES { + let mapping = self.get_temp_mapping(mapping_idx); + if mapping.get_kind() == MapToLocal { + let local_idx = mapping.get_local_idx() as usize; + self.set_temp_mapping(mapping_idx, TempMapping::map_to_stack(self.get_local_type(local_idx))); } } // Clear the local types - self.local_types = [Type::default(); MAX_LOCAL_TYPES]; + self.local_types = 0; + } + + /// Return true if the code is inlined by the caller + pub fn inline(&self) -> bool { + self.inline_block != 0 + } + + /// Set a block ISEQ given to the Block of this Context + pub fn set_inline_block(&mut self, iseq: IseqPtr) { + self.inline_block = iseq as u64 } /// Compute a difference score for two context objects - /// Returns 0 if the two contexts are the same - /// Returns > 0 if different but compatible - /// Returns usize::MAX if incompatible - pub fn diff(&self, dst: &Context) -> usize { + pub fn diff(&self, dst: &Context) -> TypeDiff { // Self is the source context (at the end of the predecessor) let src = self; // Can only lookup the first version in the chain - if dst.chain_depth != 0 { - return usize::MAX; + if dst.get_chain_depth() != 0 { + return TypeDiff::Incompatible; } // Blocks with depth > 0 always produce new versions // Sidechains cannot overlap - if src.chain_depth != 0 { - return usize::MAX; + if src.get_chain_depth() != 0 { + return TypeDiff::Incompatible; + } + + if src.is_return_landing() != dst.is_return_landing() { + return TypeDiff::Incompatible; + } + + if src.is_deferred() != dst.is_deferred() { + return TypeDiff::Incompatible; } if dst.stack_size != src.stack_size { - return usize::MAX; + return TypeDiff::Incompatible; } if dst.sp_offset != src.sp_offset { - return usize::MAX; + return TypeDiff::Incompatible; + } + + if dst.reg_temps != src.reg_temps { + return TypeDiff::Incompatible; } // Difference sum let mut diff = 0; // Check the type of self - let self_diff = src.self_type.diff(dst.self_type); + diff += match src.self_type.diff(dst.self_type) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; - if self_diff == usize::MAX { - return usize::MAX; + // Check the block to inline + if src.inline_block != dst.inline_block { + // find_block_version should not find existing blocks with different + // inline_block so that their yield will not be megamorphic. + return TypeDiff::Incompatible; } - diff += self_diff; - // For each local type we track - for i in 0..src.local_types.len() { - let t_src = src.local_types[i]; - let t_dst = dst.local_types[i]; - let temp_diff = t_src.diff(t_dst); - - if temp_diff == usize::MAX { - return usize::MAX; - } - - diff += temp_diff; + for i in 0.. MAX_LOCAL_TYPES { + let t_src = src.get_local_type(i); + let t_dst = dst.get_local_type(i); + diff += match t_src.diff(t_dst) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; } // For each value on the temp stack for i in 0..src.stack_size { - let (src_mapping, src_type) = src.get_opnd_mapping(StackOpnd(i)); - let (dst_mapping, dst_type) = dst.get_opnd_mapping(StackOpnd(i)); + let src_mapping = src.get_opnd_mapping(StackOpnd(i)); + let dst_mapping = dst.get_opnd_mapping(StackOpnd(i)); // If the two mappings aren't the same if src_mapping != dst_mapping { - if dst_mapping == MapToStack { + if dst_mapping.get_kind() == MapToStack { // We can safely drop information about the source of the temp // stack operand. diff += 1; } else { - return usize::MAX; + return TypeDiff::Incompatible; } } - let temp_diff = src_type.diff(dst_type); + let src_type = src.get_opnd_type(StackOpnd(i)); + let dst_type = dst.get_opnd_type(StackOpnd(i)); + + diff += match src_type.diff(dst_type) { + TypeDiff::Compatible(diff) => diff, + TypeDiff::Incompatible => return TypeDiff::Incompatible, + }; + } + + return TypeDiff::Compatible(diff); + } + + pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> { + if jit.at_current_insn() { + let comptime_recv = jit.peek_at_stack(self, 1); + let comptime_arg = jit.peek_at_stack(self, 0); + return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p()); + } - if temp_diff == usize::MAX { - return usize::MAX; + let recv_type = self.get_opnd_type(StackOpnd(1)); + let arg_type = self.get_opnd_type(StackOpnd(0)); + match (recv_type, arg_type) { + (Type::Fixnum, Type::Fixnum) => Some(true), + (Type::Unknown | Type::UnknownImm, Type::Unknown | Type::UnknownImm) => None, + _ => Some(false), + } + } +} + +impl Assembler { + /// Push one new value on the temp stack with an explicit mapping + /// Return a pointer to the new stack top + pub fn stack_push_mapping(&mut self, mapping: TempMapping) -> Opnd { + // If type propagation is disabled, store no types + if get_option!(no_type_prop) { + return self.stack_push_mapping(mapping.without_type()); + } + + let stack_size: usize = self.ctx.stack_size.into(); + + // Keep track of the type and mapping of the value + if stack_size < MAX_TEMP_TYPES { + self.ctx.set_temp_mapping(stack_size, mapping); + + if mapping.get_kind() == MapToLocal { + let idx = mapping.get_local_idx(); + assert!((idx as usize) < MAX_LOCAL_TYPES); } + } - diff += temp_diff; + // Allocate a register to the stack operand + if self.ctx.stack_size < MAX_REG_TEMPS { + self.alloc_temp_reg(self.ctx.stack_size); } - return diff; + self.ctx.stack_size += 1; + self.ctx.sp_offset += 1; + + return self.stack_opnd(0); + } + + /// Push one new value on the temp stack + /// Return a pointer to the new stack top + pub fn stack_push(&mut self, val_type: Type) -> Opnd { + return self.stack_push_mapping(TempMapping::map_to_stack(val_type)); + } + + /// Push the self value on the stack + pub fn stack_push_self(&mut self) -> Opnd { + return self.stack_push_mapping(TempMapping::map_to_self()); + } + + /// Push a local variable on the stack + pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { + if local_idx >= MAX_LOCAL_TYPES { + return self.stack_push(Type::Unknown); + } + + return self.stack_push_mapping(TempMapping::map_to_local(local_idx as u8)); + } + + // Pop N values off the stack + // Return a pointer to the stack top before the pop operation + pub fn stack_pop(&mut self, n: usize) -> Opnd { + assert!(n <= self.ctx.stack_size.into()); + + let top = self.stack_opnd(0); + + // Clear the types of the popped values + for i in 0..n { + let idx: usize = (self.ctx.stack_size as usize) - i - 1; + + if idx < MAX_TEMP_TYPES { + self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown)); + } + } + + self.ctx.stack_size -= n as u8; + self.ctx.sp_offset -= n as i8; + + return top; + } + + /// Shift stack temps to remove a Symbol for #send. + pub fn shift_stack(&mut self, argc: usize) { + assert!(argc < self.ctx.stack_size.into()); + + let method_name_index = (self.ctx.stack_size as usize) - argc - 1; + + for i in method_name_index..(self.ctx.stack_size - 1) as usize { + if i < MAX_TEMP_TYPES { + let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES { + self.ctx.get_temp_mapping(i + 1) + } else { + TempMapping::map_to_stack(Type::Unknown) + }; + self.ctx.set_temp_mapping(i, next_arg_mapping); + } + } + self.stack_pop(1); + } + + /// Get an operand pointing to a slot on the temp stack + pub fn stack_opnd(&self, idx: i32) -> Opnd { + Opnd::Stack { + idx, + num_bits: 64, + stack_size: self.ctx.stack_size, + sp_offset: self.ctx.sp_offset, + reg_temps: None, // push_insn will set this + } } } @@ -1316,7 +2294,7 @@ impl BlockId { #[cfg(debug_assertions)] #[allow(dead_code)] pub fn dump_src_loc(&self) { - unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx) } + unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx as u32) } } } @@ -1351,52 +2329,54 @@ fn gen_block_series_body( // Generate code for the first block let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?; - batch.push(first_block.clone()); // Keep track of this block version + batch.push(first_block); // Keep track of this block version // Add the block version to the VersionMap for this ISEQ - add_block_version(&first_block, cb); + unsafe { add_block_version(first_block, cb) }; // Loop variable - let mut last_blockref = first_block.clone(); + let mut last_blockref = first_block; loop { // Get the last outgoing branch from the previous block. let last_branchref = { - let last_block = last_blockref.borrow(); + let last_block = unsafe { last_blockref.as_ref() }; match last_block.outgoing.last() { - Some(branch) => branch.clone(), + Some(branch) => *branch, None => { break; } // If last block has no branches, stop. } }; - let mut last_branch = last_branchref.borrow_mut(); + let last_branch = unsafe { last_branchref.as_ref() }; + + incr_counter!(block_next_count); // gen_direct_jump() can request a block to be placed immediately after by - // leaving `None`s in the `dst_addrs` array. - match &last_branch.dst_addrs { - [None, None] => (), - _ => { - break; - } // If there is no next block to compile, stop + // leaving a single target that has a `None` address. + // SAFETY: no mutation inside the unsafe block + let (requested_blockid, requested_ctx) = unsafe { + match (last_branch.targets[0].ref_unchecked(), last_branch.targets[1].ref_unchecked()) { + (Some(last_target), None) if last_target.get_address().is_none() => { + (last_target.get_blockid(), last_target.get_ctx()) + } + _ => { + // We're done when no fallthrough block is requested + break; + } + } }; - // Get id and context for the new block - let requested_id = last_branch.targets[0].expect("block id must be filled"); - let requested_ctx = &last_branch.target_ctxs[0]; - // Generate new block using context from the last branch. - let result = gen_single_block(requested_id, requested_ctx, ec, cb, ocb); + let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb); // If the block failed to compile if result.is_err() { // Remove previously compiled block // versions from the version map - for blockref in &batch { - // FIXME: should be deallocating resources here too - // e.g. invariants, etc. - //free_block(blockref) - - remove_block_version(blockref); + for blockref in batch { + remove_block_version(&blockref); + // SAFETY: block was well connected because it was in a version_map + unsafe { free_block(blockref, false) }; } // Stop compiling @@ -1406,37 +2386,50 @@ fn gen_block_series_body( let new_blockref = result.unwrap(); // Add the block version to the VersionMap for this ISEQ - add_block_version(&new_blockref, cb); + unsafe { add_block_version(new_blockref, cb) }; // Connect the last branch and the new block - last_branch.blocks[0] = Some(new_blockref.clone()); - last_branch.dst_addrs[0] = new_blockref.borrow().start_addr; - new_blockref - .borrow_mut() - .incoming - .push(last_branchref.clone()); - - // This block should immediately follow the last branch - assert!(new_blockref.borrow().start_addr == last_branch.end_addr); + last_branch.targets[0].set(Some(Box::new(BranchTarget::Block(new_blockref)))); + unsafe { new_blockref.as_ref().incoming.push(last_branchref) }; // Track the block - batch.push(new_blockref.clone()); + batch.push(new_blockref); // Repeat with newest block last_blockref = new_blockref; } + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is active, see if this iseq's location matches the given substring. + // If so, we print the new blocks to the console. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_location = iseq_get_location(blockid.iseq, blockid.idx); + if iseq_location.contains(substr) { + let last_block = unsafe { last_blockref.as_ref() }; + let iseq_range = &last_block.iseq_range; + println!("Compiling {} block(s) for {}, ISEQ offsets [{}, {})", batch.len(), iseq_location, iseq_range.start, iseq_range.end); + print!("{}", disasm_iseq_insn_range(blockid.iseq, iseq_range.start, iseq_range.end)); + } + } + } + Some(first_block) } /// Generate a block version that is an entry point inserted into an iseq /// NOTE: this function assumes that the VM lock has been taken -pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See [jit_compile_exception] for details. +pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> { // Compute the current instruction index based on the current PC - let insn_idx: u32 = unsafe { - let pc_zero = rb_iseq_pc_at_idx(iseq, 0); - let ec_pc = get_cfp_pc(get_ec_cfp(ec)); - ec_pc.offset_from(pc_zero).try_into().ok()? + let cfp = unsafe { get_ec_cfp(ec) }; + let insn_idx: u16 = unsafe { + let ec_pc = get_cfp_pc(cfp); + iseq_pc_to_insn_idx(iseq, ec_pc)? + }; + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? }; // The entry context makes no assumptions about types @@ -1450,61 +2443,226 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { let ocb = CodegenGlobals::get_outlined_cb(); // Write the interpreter entry prologue. Might be NULL when out of memory. - let code_ptr = gen_entry_prologue(cb, iseq, insn_idx); + let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx, jit_exception); // Try to generate code for the entry block - let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb); + let mut ctx = Context::default(); + ctx.stack_size = stack_size; + let block = gen_block_series(blockid, &ctx, ec, cb, ocb); cb.mark_all_executable(); ocb.unwrap().mark_all_executable(); match block { // Compilation failed - None => return None, + None => { + // Trigger code GC. This entry point will be recompiled later. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + return None; + } // If the block contains no Ruby instructions Some(block) => { - let block = block.borrow(); - if block.end_idx == insn_idx { + let block = unsafe { block.as_ref() }; + if block.iseq_range.is_empty() { return None; } } } + // Count the number of entry points we compile + incr_counter!(compiled_iseq_entry); + // Compilation successful and block not empty - return code_ptr; + code_ptr.map(|ptr| ptr.raw_ptr(cb)) } -/// Generate code for a branch, possibly rewriting and changing the size of it -fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { - // FIXME - /* - if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { - // Generating this branch would modify frozen bytes. Do nothing. - return; - } - */ +// Change the entry's jump target from an entry stub to a next entry +pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) { + let mut asm = Assembler::new(); + asm_comment!(asm, "regenerate_entry"); + // gen_entry_guard generates cmp + jne. We're rewriting only jne. + asm.jne(next_entry.into()); + + // Move write_pos to rewrite the entry let old_write_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr); + cb.set_dropped_bytes(false); + asm.compile(cb, None).expect("can rewrite existing code"); + + // Rewind write_pos to the original one + assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr); + cb.set_pos(old_write_pos); + cb.set_dropped_bytes(old_dropped_bytes); +} - let mut block = branch.block.borrow_mut(); - let branch_terminates_block = branch.end_addr == block.end_addr; +pub type PendingEntryRef = Rc<PendingEntry>; - // Rewrite the branch - assert!(branch.dst_addrs[0].is_some()); - cb.set_write_ptr(branch.start_addr.unwrap()); - (branch.gen_fn)( - cb, - branch.dst_addrs[0].unwrap(), - branch.dst_addrs[1], - branch.shape, +/// Create a new entry reference for an ISEQ +pub fn new_pending_entry() -> PendingEntryRef { + let entry = PendingEntry { + uninit_entry: Box::new(MaybeUninit::uninit()), + start_addr: Cell::new(None), + end_addr: Cell::new(None), + }; + return Rc::new(entry); +} + +c_callable! { + /// Generated code calls this function with the SysV calling convention. + /// See [gen_call_entry_stub_hit]. + fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 { + with_compile_time(|| { + with_vm_lock(src_loc!(), || { + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + let addr = entry_stub_hit_body(entry_ptr, ec, cb, ocb) + .unwrap_or_else(|| { + // Trigger code GC (e.g. no space). + // This entry point will be recompiled later. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + CodegenGlobals::get_stub_exit_code().raw_ptr(cb) + }); + + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + addr + }) + }) + } +} + +/// Called by the generated code when an entry stub is executed +fn entry_stub_hit_body( + entry_ptr: *const c_void, + ec: EcPtr, + cb: &mut CodeBlock, + ocb: &mut OutlinedCb +) -> Option<*const u8> { + // Get ISEQ and insn_idx from the current ec->cfp + let cfp = unsafe { get_ec_cfp(ec) }; + let iseq = unsafe { get_cfp_iseq(cfp) }; + let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?; + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? + }; + + // Compile a new entry guard as a next entry + let next_entry = cb.get_write_ptr(); + let mut asm = Assembler::new(); + let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?; + asm.compile(cb, Some(ocb))?; + + // Find or compile a block version + let blockid = BlockId { iseq, idx: insn_idx }; + let mut ctx = Context::default(); + ctx.stack_size = stack_size; + let blockref = match find_block_version(blockid, &ctx) { + // If an existing block is found, generate a jump to the block. + Some(blockref) => { + let mut asm = Assembler::new(); + asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); + asm.compile(cb, Some(ocb))?; + Some(blockref) + } + // If this block hasn't yet been compiled, generate blocks after the entry guard. + None => gen_block_series(blockid, &ctx, ec, cb, ocb), + }; + + // Commit or retry the entry + if blockref.is_some() { + // Regenerate the previous entry + let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null"); + regenerate_entry(cb, &entryref, next_entry); + + // Write an entry to the heap and push it to the ISEQ + let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique"); + get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); + } + + // Let the stub jump to the block + blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb)) +} + +/// Generate a stub that calls entry_stub_hit +pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + + let mut asm = Assembler::new(); + asm_comment!(asm, "entry stub hit"); + + asm.mov(C_ARG_OPNDS[0], entry_address.into()); + + // Jump to trampoline to call entry_stub_hit() + // Not really a side exit, just don't need a padded jump here. + asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so +/// it's useful for Code GC to call entry_stub_hit from a globally shared code. +pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { + let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + // See gen_entry_guard for how it's used. + asm_comment!(asm, "entry_stub_hit() trampoline"); + let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]); + + // Jump to the address returned by the entry_stub_hit() call + asm.jmp_opnd(jump_addr); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +/// Generate code for a branch, possibly rewriting and changing the size of it +fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) { + // Remove old comments + cb.remove_comments(branch.start_addr, branch.end_addr.get()); + + // SAFETY: having a &Branch implies branch.block is initialized. + let block = unsafe { branch.block.as_ref() }; + + let branch_terminates_block = branch.end_addr.get() == block.get_end_addr(); + + // Generate the branch + let mut asm = Assembler::new(); + asm_comment!(asm, "regenerate_branch"); + branch.gen_fn.call( + &mut asm, + Target::CodePtr(branch.get_target_address(0).unwrap()), + branch.get_target_address(1).map(|addr| Target::CodePtr(addr)), ); - branch.end_addr = Some(cb.get_write_ptr()); + + // If the entire block is the branch and the block could be invalidated, + // we need to pad to ensure there is room for invalidation patching. + if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() { + asm.pad_inval_patch(); + } + + // Rewrite the branch + let old_write_pos = cb.get_write_pos(); + let old_dropped_bytes = cb.has_dropped_bytes(); + cb.set_write_ptr(branch.start_addr); + cb.set_dropped_bytes(false); + asm.compile(cb, None).expect("can rewrite existing code"); + let new_end_addr = cb.get_write_ptr(); + + branch.end_addr.set(new_end_addr); // The block may have shrunk after the branch is rewritten if branch_terminates_block { // Adjust block size - block.end_addr = branch.end_addr; + block.end_addr.set(new_end_addr); } // cb.write_pos is both a write cursor and a marker for the end of @@ -1517,79 +2675,66 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) { if old_write_pos > cb.get_write_pos() { // We rewound cb->write_pos to generate the branch, now restore it. cb.set_pos(old_write_pos); + cb.set_dropped_bytes(old_dropped_bytes); } else { // The branch sits at the end of cb and consumed some memory. // Keep cb.write_pos. } -} - -/// Create a new outgoing branch entry for a block -fn make_branch_entry(block: BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef { - let branch = Branch { - // Block this is attached to - block: block.clone(), - // Positions where the generated code starts and ends - start_addr: None, - end_addr: None, - - // Context right after the branch instruction - src_ctx: *src_ctx, - - // Branch target blocks and their contexts - targets: [None, None], - target_ctxs: [Context::default(), Context::default()], - blocks: [None, None], + branch.assert_layout(); +} - // Jump target addresses - dst_addrs: [None, None], +pub type PendingBranchRef = Rc<PendingBranch>; - // Branch code generation function - gen_fn: gen_fn, +/// Create a new outgoing branch entry for a block +fn new_pending_branch(jit: &mut JITState, gen_fn: BranchGenFn) -> PendingBranchRef { + let branch = Rc::new(PendingBranch { + uninit_branch: Box::new(MaybeUninit::uninit()), + gen_fn, + start_addr: Cell::new(None), + end_addr: Cell::new(None), + targets: [Cell::new(None), Cell::new(None)], + }); - // Shape of the branch - shape: BranchShape::Default, - }; + incr_counter!(compiled_branch_count); // TODO not true. count at finalize time // Add to the list of outgoing branches for the block - let branchref = Rc::new(RefCell::new(branch)); - block.borrow_mut().outgoing.push(branchref.clone()); + jit.queue_outgoing_branch(branch.clone()); - return branchref; + branch } -/// Generated code calls this function with the SysV calling convention. -/// See [get_branch_target]. -extern "sysv64" fn branch_stub_hit( - branch_ptr: *const c_void, - target_idx: u32, - ec: EcPtr, -) -> *const u8 { - with_vm_lock(src_loc!(), || { - branch_stub_hit_body(branch_ptr, target_idx, ec) - }) +c_callable! { + /// Generated code calls this function with the SysV calling convention. + /// See [gen_branch_stub]. + fn branch_stub_hit( + branch_ptr: *const c_void, + target_idx: u32, + ec: EcPtr, + ) -> *const u8 { + with_vm_lock(src_loc!(), || { + with_compile_time(|| { branch_stub_hit_body(branch_ptr, target_idx, ec) }) + }) + } } /// Called by the generated code when a branch stub is executed /// Triggers compilation of branches and code patching fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 { - assert!(!branch_ptr.is_null()); - - //branch_ptr is actually: - //branch_ptr: *const RefCell<Branch> - let branch_rc = unsafe { BranchRef::from_raw(branch_ptr as *const RefCell<Branch>) }; + if get_option!(dump_insns) { + println!("branch_stub_hit"); + } - // We increment the strong count because we want to keep the reference owned - // by the branch stub alive. Return branch stubs can be hit multiple times. - unsafe { Rc::increment_strong_count(branch_ptr) }; + let branch_ref = NonNull::<Branch>::new(branch_ptr as *mut Branch) + .expect("Branches should not be null"); - let mut branch = branch_rc.borrow_mut(); + // SAFETY: We have the VM lock, and the branch is initialized by the time generated + // code calls this function. + let branch = unsafe { branch_ref.as_ref() }; let branch_size_on_entry = branch.code_size(); + let housing_block = unsafe { branch.block.as_ref() }; let target_idx: usize = target_idx.as_usize(); - let target = branch.targets[target_idx].unwrap(); - let target_ctx = branch.target_ctxs[target_idx]; - let target_branch_shape = match target_idx { 0 => BranchShape::Next0, 1 => BranchShape::Next1, @@ -1599,18 +2744,31 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - let cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); - // If this branch has already been patched, return the dst address - // Note: ractors can cause the same stub to be hit multiple times - if let Some(_) = branch.blocks[target_idx] { - return branch.dst_addrs[target_idx].unwrap().raw_ptr(); - } + let (target_blockid, target_ctx): (BlockId, Context) = unsafe { + // SAFETY: no mutation of the target's Cell. Just reading out data. + let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); + + // If this branch has already been patched, return the dst address + // Note: recursion can cause the same stub to be hit multiple times + if let BranchTarget::Block(_) = target.as_ref() { + return target.get_address().unwrap().raw_ptr(cb); + } + + (target.get_blockid(), target.get_ctx()) + }; let (cfp, original_interp_sp) = unsafe { let cfp = get_ec_cfp(ec); let original_interp_sp = get_cfp_sp(cfp); - let reconned_pc = rb_iseq_pc_at_idx(rb_cfp_get_iseq(cfp), target.idx); + let running_iseq = get_cfp_iseq(cfp); + assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq"); + + let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into()); let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into()); + // Unlike in the interpreter, our `leave` doesn't write to the caller's + // SP -- we do it in the returned-to code. Account for this difference. + let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into()); // Update the PC in the current CFP, because it may be out of sync in JITted code rb_set_cfp_pc(cfp, reconned_pc); @@ -1623,76 +2781,88 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - // So we do it here instead. rb_set_cfp_sp(cfp, reconned_sp); + // Bail if code GC is disabled and we've already run out of spaces. + if !get_option!(code_gc) && (cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes()) { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + + // Bail if we're about to run out of native stack space. + // We've just reconstructed interpreter state. + if rb_ec_stack_check(ec as _) != 0 { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + (cfp, original_interp_sp) }; // Try to find an existing compiled version of this block - let mut block = find_block_version(target, &target_ctx); - + let mut block = find_block_version(target_blockid, &target_ctx); + let mut branch_modified = false; // If this block hasn't yet been compiled if block.is_none() { - let branch_old_shape = branch.shape; - let mut branch_modified = false; + let branch_old_shape = branch.gen_fn.get_shape(); // If the new block can be generated right after the branch (at cb->write_pos) - if Some(cb.get_write_ptr()) == branch.end_addr { + if cb.get_write_ptr() == branch.end_addr.get() { // This branch should be terminating its block - assert!(branch.end_addr == branch.block.borrow().end_addr); + assert!(branch.end_addr == housing_block.end_addr); // Change the branch shape to indicate the target block will be placed next - branch.shape = target_branch_shape; + branch.gen_fn.set_shape(target_branch_shape); // Rewrite the branch with the new, potentially more compact shape - regenerate_branch(cb, &mut branch); + regenerate_branch(cb, branch); branch_modified = true; // Ensure that the branch terminates the codeblock just like // before entering this if block. This drops bytes off the end // in case we shrank the branch when regenerating. - cb.set_write_ptr(branch.end_addr.unwrap()); + cb.set_write_ptr(branch.end_addr.get()); } // Compile the new block version - drop(branch); // Stop mutable RefCell borrow since GC might borrow branch for marking - block = gen_block_series(target, &target_ctx, ec, cb, ocb); - branch = branch_rc.borrow_mut(); + block = gen_block_series(target_blockid, &target_ctx, ec, cb, ocb); if block.is_none() && branch_modified { // We couldn't generate a new block for the branch, but we modified the branch. // Restore the branch by regenerating it. - branch.shape = branch_old_shape; - regenerate_branch(cb, &mut branch); + branch.gen_fn.set_shape(branch_old_shape); + regenerate_branch(cb, branch); } } // Finish building the new block let dst_addr = match block { - Some(block_rc) => { - let mut block: RefMut<_> = block_rc.borrow_mut(); + Some(new_block) => { + let new_block = unsafe { new_block.as_ref() }; // Branch shape should reflect layout - assert!(!(branch.shape == target_branch_shape && block.start_addr != branch.end_addr)); + assert!(!(branch.gen_fn.get_shape() == target_branch_shape && new_block.start_addr != branch.end_addr.get())); // Add this branch to the list of incoming branches for the target - block.incoming.push(branch_rc.clone()); + new_block.push_incoming(branch_ref); // Update the branch target address - let dst_addr = block.start_addr; - branch.dst_addrs[target_idx] = dst_addr; - - // Mark this branch target as patched (no longer a stub) - branch.blocks[target_idx] = Some(block_rc.clone()); + branch.targets[target_idx].set(Some(Box::new(BranchTarget::Block(new_block.into())))); // Rewrite the branch with the new jump target address - mem::drop(block); // end mut borrow - regenerate_branch(cb, &mut branch); + regenerate_branch(cb, branch); // Restore interpreter sp, since the code hitting the stub expects the original. unsafe { rb_set_cfp_sp(cfp, original_interp_sp) }; - block_rc.borrow().start_addr.unwrap() + new_block.start_addr } None => { + // Trigger code GC. The whole ISEQ will be recompiled later. + // We shouldn't trigger it in the middle of compilation in branch_stub_hit + // because incomplete code could be used when cb.dropped_bytes is flipped + // by code GC. So this place, after all compilation, is the safest place + // to hook code GC on branch_stub_hit. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + // Failed to service the stub by generating a new block so now we // need to exit to the interpreter at the stubbed location. We are // intentionally *not* restoring original_interp_sp. At the time of @@ -1710,67 +2880,166 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - let new_branch_size = branch.code_size(); assert!( new_branch_size <= branch_size_on_entry, - "branch stubs should never enlarge branches" + "branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})", + branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size, ); // Return a pointer to the compiled block version - dst_addr.raw_ptr() + dst_addr.raw_ptr(cb) } -/// Get a block version or stub corresponding to a branch target -fn get_branch_target( - target: BlockId, +/// Generate a "stub", a piece of code that calls the compiler back when run. +/// A piece of code that redeems for more code; a thunk for code. +fn gen_branch_stub( ctx: &Context, - branchref: &BranchRef, - target_idx: u32, ocb: &mut OutlinedCb, + branch_struct_address: usize, + target_idx: u32, ) -> Option<CodePtr> { - let maybe_block = find_block_version(target, ctx); + let ocb = ocb.unwrap(); - // If the block already exists - if let Some(blockref) = maybe_block { - let mut block = blockref.borrow_mut(); + let mut asm = Assembler::new(); + asm.ctx = *ctx; + asm.set_reg_temps(ctx.reg_temps); + asm_comment!(asm, "branch stub hit"); - // Add an incoming branch into this block - block.incoming.push(branchref.clone()); - let mut branch = branchref.borrow_mut(); - branch.blocks[target_idx.as_usize()] = Some(blockref.clone()); + if asm.ctx.is_return_landing() { + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + } - // Return a pointer to the compiled code for the block - return block.start_addr; + // Save caller-saved registers before C_ARG_OPNDS get clobbered. + // Spill all registers for consistency with the trampoline. + for ® in caller_saved_temp_regs() { + asm.cpush(Opnd::Reg(reg)); } + // Spill temps to the VM stack as well for jit.peek_at_stack() + asm.spill_temps(); + + // Set up the arguments unique to this stub for: + // + // branch_stub_hit(branch_ptr, target_idx, ec) + // + // Bake pointer to Branch into output code. + // We make sure the block housing the branch is still alive when branch_stub_hit() is running. + asm.mov(C_ARG_OPNDS[0], branch_struct_address.into()); + asm.mov(C_ARG_OPNDS[1], target_idx.into()); + + // Jump to trampoline to call branch_stub_hit() + // Not really a side exit, just don't need a padded jump here. + asm.jmp(CodegenGlobals::get_branch_stub_hit_trampoline().as_side_exit()); + + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} + +pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); + let mut asm = Assembler::new(); + + // For `branch_stub_hit(branch_ptr, target_idx, ec)`, + // `branch_ptr` and `target_idx` is different for each stub, + // but the call and what's after is the same. This trampoline + // is the unchanging part. + // Since this trampoline is static, it allows code GC inside + // branch_stub_hit() to free stubs without problems. + asm_comment!(asm, "branch_stub_hit() trampoline"); + let stub_hit_ret = asm.ccall( + branch_stub_hit as *mut u8, + vec![ + C_ARG_OPNDS[0], + C_ARG_OPNDS[1], + EC, + ] + ); + let jump_addr = asm.load(stub_hit_ret); - // Generate an outlined stub that will call branch_stub_hit() - let stub_addr = ocb.get_write_ptr(); + // Restore caller-saved registers for stack temps + for ® in caller_saved_temp_regs().rev() { + asm.cpop_into(Opnd::Reg(reg)); + } - // Get a raw pointer to the branch while keeping the reference count alive - // Here clone increments the strong count by 1 - // This means the branch stub owns its own reference to the branch - let branch_ptr: *const RefCell<Branch> = BranchRef::into_raw(branchref.clone()); + // Jump to the address returned by the branch_stub_hit() call + asm.jmp_opnd(jump_addr); - // Call branch_stub_hit(branch_idx, target_idx, ec) - mov(ocb, C_ARG_REGS[2], REG_EC); - mov(ocb, C_ARG_REGS[1], uimm_opnd(target_idx as u64)); - mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch_ptr as *const u8)); - call_ptr(ocb, REG0, branch_stub_hit as *mut u8); + // HACK: popping into C_RET_REG clobbers the return value of branch_stub_hit() we need to jump + // to, so we need a scratch register to preserve it. This extends the live range of the C + // return register so we get something else for the return value. + let _ = asm.live_reg_opnd(stub_hit_ret); - // Jump to the address returned by the - // branch_stub_hit call - jmp_rm(ocb, RAX); + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) +} - if ocb.has_dropped_bytes() { - None // No space +/// Return registers to be pushed and popped on branch_stub_hit. +pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator { + let temp_regs = Assembler::get_temp_regs().iter(); + let len = temp_regs.len(); + // The return value gen_leave() leaves in C_RET_REG + // needs to survive the branch_stub_hit() call. + let regs = temp_regs.chain(std::iter::once(&C_RET_REG)); + + // On x86_64, maintain 16-byte stack alignment + if cfg!(target_arch = "x86_64") && len % 2 == 0 { + static ONE_MORE: [Reg; 1] = [C_RET_REG]; + regs.chain(ONE_MORE.iter()) } else { - Some(stub_addr) + regs.chain(&[]) + } +} + +impl Assembler +{ + /// Mark the start position of a patchable entry point in the machine code + pub fn mark_entry_start(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr, _| { + entryref.start_addr.set(Some(code_ptr)); + }); + } + + /// Mark the end position of a patchable entry point in the machine code + pub fn mark_entry_end(&mut self, entryref: &PendingEntryRef) { + // We need to create our own entry rc object + // so that we can move the closure below + let entryref = entryref.clone(); + + self.pos_marker(move |code_ptr, _| { + entryref.end_addr.set(Some(code_ptr)); + }); + } + + // Mark the start position of a patchable branch in the machine code + fn mark_branch_start(&mut self, branchref: &PendingBranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(move |code_ptr, _| { + branchref.start_addr.set(Some(code_ptr)); + }); + } + + // Mark the end position of a patchable branch in the machine code + fn mark_branch_end(&mut self, branchref: &PendingBranchRef) + { + // We need to create our own branch rc object + // so that we can move the closure below + let branchref = branchref.clone(); + + self.pos_marker(move |code_ptr, _| { + branchref.end_addr.set(Some(code_ptr)); + }); } } pub fn gen_branch( - jit: &JITState, - src_ctx: &Context, - cb: &mut CodeBlock, + jit: &mut JITState, + asm: &mut Assembler, ocb: &mut OutlinedCb, target0: BlockId, ctx0: &Context, @@ -1778,168 +3047,207 @@ pub fn gen_branch( ctx1: Option<&Context>, gen_fn: BranchGenFn, ) { - let branchref = make_branch_entry(jit.get_block(), src_ctx, gen_fn); + let branch = new_pending_branch(jit, gen_fn); // Get the branch targets or stubs - let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb); - let dst_addr1 = if let Some(ctx) = ctx1 { - get_branch_target(target1.unwrap(), ctx, &branchref, 1, ocb) - } else { - None - }; - - let mut branch = branchref.borrow_mut(); - - // Set the branch target adresses - branch.dst_addrs[0] = dst_addr0; - branch.dst_addrs[1] = dst_addr1; + let target0_addr = branch.set_target(0, target0, ctx0, ocb); + let target1_addr = if let Some(ctx) = ctx1 { + let addr = branch.set_target(1, target1.unwrap(), ctx, ocb); + if addr.is_none() { + // target1 requested but we're out of memory. + // Avoid unwrap() in gen_fn() + return; + } - branch.targets[0] = Some(target0); - branch.targets[1] = target1; - branch.target_ctxs[0] = *ctx0; - branch.target_ctxs[1] = if let Some(&ctx) = ctx1 { - ctx - } else { - Context::default() - }; + addr + } else { None }; // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - regenerate_branch(cb, &mut branch); -} - -fn gen_jump_branch( - cb: &mut CodeBlock, - target0: CodePtr, - _target1: Option<CodePtr>, - shape: BranchShape, -) { - if shape == BranchShape::Next1 { - panic!("Branch shape Next1 not allowed in gen_jump_branch!"); - } - - if shape == BranchShape::Default { - jmp_ptr(cb, target0); + asm.mark_branch_start(&branch); + if let Some(dst_addr) = target0_addr { + branch.gen_fn.call(asm, Target::CodePtr(dst_addr), target1_addr.map(|addr| Target::CodePtr(addr))); } + asm.mark_branch_end(&branch); } -pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut CodeBlock) { - let branchref = make_branch_entry(jit.get_block(), ctx, gen_jump_branch); - let mut branch = branchref.borrow_mut(); - - branch.targets[0] = Some(target0); - branch.target_ctxs[0] = *ctx; - +pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: &mut Assembler) { + let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); let maybe_block = find_block_version(target0, ctx); // If the block already exists - if let Some(blockref) = maybe_block { - let mut block = blockref.borrow_mut(); - - block.incoming.push(branchref.clone()); - - branch.dst_addrs[0] = block.start_addr; - branch.blocks[0] = Some(blockref.clone()); - branch.shape = BranchShape::Default; + let new_target = if let Some(blockref) = maybe_block { + let block = unsafe { blockref.as_ref() }; + let block_addr = block.start_addr; // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); - branch.end_addr = Some(cb.get_write_ptr()); + asm_comment!(asm, "gen_direct_jmp: existing block"); + asm.mark_branch_start(&branch); + branch.gen_fn.call(asm, Target::CodePtr(block_addr), None); + asm.mark_branch_end(&branch); + + BranchTarget::Block(blockref) } else { - // This None target address signals gen_block_series() to compile the - // target block right after this one (fallthrough). - branch.dst_addrs[0] = None; - branch.shape = BranchShape::Next0; - branch.start_addr = Some(cb.get_write_ptr()); - branch.end_addr = Some(cb.get_write_ptr()); - } + // The branch is effectively empty (a noop) + asm_comment!(asm, "gen_direct_jmp: fallthrough"); + asm.mark_branch_start(&branch); + asm.mark_branch_end(&branch); + branch.gen_fn.set_shape(BranchShape::Next0); + + // `None` in new_target.address signals gen_block_series() to + // compile the target block right after this one (fallthrough). + BranchTarget::Stub(Box::new(BranchStub { + address: None, + ctx: *ctx, + iseq: Cell::new(target0.iseq), + iseq_idx: target0.idx, + })) + }; + + branch.targets[0].set(Some(Box::new(new_target))); } /// Create a stub to force the code up to this point to be executed pub fn defer_compilation( - jit: &JITState, - cur_ctx: &Context, - cb: &mut CodeBlock, + jit: &mut JITState, + asm: &mut Assembler, ocb: &mut OutlinedCb, ) { - if cur_ctx.chain_depth != 0 { + if asm.ctx.is_deferred() { panic!("Double defer!"); } - let mut next_ctx = *cur_ctx; + let mut next_ctx = asm.ctx; - if next_ctx.chain_depth == u8::MAX { - panic!("max block version chain depth reached!"); - } - next_ctx.chain_depth += 1; + next_ctx.mark_as_deferred(); - let block_rc = jit.get_block(); - let branch_rc = make_branch_entry(jit.get_block(), cur_ctx, gen_jump_branch); - let mut branch = branch_rc.borrow_mut(); - let block = block_rc.borrow(); + let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); let blockid = BlockId { - iseq: block.blockid.iseq, + iseq: jit.get_iseq(), idx: jit.get_insn_idx(), }; - branch.target_ctxs[0] = next_ctx; - branch.targets[0] = Some(blockid); - branch.dst_addrs[0] = get_branch_target(blockid, &next_ctx, &branch_rc, 0, ocb); + + // Likely a stub due to the increased chain depth + let target0_address = branch.set_target(0, blockid, &next_ctx, ocb); // Call the branch generation function - branch.start_addr = Some(cb.get_write_ptr()); - gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default); - branch.end_addr = Some(cb.get_write_ptr()); -} + asm_comment!(asm, "defer_compilation"); + asm.mark_branch_start(&branch); + if let Some(dst_addr) = target0_address { + branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None); + } + asm.mark_branch_end(&branch); -// Remove all references to a block then free it. -fn free_block(blockref: &BlockRef) { - use crate::invariants::*; + // If the block we're deferring from is empty + if jit.get_starting_insn_idx() == jit.get_insn_idx() { + incr_counter!(defer_empty_count); + } - block_assumptions_free(blockref); + incr_counter!(defer_count); +} - let block = blockref.borrow(); +/// Remove a block from the live control flow graph. +/// Block must be initialized and incoming/outgoing edges +/// must also point to initialized blocks. +unsafe fn remove_from_graph(blockref: BlockRef) { + let block = unsafe { blockref.as_ref() }; // Remove this block from the predecessor's targets - for pred_branchref in &block.incoming { + for pred_branchref in block.incoming.0.take().iter() { // Branch from the predecessor to us - let mut pred_branch = pred_branchref.borrow_mut(); + let pred_branch = unsafe { pred_branchref.as_ref() }; // If this is us, nullify the target block - for pred_succ_ref in &mut pred_branch.blocks { - if let Some(pred_succ) = pred_succ_ref { - if pred_succ == blockref { - *pred_succ_ref = None; - } + for target_idx in 0..pred_branch.targets.len() { + // SAFETY: no mutation inside unsafe + let target_is_us = unsafe { + pred_branch.targets[target_idx] + .ref_unchecked() + .as_ref() + .and_then(|target| target.get_block()) + .and_then(|target_block| (target_block == blockref).then(|| ())) + .is_some() + }; + + if target_is_us { + pred_branch.targets[target_idx].set(None); } } } // For each outgoing branch - for out_branchref in &block.outgoing { - let out_branch = out_branchref.borrow(); - + for out_branchref in block.outgoing.iter() { + let out_branch = unsafe { out_branchref.as_ref() }; // For each successor block - for succ in &out_branch.blocks { - if let Some(succ) = succ { + for out_target in out_branch.targets.iter() { + // SAFETY: copying out an Option<BlockRef>. No mutation. + let succ_block: Option<BlockRef> = unsafe { + out_target.ref_unchecked().as_ref().and_then(|target| target.get_block()) + }; + + if let Some(succ_block) = succ_block { // Remove outgoing branch from the successor's incoming list - let mut succ_block = succ.borrow_mut(); - succ_block - .incoming - .retain(|succ_incoming| !Rc::ptr_eq(succ_incoming, out_branchref)); + // SAFETY: caller promises the block has valid outgoing edges. + let succ_block = unsafe { succ_block.as_ref() }; + // Temporarily move out of succ_block.incoming. + let succ_incoming = succ_block.incoming.0.take(); + let mut succ_incoming = succ_incoming.into_vec(); + succ_incoming.retain(|branch| branch != out_branchref); + succ_block.incoming.0.set(succ_incoming.into_boxed_slice()); // allocs. Rely on oom=abort } } } +} + +/// Tear down a block and deallocate it. +/// Caller has to ensure that the code tracked by the block is not +/// running, as running code may hit [branch_stub_hit] who exepcts +/// [Branch] to be live. +/// +/// We currently ensure this through the `jit_cont` system in cont.c +/// and sometimes through the GC calling [rb_yjit_iseq_free]. The GC +/// has proven that an ISeq is not running if it calls us to free it. +/// +/// For delayed deallocation, since dead blocks don't keep +/// blocks they refer alive, by the time we get here their outgoing +/// edges may be dangling. Pass `graph_intact=false` such these cases. +pub unsafe fn free_block(blockref: BlockRef, graph_intact: bool) { + // Careful with order here. + // First, remove all pointers to the referent block + unsafe { + block_assumptions_free(blockref); + + if graph_intact { + remove_from_graph(blockref); + } + } - // No explicit deallocation here as blocks are ref-counted. + // SAFETY: we should now have a unique pointer to the block + unsafe { dealloc_block(blockref) } +} + +/// Deallocate a block and its outgoing branches. Blocks own their outgoing branches. +/// Caller must ensure that we have unique ownership for the referent block +unsafe fn dealloc_block(blockref: BlockRef) { + unsafe { + for outgoing in blockref.as_ref().outgoing.iter() { + // this Box::from_raw matches the Box::into_raw from PendingBranch::into_branch + mem::drop(Box::from_raw(outgoing.as_ptr())); + } + } + + // Deallocate the referent Block + unsafe { + // this Box::from_raw matches the Box::into_raw from JITState::into_block + mem::drop(Box::from_raw(blockref.as_ptr())); + } } // Some runtime checks for integrity of a program location pub fn verify_blockid(blockid: BlockId) { unsafe { assert!(rb_IMEMO_TYPE_P(blockid.iseq.into(), imemo_iseq) != 0); - assert!(blockid.idx < get_iseq_encoded_size(blockid.iseq)); + assert!(u32::from(blockid.idx) < get_iseq_encoded_size(blockid.iseq)); } } @@ -1950,114 +3258,129 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // TODO: want to assert that all other ractors are stopped here. Can't patch // machine code that some other thread is running. - let block = blockref.borrow(); - let cb = CodegenGlobals::get_inline_cb(); + let block = unsafe { (*blockref).as_ref() }; + let id_being_invalidated = block.get_blockid(); + let mut cb = CodegenGlobals::get_inline_cb(); let ocb = CodegenGlobals::get_outlined_cb(); - verify_blockid(block.blockid); + verify_blockid(id_being_invalidated); + + #[cfg(feature = "disasm")] + { + // If dump_iseq_disasm is specified, print to console that blocks for matching ISEQ names were invalidated. + if let Some(substr) = get_option_ref!(dump_iseq_disasm).as_ref() { + let iseq_range = &block.iseq_range; + let iseq_location = iseq_get_location(block.iseq.get(), iseq_range.start); + if iseq_location.contains(substr) { + println!("Invalidating block from {}, ISEQ offsets [{}, {})", iseq_location, iseq_range.start, iseq_range.end); + } + } + } // Remove this block from the version array remove_block_version(blockref); // Get a pointer to the generated code for this block - let code_ptr = block.start_addr; + let block_start = block.start_addr; - // Make the the start of the block do an exit. This handles OOM situations + // Make the start of the block do an exit. This handles OOM situations // and some cases where we can't efficiently patch incoming branches. // Do this first, since in case there is a fallthrough branch into this // block, the patching loop below can overwrite the start of the block. // In those situations, there is hopefully no jumps to the start of the block // after patching as the start of the block would be in the middle of something // generated by branch_t::gen_fn. + let block_entry_exit = block + .entry_exit + .expect("invalidation needs the entry_exit field"); { - let block_start = block - .start_addr - .expect("invalidation needs constructed block"); - let block_end = block - .end_addr - .expect("invalidation needs constructed block"); - let block_entry_exit = block - .entry_exit - .expect("invalidation needs the entry_exit field"); + let block_end = block.get_end_addr(); if block_start == block_entry_exit { // Some blocks exit on entry. Patching a jump to the entry at the // entry makes an infinite loop. } else { - // TODO(alan) - // if (block.start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) // Don't patch frozen code region - // Patch in a jump to block.entry_exit. + let cur_pos = cb.get_write_ptr(); + let cur_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(block_start); - jmp_ptr(cb, block_entry_exit); + + let mut asm = Assembler::new(); + asm.jmp(block_entry_exit.as_side_exit()); + cb.set_dropped_bytes(false); + asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code"); + assert!( - cb.get_write_ptr() < block_end, - "invalidation wrote past end of block" + cb.get_write_ptr() <= block_end, + "invalidation wrote past end of block (code_size: {:?}, new_size: {})", + block.code_size(), + cb.get_write_ptr().as_offset() - block_start.as_offset(), ); cb.set_write_ptr(cur_pos); + cb.set_dropped_bytes(cur_dropped_bytes); } } // For each incoming branch - for branchref in &block.incoming { - let mut branch = branchref.borrow_mut(); - let target_idx = if branch.dst_addrs[0] == code_ptr { + for branchref in block.incoming.0.take().iter() { + let branch = unsafe { branchref.as_ref() }; + let target_idx = if branch.get_target_address(0) == Some(block_start) { 0 } else { 1 }; - assert_eq!(branch.dst_addrs[target_idx], code_ptr); - assert_eq!(blockref, branch.blocks[target_idx].as_ref().unwrap()); - - // Mark this target as being a stub - branch.blocks[target_idx] = None; - - // TODO(alan): - // Don't patch frozen code region - // if (branch.start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { - // continue; - // } - // Create a stub for this branch target - mem::drop(branch); // end RefCell borrow as get_branch_target() can borrow the branch. - let mut branch_target = - get_branch_target(block.blockid, &block.ctx, branchref, target_idx as u32, ocb); - - if branch_target.is_none() { - // We were unable to generate a stub (e.g. OOM). Use the block's - // exit instead of a stub for the block. It's important that we - // still patch the branch in this situation so stubs are unique - // to branches. Think about what could go wrong if we run out of - // memory in the middle of this loop. - branch_target = block.entry_exit; + // Assert that the incoming branch indeed points to the block being invalidated + // SAFETY: no mutation. + unsafe { + let incoming_target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); + assert_eq!(Some(block_start), incoming_target.get_address()); + if let Some(incoming_block) = &incoming_target.get_block() { + assert_eq!(blockref, incoming_block); + } } - branch = branchref.borrow_mut(); - branch.dst_addrs[target_idx] = branch_target; + // Create a stub for this branch target + let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32); + + // In case we were unable to generate a stub (e.g. OOM). Use the block's + // exit instead of a stub for the block. It's important that we + // still patch the branch in this situation so stubs are unique + // to branches. Think about what could go wrong if we run out of + // memory in the middle of this loop. + let stub_addr = stub_addr.unwrap_or(block_entry_exit); + + // Fill the branch target with a stub + branch.targets[target_idx].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + address: Some(stub_addr), + iseq: block.iseq.clone(), + iseq_idx: block.iseq_range.start, + ctx: block.ctx, + }))))); // Check if the invalidated block immediately follows - let target_next = block.start_addr == branch.end_addr; + let target_next = block.start_addr == branch.end_addr.get(); if target_next { // The new block will no longer be adjacent. // Note that we could be enlarging the branch and writing into the // start of the block being invalidated. - branch.shape = BranchShape::Default; + branch.gen_fn.set_shape(BranchShape::Default); } // Rewrite the branch with the new jump target address - regenerate_branch(cb, &mut branch); + let old_branch_size = branch.code_size(); + regenerate_branch(cb, branch); if target_next && branch.end_addr > block.end_addr { - dbg!( - branch.block.borrow().blockid.idx, - block.blockid.idx, - branch.end_addr, - block.end_addr, - block.code_size() + panic!("yjit invalidate rewrote branch past end of invalidated block: {:?} (code_size: {})", branch, block.code_size()); + } + if !target_next && branch.code_size() > old_branch_size { + panic!( + "invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})", + branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size() ); - panic!("yjit invalidate rewrote branch past end of invalidated block"); } } @@ -2069,18 +3392,21 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // points will always have an instruction index of 0. We'll need to // change this in the future when we support optional parameters because // they enter the function with a non-zero PC - if block.blockid.idx == 0 { - unsafe { rb_iseq_reset_jit_func(block.blockid.iseq) }; + if block.iseq_range.start == 0 { + // TODO: + // We could reset the exec counter to zero in rb_iseq_reset_jit_func() + // so that we eventually compile a new entry point when useful + unsafe { rb_iseq_reset_jit_func(block.iseq.get()) }; } - // TODO: - // May want to recompile a new entry point (for interpreter entry blocks) - // This isn't necessary for correctness - // FIXME: // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub. - free_block(blockref); + // SAFETY: This block was in a version_map earlier + // in this function before we removed it, so it's well connected. + unsafe { remove_from_graph(*blockref) }; + + delayed_deallocation(*blockref); ocb.unwrap().mark_all_executable(); cb.mark_all_executable(); @@ -2088,36 +3414,332 @@ pub fn invalidate_block_version(blockref: &BlockRef) { incr_counter!(invalidation_count); } +// We cannot deallocate blocks immediately after invalidation since there +// could be stubs waiting to access branch pointers. Return stubs can do +// this since patching the code for setting up return addresses does not +// affect old return addresses that are already set up to use potentially +// invalidated branch pointers. Example: +// def foo(n) +// if n == 2 +// # 1.times.each to create a cfunc frame to preserve the JIT frame +// # which will return to a stub housed in an invalidated block +// return 1.times.each { Object.define_method(:foo) {} } +// end +// +// foo(n + 1) +// end +// p foo(1) +pub fn delayed_deallocation(blockref: BlockRef) { + block_assumptions_free(blockref); + + let payload = get_iseq_payload(unsafe { blockref.as_ref() }.iseq.get()).unwrap(); + payload.dead_blocks.push(blockref); +} + +trait RefUnchecked { + type Contained; + unsafe fn ref_unchecked(&self) -> &Self::Contained; +} + +impl<T> RefUnchecked for Cell<T> { + type Contained = T; + + /// Gives a reference to the contents of a [Cell]. + /// Dangerous; please include a SAFETY note. + /// + /// An easy way to use this without triggering Undefined Behavior is to + /// 1. ensure there is transitively no Cell/UnsafeCell mutation in the `unsafe` block + /// 2. ensure the `unsafe` block does not return any references, so our + /// analysis is lexically confined. This is trivially true if the block + /// returns a `bool`, for example. Aggregates that store references have + /// explicit lifetime parameters that look like `<'a>`. + /// + /// There are other subtler situations that don't follow these rules yet + /// are still sound. + /// See `test_miri_ref_unchecked()` for examples. You can play with it + /// with `cargo +nightly miri test miri`. + unsafe fn ref_unchecked(&self) -> &Self::Contained { + // SAFETY: pointer is dereferenceable because it's from a &Cell. + // It's up to the caller to follow aliasing rules with the output + // reference. + unsafe { self.as_ptr().as_ref().unwrap() } + } +} + #[cfg(test)] mod tests { use crate::core::*; #[test] + fn type_size() { + // Check that we can store types in 4 bits, + // and all local types in 32 bits + assert_eq!(mem::size_of::<Type>(), 1); + assert!(Type::BlockParamProxy as usize <= 0b1111); + assert!(MAX_LOCAL_TYPES * 4 <= 32); + } + + #[test] + fn tempmapping_size() { + assert_eq!(mem::size_of::<TempMapping>(), 1); + } + + #[test] + fn local_types() { + let mut ctx = Context::default(); + + for i in 0..MAX_LOCAL_TYPES { + ctx.set_local_type(i, Type::Fixnum); + assert_eq!(ctx.get_local_type(i), Type::Fixnum); + ctx.set_local_type(i, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(i), Type::BlockParamProxy); + } + + ctx.set_local_type(0, Type::Fixnum); + ctx.clear_local_types(); + assert!(ctx.get_local_type(0) == Type::Unknown); + + // Make sure we don't accidentally set bits incorrectly + let mut ctx = Context::default(); + ctx.set_local_type(0, Type::Fixnum); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + ctx.set_local_type(2, Type::Fixnum); + ctx.set_local_type(1, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + assert_eq!(ctx.get_local_type(2), Type::Fixnum); + } + + #[test] + fn tempmapping() { + let t = TempMapping::map_to_stack(Type::Unknown); + assert_eq!(t.get_kind(), MapToStack); + assert_eq!(t.get_type(), Type::Unknown); + + let t = TempMapping::map_to_stack(Type::TString); + assert_eq!(t.get_kind(), MapToStack); + assert_eq!(t.get_type(), Type::TString); + + let t = TempMapping::map_to_local(7); + assert_eq!(t.get_kind(), MapToLocal); + assert_eq!(t.get_local_idx(), 7); + } + + #[test] + fn context_size() { + assert_eq!(mem::size_of::<Context>(), 23); + } + + #[test] fn types() { // Valid src => dst - assert_eq!(Type::Unknown.diff(Type::Unknown), 0); - assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), 0); - assert_ne!(Type::UnknownImm.diff(Type::Unknown), usize::MAX); - assert_ne!(Type::Fixnum.diff(Type::Unknown), usize::MAX); - assert_ne!(Type::Fixnum.diff(Type::UnknownImm), usize::MAX); + assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0)); + assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), TypeDiff::Compatible(0)); + assert_ne!(Type::UnknownImm.diff(Type::Unknown), TypeDiff::Incompatible); + assert_ne!(Type::Fixnum.diff(Type::Unknown), TypeDiff::Incompatible); + assert_ne!(Type::Fixnum.diff(Type::UnknownImm), TypeDiff::Incompatible); // Invalid src => dst - assert_eq!(Type::Unknown.diff(Type::UnknownImm), usize::MAX); - assert_eq!(Type::Unknown.diff(Type::Fixnum), usize::MAX); - assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), usize::MAX); + assert_eq!(Type::Unknown.diff(Type::UnknownImm), TypeDiff::Incompatible); + assert_eq!(Type::Unknown.diff(Type::Fixnum), TypeDiff::Incompatible); + assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), TypeDiff::Incompatible); + } + + #[test] + fn reg_temps() { + let mut reg_temps = RegTemps(0); + + // 0 means every slot is not spilled + for stack_idx in 0..MAX_REG_TEMPS { + assert_eq!(reg_temps.get(stack_idx), false); + } + + // Set 0, 2, 7 (RegTemps: 10100001) + reg_temps.set(0, true); + reg_temps.set(2, true); + reg_temps.set(3, true); + reg_temps.set(3, false); + reg_temps.set(7, true); + + // Get 0..8 + assert_eq!(reg_temps.get(0), true); + assert_eq!(reg_temps.get(1), false); + assert_eq!(reg_temps.get(2), true); + assert_eq!(reg_temps.get(3), false); + assert_eq!(reg_temps.get(4), false); + assert_eq!(reg_temps.get(5), false); + assert_eq!(reg_temps.get(6), false); + assert_eq!(reg_temps.get(7), true); + + // Test conflicts + assert_eq!(5, get_option!(num_temp_regs)); + assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict + assert_eq!(reg_temps.conflicts_with(1), false); + assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7 + assert_eq!(reg_temps.conflicts_with(3), false); + assert_eq!(reg_temps.conflicts_with(4), false); + assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0 + assert_eq!(reg_temps.conflicts_with(6), false); + assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2 } #[test] fn context() { // Valid src => dst - assert_eq!(Context::default().diff(&Context::default()), 0); + assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0)); // Try pushing an operand and getting its type - let mut ctx = Context::default(); - ctx.stack_push(Type::Fixnum); - let top_type = ctx.get_opnd_type(StackOpnd(0)); + let mut asm = Assembler::new(); + asm.stack_push(Type::Fixnum); + let top_type = asm.ctx.get_opnd_type(StackOpnd(0)); assert!(top_type == Type::Fixnum); // TODO: write more tests for Context type diff } + + #[test] + fn context_upgrade_local() { + let mut asm = Assembler::new(); + asm.stack_push_local(0); + asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil); + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + } + + #[test] + fn context_chain_depth() { + let mut ctx = Context::default(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_return_landing(), false); + assert_eq!(ctx.is_deferred(), false); + + for _ in 0..5 { + ctx.increment_chain_depth(); + } + assert_eq!(ctx.get_chain_depth(), 5); + + ctx.set_as_return_landing(); + assert_eq!(ctx.is_return_landing(), true); + + ctx.clear_return_landing(); + assert_eq!(ctx.is_return_landing(), false); + + ctx.mark_as_deferred(); + assert_eq!(ctx.is_deferred(), true); + + ctx.reset_chain_depth_and_defer(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_deferred(), false); + } + + #[test] + fn shift_stack_for_send() { + let mut asm = Assembler::new(); + + // Push values to simulate send(:name, arg) with 6 items already on-stack + for _ in 0..6 { + asm.stack_push(Type::Fixnum); + } + asm.stack_push(Type::Unknown); + asm.stack_push(Type::ImmSymbol); + asm.stack_push(Type::Unknown); + + // This method takes argc of the sendee, not argc of send + asm.shift_stack(1); + + // The symbol should be gone + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(1))); + } + + #[test] + fn test_miri_ref_unchecked() { + let blockid = BlockId { + iseq: ptr::null(), + idx: 0, + }; + let cb = CodeBlock::new_dummy(1024); + let dumm_addr = cb.get_write_ptr(); + let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null()) + .into_block(0, dumm_addr, dumm_addr, vec![]); + let _dropper = BlockDropper(block); + + // Outside of brief moments during construction, + // we're always working with &Branch (a shared reference to a Branch). + let branch: &Branch = &Branch { + gen_fn: BranchGenFn::JZToTarget0, + block, + start_addr: dumm_addr, + end_addr: Cell::new(dumm_addr), + targets: [Cell::new(None), Cell::new(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + iseq: Cell::new(ptr::null()), + iseq_idx: 0, + address: None, + ctx: Context::default(), + })))))] + }; + // For easier soundness reasoning, make sure the reference returned does not out live the + // `unsafe` block! It's tempting to do, but it leads to non-local issues. + // Here is an example where it goes wrong: + if false { + for target in branch.targets.iter().as_ref() { + if let Some(btarget) = unsafe { target.ref_unchecked() } { + // btarget is derived from the usnafe block! + target.set(None); // This drops the contents of the cell... + assert!(btarget.get_address().is_none()); // but `btarget` is still live! UB. + } + } + } + + // Do something like this instead. It's not pretty, but it's easier to vet for UB this way. + for target in branch.targets.iter().as_ref() { + // SAFETY: no mutation within unsafe + if unsafe { target.ref_unchecked().is_none() } { + continue; + } + // SAFETY: no mutation within unsafe + assert!(unsafe { target.ref_unchecked().as_ref().unwrap().get_address().is_none() }); + target.set(None); + } + + // A more subtle situation where we do Cell/UnsafeCell mutation over the + // lifetime of the reference released by ref_unchecked(). + branch.targets[0].set(Some(Box::new(BranchTarget::Stub(Box::new(BranchStub { + iseq: Cell::new(ptr::null()), + iseq_idx: 0, + address: None, + ctx: Context::default(), + }))))); + // Invalid ISeq; we never dereference it. + let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr(); + unsafe { + if let Some(branch_target) = branch.targets[0].ref_unchecked().as_ref() { + if let BranchTarget::Stub(stub) = branch_target.as_ref() { + // SAFETY: + // This is a Cell mutation, but it mutates the contents + // of a a Cell<IseqPtr>, which is a different type + // from the type of Cell found in `Branch::targets`, so + // there is no chance of mutating the Cell that we called + // ref_unchecked() on above. + Cell::set(&stub.iseq, secret_iseq); + } + } + }; + // Check that we indeed changed the iseq of the stub + // Cell::take moves out of the cell. + assert_eq!( + secret_iseq as usize, + branch.targets[0].take().unwrap().get_blockid().iseq as usize + ); + + struct BlockDropper(BlockRef); + impl Drop for BlockDropper { + fn drop(&mut self) { + // SAFETY: we have ownership because the test doesn't stash + // the block away in any global structure. + // Note that the test being self-contained is also why we + // use dealloc_block() over free_block(), as free_block() touches + // the global invariants tables unavailable in tests. + unsafe { dealloc_block(self.0) }; + } + } + } } |