diff options
Diffstat (limited to 'yjit/src/core.rs')
-rw-r--r-- | yjit/src/core.rs | 1248 |
1 files changed, 863 insertions, 385 deletions
diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 4dd0a387d5..cd6e649aa0 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -18,13 +18,14 @@ use std::cell::*; use std::collections::HashSet; use std::fmt; use std::mem; +use std::mem::transmute; use std::ops::Range; use std::rc::Rc; use mem::MaybeUninit; use std::ptr; use ptr::NonNull; use YARVOpnd::*; -use TempMapping::*; +use TempMappingKind::*; use crate::invariants::*; // Maximum number of temp value types we keep track of @@ -39,8 +40,9 @@ pub type IseqIdx = u16; // Represent the type of a value (local/stack/self) in YJIT #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +#[repr(u8)] pub enum Type { - Unknown, + Unknown = 0, UnknownImm, UnknownHeap, Nil, @@ -48,19 +50,20 @@ pub enum Type { False, Fixnum, Flonum, - Hash, ImmSymbol, - #[allow(unused)] - HeapSymbol, - TString, // An object with the T_STRING flag set, possibly an rb_cString - CString, // An un-subclassed string of type rb_cString (can have instance vars in some cases) + CString, // An object that at one point had its class field equal rb_cString (creating a singleton class changes it) TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray - CArray, // An un-subclassed string of type rb_cArray (can have instance vars in some cases) + CArray, // An object that at one point had its class field equal rb_cArray (creating a singleton class changes it) + THash, // An object with the T_HASH flag set, possibly an rb_cHash + CHash, // An object that at one point had its class field equal rb_cHash (creating a singleton class changes it) BlockParamProxy, // A special sentinel value indicating the block parameter should be read from // the current surrounding cfp + + // The context currently relies on types taking at most 4 bits (max value 15) + // to encode, so if we add any more, we will need to refactor the context. } // Default initialization @@ -93,12 +96,11 @@ impl Type { // Core.rs can't reference rb_cString because it's linked by Rust-only tests. // But CString vs TString is only an optimisation and shouldn't affect correctness. #[cfg(not(test))] - if val.class_of() == unsafe { rb_cString } { - return Type::CString; - } - #[cfg(not(test))] - if val.class_of() == unsafe { rb_cArray } { - return Type::CArray; + match val.class_of() { + class if class == unsafe { rb_cArray } => return Type::CArray, + class if class == unsafe { rb_cHash } => return Type::CHash, + class if class == unsafe { rb_cString } => return Type::CString, + _ => {} } // We likewise can't reference rb_block_param_proxy, but it's again an optimisation; // we can just treat it as a normal Object. @@ -108,7 +110,7 @@ impl Type { } match val.builtin_type() { RUBY_T_ARRAY => Type::TArray, - RUBY_T_HASH => Type::Hash, + RUBY_T_HASH => Type::THash, RUBY_T_STRING => Type::TString, _ => Type::UnknownHeap, } @@ -150,8 +152,8 @@ impl Type { Type::UnknownHeap => true, Type::TArray => true, Type::CArray => true, - Type::Hash => true, - Type::HeapSymbol => true, + Type::THash => true, + Type::CHash => true, Type::TString => true, Type::CString => true, Type::BlockParamProxy => true, @@ -161,20 +163,17 @@ impl Type { /// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY) pub fn is_array(&self) -> bool { - match self { - Type::TArray => true, - Type::CArray => true, - _ => false, - } + matches!(self, Type::TArray | Type::CArray) + } + + /// Check if it's a T_HASH object (both THash and CHash are T_HASH) + pub fn is_hash(&self) -> bool { + matches!(self, Type::THash | Type::CHash) } /// Check if it's a T_STRING object (both TString and CString are T_STRING) pub fn is_string(&self) -> bool { - match self { - Type::TString => true, - Type::CString => true, - _ => false, - } + matches!(self, Type::TString | Type::CString) } /// Returns an Option with the T_ value type if it is known, otherwise None @@ -186,8 +185,8 @@ impl Type { Type::Fixnum => Some(RUBY_T_FIXNUM), Type::Flonum => Some(RUBY_T_FLOAT), Type::TArray | Type::CArray => Some(RUBY_T_ARRAY), - Type::Hash => Some(RUBY_T_HASH), - Type::ImmSymbol | Type::HeapSymbol => Some(RUBY_T_SYMBOL), + Type::THash | Type::CHash => Some(RUBY_T_HASH), + Type::ImmSymbol => Some(RUBY_T_SYMBOL), Type::TString | Type::CString => Some(RUBY_T_STRING), Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None, Type::BlockParamProxy => None, @@ -203,9 +202,10 @@ impl Type { Type::False => Some(rb_cFalseClass), Type::Fixnum => Some(rb_cInteger), Type::Flonum => Some(rb_cFloat), - Type::ImmSymbol | Type::HeapSymbol => Some(rb_cSymbol), - Type::CString => Some(rb_cString), + Type::ImmSymbol => Some(rb_cSymbol), Type::CArray => Some(rb_cArray), + Type::CHash => Some(rb_cHash), + Type::CString => Some(rb_cString), _ => None, } } @@ -255,13 +255,18 @@ impl Type { return TypeDiff::Compatible(1); } - // A CString is also a TString. - if self == Type::CString && dst == Type::TString { + // A CArray is also a TArray. + if self == Type::CArray && dst == Type::TArray { return TypeDiff::Compatible(1); } - // A CArray is also a TArray. - if self == Type::CArray && dst == Type::TArray { + // A CHash is also a THash. + if self == Type::CHash && dst == Type::THash { + return TypeDiff::Compatible(1); + } + + // A CString is also a TString. + if self == Type::CString && dst == Type::TString { return TypeDiff::Compatible(1); } @@ -296,63 +301,92 @@ pub enum TypeDiff { Incompatible, } -// Potential mapping of a value on the temporary stack to -// self, a local variable or constant so that we can track its type #[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] -pub enum TempMapping { - MapToStack, // Normal stack value - MapToSelf, // Temp maps to the self operand - MapToLocal(LocalIndex), // Temp maps to a local variable with index - //ConstMapping, // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue) +#[repr(u8)] +pub enum TempMappingKind +{ + MapToStack = 0, + MapToSelf = 1, + MapToLocal = 2, } -// Index used by MapToLocal. Using this instead of u8 makes TempMapping 1 byte. +// Potential mapping of a value on the temporary stack to +// self, a local variable or constant so that we can track its type +// +// The highest two bits represent TempMappingKind, and the rest of +// the bits are used differently across different kinds. +// * MapToStack: The lowest 5 bits are used for mapping Type. +// * MapToSelf: The remaining bits are not used; the type is stored in self_type. +// * MapToLocal: The lowest 3 bits store the index of a local variable. #[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] -pub enum LocalIndex { - Local0, - Local1, - Local2, - Local3, - Local4, - Local5, - Local6, - Local7, -} +pub struct TempMapping(u8); -impl From<LocalIndex> for u8 { - fn from(idx: LocalIndex) -> Self { - match idx { - LocalIndex::Local0 => 0, - LocalIndex::Local1 => 1, - LocalIndex::Local2 => 2, - LocalIndex::Local3 => 3, - LocalIndex::Local4 => 4, - LocalIndex::Local5 => 5, - LocalIndex::Local6 => 6, - LocalIndex::Local7 => 7, - } +impl TempMapping { + pub fn map_to_stack(t: Type) -> TempMapping + { + let kind_bits = TempMappingKind::MapToStack as u8; + let type_bits = t as u8; + assert!(type_bits <= 0b11111); + let bits = (kind_bits << 6) | (type_bits & 0b11111); + TempMapping(bits) + } + + pub fn map_to_self() -> TempMapping + { + let kind_bits = TempMappingKind::MapToSelf as u8; + let bits = kind_bits << 6; + TempMapping(bits) } -} -impl From<u8> for LocalIndex { - fn from(idx: u8) -> Self { - match idx { - 0 => LocalIndex::Local0, - 1 => LocalIndex::Local1, - 2 => LocalIndex::Local2, - 3 => LocalIndex::Local3, - 4 => LocalIndex::Local4, - 5 => LocalIndex::Local5, - 6 => LocalIndex::Local6, - 7 => LocalIndex::Local7, - _ => unreachable!("{idx} was larger than {MAX_LOCAL_TYPES}"), + pub fn map_to_local(local_idx: u8) -> TempMapping + { + let kind_bits = TempMappingKind::MapToLocal as u8; + assert!(local_idx <= 0b111); + let bits = (kind_bits << 6) | (local_idx & 0b111); + TempMapping(bits) + } + + pub fn without_type(&self) -> TempMapping + { + if self.get_kind() != TempMappingKind::MapToStack { + return *self; } + + TempMapping::map_to_stack(Type::Unknown) + } + + pub fn get_kind(&self) -> TempMappingKind + { + // Take the two highest bits + let TempMapping(bits) = self; + let kind_bits = bits >> 6; + assert!(kind_bits <= 2); + unsafe { transmute::<u8, TempMappingKind>(kind_bits) } + } + + pub fn get_type(&self) -> Type + { + assert!(self.get_kind() == TempMappingKind::MapToStack); + + // Take the 5 lowest bits + let TempMapping(bits) = self; + let type_bits = bits & 0b11111; + unsafe { transmute::<u8, Type>(type_bits) } + } + + pub fn get_local_idx(&self) -> u8 + { + assert!(self.get_kind() == TempMappingKind::MapToLocal); + + // Take the 3 lowest bits + let TempMapping(bits) = self; + bits & 0b111 } } impl Default for TempMapping { fn default() -> Self { - MapToStack + TempMapping::map_to_stack(Type::Unknown) } } @@ -403,21 +437,27 @@ impl RegTemps { /// Return true if there's a register that conflicts with a given stack_idx. pub fn conflicts_with(&self, stack_idx: u8) -> bool { - let mut other_idx = stack_idx as isize - get_option!(num_temp_regs) as isize; - while other_idx >= 0 { - if self.get(other_idx as u8) { + let mut other_idx = stack_idx as usize % get_option!(num_temp_regs); + while other_idx < MAX_REG_TEMPS as usize { + if stack_idx as usize != other_idx && self.get(other_idx as u8) { return true; } - other_idx -= get_option!(num_temp_regs) as isize; + other_idx += get_option!(num_temp_regs); } false } } +/// Bits for chain_depth_return_landing_defer +const RETURN_LANDING_BIT: u8 = 0b10000000; +const DEFER_BIT: u8 = 0b01000000; +const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63 + /// Code generation context /// Contains information we can use to specialize/optimize code /// There are a lot of context objects so we try to keep the size small. -#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, Debug)] +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] +#[repr(packed)] pub struct Context { // Number of values currently on the temporary stack stack_size: u8, @@ -429,20 +469,33 @@ pub struct Context { /// Bitmap of which stack temps are in a register reg_temps: RegTemps, - // Depth of this block in the sidechain (eg: inline-cache chain) - chain_depth: u8, + /// Fields packed into u8 + /// - 1st bit from the left: Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing]) + /// - 2nd bit from the left: Whether the compilation of this code has been deferred ([Self::is_deferred]) + /// - Last 6 bits (max: 63): Depth of this block in the sidechain (eg: inline-cache chain) + chain_depth_and_flags: u8, + + // Type we track for self + self_type: Type, // Local variable types we keep track of - local_types: [Type; MAX_LOCAL_TYPES], + // We store 8 local types, requiring 4 bits each, for a total of 32 bits + local_types: u32, - // Temporary variable types we keep track of - temp_types: [Type; MAX_TEMP_TYPES], + // Temp mapping kinds we track + // 8 temp mappings * 2 bits, total 16 bits + temp_mapping_kind: u16, - // Type we track for self - self_type: Type, + // Stack slot type/local_idx we track + // 8 temp types * 4 bits, total 32 bits + temp_payload: u32, - // Mapping of temp stack entries to types we track - temp_mapping: [TempMapping; MAX_TEMP_TYPES], + /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined. + /// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)] + /// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr + /// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which + /// could allow this to consume no bytes, so we're leaving this as is. + inline_block: u64, } /// Tuple of (iseq, idx) used to identify basic blocks @@ -474,6 +527,8 @@ pub enum BranchGenFn { JNZToTarget0, JZToTarget0, JBEToTarget0, + JBToTarget0, + JOMulToTarget0, JITReturn, } @@ -485,8 +540,8 @@ impl BranchGenFn { BranchShape::Next0 => asm.jz(target1.unwrap()), BranchShape::Next1 => asm.jnz(target0), BranchShape::Default => { - asm.jnz(target0.into()); - asm.jmp(target1.unwrap().into()); + asm.jnz(target0); + asm.jmp(target1.unwrap()); } } } @@ -515,11 +570,11 @@ impl BranchGenFn { panic!("Branch shape Next1 not allowed in JumpToTarget0!"); } if shape.get() == BranchShape::Default { - asm.jmp(target0.into()); + asm.jmp(target0); } } BranchGenFn::JNZToTarget0 => { - asm.jnz(target0.into()) + asm.jnz(target0) } BranchGenFn::JZToTarget0 => { asm.jz(target0) @@ -527,9 +582,17 @@ impl BranchGenFn { BranchGenFn::JBEToTarget0 => { asm.jbe(target0) } + BranchGenFn::JBToTarget0 => { + asm.jb(target0) + } + BranchGenFn::JOMulToTarget0 => { + asm.jo_mul(target0) + } BranchGenFn::JITReturn => { - asm.comment("update cfp->jit_return"); - asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), Opnd::const_ptr(target0.unwrap_code_ptr().raw_ptr())); + asm_comment!(asm, "update cfp->jit_return"); + let jit_return = RUBY_OFFSET_CFP_JIT_RETURN - RUBY_SIZEOF_CONTROL_FRAME as i32; + let raw_ptr = asm.lea_jump_target(target0); + asm.mov(Opnd::mem(64, CFP, jit_return), raw_ptr); } } } @@ -543,6 +606,8 @@ impl BranchGenFn { BranchGenFn::JNZToTarget0 | BranchGenFn::JZToTarget0 | BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | BranchGenFn::JITReturn => BranchShape::Default, } } @@ -563,6 +628,8 @@ impl BranchGenFn { BranchGenFn::JNZToTarget0 | BranchGenFn::JZToTarget0 | BranchGenFn::JBEToTarget0 | + BranchGenFn::JBToTarget0 | + BranchGenFn::JOMulToTarget0 | BranchGenFn::JITReturn => { assert_eq!(new_shape, BranchShape::Default); } @@ -594,8 +661,8 @@ impl BranchTarget { fn get_ctx(&self) -> Context { match self { - BranchTarget::Stub(stub) => stub.ctx.clone(), - BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx.clone(), + BranchTarget::Stub(stub) => stub.ctx, + BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx, } } @@ -660,7 +727,7 @@ pub struct PendingBranch { impl Branch { // Compute the size of the branch code fn code_size(&self) -> usize { - (self.end_addr.get().raw_ptr() as usize) - (self.start_addr.raw_ptr() as usize) + (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize } /// Get the address of one of the branch destination @@ -752,7 +819,7 @@ impl PendingBranch { address: Some(stub_addr), iseq: Cell::new(target.iseq), iseq_idx: target.idx, - ctx: ctx.clone(), + ctx: *ctx, }))))); } @@ -937,7 +1004,6 @@ impl fmt::Debug for MutableBranchList { } } - /// This is all the data YJIT stores on an iseq /// This will be dynamically allocated by C code /// C code should pass an &mut IseqPayload to us @@ -1050,23 +1116,34 @@ pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) { /// Iterate over all NOT on-stack ISEQ payloads pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback: F) { - let mut on_stack_iseqs: Vec<IseqPtr> = vec![]; - for_each_on_stack_iseq(|iseq| { - on_stack_iseqs.push(iseq); - }); - for_each_iseq(|iseq| { + // Get all ISEQs on the heap. Note that rb_objspace_each_objects() runs GC first, + // which could move ISEQ pointers when GC.auto_compact = true. + // So for_each_on_stack_iseq() must be called after this, which doesn't run GC. + let mut iseqs: Vec<IseqPtr> = vec![]; + for_each_iseq(|iseq| iseqs.push(iseq)); + + // Get all ISEQs that are on a CFP of existing ECs. + let mut on_stack_iseqs: HashSet<IseqPtr> = HashSet::new(); + for_each_on_stack_iseq(|iseq| { on_stack_iseqs.insert(iseq); }); + + // Invoke the callback for iseqs - on_stack_iseqs + for iseq in iseqs { if !on_stack_iseqs.contains(&iseq) { if let Some(iseq_payload) = get_iseq_payload(iseq) { callback(iseq_payload); } } - }) + } } /// Free the per-iseq payload #[no_mangle] -pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) { +pub extern "C" fn rb_yjit_iseq_free(iseq: IseqPtr) { + // Free invariants for the ISEQ + iseq_free_invariants(iseq); + let payload = { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; if payload.is_null() { // Nothing to free. return; @@ -1103,7 +1180,7 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) { incr_counter!(freed_iseq_count); } -/// GC callback for marking GC objects in the the per-iseq payload. +/// GC callback for marking GC objects in the per-iseq payload. #[no_mangle] pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { let payload = if payload.is_null() { @@ -1129,30 +1206,54 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { for block in versions { // SAFETY: all blocks inside version_map are initialized. let block = unsafe { block.as_ref() }; + mark_block(block, cb, false); + } + } + // Mark dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + mark_block(block, cb, true); + } + + return; + + fn mark_block(block: &Block, cb: &CodeBlock, dead: bool) { + unsafe { rb_gc_mark_movable(block.iseq.get().into()) }; + + // Mark method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + unsafe { rb_gc_mark_movable(cme_dep.get().into()) }; + } + + // Mark outgoing branch entries + for branch in block.outgoing.iter() { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let target_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; - unsafe { rb_gc_mark_movable(block.iseq.get().into()) }; - - // Mark method entry dependencies - for cme_dep in block.cme_dependencies.iter() { - unsafe { rb_gc_mark_movable(cme_dep.get().into()) }; - } - - // Mark outgoing branch entries - for branch in block.outgoing.iter() { - let branch = unsafe { branch.as_ref() }; - for target in branch.targets.iter() { - // SAFETY: no mutation inside unsafe - let target_iseq = unsafe { target.ref_unchecked().as_ref().map(|target| target.get_blockid().iseq) }; - - if let Some(target_iseq) = target_iseq { - unsafe { rb_gc_mark_movable(target_iseq.into()) }; - } + if let Some(target_iseq) = target_iseq { + unsafe { rb_gc_mark_movable(target_iseq.into()) }; } } + } - // Walk over references to objects in generated code. + // Mark references to objects in generated code. + // Skip for dead blocks since they shouldn't run. + if !dead { for offset in block.gc_obj_offsets.iter() { - let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address = value_address as *const VALUE; @@ -1166,10 +1267,11 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) { } } -/// GC callback for updating GC objects in the the per-iseq payload. +/// GC callback for updating GC objects in the per-iseq payload. /// This is a mirror of [rb_yjit_iseq_mark]. #[no_mangle] -pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { +pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) { + let payload = unsafe { rb_iseq_get_yjit_payload(iseq) }; let payload = if payload.is_null() { // Nothing to update. return; @@ -1196,21 +1298,70 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { for version in versions { // SAFETY: all blocks inside version_map are initialized let block = unsafe { version.as_ref() }; + block_update_references(block, cb, false); + } + } + // Update dead blocks, since there could be stubs pointing at them + for blockref in &payload.dead_blocks { + // SAFETY: dead blocks come from version_map, which only have initialized blocks + let block = unsafe { blockref.as_ref() }; + block_update_references(block, cb, true); + } - block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq()); + // Note that we would have returned already if YJIT is off. + cb.mark_all_executable(); - // Update method entry dependencies - for cme_dep in block.cme_dependencies.iter() { - let cur_cme: VALUE = cme_dep.get().into(); - let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme(); - cme_dep.set(new_cme); + CodegenGlobals::get_outlined_cb() + .unwrap() + .mark_all_executable(); + + return; + + fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) { + block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq()); + + // Update method entry dependencies + for cme_dep in block.cme_dependencies.iter() { + let cur_cme: VALUE = cme_dep.get().into(); + let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme(); + cme_dep.set(new_cme); + } + + // Update outgoing branch entries + for branch in block.outgoing.iter() { + let branch = unsafe { branch.as_ref() }; + for target in branch.targets.iter() { + // SAFETY: no mutation inside unsafe + let current_iseq = unsafe { + target.ref_unchecked().as_ref().and_then(|target| { + // Avoid get_blockid() on blockref. Can be dangling on dead blocks, + // and the iseq housing the block already naturally handles it. + if target.get_block().is_some() { + None + } else { + Some(target.get_blockid().iseq) + } + }) + }; + + if let Some(current_iseq) = current_iseq { + let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) } + .as_iseq(); + // SAFETY: the Cell::set is not on the reference given out + // by ref_unchecked. + unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) }; + } } + } - // Walk over references to objects in generated code. + // Update references to objects in generated code. + // Skip for dead blocks since they shouldn't run and + // so there is no potential of writing over invalidation jumps + if !dead { for offset in block.gc_obj_offsets.iter() { let offset_to_value = offset.as_usize(); let value_code_ptr = cb.get_ptr(offset_to_value); - let value_ptr: *const u8 = value_code_ptr.raw_ptr(); + let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_ptr = value_ptr as *mut VALUE; @@ -1227,32 +1378,9 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) { } } } - - // Update outgoing branch entries - for branch in block.outgoing.iter() { - let branch = unsafe { branch.as_ref() }; - for target in branch.targets.iter() { - // SAFETY: no mutation inside unsafe - let current_iseq = unsafe { target.ref_unchecked().as_ref().map(|target| target.get_blockid().iseq) }; - - if let Some(current_iseq) = current_iseq { - let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) } - .as_iseq(); - // SAFETY: the Cell::set is not on the reference given out - // by ref_unchecked. - unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) }; - } - } - } } - } - // Note that we would have returned already if YJIT is off. - cb.mark_all_executable(); - - CodegenGlobals::get_outlined_cb() - .unwrap() - .mark_all_executable(); + } } /// Get all blocks for a particular place in an iseq. @@ -1293,14 +1421,19 @@ pub fn take_version_list(blockid: BlockId) -> VersionList { } /// Count the number of block versions matching a given blockid -fn get_num_versions(blockid: BlockId) -> usize { +/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions. +fn get_num_versions(blockid: BlockId, inlined: bool) -> usize { let insn_idx = blockid.idx.as_usize(); match get_iseq_payload(blockid.iseq) { Some(payload) => { payload .version_map .get(insn_idx) - .map(|versions| versions.len()) + .map(|versions| { + versions.iter().filter(|&&version| + unsafe { version.as_ref() }.ctx.inline() == inlined + ).count() + }) .unwrap_or(0) } None => 0, @@ -1355,41 +1488,54 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> { } } - // If greedy versioning is enabled - if get_option!(greedy_versioning) { - // If we're below the version limit, don't settle for an imperfect match - if versions.len() + 1 < get_option!(max_versions) && best_diff > 0 { - return None; - } - } - return best_version; } +/// Allow inlining a Block up to MAX_INLINE_VERSIONS times. +const MAX_INLINE_VERSIONS: usize = 1000; + /// Produce a generic context when the block version limit is hit for a blockid pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context { // Guard chains implement limits separately, do nothing - if ctx.chain_depth > 0 { - return ctx.clone(); + if ctx.get_chain_depth() > 0 { + return *ctx; } + let next_versions = get_num_versions(blockid, ctx.inline()) + 1; + let max_versions = if ctx.inline() { + MAX_INLINE_VERSIONS + } else { + get_option!(max_versions) + }; + // If this block version we're about to add will hit the version limit - if get_num_versions(blockid) + 1 >= get_option!(max_versions) { + if next_versions >= max_versions { // Produce a generic context that stores no type information, // but still respects the stack_size and sp_offset constraints. // This new context will then match all future requests. let generic_ctx = ctx.get_generic_ctx(); - debug_assert_ne!( - TypeDiff::Incompatible, - ctx.diff(&generic_ctx), - "should substitute a compatible context", - ); + if cfg!(debug_assertions) { + let mut ctx = ctx.clone(); + if ctx.inline() { + // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible + // to keep inlining blocks until we hit the limit, but it's safe to give up inlining. + ctx.inline_block = 0; + assert!(generic_ctx.inline_block == 0); + } + + assert_ne!( + TypeDiff::Incompatible, + ctx.diff(&generic_ctx), + "should substitute a compatible context", + ); + } return generic_ctx; } + incr_counter_to!(max_inline_versions, next_versions); - return ctx.clone(); + return *ctx; } /// Install a block version into its [IseqPayload], letting the GC track its @@ -1436,7 +1582,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) { // Run write barriers for all objects in generated code. for offset in block.gc_obj_offsets.iter() { - let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(); + let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb); // Creating an unaligned pointer is well defined unlike in C. let value_address: *const VALUE = value_address.cast(); @@ -1513,6 +1659,12 @@ impl JITState { if let Some(idlist) = self.stable_constant_names_assumption { track_stable_constant_names_assumption(blockref, idlist); } + for klass in self.no_singleton_class_assumptions { + track_no_singleton_class_assumption(blockref, klass); + } + if self.no_ep_escape { + track_no_ep_escape_assumption(blockref, self.iseq); + } blockref } @@ -1558,7 +1710,7 @@ impl Block { // Compute the size of the block code pub fn code_size(&self) -> usize { - (self.end_addr.get().into_usize()) - (self.start_addr.into_usize()) + (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap() } } @@ -1567,12 +1719,22 @@ impl Context { self.stack_size } + pub fn set_stack_size(&mut self, stack_size: u8) { + self.stack_size = stack_size; + } + /// Create a new Context that is compatible with self but doesn't have type information. pub fn get_generic_ctx(&self) -> Context { let mut generic_ctx = Context::default(); generic_ctx.stack_size = self.stack_size; generic_ctx.sp_offset = self.sp_offset; generic_ctx.reg_temps = self.reg_temps; + if self.is_return_landing() { + generic_ctx.set_as_return_landing(); + } + if self.is_deferred() { + generic_ctx.mark_as_deferred(); + } generic_ctx } @@ -1580,7 +1742,7 @@ impl Context { /// accordingly. This is useful when you want to virtually rewind a stack_size for /// generating a side exit while considering past sp_offset changes on gen_save_sp. pub fn with_stack_size(&self, stack_size: u8) -> Context { - let mut ctx = self.clone(); + let mut ctx = *self; ctx.sp_offset -= (ctx.get_stack_size() as isize - stack_size as isize) as i8; ctx.stack_size = stack_size; ctx @@ -1603,24 +1765,54 @@ impl Context { } pub fn get_chain_depth(&self) -> u8 { - self.chain_depth + self.chain_depth_and_flags & CHAIN_DEPTH_MASK } - pub fn reset_chain_depth(&mut self) { - self.chain_depth = 0; + pub fn reset_chain_depth_and_defer(&mut self) { + self.chain_depth_and_flags &= !CHAIN_DEPTH_MASK; + self.chain_depth_and_flags &= !DEFER_BIT; } pub fn increment_chain_depth(&mut self) { - self.chain_depth += 1; + if self.get_chain_depth() == CHAIN_DEPTH_MASK { + panic!("max block version chain depth reached!"); + } + self.chain_depth_and_flags += 1; + } + + pub fn set_as_return_landing(&mut self) { + self.chain_depth_and_flags |= RETURN_LANDING_BIT; + } + + pub fn clear_return_landing(&mut self) { + self.chain_depth_and_flags &= !RETURN_LANDING_BIT; + } + + pub fn is_return_landing(&self) -> bool { + self.chain_depth_and_flags & RETURN_LANDING_BIT != 0 + } + + pub fn mark_as_deferred(&mut self) { + self.chain_depth_and_flags |= DEFER_BIT; + } + + pub fn is_deferred(&self) -> bool { + self.chain_depth_and_flags & DEFER_BIT != 0 } /// Get an operand for the adjusted stack pointer address - pub fn sp_opnd(&self, offset_bytes: isize) -> Opnd { - let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes; - let offset = offset as i32; + pub fn sp_opnd(&self, offset: i32) -> Opnd { + let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32; return Opnd::mem(64, SP, offset); } + /// Get an operand for the adjusted environment pointer address using SP register. + /// This is valid only when a Binding object hasn't been created for the frame. + pub fn ep_opnd(&self, offset: i32) -> Opnd { + let ep_offset = self.get_stack_size() as i32 + 1; + self.sp_opnd(-ep_offset + offset) + } + /// Stop using a register for a given stack temp. /// This allows us to reuse the register for a value that we know is dead /// and will no longer be used (e.g. popped stack temp). @@ -1645,14 +1837,15 @@ impl Context { return Type::Unknown; } - let mapping = self.temp_mapping[stack_idx]; + let mapping = self.get_temp_mapping(stack_idx); - match mapping { + match mapping.get_kind() { MapToSelf => self.self_type, - MapToStack => self.temp_types[(self.stack_size - 1 - idx) as usize], - MapToLocal(idx) => { + MapToStack => mapping.get_type(), + MapToLocal => { + let idx = mapping.get_local_idx(); assert!((idx as usize) < MAX_LOCAL_TYPES); - return self.local_types[idx as usize]; + return self.get_local_type(idx.into()); } } } @@ -1660,8 +1853,83 @@ impl Context { } /// Get the currently tracked type for a local variable - pub fn get_local_type(&self, idx: usize) -> Type { - *self.local_types.get(idx).unwrap_or(&Type::Unknown) + pub fn get_local_type(&self, local_idx: usize) -> Type { + if local_idx >= MAX_LOCAL_TYPES { + return Type::Unknown + } else { + // Each type is stored in 4 bits + let type_bits = (self.local_types >> (4 * local_idx)) & 0b1111; + unsafe { transmute::<u8, Type>(type_bits as u8) } + } + } + + /// Get the current temp mapping for a given stack slot + fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping { + assert!(temp_idx < MAX_TEMP_TYPES); + + // Extract the temp mapping kind + let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11; + let temp_kind = unsafe { transmute::<u8, TempMappingKind>(kind_bits as u8) }; + + // Extract the payload bits (temp type or local idx) + let payload_bits = (self.temp_payload >> (4 * temp_idx)) & 0b1111; + + match temp_kind { + MapToSelf => TempMapping::map_to_self(), + + MapToStack => { + TempMapping::map_to_stack( + unsafe { transmute::<u8, Type>(payload_bits as u8) } + ) + } + + MapToLocal => { + TempMapping::map_to_local( + payload_bits as u8 + ) + } + } + } + + /// Get the current temp mapping for a given stack slot + fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) { + assert!(temp_idx < MAX_TEMP_TYPES); + + // Extract the kind bits + let mapping_kind = mapping.get_kind(); + let kind_bits = unsafe { transmute::<TempMappingKind, u8>(mapping_kind) }; + assert!(kind_bits <= 0b11); + + // Extract the payload bits + let payload_bits = match mapping_kind { + MapToSelf => 0, + + MapToStack => { + let t = mapping.get_type(); + unsafe { transmute::<Type, u8>(t) } + } + + MapToLocal => { + mapping.get_local_idx() + } + }; + assert!(payload_bits <= 0b1111); + + // Update the kind bits + { + let mask_bits = 0b11_u16 << (2 * temp_idx); + let shifted_bits = (kind_bits as u16) << (2 * temp_idx); + let all_kind_bits = self.temp_mapping_kind as u16; + self.temp_mapping_kind = (all_kind_bits & !mask_bits) | shifted_bits; + } + + // Update the payload bits + { + let mask_bits = 0b1111_u32 << (4 * temp_idx); + let shifted_bits = (payload_bits as u32) << (4 * temp_idx); + let all_payload_bits = self.temp_payload as u32; + self.temp_payload = (all_payload_bits & !mask_bits) | shifted_bits; + } } /// Upgrade (or "learn") the type of an instruction operand @@ -1685,15 +1953,24 @@ impl Context { return; } - let mapping = self.temp_mapping[stack_idx]; + let mapping = self.get_temp_mapping(stack_idx); - match mapping { + match mapping.get_kind() { MapToSelf => self.self_type.upgrade(opnd_type), - MapToStack => self.temp_types[stack_idx].upgrade(opnd_type), - MapToLocal(idx) => { - let idx = idx as usize; + MapToStack => { + let mut temp_type = mapping.get_type(); + temp_type.upgrade(opnd_type); + self.set_temp_mapping(stack_idx, TempMapping::map_to_stack(temp_type)); + } + MapToLocal => { + let idx = mapping.get_local_idx() as usize; assert!(idx < MAX_LOCAL_TYPES); - self.local_types[idx].upgrade(opnd_type); + let mut new_type = self.get_local_type(idx); + new_type.upgrade(opnd_type); + self.set_local_type(idx, new_type); + // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches + // all MapToLocal mappings, including the one we're upgrading here. + self.set_opnd_mapping(opnd, mapping); } } } @@ -1705,29 +1982,29 @@ impl Context { This is can be used with stack_push_mapping or set_opnd_mapping to copy a stack value's type while maintaining the mapping. */ - pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> (TempMapping, Type) { + pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> TempMapping { let opnd_type = self.get_opnd_type(opnd); match opnd { - SelfOpnd => (MapToSelf, opnd_type), + SelfOpnd => TempMapping::map_to_self(), StackOpnd(idx) => { assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; if stack_idx < MAX_TEMP_TYPES { - (self.temp_mapping[stack_idx], opnd_type) + self.get_temp_mapping(stack_idx) } else { // We can't know the source of this stack operand, so we assume it is // a stack-only temporary. type will be UNKNOWN assert!(opnd_type == Type::Unknown); - (MapToStack, opnd_type) + TempMapping::map_to_stack(opnd_type) } } } } /// Overwrite both the type and mapping of a stack operand. - pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, (mapping, opnd_type): (TempMapping, Type)) { + pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, mapping: TempMapping) { match opnd { SelfOpnd => unreachable!("self always maps to self"), StackOpnd(idx) => { @@ -1744,44 +2021,47 @@ impl Context { return; } - self.temp_mapping[stack_idx] = mapping; - - // Only used when mapping == MAP_STACK - self.temp_types[stack_idx] = opnd_type; + self.set_temp_mapping(stack_idx, mapping); } } } /// Set the type of a local variable pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) { - let ctx = self; - // If type propagation is disabled, store no types if get_option!(no_type_prop) { return; } if local_idx >= MAX_LOCAL_TYPES { - return; + return } // If any values on the stack map to this local we must detach them - for (i, mapping) in ctx.temp_mapping.iter_mut().enumerate() { - *mapping = match *mapping { - MapToStack => MapToStack, - MapToSelf => MapToSelf, - MapToLocal(idx) => { + for mapping_idx in 0..MAX_TEMP_TYPES { + let mapping = self.get_temp_mapping(mapping_idx); + let tm = match mapping.get_kind() { + MapToStack => mapping, + MapToSelf => mapping, + MapToLocal => { + let idx = mapping.get_local_idx(); if idx as usize == local_idx { - ctx.temp_types[i] = ctx.local_types[idx as usize]; - MapToStack + let local_type = self.get_local_type(local_idx); + TempMapping::map_to_stack(local_type) } else { - MapToLocal(idx) + TempMapping::map_to_local(idx) } } - } + }; + self.set_temp_mapping(mapping_idx, tm); } - ctx.local_types[local_idx] = local_type; + // Update the type bits + let type_bits = local_type as u32; + assert!(type_bits <= 0b1111); + let mask_bits = 0b1111_u32 << (4 * local_idx); + let shifted_bits = type_bits << (4 * local_idx); + self.local_types = (self.local_types & !mask_bits) | shifted_bits; } /// Erase local variable type information @@ -1789,19 +2069,27 @@ impl Context { pub fn clear_local_types(&mut self) { // When clearing local types we must detach any stack mappings to those // locals. Even if local values may have changed, stack values will not. - for (i, mapping) in self.temp_mapping.iter_mut().enumerate() { - *mapping = match *mapping { - MapToStack => MapToStack, - MapToSelf => MapToSelf, - MapToLocal(idx) => { - self.temp_types[i] = self.local_types[idx as usize]; - MapToStack - } + + for mapping_idx in 0..MAX_TEMP_TYPES { + let mapping = self.get_temp_mapping(mapping_idx); + if mapping.get_kind() == MapToLocal { + let local_idx = mapping.get_local_idx() as usize; + self.set_temp_mapping(mapping_idx, TempMapping::map_to_stack(self.get_local_type(local_idx))); } } // Clear the local types - self.local_types = [Type::default(); MAX_LOCAL_TYPES]; + self.local_types = 0; + } + + /// Return true if the code is inlined by the caller + pub fn inline(&self) -> bool { + self.inline_block != 0 + } + + /// Set a block ISEQ given to the Block of this Context + pub fn set_inline_block(&mut self, iseq: IseqPtr) { + self.inline_block = iseq as u64 } /// Compute a difference score for two context objects @@ -1810,13 +2098,21 @@ impl Context { let src = self; // Can only lookup the first version in the chain - if dst.chain_depth != 0 { + if dst.get_chain_depth() != 0 { return TypeDiff::Incompatible; } // Blocks with depth > 0 always produce new versions // Sidechains cannot overlap - if src.chain_depth != 0 { + if src.get_chain_depth() != 0 { + return TypeDiff::Incompatible; + } + + if src.is_return_landing() != dst.is_return_landing() { + return TypeDiff::Incompatible; + } + + if src.is_deferred() != dst.is_deferred() { return TypeDiff::Incompatible; } @@ -1841,10 +2137,17 @@ impl Context { TypeDiff::Incompatible => return TypeDiff::Incompatible, }; + // Check the block to inline + if src.inline_block != dst.inline_block { + // find_block_version should not find existing blocks with different + // inline_block so that their yield will not be megamorphic. + return TypeDiff::Incompatible; + } + // For each local type we track - for i in 0..src.local_types.len() { - let t_src = src.local_types[i]; - let t_dst = dst.local_types[i]; + for i in 0.. MAX_LOCAL_TYPES { + let t_src = src.get_local_type(i); + let t_dst = dst.get_local_type(i); diff += match t_src.diff(t_dst) { TypeDiff::Compatible(diff) => diff, TypeDiff::Incompatible => return TypeDiff::Incompatible, @@ -1853,12 +2156,12 @@ impl Context { // For each value on the temp stack for i in 0..src.stack_size { - let (src_mapping, src_type) = src.get_opnd_mapping(StackOpnd(i)); - let (dst_mapping, dst_type) = dst.get_opnd_mapping(StackOpnd(i)); + let src_mapping = src.get_opnd_mapping(StackOpnd(i)); + let dst_mapping = dst.get_opnd_mapping(StackOpnd(i)); // If the two mappings aren't the same if src_mapping != dst_mapping { - if dst_mapping == MapToStack { + if dst_mapping.get_kind() == MapToStack { // We can safely drop information about the source of the temp // stack operand. diff += 1; @@ -1867,6 +2170,9 @@ impl Context { } } + let src_type = src.get_opnd_type(StackOpnd(i)); + let dst_type = dst.get_opnd_type(StackOpnd(i)); + diff += match src_type.diff(dst_type) { TypeDiff::Compatible(diff) => diff, TypeDiff::Incompatible => return TypeDiff::Incompatible, @@ -1896,20 +2202,20 @@ impl Context { impl Assembler { /// Push one new value on the temp stack with an explicit mapping /// Return a pointer to the new stack top - pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { + pub fn stack_push_mapping(&mut self, mapping: TempMapping) -> Opnd { // If type propagation is disabled, store no types if get_option!(no_type_prop) { - return self.stack_push_mapping((mapping, Type::Unknown)); + return self.stack_push_mapping(mapping.without_type()); } let stack_size: usize = self.ctx.stack_size.into(); // Keep track of the type and mapping of the value if stack_size < MAX_TEMP_TYPES { - self.ctx.temp_mapping[stack_size] = mapping; - self.ctx.temp_types[stack_size] = temp_type; + self.ctx.set_temp_mapping(stack_size, mapping); - if let MapToLocal(idx) = mapping { + if mapping.get_kind() == MapToLocal { + let idx = mapping.get_local_idx(); assert!((idx as usize) < MAX_LOCAL_TYPES); } } @@ -1928,12 +2234,12 @@ impl Assembler { /// Push one new value on the temp stack /// Return a pointer to the new stack top pub fn stack_push(&mut self, val_type: Type) -> Opnd { - return self.stack_push_mapping((MapToStack, val_type)); + return self.stack_push_mapping(TempMapping::map_to_stack(val_type)); } /// Push the self value on the stack pub fn stack_push_self(&mut self) -> Opnd { - return self.stack_push_mapping((MapToSelf, Type::Unknown)); + return self.stack_push_mapping(TempMapping::map_to_self()); } /// Push a local variable on the stack @@ -1942,7 +2248,7 @@ impl Assembler { return self.stack_push(Type::Unknown); } - return self.stack_push_mapping((MapToLocal((local_idx as u8).into()), Type::Unknown)); + return self.stack_push_mapping(TempMapping::map_to_local(local_idx as u8)); } // Pop N values off the stack @@ -1957,8 +2263,7 @@ impl Assembler { let idx: usize = (self.ctx.stack_size as usize) - i - 1; if idx < MAX_TEMP_TYPES { - self.ctx.temp_types[idx] = Type::Unknown; - self.ctx.temp_mapping[idx] = MapToStack; + self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown)); } } @@ -1972,12 +2277,16 @@ impl Assembler { pub fn shift_stack(&mut self, argc: usize) { assert!(argc < self.ctx.stack_size.into()); - let method_name_index = (self.ctx.stack_size as usize) - (argc as usize) - 1; + let method_name_index = (self.ctx.stack_size as usize) - argc - 1; for i in method_name_index..(self.ctx.stack_size - 1) as usize { - if i + 1 < MAX_TEMP_TYPES { - self.ctx.temp_types[i] = self.ctx.temp_types[i + 1]; - self.ctx.temp_mapping[i] = self.ctx.temp_mapping[i + 1]; + if i < MAX_TEMP_TYPES { + let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES { + self.ctx.get_temp_mapping(i + 1) + } else { + TempMapping::map_to_stack(Type::Unknown) + }; + self.ctx.set_temp_mapping(i, next_arg_mapping); } } self.stack_pop(1); @@ -2125,12 +2434,18 @@ fn gen_block_series_body( /// Generate a block version that is an entry point inserted into an iseq /// NOTE: this function assumes that the VM lock has been taken -pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { +/// If jit_exception is true, compile JIT code for handling exceptions. +/// See [jit_compile_exception] for details. +pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> { // Compute the current instruction index based on the current PC + let cfp = unsafe { get_ec_cfp(ec) }; let insn_idx: u16 = unsafe { - let ec_pc = get_cfp_pc(get_ec_cfp(ec)); + let ec_pc = get_cfp_pc(cfp); iseq_pc_to_insn_idx(iseq, ec_pc)? }; + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? + }; // The entry context makes no assumptions about types let blockid = BlockId { @@ -2143,10 +2458,12 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { let ocb = CodegenGlobals::get_outlined_cb(); // Write the interpreter entry prologue. Might be NULL when out of memory. - let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx); + let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx, jit_exception); // Try to generate code for the entry block - let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb); + let mut ctx = Context::default(); + ctx.stack_size = stack_size; + let block = gen_block_series(blockid, &ctx, ec, cb, ocb); cb.mark_all_executable(); ocb.unwrap().mark_all_executable(); @@ -2155,7 +2472,9 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { // Compilation failed None => { // Trigger code GC. This entry point will be recompiled later. - cb.code_gc(ocb); + if get_option!(code_gc) { + cb.code_gc(ocb); + } return None; } @@ -2168,14 +2487,17 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> { } } + // Count the number of entry points we compile + incr_counter!(compiled_iseq_entry); + // Compilation successful and block not empty - return code_ptr; + code_ptr.map(|ptr| ptr.raw_ptr(cb)) } // Change the entry's jump target from an entry stub to a next entry pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) { let mut asm = Assembler::new(); - asm.comment("regenerate_entry"); + asm_comment!(asm, "regenerate_entry"); // gen_entry_guard generates cmp + jne. We're rewriting only jne. asm.jne(next_entry.into()); @@ -2185,7 +2507,7 @@ pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: Cod let old_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr); cb.set_dropped_bytes(false); - asm.compile(cb, None); + asm.compile(cb, None).expect("can rewrite existing code"); // Rewind write_pos to the original one assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr); @@ -2209,78 +2531,88 @@ c_callable! { /// Generated code calls this function with the SysV calling convention. /// See [gen_call_entry_stub_hit]. fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 { - with_vm_lock(src_loc!(), || { - match entry_stub_hit_body(entry_ptr, ec) { - Some(addr) => addr, - // Failed to service the stub by generating a new block so now we - // need to exit to the interpreter at the stubbed location. - None => return CodegenGlobals::get_stub_exit_code().raw_ptr(), - } + with_compile_time(|| { + with_vm_lock(src_loc!(), || { + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + + let addr = entry_stub_hit_body(entry_ptr, ec, cb, ocb) + .unwrap_or_else(|| { + // Trigger code GC (e.g. no space). + // This entry point will be recompiled later. + if get_option!(code_gc) { + cb.code_gc(ocb); + } + CodegenGlobals::get_stub_exit_code().raw_ptr(cb) + }); + + cb.mark_all_executable(); + ocb.unwrap().mark_all_executable(); + + addr + }) }) } } /// Called by the generated code when an entry stub is executed -fn entry_stub_hit_body(entry_ptr: *const c_void, ec: EcPtr) -> Option<*const u8> { +fn entry_stub_hit_body( + entry_ptr: *const c_void, + ec: EcPtr, + cb: &mut CodeBlock, + ocb: &mut OutlinedCb +) -> Option<*const u8> { // Get ISEQ and insn_idx from the current ec->cfp let cfp = unsafe { get_ec_cfp(ec) }; let iseq = unsafe { get_cfp_iseq(cfp) }; let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?; - - let cb = CodegenGlobals::get_inline_cb(); - let ocb = CodegenGlobals::get_outlined_cb(); + let stack_size: u8 = unsafe { + u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()? + }; // Compile a new entry guard as a next entry let next_entry = cb.get_write_ptr(); let mut asm = Assembler::new(); let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?; - asm.compile(cb, Some(ocb)); + asm.compile(cb, Some(ocb))?; - // Try to find an existing compiled version of this block + // Find or compile a block version let blockid = BlockId { iseq, idx: insn_idx }; - let ctx = Context::default(); + let mut ctx = Context::default(); + ctx.stack_size = stack_size; let blockref = match find_block_version(blockid, &ctx) { // If an existing block is found, generate a jump to the block. Some(blockref) => { let mut asm = Assembler::new(); asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); - asm.compile(cb, Some(ocb)); - blockref + asm.compile(cb, Some(ocb))?; + Some(blockref) } // If this block hasn't yet been compiled, generate blocks after the entry guard. - None => match gen_block_series(blockid, &ctx, ec, cb, ocb) { - Some(blockref) => blockref, - None => { // No space - // Trigger code GC. This entry point will be recompiled later. - cb.code_gc(ocb); - return None; - } - } + None => gen_block_series(blockid, &ctx, ec, cb, ocb), }; - // Regenerate the previous entry - assert!(!entry_ptr.is_null()); - let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null"); - regenerate_entry(cb, &entryref, next_entry); - - // Write an entry to the heap and push it to the ISEQ - let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique"); - get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); + // Commit or retry the entry + if blockref.is_some() { + // Regenerate the previous entry + let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null"); + regenerate_entry(cb, &entryref, next_entry); - cb.mark_all_executable(); - ocb.unwrap().mark_all_executable(); + // Write an entry to the heap and push it to the ISEQ + let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique"); + get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry()); + } // Let the stub jump to the block - Some(unsafe { blockref.as_ref() }.start_addr.raw_ptr()) + blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb)) } /// Generate a stub that calls entry_stub_hit pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let stub_addr = ocb.get_write_ptr(); let mut asm = Assembler::new(); - asm.comment("entry stub hit"); + asm_comment!(asm, "entry stub hit"); asm.mov(C_ARG_OPNDS[0], entry_address.into()); @@ -2288,32 +2620,23 @@ pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<Code // Not really a side exit, just don't need a padded jump here. asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit()); - asm.compile(ocb, None); - - if ocb.has_dropped_bytes() { - return None; // No space - } else { - return Some(stub_addr); - } + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so /// it's useful for Code GC to call entry_stub_hit from a globally shared code. -pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { +pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); let mut asm = Assembler::new(); // See gen_entry_guard for how it's used. - asm.comment("entry_stub_hit() trampoline"); + asm_comment!(asm, "entry_stub_hit() trampoline"); let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]); // Jump to the address returned by the entry_stub_hit() call asm.jmp_opnd(jump_addr); - asm.compile(ocb, None); - - code_ptr + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// Generate code for a branch, possibly rewriting and changing the size of it @@ -2328,19 +2651,25 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) { // Generate the branch let mut asm = Assembler::new(); - asm.comment("regenerate_branch"); + asm_comment!(asm, "regenerate_branch"); branch.gen_fn.call( &mut asm, Target::CodePtr(branch.get_target_address(0).unwrap()), branch.get_target_address(1).map(|addr| Target::CodePtr(addr)), ); + // If the entire block is the branch and the block could be invalidated, + // we need to pad to ensure there is room for invalidation patching. + if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() { + asm.pad_inval_patch(); + } + // Rewrite the branch let old_write_pos = cb.get_write_pos(); let old_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(branch.start_addr); cb.set_dropped_bytes(false); - asm.compile(cb, None); + asm.compile(cb, None).expect("can rewrite existing code"); let new_end_addr = cb.get_write_ptr(); branch.end_addr.set(new_end_addr); @@ -2399,7 +2728,7 @@ c_callable! { ec: EcPtr, ) -> *const u8 { with_vm_lock(src_loc!(), || { - branch_stub_hit_body(branch_ptr, target_idx, ec) + with_compile_time(|| { branch_stub_hit_body(branch_ptr, target_idx, ec) }) }) } } @@ -2427,6 +2756,9 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - _ => unreachable!("target_idx < 2 must always hold"), }; + let cb = CodegenGlobals::get_inline_cb(); + let ocb = CodegenGlobals::get_outlined_cb(); + let (target_blockid, target_ctx): (BlockId, Context) = unsafe { // SAFETY: no mutation of the target's Cell. Just reading out data. let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap(); @@ -2434,24 +2766,24 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - // If this branch has already been patched, return the dst address // Note: recursion can cause the same stub to be hit multiple times if let BranchTarget::Block(_) = target.as_ref() { - return target.get_address().unwrap().raw_ptr(); + return target.get_address().unwrap().raw_ptr(cb); } (target.get_blockid(), target.get_ctx()) }; - let cb = CodegenGlobals::get_inline_cb(); - let ocb = CodegenGlobals::get_outlined_cb(); - let (cfp, original_interp_sp) = unsafe { let cfp = get_ec_cfp(ec); let original_interp_sp = get_cfp_sp(cfp); - let running_iseq = rb_cfp_get_iseq(cfp); + let running_iseq = get_cfp_iseq(cfp); + assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq"); + let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into()); let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into()); - - assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq"); + // Unlike in the interpreter, our `leave` doesn't write to the caller's + // SP -- we do it in the returned-to code. Account for this difference. + let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into()); // Update the PC in the current CFP, because it may be out of sync in JITted code rb_set_cfp_pc(cfp, reconned_pc); @@ -2464,6 +2796,17 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - // So we do it here instead. rb_set_cfp_sp(cfp, reconned_sp); + // Bail if code GC is disabled and we've already run out of spaces. + if !get_option!(code_gc) && (cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes()) { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + + // Bail if we're about to run out of native stack space. + // We've just reconstructed interpreter state. + if rb_ec_stack_check(ec as _) != 0 { + return CodegenGlobals::get_stub_exit_code().raw_ptr(cb); + } + (cfp, original_interp_sp) }; @@ -2474,7 +2817,6 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - if block.is_none() { let branch_old_shape = branch.gen_fn.get_shape(); - // If the new block can be generated right after the branch (at cb->write_pos) if cb.get_write_ptr() == branch.end_addr.get() { // This branch should be terminating its block @@ -2532,7 +2874,9 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - // because incomplete code could be used when cb.dropped_bytes is flipped // by code GC. So this place, after all compilation, is the safest place // to hook code GC on branch_stub_hit. - cb.code_gc(ocb); + if get_option!(code_gc) { + cb.code_gc(ocb); + } // Failed to service the stub by generating a new block so now we // need to exit to the interpreter at the stubbed location. We are @@ -2552,11 +2896,11 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - assert!( new_branch_size <= branch_size_on_entry, "branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})", - branch.start_addr.raw_ptr(), branch_size_on_entry, new_branch_size, + branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size, ); // Return a pointer to the compiled block version - dst_addr.raw_ptr() + dst_addr.raw_ptr(cb) } /// Generate a "stub", a piece of code that calls the compiler back when run. @@ -2569,18 +2913,21 @@ fn gen_branch_stub( ) -> Option<CodePtr> { let ocb = ocb.unwrap(); - // Generate an outlined stub that will call branch_stub_hit() - let stub_addr = ocb.get_write_ptr(); - let mut asm = Assembler::new(); - asm.ctx = ctx.clone(); + asm.ctx = *ctx; asm.set_reg_temps(ctx.reg_temps); - asm.comment("branch stub hit"); + asm_comment!(asm, "branch stub hit"); + + if asm.ctx.is_return_landing() { + asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP)); + let top = asm.stack_push(Type::Unknown); + asm.mov(top, C_RET_OPND); + } // Save caller-saved registers before C_ARG_OPNDS get clobbered. // Spill all registers for consistency with the trampoline. - for ® in caller_saved_temp_regs().iter() { - asm.cpush(reg); + for ® in caller_saved_temp_regs() { + asm.cpush(Opnd::Reg(reg)); } // Spill temps to the VM stack as well for jit.peek_at_stack() @@ -2599,19 +2946,11 @@ fn gen_branch_stub( // Not really a side exit, just don't need a padded jump here. asm.jmp(CodegenGlobals::get_branch_stub_hit_trampoline().as_side_exit()); - asm.compile(ocb, None); - - if ocb.has_dropped_bytes() { - // No space - None - } else { - Some(stub_addr) - } + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } -pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { +pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> { let ocb = ocb.unwrap(); - let code_ptr = ocb.get_write_ptr(); let mut asm = Assembler::new(); // For `branch_stub_hit(branch_ptr, target_idx, ec)`, @@ -2620,8 +2959,8 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { // is the unchanging part. // Since this trampoline is static, it allows code GC inside // branch_stub_hit() to free stubs without problems. - asm.comment("branch_stub_hit() trampoline"); - let jump_addr = asm.ccall( + asm_comment!(asm, "branch_stub_hit() trampoline"); + let stub_hit_ret = asm.ccall( branch_stub_hit as *mut u8, vec![ C_ARG_OPNDS[0], @@ -2629,28 +2968,39 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr { EC, ] ); + let jump_addr = asm.load(stub_hit_ret); // Restore caller-saved registers for stack temps - for ® in caller_saved_temp_regs().iter().rev() { - asm.cpop_into(reg); + for ® in caller_saved_temp_regs().rev() { + asm.cpop_into(Opnd::Reg(reg)); } // Jump to the address returned by the branch_stub_hit() call asm.jmp_opnd(jump_addr); - asm.compile(ocb, None); + // HACK: popping into C_RET_REG clobbers the return value of branch_stub_hit() we need to jump + // to, so we need a scratch register to preserve it. This extends the live range of the C + // return register so we get something else for the return value. + let _ = asm.live_reg_opnd(stub_hit_ret); - code_ptr + asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr) } /// Return registers to be pushed and popped on branch_stub_hit. -/// The return value may include an extra register for x86 alignment. -fn caller_saved_temp_regs() -> Vec<Opnd> { - let mut regs = Assembler::get_temp_regs(); - if regs.len() % 2 == 1 { - regs.push(*regs.last().unwrap()); // x86 alignment +pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator { + let temp_regs = Assembler::get_temp_regs().iter(); + let len = temp_regs.len(); + // The return value gen_leave() leaves in C_RET_REG + // needs to survive the branch_stub_hit() call. + let regs = temp_regs.chain(std::iter::once(&C_RET_REG)); + + // On x86_64, maintain 16-byte stack alignment + if cfg!(target_arch = "x86_64") && len % 2 == 0 { + static ONE_MORE: [Reg; 1] = [C_RET_REG]; + regs.chain(ONE_MORE.iter()) + } else { + regs.chain(&[]) } - regs.iter().map(|®| Opnd::Reg(reg)).collect() } impl Assembler @@ -2661,7 +3011,7 @@ impl Assembler // so that we can move the closure below let entryref = entryref.clone(); - self.pos_marker(move |code_ptr| { + self.pos_marker(move |code_ptr, _| { entryref.start_addr.set(Some(code_ptr)); }); } @@ -2672,7 +3022,7 @@ impl Assembler // so that we can move the closure below let entryref = entryref.clone(); - self.pos_marker(move |code_ptr| { + self.pos_marker(move |code_ptr, _| { entryref.end_addr.set(Some(code_ptr)); }); } @@ -2684,7 +3034,7 @@ impl Assembler // so that we can move the closure below let branchref = branchref.clone(); - self.pos_marker(move |code_ptr| { + self.pos_marker(move |code_ptr, _| { branchref.start_addr.set(Some(code_ptr)); }); } @@ -2696,7 +3046,7 @@ impl Assembler // so that we can move the closure below let branchref = branchref.clone(); - self.pos_marker(move |code_ptr| { + self.pos_marker(move |code_ptr, _| { branchref.end_addr.set(Some(code_ptr)); }); } @@ -2745,7 +3095,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: let block_addr = block.start_addr; // Call the branch generation function - asm.comment("gen_direct_jmp: existing block"); + asm_comment!(asm, "gen_direct_jmp: existing block"); asm.mark_branch_start(&branch); branch.gen_fn.call(asm, Target::CodePtr(block_addr), None); asm.mark_branch_end(&branch); @@ -2753,7 +3103,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: BranchTarget::Block(blockref) } else { // The branch is effectively empty (a noop) - asm.comment("gen_direct_jmp: fallthrough"); + asm_comment!(asm, "gen_direct_jmp: fallthrough"); asm.mark_branch_start(&branch); asm.mark_branch_end(&branch); branch.gen_fn.set_shape(BranchShape::Next0); @@ -2762,7 +3112,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: // compile the target block right after this one (fallthrough). BranchTarget::Stub(Box::new(BranchStub { address: None, - ctx: ctx.clone(), + ctx: *ctx, iseq: Cell::new(target0.iseq), iseq_idx: target0.idx, })) @@ -2777,16 +3127,13 @@ pub fn defer_compilation( asm: &mut Assembler, ocb: &mut OutlinedCb, ) { - if asm.ctx.chain_depth != 0 { + if asm.ctx.is_deferred() { panic!("Double defer!"); } - let mut next_ctx = asm.ctx.clone(); + let mut next_ctx = asm.ctx; - if next_ctx.chain_depth == u8::MAX { - panic!("max block version chain depth reached!"); - } - next_ctx.chain_depth += 1; + next_ctx.mark_as_deferred(); let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default))); @@ -2798,8 +3145,14 @@ pub fn defer_compilation( // Likely a stub due to the increased chain depth let target0_address = branch.set_target(0, blockid, &next_ctx, ocb); + // Pad the block if it has the potential to be invalidated. This must be + // done before gen_fn() in case the jump is overwritten by a fallthrough. + if jit.block_entry_exit.is_some() { + asm.pad_inval_patch(); + } + // Call the branch generation function - asm.comment("defer_compilation"); + asm_comment!(asm, "defer_compilation"); asm.mark_branch_start(&branch); if let Some(dst_addr) = target0_address { branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None); @@ -2951,7 +3304,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // Get a pointer to the generated code for this block let block_start = block.start_addr; - // Make the the start of the block do an exit. This handles OOM situations + // Make the start of the block do an exit. This handles OOM situations // and some cases where we can't efficiently patch incoming branches. // Do this first, since in case there is a fallthrough branch into this // block, the patching loop below can overwrite the start of the block. @@ -2977,13 +3330,14 @@ pub fn invalidate_block_version(blockref: &BlockRef) { let mut asm = Assembler::new(); asm.jmp(block_entry_exit.as_side_exit()); cb.set_dropped_bytes(false); - asm.compile(&mut cb, Some(ocb)); + asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code"); assert!( cb.get_write_ptr() <= block_end, - "invalidation wrote past end of block (code_size: {:?}, new_size: {})", + "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})", block.code_size(), - cb.get_write_ptr().into_i64() - block_start.into_i64(), + cb.get_write_ptr().as_offset() - block_start.as_offset(), + block.start_addr.raw_ptr(cb), ); cb.set_write_ptr(cur_pos); cb.set_dropped_bytes(cur_dropped_bytes); @@ -3024,7 +3378,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { address: Some(stub_addr), iseq: block.iseq.clone(), iseq_idx: block.iseq_range.start, - ctx: block.ctx.clone(), + ctx: block.ctx, }))))); // Check if the invalidated block immediately follows @@ -3047,7 +3401,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { if !target_next && branch.code_size() > old_branch_size { panic!( "invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})", - branch.start_addr.raw_ptr(), old_branch_size, branch.code_size() + branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size() ); } } @@ -3089,9 +3443,9 @@ pub fn invalidate_block_version(blockref: &BlockRef) { // invalidated branch pointers. Example: // def foo(n) // if n == 2 -// # 1.times{} to use a cfunc to avoid exiting from the -// # frame which will use the retained return address -// return 1.times { Object.define_method(:foo) {} } +// # 1.times.each to create a cfunc frame to preserve the JIT frame +// # which will return to a stub housed in an invalidated block +// return 1.times.each { Object.define_method(:foo) {} } // end // // foo(n + 1) @@ -3139,6 +3493,65 @@ mod tests { use crate::core::*; #[test] + fn type_size() { + // Check that we can store types in 4 bits, + // and all local types in 32 bits + assert_eq!(mem::size_of::<Type>(), 1); + assert!(Type::BlockParamProxy as usize <= 0b1111); + assert!(MAX_LOCAL_TYPES * 4 <= 32); + } + + #[test] + fn tempmapping_size() { + assert_eq!(mem::size_of::<TempMapping>(), 1); + } + + #[test] + fn local_types() { + let mut ctx = Context::default(); + + for i in 0..MAX_LOCAL_TYPES { + ctx.set_local_type(i, Type::Fixnum); + assert_eq!(ctx.get_local_type(i), Type::Fixnum); + ctx.set_local_type(i, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(i), Type::BlockParamProxy); + } + + ctx.set_local_type(0, Type::Fixnum); + ctx.clear_local_types(); + assert!(ctx.get_local_type(0) == Type::Unknown); + + // Make sure we don't accidentally set bits incorrectly + let mut ctx = Context::default(); + ctx.set_local_type(0, Type::Fixnum); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + ctx.set_local_type(2, Type::Fixnum); + ctx.set_local_type(1, Type::BlockParamProxy); + assert_eq!(ctx.get_local_type(0), Type::Fixnum); + assert_eq!(ctx.get_local_type(2), Type::Fixnum); + } + + #[test] + fn tempmapping() { + let t = TempMapping::map_to_stack(Type::Unknown); + assert_eq!(t.get_kind(), MapToStack); + assert_eq!(t.get_type(), Type::Unknown); + + let t = TempMapping::map_to_stack(Type::TString); + assert_eq!(t.get_kind(), MapToStack); + assert_eq!(t.get_type(), Type::TString); + + let t = TempMapping::map_to_local(7); + assert_eq!(t.get_kind(), MapToLocal); + assert_eq!(t.get_local_idx(), 7); + } + + #[test] + fn context_size() { + assert_eq!(mem::size_of::<Context>(), 23); + } + + #[test] fn types() { // Valid src => dst assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0)); @@ -3162,7 +3575,7 @@ mod tests { assert_eq!(reg_temps.get(stack_idx), false); } - // Set 0, 2, 7 + // Set 0, 2, 7 (RegTemps: 10100001) reg_temps.set(0, true); reg_temps.set(2, true); reg_temps.set(3, true); @@ -3178,6 +3591,17 @@ mod tests { assert_eq!(reg_temps.get(5), false); assert_eq!(reg_temps.get(6), false); assert_eq!(reg_temps.get(7), true); + + // Test conflicts + assert_eq!(5, get_option!(num_temp_regs)); + assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict + assert_eq!(reg_temps.conflicts_with(1), false); + assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7 + assert_eq!(reg_temps.conflicts_with(3), false); + assert_eq!(reg_temps.conflicts_with(4), false); + assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0 + assert_eq!(reg_temps.conflicts_with(6), false); + assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2 } #[test] @@ -3195,6 +3619,60 @@ mod tests { } #[test] + fn context_upgrade_local() { + let mut asm = Assembler::new(); + asm.stack_push_local(0); + asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil); + assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); + } + + #[test] + fn context_chain_depth() { + let mut ctx = Context::default(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_return_landing(), false); + assert_eq!(ctx.is_deferred(), false); + + for _ in 0..5 { + ctx.increment_chain_depth(); + } + assert_eq!(ctx.get_chain_depth(), 5); + + ctx.set_as_return_landing(); + assert_eq!(ctx.is_return_landing(), true); + + ctx.clear_return_landing(); + assert_eq!(ctx.is_return_landing(), false); + + ctx.mark_as_deferred(); + assert_eq!(ctx.is_deferred(), true); + + ctx.reset_chain_depth_and_defer(); + assert_eq!(ctx.get_chain_depth(), 0); + assert_eq!(ctx.is_deferred(), false); + } + + #[test] + fn shift_stack_for_send() { + let mut asm = Assembler::new(); + + // Push values to simulate send(:name, arg) with 6 items already on-stack + for _ in 0..6 { + asm.stack_push(Type::Fixnum); + } + asm.stack_push(Type::Unknown); + asm.stack_push(Type::ImmSymbol); + asm.stack_push(Type::Unknown); + + // This method takes argc of the sendee, not argc of send + asm.shift_stack(1); + + // The symbol should be gone + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(0))); + assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(1))); + } + + #[test] fn test_miri_ref_unchecked() { let blockid = BlockId { iseq: ptr::null(), |