summaryrefslogtreecommitdiff
path: root/yjit/src/core.rs
diff options
context:
space:
mode:
Diffstat (limited to 'yjit/src/core.rs')
-rw-r--r--yjit/src/core.rs1248
1 files changed, 863 insertions, 385 deletions
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 4dd0a387d5..cd6e649aa0 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -18,13 +18,14 @@ use std::cell::*;
use std::collections::HashSet;
use std::fmt;
use std::mem;
+use std::mem::transmute;
use std::ops::Range;
use std::rc::Rc;
use mem::MaybeUninit;
use std::ptr;
use ptr::NonNull;
use YARVOpnd::*;
-use TempMapping::*;
+use TempMappingKind::*;
use crate::invariants::*;
// Maximum number of temp value types we keep track of
@@ -39,8 +40,9 @@ pub type IseqIdx = u16;
// Represent the type of a value (local/stack/self) in YJIT
#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)]
+#[repr(u8)]
pub enum Type {
- Unknown,
+ Unknown = 0,
UnknownImm,
UnknownHeap,
Nil,
@@ -48,19 +50,20 @@ pub enum Type {
False,
Fixnum,
Flonum,
- Hash,
ImmSymbol,
- #[allow(unused)]
- HeapSymbol,
-
TString, // An object with the T_STRING flag set, possibly an rb_cString
- CString, // An un-subclassed string of type rb_cString (can have instance vars in some cases)
+ CString, // An object that at one point had its class field equal rb_cString (creating a singleton class changes it)
TArray, // An object with the T_ARRAY flag set, possibly an rb_cArray
- CArray, // An un-subclassed string of type rb_cArray (can have instance vars in some cases)
+ CArray, // An object that at one point had its class field equal rb_cArray (creating a singleton class changes it)
+ THash, // An object with the T_HASH flag set, possibly an rb_cHash
+ CHash, // An object that at one point had its class field equal rb_cHash (creating a singleton class changes it)
BlockParamProxy, // A special sentinel value indicating the block parameter should be read from
// the current surrounding cfp
+
+ // The context currently relies on types taking at most 4 bits (max value 15)
+ // to encode, so if we add any more, we will need to refactor the context.
}
// Default initialization
@@ -93,12 +96,11 @@ impl Type {
// Core.rs can't reference rb_cString because it's linked by Rust-only tests.
// But CString vs TString is only an optimisation and shouldn't affect correctness.
#[cfg(not(test))]
- if val.class_of() == unsafe { rb_cString } {
- return Type::CString;
- }
- #[cfg(not(test))]
- if val.class_of() == unsafe { rb_cArray } {
- return Type::CArray;
+ match val.class_of() {
+ class if class == unsafe { rb_cArray } => return Type::CArray,
+ class if class == unsafe { rb_cHash } => return Type::CHash,
+ class if class == unsafe { rb_cString } => return Type::CString,
+ _ => {}
}
// We likewise can't reference rb_block_param_proxy, but it's again an optimisation;
// we can just treat it as a normal Object.
@@ -108,7 +110,7 @@ impl Type {
}
match val.builtin_type() {
RUBY_T_ARRAY => Type::TArray,
- RUBY_T_HASH => Type::Hash,
+ RUBY_T_HASH => Type::THash,
RUBY_T_STRING => Type::TString,
_ => Type::UnknownHeap,
}
@@ -150,8 +152,8 @@ impl Type {
Type::UnknownHeap => true,
Type::TArray => true,
Type::CArray => true,
- Type::Hash => true,
- Type::HeapSymbol => true,
+ Type::THash => true,
+ Type::CHash => true,
Type::TString => true,
Type::CString => true,
Type::BlockParamProxy => true,
@@ -161,20 +163,17 @@ impl Type {
/// Check if it's a T_ARRAY object (both TArray and CArray are T_ARRAY)
pub fn is_array(&self) -> bool {
- match self {
- Type::TArray => true,
- Type::CArray => true,
- _ => false,
- }
+ matches!(self, Type::TArray | Type::CArray)
+ }
+
+ /// Check if it's a T_HASH object (both THash and CHash are T_HASH)
+ pub fn is_hash(&self) -> bool {
+ matches!(self, Type::THash | Type::CHash)
}
/// Check if it's a T_STRING object (both TString and CString are T_STRING)
pub fn is_string(&self) -> bool {
- match self {
- Type::TString => true,
- Type::CString => true,
- _ => false,
- }
+ matches!(self, Type::TString | Type::CString)
}
/// Returns an Option with the T_ value type if it is known, otherwise None
@@ -186,8 +185,8 @@ impl Type {
Type::Fixnum => Some(RUBY_T_FIXNUM),
Type::Flonum => Some(RUBY_T_FLOAT),
Type::TArray | Type::CArray => Some(RUBY_T_ARRAY),
- Type::Hash => Some(RUBY_T_HASH),
- Type::ImmSymbol | Type::HeapSymbol => Some(RUBY_T_SYMBOL),
+ Type::THash | Type::CHash => Some(RUBY_T_HASH),
+ Type::ImmSymbol => Some(RUBY_T_SYMBOL),
Type::TString | Type::CString => Some(RUBY_T_STRING),
Type::Unknown | Type::UnknownImm | Type::UnknownHeap => None,
Type::BlockParamProxy => None,
@@ -203,9 +202,10 @@ impl Type {
Type::False => Some(rb_cFalseClass),
Type::Fixnum => Some(rb_cInteger),
Type::Flonum => Some(rb_cFloat),
- Type::ImmSymbol | Type::HeapSymbol => Some(rb_cSymbol),
- Type::CString => Some(rb_cString),
+ Type::ImmSymbol => Some(rb_cSymbol),
Type::CArray => Some(rb_cArray),
+ Type::CHash => Some(rb_cHash),
+ Type::CString => Some(rb_cString),
_ => None,
}
}
@@ -255,13 +255,18 @@ impl Type {
return TypeDiff::Compatible(1);
}
- // A CString is also a TString.
- if self == Type::CString && dst == Type::TString {
+ // A CArray is also a TArray.
+ if self == Type::CArray && dst == Type::TArray {
return TypeDiff::Compatible(1);
}
- // A CArray is also a TArray.
- if self == Type::CArray && dst == Type::TArray {
+ // A CHash is also a THash.
+ if self == Type::CHash && dst == Type::THash {
+ return TypeDiff::Compatible(1);
+ }
+
+ // A CString is also a TString.
+ if self == Type::CString && dst == Type::TString {
return TypeDiff::Compatible(1);
}
@@ -296,63 +301,92 @@ pub enum TypeDiff {
Incompatible,
}
-// Potential mapping of a value on the temporary stack to
-// self, a local variable or constant so that we can track its type
#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
-pub enum TempMapping {
- MapToStack, // Normal stack value
- MapToSelf, // Temp maps to the self operand
- MapToLocal(LocalIndex), // Temp maps to a local variable with index
- //ConstMapping, // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue)
+#[repr(u8)]
+pub enum TempMappingKind
+{
+ MapToStack = 0,
+ MapToSelf = 1,
+ MapToLocal = 2,
}
-// Index used by MapToLocal. Using this instead of u8 makes TempMapping 1 byte.
+// Potential mapping of a value on the temporary stack to
+// self, a local variable or constant so that we can track its type
+//
+// The highest two bits represent TempMappingKind, and the rest of
+// the bits are used differently across different kinds.
+// * MapToStack: The lowest 5 bits are used for mapping Type.
+// * MapToSelf: The remaining bits are not used; the type is stored in self_type.
+// * MapToLocal: The lowest 3 bits store the index of a local variable.
#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
-pub enum LocalIndex {
- Local0,
- Local1,
- Local2,
- Local3,
- Local4,
- Local5,
- Local6,
- Local7,
-}
+pub struct TempMapping(u8);
-impl From<LocalIndex> for u8 {
- fn from(idx: LocalIndex) -> Self {
- match idx {
- LocalIndex::Local0 => 0,
- LocalIndex::Local1 => 1,
- LocalIndex::Local2 => 2,
- LocalIndex::Local3 => 3,
- LocalIndex::Local4 => 4,
- LocalIndex::Local5 => 5,
- LocalIndex::Local6 => 6,
- LocalIndex::Local7 => 7,
- }
+impl TempMapping {
+ pub fn map_to_stack(t: Type) -> TempMapping
+ {
+ let kind_bits = TempMappingKind::MapToStack as u8;
+ let type_bits = t as u8;
+ assert!(type_bits <= 0b11111);
+ let bits = (kind_bits << 6) | (type_bits & 0b11111);
+ TempMapping(bits)
+ }
+
+ pub fn map_to_self() -> TempMapping
+ {
+ let kind_bits = TempMappingKind::MapToSelf as u8;
+ let bits = kind_bits << 6;
+ TempMapping(bits)
}
-}
-impl From<u8> for LocalIndex {
- fn from(idx: u8) -> Self {
- match idx {
- 0 => LocalIndex::Local0,
- 1 => LocalIndex::Local1,
- 2 => LocalIndex::Local2,
- 3 => LocalIndex::Local3,
- 4 => LocalIndex::Local4,
- 5 => LocalIndex::Local5,
- 6 => LocalIndex::Local6,
- 7 => LocalIndex::Local7,
- _ => unreachable!("{idx} was larger than {MAX_LOCAL_TYPES}"),
+ pub fn map_to_local(local_idx: u8) -> TempMapping
+ {
+ let kind_bits = TempMappingKind::MapToLocal as u8;
+ assert!(local_idx <= 0b111);
+ let bits = (kind_bits << 6) | (local_idx & 0b111);
+ TempMapping(bits)
+ }
+
+ pub fn without_type(&self) -> TempMapping
+ {
+ if self.get_kind() != TempMappingKind::MapToStack {
+ return *self;
}
+
+ TempMapping::map_to_stack(Type::Unknown)
+ }
+
+ pub fn get_kind(&self) -> TempMappingKind
+ {
+ // Take the two highest bits
+ let TempMapping(bits) = self;
+ let kind_bits = bits >> 6;
+ assert!(kind_bits <= 2);
+ unsafe { transmute::<u8, TempMappingKind>(kind_bits) }
+ }
+
+ pub fn get_type(&self) -> Type
+ {
+ assert!(self.get_kind() == TempMappingKind::MapToStack);
+
+ // Take the 5 lowest bits
+ let TempMapping(bits) = self;
+ let type_bits = bits & 0b11111;
+ unsafe { transmute::<u8, Type>(type_bits) }
+ }
+
+ pub fn get_local_idx(&self) -> u8
+ {
+ assert!(self.get_kind() == TempMappingKind::MapToLocal);
+
+ // Take the 3 lowest bits
+ let TempMapping(bits) = self;
+ bits & 0b111
}
}
impl Default for TempMapping {
fn default() -> Self {
- MapToStack
+ TempMapping::map_to_stack(Type::Unknown)
}
}
@@ -403,21 +437,27 @@ impl RegTemps {
/// Return true if there's a register that conflicts with a given stack_idx.
pub fn conflicts_with(&self, stack_idx: u8) -> bool {
- let mut other_idx = stack_idx as isize - get_option!(num_temp_regs) as isize;
- while other_idx >= 0 {
- if self.get(other_idx as u8) {
+ let mut other_idx = stack_idx as usize % get_option!(num_temp_regs);
+ while other_idx < MAX_REG_TEMPS as usize {
+ if stack_idx as usize != other_idx && self.get(other_idx as u8) {
return true;
}
- other_idx -= get_option!(num_temp_regs) as isize;
+ other_idx += get_option!(num_temp_regs);
}
false
}
}
+/// Bits for chain_depth_return_landing_defer
+const RETURN_LANDING_BIT: u8 = 0b10000000;
+const DEFER_BIT: u8 = 0b01000000;
+const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63
+
/// Code generation context
/// Contains information we can use to specialize/optimize code
/// There are a lot of context objects so we try to keep the size small.
-#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, Debug)]
+#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
+#[repr(packed)]
pub struct Context {
// Number of values currently on the temporary stack
stack_size: u8,
@@ -429,20 +469,33 @@ pub struct Context {
/// Bitmap of which stack temps are in a register
reg_temps: RegTemps,
- // Depth of this block in the sidechain (eg: inline-cache chain)
- chain_depth: u8,
+ /// Fields packed into u8
+ /// - 1st bit from the left: Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing])
+ /// - 2nd bit from the left: Whether the compilation of this code has been deferred ([Self::is_deferred])
+ /// - Last 6 bits (max: 63): Depth of this block in the sidechain (eg: inline-cache chain)
+ chain_depth_and_flags: u8,
+
+ // Type we track for self
+ self_type: Type,
// Local variable types we keep track of
- local_types: [Type; MAX_LOCAL_TYPES],
+ // We store 8 local types, requiring 4 bits each, for a total of 32 bits
+ local_types: u32,
- // Temporary variable types we keep track of
- temp_types: [Type; MAX_TEMP_TYPES],
+ // Temp mapping kinds we track
+ // 8 temp mappings * 2 bits, total 16 bits
+ temp_mapping_kind: u16,
- // Type we track for self
- self_type: Type,
+ // Stack slot type/local_idx we track
+ // 8 temp types * 4 bits, total 32 bits
+ temp_payload: u32,
- // Mapping of temp stack entries to types we track
- temp_mapping: [TempMapping; MAX_TEMP_TYPES],
+ /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined.
+ /// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)]
+ /// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr
+ /// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which
+ /// could allow this to consume no bytes, so we're leaving this as is.
+ inline_block: u64,
}
/// Tuple of (iseq, idx) used to identify basic blocks
@@ -474,6 +527,8 @@ pub enum BranchGenFn {
JNZToTarget0,
JZToTarget0,
JBEToTarget0,
+ JBToTarget0,
+ JOMulToTarget0,
JITReturn,
}
@@ -485,8 +540,8 @@ impl BranchGenFn {
BranchShape::Next0 => asm.jz(target1.unwrap()),
BranchShape::Next1 => asm.jnz(target0),
BranchShape::Default => {
- asm.jnz(target0.into());
- asm.jmp(target1.unwrap().into());
+ asm.jnz(target0);
+ asm.jmp(target1.unwrap());
}
}
}
@@ -515,11 +570,11 @@ impl BranchGenFn {
panic!("Branch shape Next1 not allowed in JumpToTarget0!");
}
if shape.get() == BranchShape::Default {
- asm.jmp(target0.into());
+ asm.jmp(target0);
}
}
BranchGenFn::JNZToTarget0 => {
- asm.jnz(target0.into())
+ asm.jnz(target0)
}
BranchGenFn::JZToTarget0 => {
asm.jz(target0)
@@ -527,9 +582,17 @@ impl BranchGenFn {
BranchGenFn::JBEToTarget0 => {
asm.jbe(target0)
}
+ BranchGenFn::JBToTarget0 => {
+ asm.jb(target0)
+ }
+ BranchGenFn::JOMulToTarget0 => {
+ asm.jo_mul(target0)
+ }
BranchGenFn::JITReturn => {
- asm.comment("update cfp->jit_return");
- asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), Opnd::const_ptr(target0.unwrap_code_ptr().raw_ptr()));
+ asm_comment!(asm, "update cfp->jit_return");
+ let jit_return = RUBY_OFFSET_CFP_JIT_RETURN - RUBY_SIZEOF_CONTROL_FRAME as i32;
+ let raw_ptr = asm.lea_jump_target(target0);
+ asm.mov(Opnd::mem(64, CFP, jit_return), raw_ptr);
}
}
}
@@ -543,6 +606,8 @@ impl BranchGenFn {
BranchGenFn::JNZToTarget0 |
BranchGenFn::JZToTarget0 |
BranchGenFn::JBEToTarget0 |
+ BranchGenFn::JBToTarget0 |
+ BranchGenFn::JOMulToTarget0 |
BranchGenFn::JITReturn => BranchShape::Default,
}
}
@@ -563,6 +628,8 @@ impl BranchGenFn {
BranchGenFn::JNZToTarget0 |
BranchGenFn::JZToTarget0 |
BranchGenFn::JBEToTarget0 |
+ BranchGenFn::JBToTarget0 |
+ BranchGenFn::JOMulToTarget0 |
BranchGenFn::JITReturn => {
assert_eq!(new_shape, BranchShape::Default);
}
@@ -594,8 +661,8 @@ impl BranchTarget {
fn get_ctx(&self) -> Context {
match self {
- BranchTarget::Stub(stub) => stub.ctx.clone(),
- BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx.clone(),
+ BranchTarget::Stub(stub) => stub.ctx,
+ BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx,
}
}
@@ -660,7 +727,7 @@ pub struct PendingBranch {
impl Branch {
// Compute the size of the branch code
fn code_size(&self) -> usize {
- (self.end_addr.get().raw_ptr() as usize) - (self.start_addr.raw_ptr() as usize)
+ (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize
}
/// Get the address of one of the branch destination
@@ -752,7 +819,7 @@ impl PendingBranch {
address: Some(stub_addr),
iseq: Cell::new(target.iseq),
iseq_idx: target.idx,
- ctx: ctx.clone(),
+ ctx: *ctx,
})))));
}
@@ -937,7 +1004,6 @@ impl fmt::Debug for MutableBranchList {
}
}
-
/// This is all the data YJIT stores on an iseq
/// This will be dynamically allocated by C code
/// C code should pass an &mut IseqPayload to us
@@ -1050,23 +1116,34 @@ pub fn for_each_on_stack_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
/// Iterate over all NOT on-stack ISEQ payloads
pub fn for_each_off_stack_iseq_payload<F: FnMut(&mut IseqPayload)>(mut callback: F) {
- let mut on_stack_iseqs: Vec<IseqPtr> = vec![];
- for_each_on_stack_iseq(|iseq| {
- on_stack_iseqs.push(iseq);
- });
- for_each_iseq(|iseq| {
+ // Get all ISEQs on the heap. Note that rb_objspace_each_objects() runs GC first,
+ // which could move ISEQ pointers when GC.auto_compact = true.
+ // So for_each_on_stack_iseq() must be called after this, which doesn't run GC.
+ let mut iseqs: Vec<IseqPtr> = vec![];
+ for_each_iseq(|iseq| iseqs.push(iseq));
+
+ // Get all ISEQs that are on a CFP of existing ECs.
+ let mut on_stack_iseqs: HashSet<IseqPtr> = HashSet::new();
+ for_each_on_stack_iseq(|iseq| { on_stack_iseqs.insert(iseq); });
+
+ // Invoke the callback for iseqs - on_stack_iseqs
+ for iseq in iseqs {
if !on_stack_iseqs.contains(&iseq) {
if let Some(iseq_payload) = get_iseq_payload(iseq) {
callback(iseq_payload);
}
}
- })
+ }
}
/// Free the per-iseq payload
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
+pub extern "C" fn rb_yjit_iseq_free(iseq: IseqPtr) {
+ // Free invariants for the ISEQ
+ iseq_free_invariants(iseq);
+
let payload = {
+ let payload = unsafe { rb_iseq_get_yjit_payload(iseq) };
if payload.is_null() {
// Nothing to free.
return;
@@ -1103,7 +1180,7 @@ pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
incr_counter!(freed_iseq_count);
}
-/// GC callback for marking GC objects in the the per-iseq payload.
+/// GC callback for marking GC objects in the per-iseq payload.
#[no_mangle]
pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
let payload = if payload.is_null() {
@@ -1129,30 +1206,54 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
for block in versions {
// SAFETY: all blocks inside version_map are initialized.
let block = unsafe { block.as_ref() };
+ mark_block(block, cb, false);
+ }
+ }
+ // Mark dead blocks, since there could be stubs pointing at them
+ for blockref in &payload.dead_blocks {
+ // SAFETY: dead blocks come from version_map, which only have initialized blocks
+ let block = unsafe { blockref.as_ref() };
+ mark_block(block, cb, true);
+ }
+
+ return;
+
+ fn mark_block(block: &Block, cb: &CodeBlock, dead: bool) {
+ unsafe { rb_gc_mark_movable(block.iseq.get().into()) };
+
+ // Mark method entry dependencies
+ for cme_dep in block.cme_dependencies.iter() {
+ unsafe { rb_gc_mark_movable(cme_dep.get().into()) };
+ }
+
+ // Mark outgoing branch entries
+ for branch in block.outgoing.iter() {
+ let branch = unsafe { branch.as_ref() };
+ for target in branch.targets.iter() {
+ // SAFETY: no mutation inside unsafe
+ let target_iseq = unsafe {
+ target.ref_unchecked().as_ref().and_then(|target| {
+ // Avoid get_blockid() on blockref. Can be dangling on dead blocks,
+ // and the iseq housing the block already naturally handles it.
+ if target.get_block().is_some() {
+ None
+ } else {
+ Some(target.get_blockid().iseq)
+ }
+ })
+ };
- unsafe { rb_gc_mark_movable(block.iseq.get().into()) };
-
- // Mark method entry dependencies
- for cme_dep in block.cme_dependencies.iter() {
- unsafe { rb_gc_mark_movable(cme_dep.get().into()) };
- }
-
- // Mark outgoing branch entries
- for branch in block.outgoing.iter() {
- let branch = unsafe { branch.as_ref() };
- for target in branch.targets.iter() {
- // SAFETY: no mutation inside unsafe
- let target_iseq = unsafe { target.ref_unchecked().as_ref().map(|target| target.get_blockid().iseq) };
-
- if let Some(target_iseq) = target_iseq {
- unsafe { rb_gc_mark_movable(target_iseq.into()) };
- }
+ if let Some(target_iseq) = target_iseq {
+ unsafe { rb_gc_mark_movable(target_iseq.into()) };
}
}
+ }
- // Walk over references to objects in generated code.
+ // Mark references to objects in generated code.
+ // Skip for dead blocks since they shouldn't run.
+ if !dead {
for offset in block.gc_obj_offsets.iter() {
- let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+ let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_address = value_address as *const VALUE;
@@ -1166,10 +1267,11 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
}
}
-/// GC callback for updating GC objects in the the per-iseq payload.
+/// GC callback for updating GC objects in the per-iseq payload.
/// This is a mirror of [rb_yjit_iseq_mark].
#[no_mangle]
-pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
+pub extern "C" fn rb_yjit_iseq_update_references(iseq: IseqPtr) {
+ let payload = unsafe { rb_iseq_get_yjit_payload(iseq) };
let payload = if payload.is_null() {
// Nothing to update.
return;
@@ -1196,21 +1298,70 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
for version in versions {
// SAFETY: all blocks inside version_map are initialized
let block = unsafe { version.as_ref() };
+ block_update_references(block, cb, false);
+ }
+ }
+ // Update dead blocks, since there could be stubs pointing at them
+ for blockref in &payload.dead_blocks {
+ // SAFETY: dead blocks come from version_map, which only have initialized blocks
+ let block = unsafe { blockref.as_ref() };
+ block_update_references(block, cb, true);
+ }
- block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq());
+ // Note that we would have returned already if YJIT is off.
+ cb.mark_all_executable();
- // Update method entry dependencies
- for cme_dep in block.cme_dependencies.iter() {
- let cur_cme: VALUE = cme_dep.get().into();
- let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme();
- cme_dep.set(new_cme);
+ CodegenGlobals::get_outlined_cb()
+ .unwrap()
+ .mark_all_executable();
+
+ return;
+
+ fn block_update_references(block: &Block, cb: &mut CodeBlock, dead: bool) {
+ block.iseq.set(unsafe { rb_gc_location(block.iseq.get().into()) }.as_iseq());
+
+ // Update method entry dependencies
+ for cme_dep in block.cme_dependencies.iter() {
+ let cur_cme: VALUE = cme_dep.get().into();
+ let new_cme = unsafe { rb_gc_location(cur_cme) }.as_cme();
+ cme_dep.set(new_cme);
+ }
+
+ // Update outgoing branch entries
+ for branch in block.outgoing.iter() {
+ let branch = unsafe { branch.as_ref() };
+ for target in branch.targets.iter() {
+ // SAFETY: no mutation inside unsafe
+ let current_iseq = unsafe {
+ target.ref_unchecked().as_ref().and_then(|target| {
+ // Avoid get_blockid() on blockref. Can be dangling on dead blocks,
+ // and the iseq housing the block already naturally handles it.
+ if target.get_block().is_some() {
+ None
+ } else {
+ Some(target.get_blockid().iseq)
+ }
+ })
+ };
+
+ if let Some(current_iseq) = current_iseq {
+ let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) }
+ .as_iseq();
+ // SAFETY: the Cell::set is not on the reference given out
+ // by ref_unchecked.
+ unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) };
+ }
}
+ }
- // Walk over references to objects in generated code.
+ // Update references to objects in generated code.
+ // Skip for dead blocks since they shouldn't run and
+ // so there is no potential of writing over invalidation jumps
+ if !dead {
for offset in block.gc_obj_offsets.iter() {
let offset_to_value = offset.as_usize();
let value_code_ptr = cb.get_ptr(offset_to_value);
- let value_ptr: *const u8 = value_code_ptr.raw_ptr();
+ let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_ptr = value_ptr as *mut VALUE;
@@ -1227,32 +1378,9 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
}
}
}
-
- // Update outgoing branch entries
- for branch in block.outgoing.iter() {
- let branch = unsafe { branch.as_ref() };
- for target in branch.targets.iter() {
- // SAFETY: no mutation inside unsafe
- let current_iseq = unsafe { target.ref_unchecked().as_ref().map(|target| target.get_blockid().iseq) };
-
- if let Some(current_iseq) = current_iseq {
- let updated_iseq = unsafe { rb_gc_location(current_iseq.into()) }
- .as_iseq();
- // SAFETY: the Cell::set is not on the reference given out
- // by ref_unchecked.
- unsafe { target.ref_unchecked().as_ref().unwrap().set_iseq(updated_iseq) };
- }
- }
- }
}
- }
- // Note that we would have returned already if YJIT is off.
- cb.mark_all_executable();
-
- CodegenGlobals::get_outlined_cb()
- .unwrap()
- .mark_all_executable();
+ }
}
/// Get all blocks for a particular place in an iseq.
@@ -1293,14 +1421,19 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
}
/// Count the number of block versions matching a given blockid
-fn get_num_versions(blockid: BlockId) -> usize {
+/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
+fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
Some(payload) => {
payload
.version_map
.get(insn_idx)
- .map(|versions| versions.len())
+ .map(|versions| {
+ versions.iter().filter(|&&version|
+ unsafe { version.as_ref() }.ctx.inline() == inlined
+ ).count()
+ })
.unwrap_or(0)
}
None => 0,
@@ -1355,41 +1488,54 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
}
}
- // If greedy versioning is enabled
- if get_option!(greedy_versioning) {
- // If we're below the version limit, don't settle for an imperfect match
- if versions.len() + 1 < get_option!(max_versions) && best_diff > 0 {
- return None;
- }
- }
-
return best_version;
}
+/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
+const MAX_INLINE_VERSIONS: usize = 1000;
+
/// Produce a generic context when the block version limit is hit for a blockid
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
// Guard chains implement limits separately, do nothing
- if ctx.chain_depth > 0 {
- return ctx.clone();
+ if ctx.get_chain_depth() > 0 {
+ return *ctx;
}
+ let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
+ let max_versions = if ctx.inline() {
+ MAX_INLINE_VERSIONS
+ } else {
+ get_option!(max_versions)
+ };
+
// If this block version we're about to add will hit the version limit
- if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
+ if next_versions >= max_versions {
// Produce a generic context that stores no type information,
// but still respects the stack_size and sp_offset constraints.
// This new context will then match all future requests.
let generic_ctx = ctx.get_generic_ctx();
- debug_assert_ne!(
- TypeDiff::Incompatible,
- ctx.diff(&generic_ctx),
- "should substitute a compatible context",
- );
+ if cfg!(debug_assertions) {
+ let mut ctx = ctx.clone();
+ if ctx.inline() {
+ // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible
+ // to keep inlining blocks until we hit the limit, but it's safe to give up inlining.
+ ctx.inline_block = 0;
+ assert!(generic_ctx.inline_block == 0);
+ }
+
+ assert_ne!(
+ TypeDiff::Incompatible,
+ ctx.diff(&generic_ctx),
+ "should substitute a compatible context",
+ );
+ }
return generic_ctx;
}
+ incr_counter_to!(max_inline_versions, next_versions);
- return ctx.clone();
+ return *ctx;
}
/// Install a block version into its [IseqPayload], letting the GC track its
@@ -1436,7 +1582,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
// Run write barriers for all objects in generated code.
for offset in block.gc_obj_offsets.iter() {
- let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+ let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_address: *const VALUE = value_address.cast();
@@ -1513,6 +1659,12 @@ impl JITState {
if let Some(idlist) = self.stable_constant_names_assumption {
track_stable_constant_names_assumption(blockref, idlist);
}
+ for klass in self.no_singleton_class_assumptions {
+ track_no_singleton_class_assumption(blockref, klass);
+ }
+ if self.no_ep_escape {
+ track_no_ep_escape_assumption(blockref, self.iseq);
+ }
blockref
}
@@ -1558,7 +1710,7 @@ impl Block {
// Compute the size of the block code
pub fn code_size(&self) -> usize {
- (self.end_addr.get().into_usize()) - (self.start_addr.into_usize())
+ (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap()
}
}
@@ -1567,12 +1719,22 @@ impl Context {
self.stack_size
}
+ pub fn set_stack_size(&mut self, stack_size: u8) {
+ self.stack_size = stack_size;
+ }
+
/// Create a new Context that is compatible with self but doesn't have type information.
pub fn get_generic_ctx(&self) -> Context {
let mut generic_ctx = Context::default();
generic_ctx.stack_size = self.stack_size;
generic_ctx.sp_offset = self.sp_offset;
generic_ctx.reg_temps = self.reg_temps;
+ if self.is_return_landing() {
+ generic_ctx.set_as_return_landing();
+ }
+ if self.is_deferred() {
+ generic_ctx.mark_as_deferred();
+ }
generic_ctx
}
@@ -1580,7 +1742,7 @@ impl Context {
/// accordingly. This is useful when you want to virtually rewind a stack_size for
/// generating a side exit while considering past sp_offset changes on gen_save_sp.
pub fn with_stack_size(&self, stack_size: u8) -> Context {
- let mut ctx = self.clone();
+ let mut ctx = *self;
ctx.sp_offset -= (ctx.get_stack_size() as isize - stack_size as isize) as i8;
ctx.stack_size = stack_size;
ctx
@@ -1603,24 +1765,54 @@ impl Context {
}
pub fn get_chain_depth(&self) -> u8 {
- self.chain_depth
+ self.chain_depth_and_flags & CHAIN_DEPTH_MASK
}
- pub fn reset_chain_depth(&mut self) {
- self.chain_depth = 0;
+ pub fn reset_chain_depth_and_defer(&mut self) {
+ self.chain_depth_and_flags &= !CHAIN_DEPTH_MASK;
+ self.chain_depth_and_flags &= !DEFER_BIT;
}
pub fn increment_chain_depth(&mut self) {
- self.chain_depth += 1;
+ if self.get_chain_depth() == CHAIN_DEPTH_MASK {
+ panic!("max block version chain depth reached!");
+ }
+ self.chain_depth_and_flags += 1;
+ }
+
+ pub fn set_as_return_landing(&mut self) {
+ self.chain_depth_and_flags |= RETURN_LANDING_BIT;
+ }
+
+ pub fn clear_return_landing(&mut self) {
+ self.chain_depth_and_flags &= !RETURN_LANDING_BIT;
+ }
+
+ pub fn is_return_landing(&self) -> bool {
+ self.chain_depth_and_flags & RETURN_LANDING_BIT != 0
+ }
+
+ pub fn mark_as_deferred(&mut self) {
+ self.chain_depth_and_flags |= DEFER_BIT;
+ }
+
+ pub fn is_deferred(&self) -> bool {
+ self.chain_depth_and_flags & DEFER_BIT != 0
}
/// Get an operand for the adjusted stack pointer address
- pub fn sp_opnd(&self, offset_bytes: isize) -> Opnd {
- let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes;
- let offset = offset as i32;
+ pub fn sp_opnd(&self, offset: i32) -> Opnd {
+ let offset = (self.sp_offset as i32 + offset) * SIZEOF_VALUE_I32;
return Opnd::mem(64, SP, offset);
}
+ /// Get an operand for the adjusted environment pointer address using SP register.
+ /// This is valid only when a Binding object hasn't been created for the frame.
+ pub fn ep_opnd(&self, offset: i32) -> Opnd {
+ let ep_offset = self.get_stack_size() as i32 + 1;
+ self.sp_opnd(-ep_offset + offset)
+ }
+
/// Stop using a register for a given stack temp.
/// This allows us to reuse the register for a value that we know is dead
/// and will no longer be used (e.g. popped stack temp).
@@ -1645,14 +1837,15 @@ impl Context {
return Type::Unknown;
}
- let mapping = self.temp_mapping[stack_idx];
+ let mapping = self.get_temp_mapping(stack_idx);
- match mapping {
+ match mapping.get_kind() {
MapToSelf => self.self_type,
- MapToStack => self.temp_types[(self.stack_size - 1 - idx) as usize],
- MapToLocal(idx) => {
+ MapToStack => mapping.get_type(),
+ MapToLocal => {
+ let idx = mapping.get_local_idx();
assert!((idx as usize) < MAX_LOCAL_TYPES);
- return self.local_types[idx as usize];
+ return self.get_local_type(idx.into());
}
}
}
@@ -1660,8 +1853,83 @@ impl Context {
}
/// Get the currently tracked type for a local variable
- pub fn get_local_type(&self, idx: usize) -> Type {
- *self.local_types.get(idx).unwrap_or(&Type::Unknown)
+ pub fn get_local_type(&self, local_idx: usize) -> Type {
+ if local_idx >= MAX_LOCAL_TYPES {
+ return Type::Unknown
+ } else {
+ // Each type is stored in 4 bits
+ let type_bits = (self.local_types >> (4 * local_idx)) & 0b1111;
+ unsafe { transmute::<u8, Type>(type_bits as u8) }
+ }
+ }
+
+ /// Get the current temp mapping for a given stack slot
+ fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping {
+ assert!(temp_idx < MAX_TEMP_TYPES);
+
+ // Extract the temp mapping kind
+ let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11;
+ let temp_kind = unsafe { transmute::<u8, TempMappingKind>(kind_bits as u8) };
+
+ // Extract the payload bits (temp type or local idx)
+ let payload_bits = (self.temp_payload >> (4 * temp_idx)) & 0b1111;
+
+ match temp_kind {
+ MapToSelf => TempMapping::map_to_self(),
+
+ MapToStack => {
+ TempMapping::map_to_stack(
+ unsafe { transmute::<u8, Type>(payload_bits as u8) }
+ )
+ }
+
+ MapToLocal => {
+ TempMapping::map_to_local(
+ payload_bits as u8
+ )
+ }
+ }
+ }
+
+ /// Get the current temp mapping for a given stack slot
+ fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) {
+ assert!(temp_idx < MAX_TEMP_TYPES);
+
+ // Extract the kind bits
+ let mapping_kind = mapping.get_kind();
+ let kind_bits = unsafe { transmute::<TempMappingKind, u8>(mapping_kind) };
+ assert!(kind_bits <= 0b11);
+
+ // Extract the payload bits
+ let payload_bits = match mapping_kind {
+ MapToSelf => 0,
+
+ MapToStack => {
+ let t = mapping.get_type();
+ unsafe { transmute::<Type, u8>(t) }
+ }
+
+ MapToLocal => {
+ mapping.get_local_idx()
+ }
+ };
+ assert!(payload_bits <= 0b1111);
+
+ // Update the kind bits
+ {
+ let mask_bits = 0b11_u16 << (2 * temp_idx);
+ let shifted_bits = (kind_bits as u16) << (2 * temp_idx);
+ let all_kind_bits = self.temp_mapping_kind as u16;
+ self.temp_mapping_kind = (all_kind_bits & !mask_bits) | shifted_bits;
+ }
+
+ // Update the payload bits
+ {
+ let mask_bits = 0b1111_u32 << (4 * temp_idx);
+ let shifted_bits = (payload_bits as u32) << (4 * temp_idx);
+ let all_payload_bits = self.temp_payload as u32;
+ self.temp_payload = (all_payload_bits & !mask_bits) | shifted_bits;
+ }
}
/// Upgrade (or "learn") the type of an instruction operand
@@ -1685,15 +1953,24 @@ impl Context {
return;
}
- let mapping = self.temp_mapping[stack_idx];
+ let mapping = self.get_temp_mapping(stack_idx);
- match mapping {
+ match mapping.get_kind() {
MapToSelf => self.self_type.upgrade(opnd_type),
- MapToStack => self.temp_types[stack_idx].upgrade(opnd_type),
- MapToLocal(idx) => {
- let idx = idx as usize;
+ MapToStack => {
+ let mut temp_type = mapping.get_type();
+ temp_type.upgrade(opnd_type);
+ self.set_temp_mapping(stack_idx, TempMapping::map_to_stack(temp_type));
+ }
+ MapToLocal => {
+ let idx = mapping.get_local_idx() as usize;
assert!(idx < MAX_LOCAL_TYPES);
- self.local_types[idx].upgrade(opnd_type);
+ let mut new_type = self.get_local_type(idx);
+ new_type.upgrade(opnd_type);
+ self.set_local_type(idx, new_type);
+ // Re-attach MapToLocal for this StackOpnd(idx). set_local_type() detaches
+ // all MapToLocal mappings, including the one we're upgrading here.
+ self.set_opnd_mapping(opnd, mapping);
}
}
}
@@ -1705,29 +1982,29 @@ impl Context {
This is can be used with stack_push_mapping or set_opnd_mapping to copy
a stack value's type while maintaining the mapping.
*/
- pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> (TempMapping, Type) {
+ pub fn get_opnd_mapping(&self, opnd: YARVOpnd) -> TempMapping {
let opnd_type = self.get_opnd_type(opnd);
match opnd {
- SelfOpnd => (MapToSelf, opnd_type),
+ SelfOpnd => TempMapping::map_to_self(),
StackOpnd(idx) => {
assert!(idx < self.stack_size);
let stack_idx = (self.stack_size - 1 - idx) as usize;
if stack_idx < MAX_TEMP_TYPES {
- (self.temp_mapping[stack_idx], opnd_type)
+ self.get_temp_mapping(stack_idx)
} else {
// We can't know the source of this stack operand, so we assume it is
// a stack-only temporary. type will be UNKNOWN
assert!(opnd_type == Type::Unknown);
- (MapToStack, opnd_type)
+ TempMapping::map_to_stack(opnd_type)
}
}
}
}
/// Overwrite both the type and mapping of a stack operand.
- pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, (mapping, opnd_type): (TempMapping, Type)) {
+ pub fn set_opnd_mapping(&mut self, opnd: YARVOpnd, mapping: TempMapping) {
match opnd {
SelfOpnd => unreachable!("self always maps to self"),
StackOpnd(idx) => {
@@ -1744,44 +2021,47 @@ impl Context {
return;
}
- self.temp_mapping[stack_idx] = mapping;
-
- // Only used when mapping == MAP_STACK
- self.temp_types[stack_idx] = opnd_type;
+ self.set_temp_mapping(stack_idx, mapping);
}
}
}
/// Set the type of a local variable
pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) {
- let ctx = self;
-
// If type propagation is disabled, store no types
if get_option!(no_type_prop) {
return;
}
if local_idx >= MAX_LOCAL_TYPES {
- return;
+ return
}
// If any values on the stack map to this local we must detach them
- for (i, mapping) in ctx.temp_mapping.iter_mut().enumerate() {
- *mapping = match *mapping {
- MapToStack => MapToStack,
- MapToSelf => MapToSelf,
- MapToLocal(idx) => {
+ for mapping_idx in 0..MAX_TEMP_TYPES {
+ let mapping = self.get_temp_mapping(mapping_idx);
+ let tm = match mapping.get_kind() {
+ MapToStack => mapping,
+ MapToSelf => mapping,
+ MapToLocal => {
+ let idx = mapping.get_local_idx();
if idx as usize == local_idx {
- ctx.temp_types[i] = ctx.local_types[idx as usize];
- MapToStack
+ let local_type = self.get_local_type(local_idx);
+ TempMapping::map_to_stack(local_type)
} else {
- MapToLocal(idx)
+ TempMapping::map_to_local(idx)
}
}
- }
+ };
+ self.set_temp_mapping(mapping_idx, tm);
}
- ctx.local_types[local_idx] = local_type;
+ // Update the type bits
+ let type_bits = local_type as u32;
+ assert!(type_bits <= 0b1111);
+ let mask_bits = 0b1111_u32 << (4 * local_idx);
+ let shifted_bits = type_bits << (4 * local_idx);
+ self.local_types = (self.local_types & !mask_bits) | shifted_bits;
}
/// Erase local variable type information
@@ -1789,19 +2069,27 @@ impl Context {
pub fn clear_local_types(&mut self) {
// When clearing local types we must detach any stack mappings to those
// locals. Even if local values may have changed, stack values will not.
- for (i, mapping) in self.temp_mapping.iter_mut().enumerate() {
- *mapping = match *mapping {
- MapToStack => MapToStack,
- MapToSelf => MapToSelf,
- MapToLocal(idx) => {
- self.temp_types[i] = self.local_types[idx as usize];
- MapToStack
- }
+
+ for mapping_idx in 0..MAX_TEMP_TYPES {
+ let mapping = self.get_temp_mapping(mapping_idx);
+ if mapping.get_kind() == MapToLocal {
+ let local_idx = mapping.get_local_idx() as usize;
+ self.set_temp_mapping(mapping_idx, TempMapping::map_to_stack(self.get_local_type(local_idx)));
}
}
// Clear the local types
- self.local_types = [Type::default(); MAX_LOCAL_TYPES];
+ self.local_types = 0;
+ }
+
+ /// Return true if the code is inlined by the caller
+ pub fn inline(&self) -> bool {
+ self.inline_block != 0
+ }
+
+ /// Set a block ISEQ given to the Block of this Context
+ pub fn set_inline_block(&mut self, iseq: IseqPtr) {
+ self.inline_block = iseq as u64
}
/// Compute a difference score for two context objects
@@ -1810,13 +2098,21 @@ impl Context {
let src = self;
// Can only lookup the first version in the chain
- if dst.chain_depth != 0 {
+ if dst.get_chain_depth() != 0 {
return TypeDiff::Incompatible;
}
// Blocks with depth > 0 always produce new versions
// Sidechains cannot overlap
- if src.chain_depth != 0 {
+ if src.get_chain_depth() != 0 {
+ return TypeDiff::Incompatible;
+ }
+
+ if src.is_return_landing() != dst.is_return_landing() {
+ return TypeDiff::Incompatible;
+ }
+
+ if src.is_deferred() != dst.is_deferred() {
return TypeDiff::Incompatible;
}
@@ -1841,10 +2137,17 @@ impl Context {
TypeDiff::Incompatible => return TypeDiff::Incompatible,
};
+ // Check the block to inline
+ if src.inline_block != dst.inline_block {
+ // find_block_version should not find existing blocks with different
+ // inline_block so that their yield will not be megamorphic.
+ return TypeDiff::Incompatible;
+ }
+
// For each local type we track
- for i in 0..src.local_types.len() {
- let t_src = src.local_types[i];
- let t_dst = dst.local_types[i];
+ for i in 0.. MAX_LOCAL_TYPES {
+ let t_src = src.get_local_type(i);
+ let t_dst = dst.get_local_type(i);
diff += match t_src.diff(t_dst) {
TypeDiff::Compatible(diff) => diff,
TypeDiff::Incompatible => return TypeDiff::Incompatible,
@@ -1853,12 +2156,12 @@ impl Context {
// For each value on the temp stack
for i in 0..src.stack_size {
- let (src_mapping, src_type) = src.get_opnd_mapping(StackOpnd(i));
- let (dst_mapping, dst_type) = dst.get_opnd_mapping(StackOpnd(i));
+ let src_mapping = src.get_opnd_mapping(StackOpnd(i));
+ let dst_mapping = dst.get_opnd_mapping(StackOpnd(i));
// If the two mappings aren't the same
if src_mapping != dst_mapping {
- if dst_mapping == MapToStack {
+ if dst_mapping.get_kind() == MapToStack {
// We can safely drop information about the source of the temp
// stack operand.
diff += 1;
@@ -1867,6 +2170,9 @@ impl Context {
}
}
+ let src_type = src.get_opnd_type(StackOpnd(i));
+ let dst_type = dst.get_opnd_type(StackOpnd(i));
+
diff += match src_type.diff(dst_type) {
TypeDiff::Compatible(diff) => diff,
TypeDiff::Incompatible => return TypeDiff::Incompatible,
@@ -1896,20 +2202,20 @@ impl Context {
impl Assembler {
/// Push one new value on the temp stack with an explicit mapping
/// Return a pointer to the new stack top
- pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd {
+ pub fn stack_push_mapping(&mut self, mapping: TempMapping) -> Opnd {
// If type propagation is disabled, store no types
if get_option!(no_type_prop) {
- return self.stack_push_mapping((mapping, Type::Unknown));
+ return self.stack_push_mapping(mapping.without_type());
}
let stack_size: usize = self.ctx.stack_size.into();
// Keep track of the type and mapping of the value
if stack_size < MAX_TEMP_TYPES {
- self.ctx.temp_mapping[stack_size] = mapping;
- self.ctx.temp_types[stack_size] = temp_type;
+ self.ctx.set_temp_mapping(stack_size, mapping);
- if let MapToLocal(idx) = mapping {
+ if mapping.get_kind() == MapToLocal {
+ let idx = mapping.get_local_idx();
assert!((idx as usize) < MAX_LOCAL_TYPES);
}
}
@@ -1928,12 +2234,12 @@ impl Assembler {
/// Push one new value on the temp stack
/// Return a pointer to the new stack top
pub fn stack_push(&mut self, val_type: Type) -> Opnd {
- return self.stack_push_mapping((MapToStack, val_type));
+ return self.stack_push_mapping(TempMapping::map_to_stack(val_type));
}
/// Push the self value on the stack
pub fn stack_push_self(&mut self) -> Opnd {
- return self.stack_push_mapping((MapToSelf, Type::Unknown));
+ return self.stack_push_mapping(TempMapping::map_to_self());
}
/// Push a local variable on the stack
@@ -1942,7 +2248,7 @@ impl Assembler {
return self.stack_push(Type::Unknown);
}
- return self.stack_push_mapping((MapToLocal((local_idx as u8).into()), Type::Unknown));
+ return self.stack_push_mapping(TempMapping::map_to_local(local_idx as u8));
}
// Pop N values off the stack
@@ -1957,8 +2263,7 @@ impl Assembler {
let idx: usize = (self.ctx.stack_size as usize) - i - 1;
if idx < MAX_TEMP_TYPES {
- self.ctx.temp_types[idx] = Type::Unknown;
- self.ctx.temp_mapping[idx] = MapToStack;
+ self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown));
}
}
@@ -1972,12 +2277,16 @@ impl Assembler {
pub fn shift_stack(&mut self, argc: usize) {
assert!(argc < self.ctx.stack_size.into());
- let method_name_index = (self.ctx.stack_size as usize) - (argc as usize) - 1;
+ let method_name_index = (self.ctx.stack_size as usize) - argc - 1;
for i in method_name_index..(self.ctx.stack_size - 1) as usize {
- if i + 1 < MAX_TEMP_TYPES {
- self.ctx.temp_types[i] = self.ctx.temp_types[i + 1];
- self.ctx.temp_mapping[i] = self.ctx.temp_mapping[i + 1];
+ if i < MAX_TEMP_TYPES {
+ let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES {
+ self.ctx.get_temp_mapping(i + 1)
+ } else {
+ TempMapping::map_to_stack(Type::Unknown)
+ };
+ self.ctx.set_temp_mapping(i, next_arg_mapping);
}
}
self.stack_pop(1);
@@ -2125,12 +2434,18 @@ fn gen_block_series_body(
/// Generate a block version that is an entry point inserted into an iseq
/// NOTE: this function assumes that the VM lock has been taken
-pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
+/// If jit_exception is true, compile JIT code for handling exceptions.
+/// See [jit_compile_exception] for details.
+pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> {
// Compute the current instruction index based on the current PC
+ let cfp = unsafe { get_ec_cfp(ec) };
let insn_idx: u16 = unsafe {
- let ec_pc = get_cfp_pc(get_ec_cfp(ec));
+ let ec_pc = get_cfp_pc(cfp);
iseq_pc_to_insn_idx(iseq, ec_pc)?
};
+ let stack_size: u8 = unsafe {
+ u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()?
+ };
// The entry context makes no assumptions about types
let blockid = BlockId {
@@ -2143,10 +2458,12 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
let ocb = CodegenGlobals::get_outlined_cb();
// Write the interpreter entry prologue. Might be NULL when out of memory.
- let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx);
+ let code_ptr = gen_entry_prologue(cb, ocb, iseq, insn_idx, jit_exception);
// Try to generate code for the entry block
- let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb);
+ let mut ctx = Context::default();
+ ctx.stack_size = stack_size;
+ let block = gen_block_series(blockid, &ctx, ec, cb, ocb);
cb.mark_all_executable();
ocb.unwrap().mark_all_executable();
@@ -2155,7 +2472,9 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
// Compilation failed
None => {
// Trigger code GC. This entry point will be recompiled later.
- cb.code_gc(ocb);
+ if get_option!(code_gc) {
+ cb.code_gc(ocb);
+ }
return None;
}
@@ -2168,14 +2487,17 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
}
}
+ // Count the number of entry points we compile
+ incr_counter!(compiled_iseq_entry);
+
// Compilation successful and block not empty
- return code_ptr;
+ code_ptr.map(|ptr| ptr.raw_ptr(cb))
}
// Change the entry's jump target from an entry stub to a next entry
pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) {
let mut asm = Assembler::new();
- asm.comment("regenerate_entry");
+ asm_comment!(asm, "regenerate_entry");
// gen_entry_guard generates cmp + jne. We're rewriting only jne.
asm.jne(next_entry.into());
@@ -2185,7 +2507,7 @@ pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: Cod
let old_dropped_bytes = cb.has_dropped_bytes();
cb.set_write_ptr(unsafe { entryref.as_ref() }.start_addr);
cb.set_dropped_bytes(false);
- asm.compile(cb, None);
+ asm.compile(cb, None).expect("can rewrite existing code");
// Rewind write_pos to the original one
assert_eq!(cb.get_write_ptr(), unsafe { entryref.as_ref() }.end_addr);
@@ -2209,78 +2531,88 @@ c_callable! {
/// Generated code calls this function with the SysV calling convention.
/// See [gen_call_entry_stub_hit].
fn entry_stub_hit(entry_ptr: *const c_void, ec: EcPtr) -> *const u8 {
- with_vm_lock(src_loc!(), || {
- match entry_stub_hit_body(entry_ptr, ec) {
- Some(addr) => addr,
- // Failed to service the stub by generating a new block so now we
- // need to exit to the interpreter at the stubbed location.
- None => return CodegenGlobals::get_stub_exit_code().raw_ptr(),
- }
+ with_compile_time(|| {
+ with_vm_lock(src_loc!(), || {
+ let cb = CodegenGlobals::get_inline_cb();
+ let ocb = CodegenGlobals::get_outlined_cb();
+
+ let addr = entry_stub_hit_body(entry_ptr, ec, cb, ocb)
+ .unwrap_or_else(|| {
+ // Trigger code GC (e.g. no space).
+ // This entry point will be recompiled later.
+ if get_option!(code_gc) {
+ cb.code_gc(ocb);
+ }
+ CodegenGlobals::get_stub_exit_code().raw_ptr(cb)
+ });
+
+ cb.mark_all_executable();
+ ocb.unwrap().mark_all_executable();
+
+ addr
+ })
})
}
}
/// Called by the generated code when an entry stub is executed
-fn entry_stub_hit_body(entry_ptr: *const c_void, ec: EcPtr) -> Option<*const u8> {
+fn entry_stub_hit_body(
+ entry_ptr: *const c_void,
+ ec: EcPtr,
+ cb: &mut CodeBlock,
+ ocb: &mut OutlinedCb
+) -> Option<*const u8> {
// Get ISEQ and insn_idx from the current ec->cfp
let cfp = unsafe { get_ec_cfp(ec) };
let iseq = unsafe { get_cfp_iseq(cfp) };
let insn_idx = iseq_pc_to_insn_idx(iseq, unsafe { get_cfp_pc(cfp) })?;
-
- let cb = CodegenGlobals::get_inline_cb();
- let ocb = CodegenGlobals::get_outlined_cb();
+ let stack_size: u8 = unsafe {
+ u8::try_from(get_cfp_sp(cfp).offset_from(get_cfp_bp(cfp))).ok()?
+ };
// Compile a new entry guard as a next entry
let next_entry = cb.get_write_ptr();
let mut asm = Assembler::new();
let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?;
- asm.compile(cb, Some(ocb));
+ asm.compile(cb, Some(ocb))?;
- // Try to find an existing compiled version of this block
+ // Find or compile a block version
let blockid = BlockId { iseq, idx: insn_idx };
- let ctx = Context::default();
+ let mut ctx = Context::default();
+ ctx.stack_size = stack_size;
let blockref = match find_block_version(blockid, &ctx) {
// If an existing block is found, generate a jump to the block.
Some(blockref) => {
let mut asm = Assembler::new();
asm.jmp(unsafe { blockref.as_ref() }.start_addr.into());
- asm.compile(cb, Some(ocb));
- blockref
+ asm.compile(cb, Some(ocb))?;
+ Some(blockref)
}
// If this block hasn't yet been compiled, generate blocks after the entry guard.
- None => match gen_block_series(blockid, &ctx, ec, cb, ocb) {
- Some(blockref) => blockref,
- None => { // No space
- // Trigger code GC. This entry point will be recompiled later.
- cb.code_gc(ocb);
- return None;
- }
- }
+ None => gen_block_series(blockid, &ctx, ec, cb, ocb),
};
- // Regenerate the previous entry
- assert!(!entry_ptr.is_null());
- let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null");
- regenerate_entry(cb, &entryref, next_entry);
-
- // Write an entry to the heap and push it to the ISEQ
- let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique");
- get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry());
+ // Commit or retry the entry
+ if blockref.is_some() {
+ // Regenerate the previous entry
+ let entryref = NonNull::<Entry>::new(entry_ptr as *mut Entry).expect("Entry should not be null");
+ regenerate_entry(cb, &entryref, next_entry);
- cb.mark_all_executable();
- ocb.unwrap().mark_all_executable();
+ // Write an entry to the heap and push it to the ISEQ
+ let pending_entry = Rc::try_unwrap(pending_entry).ok().expect("PendingEntry should be unique");
+ get_or_create_iseq_payload(iseq).entries.push(pending_entry.into_entry());
+ }
// Let the stub jump to the block
- Some(unsafe { blockref.as_ref() }.start_addr.raw_ptr())
+ blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb))
}
/// Generate a stub that calls entry_stub_hit
pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let stub_addr = ocb.get_write_ptr();
let mut asm = Assembler::new();
- asm.comment("entry stub hit");
+ asm_comment!(asm, "entry stub hit");
asm.mov(C_ARG_OPNDS[0], entry_address.into());
@@ -2288,32 +2620,23 @@ pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<Code
// Not really a side exit, just don't need a padded jump here.
asm.jmp(CodegenGlobals::get_entry_stub_hit_trampoline().as_side_exit());
- asm.compile(ocb, None);
-
- if ocb.has_dropped_bytes() {
- return None; // No space
- } else {
- return Some(stub_addr);
- }
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
/// A trampoline used by gen_entry_stub. entry_stub_hit may issue Code GC, so
/// it's useful for Code GC to call entry_stub_hit from a globally shared code.
-pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr {
+pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let code_ptr = ocb.get_write_ptr();
let mut asm = Assembler::new();
// See gen_entry_guard for how it's used.
- asm.comment("entry_stub_hit() trampoline");
+ asm_comment!(asm, "entry_stub_hit() trampoline");
let jump_addr = asm.ccall(entry_stub_hit as *mut u8, vec![C_ARG_OPNDS[0], EC]);
// Jump to the address returned by the entry_stub_hit() call
asm.jmp_opnd(jump_addr);
- asm.compile(ocb, None);
-
- code_ptr
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
/// Generate code for a branch, possibly rewriting and changing the size of it
@@ -2328,19 +2651,25 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) {
// Generate the branch
let mut asm = Assembler::new();
- asm.comment("regenerate_branch");
+ asm_comment!(asm, "regenerate_branch");
branch.gen_fn.call(
&mut asm,
Target::CodePtr(branch.get_target_address(0).unwrap()),
branch.get_target_address(1).map(|addr| Target::CodePtr(addr)),
);
+ // If the entire block is the branch and the block could be invalidated,
+ // we need to pad to ensure there is room for invalidation patching.
+ if branch.start_addr == block.start_addr && branch_terminates_block && block.entry_exit.is_some() {
+ asm.pad_inval_patch();
+ }
+
// Rewrite the branch
let old_write_pos = cb.get_write_pos();
let old_dropped_bytes = cb.has_dropped_bytes();
cb.set_write_ptr(branch.start_addr);
cb.set_dropped_bytes(false);
- asm.compile(cb, None);
+ asm.compile(cb, None).expect("can rewrite existing code");
let new_end_addr = cb.get_write_ptr();
branch.end_addr.set(new_end_addr);
@@ -2399,7 +2728,7 @@ c_callable! {
ec: EcPtr,
) -> *const u8 {
with_vm_lock(src_loc!(), || {
- branch_stub_hit_body(branch_ptr, target_idx, ec)
+ with_compile_time(|| { branch_stub_hit_body(branch_ptr, target_idx, ec) })
})
}
}
@@ -2427,6 +2756,9 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
_ => unreachable!("target_idx < 2 must always hold"),
};
+ let cb = CodegenGlobals::get_inline_cb();
+ let ocb = CodegenGlobals::get_outlined_cb();
+
let (target_blockid, target_ctx): (BlockId, Context) = unsafe {
// SAFETY: no mutation of the target's Cell. Just reading out data.
let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap();
@@ -2434,24 +2766,24 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// If this branch has already been patched, return the dst address
// Note: recursion can cause the same stub to be hit multiple times
if let BranchTarget::Block(_) = target.as_ref() {
- return target.get_address().unwrap().raw_ptr();
+ return target.get_address().unwrap().raw_ptr(cb);
}
(target.get_blockid(), target.get_ctx())
};
- let cb = CodegenGlobals::get_inline_cb();
- let ocb = CodegenGlobals::get_outlined_cb();
-
let (cfp, original_interp_sp) = unsafe {
let cfp = get_ec_cfp(ec);
let original_interp_sp = get_cfp_sp(cfp);
- let running_iseq = rb_cfp_get_iseq(cfp);
+ let running_iseq = get_cfp_iseq(cfp);
+ assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq");
+
let reconned_pc = rb_iseq_pc_at_idx(running_iseq, target_blockid.idx.into());
let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into());
-
- assert_eq!(running_iseq, target_blockid.iseq as _, "each stub expects a particular iseq");
+ // Unlike in the interpreter, our `leave` doesn't write to the caller's
+ // SP -- we do it in the returned-to code. Account for this difference.
+ let reconned_sp = reconned_sp.add(target_ctx.is_return_landing().into());
// Update the PC in the current CFP, because it may be out of sync in JITted code
rb_set_cfp_pc(cfp, reconned_pc);
@@ -2464,6 +2796,17 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// So we do it here instead.
rb_set_cfp_sp(cfp, reconned_sp);
+ // Bail if code GC is disabled and we've already run out of spaces.
+ if !get_option!(code_gc) && (cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes()) {
+ return CodegenGlobals::get_stub_exit_code().raw_ptr(cb);
+ }
+
+ // Bail if we're about to run out of native stack space.
+ // We've just reconstructed interpreter state.
+ if rb_ec_stack_check(ec as _) != 0 {
+ return CodegenGlobals::get_stub_exit_code().raw_ptr(cb);
+ }
+
(cfp, original_interp_sp)
};
@@ -2474,7 +2817,6 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
if block.is_none() {
let branch_old_shape = branch.gen_fn.get_shape();
-
// If the new block can be generated right after the branch (at cb->write_pos)
if cb.get_write_ptr() == branch.end_addr.get() {
// This branch should be terminating its block
@@ -2532,7 +2874,9 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// because incomplete code could be used when cb.dropped_bytes is flipped
// by code GC. So this place, after all compilation, is the safest place
// to hook code GC on branch_stub_hit.
- cb.code_gc(ocb);
+ if get_option!(code_gc) {
+ cb.code_gc(ocb);
+ }
// Failed to service the stub by generating a new block so now we
// need to exit to the interpreter at the stubbed location. We are
@@ -2552,11 +2896,11 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
assert!(
new_branch_size <= branch_size_on_entry,
"branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})",
- branch.start_addr.raw_ptr(), branch_size_on_entry, new_branch_size,
+ branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size,
);
// Return a pointer to the compiled block version
- dst_addr.raw_ptr()
+ dst_addr.raw_ptr(cb)
}
/// Generate a "stub", a piece of code that calls the compiler back when run.
@@ -2569,18 +2913,21 @@ fn gen_branch_stub(
) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- // Generate an outlined stub that will call branch_stub_hit()
- let stub_addr = ocb.get_write_ptr();
-
let mut asm = Assembler::new();
- asm.ctx = ctx.clone();
+ asm.ctx = *ctx;
asm.set_reg_temps(ctx.reg_temps);
- asm.comment("branch stub hit");
+ asm_comment!(asm, "branch stub hit");
+
+ if asm.ctx.is_return_landing() {
+ asm.mov(SP, Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP));
+ let top = asm.stack_push(Type::Unknown);
+ asm.mov(top, C_RET_OPND);
+ }
// Save caller-saved registers before C_ARG_OPNDS get clobbered.
// Spill all registers for consistency with the trampoline.
- for &reg in caller_saved_temp_regs().iter() {
- asm.cpush(reg);
+ for &reg in caller_saved_temp_regs() {
+ asm.cpush(Opnd::Reg(reg));
}
// Spill temps to the VM stack as well for jit.peek_at_stack()
@@ -2599,19 +2946,11 @@ fn gen_branch_stub(
// Not really a side exit, just don't need a padded jump here.
asm.jmp(CodegenGlobals::get_branch_stub_hit_trampoline().as_side_exit());
- asm.compile(ocb, None);
-
- if ocb.has_dropped_bytes() {
- // No space
- None
- } else {
- Some(stub_addr)
- }
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
-pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr {
+pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
- let code_ptr = ocb.get_write_ptr();
let mut asm = Assembler::new();
// For `branch_stub_hit(branch_ptr, target_idx, ec)`,
@@ -2620,8 +2959,8 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr {
// is the unchanging part.
// Since this trampoline is static, it allows code GC inside
// branch_stub_hit() to free stubs without problems.
- asm.comment("branch_stub_hit() trampoline");
- let jump_addr = asm.ccall(
+ asm_comment!(asm, "branch_stub_hit() trampoline");
+ let stub_hit_ret = asm.ccall(
branch_stub_hit as *mut u8,
vec![
C_ARG_OPNDS[0],
@@ -2629,28 +2968,39 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> CodePtr {
EC,
]
);
+ let jump_addr = asm.load(stub_hit_ret);
// Restore caller-saved registers for stack temps
- for &reg in caller_saved_temp_regs().iter().rev() {
- asm.cpop_into(reg);
+ for &reg in caller_saved_temp_regs().rev() {
+ asm.cpop_into(Opnd::Reg(reg));
}
// Jump to the address returned by the branch_stub_hit() call
asm.jmp_opnd(jump_addr);
- asm.compile(ocb, None);
+ // HACK: popping into C_RET_REG clobbers the return value of branch_stub_hit() we need to jump
+ // to, so we need a scratch register to preserve it. This extends the live range of the C
+ // return register so we get something else for the return value.
+ let _ = asm.live_reg_opnd(stub_hit_ret);
- code_ptr
+ asm.compile(ocb, None).map(|(code_ptr, _)| code_ptr)
}
/// Return registers to be pushed and popped on branch_stub_hit.
-/// The return value may include an extra register for x86 alignment.
-fn caller_saved_temp_regs() -> Vec<Opnd> {
- let mut regs = Assembler::get_temp_regs();
- if regs.len() % 2 == 1 {
- regs.push(*regs.last().unwrap()); // x86 alignment
+pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator {
+ let temp_regs = Assembler::get_temp_regs().iter();
+ let len = temp_regs.len();
+ // The return value gen_leave() leaves in C_RET_REG
+ // needs to survive the branch_stub_hit() call.
+ let regs = temp_regs.chain(std::iter::once(&C_RET_REG));
+
+ // On x86_64, maintain 16-byte stack alignment
+ if cfg!(target_arch = "x86_64") && len % 2 == 0 {
+ static ONE_MORE: [Reg; 1] = [C_RET_REG];
+ regs.chain(ONE_MORE.iter())
+ } else {
+ regs.chain(&[])
}
- regs.iter().map(|&reg| Opnd::Reg(reg)).collect()
}
impl Assembler
@@ -2661,7 +3011,7 @@ impl Assembler
// so that we can move the closure below
let entryref = entryref.clone();
- self.pos_marker(move |code_ptr| {
+ self.pos_marker(move |code_ptr, _| {
entryref.start_addr.set(Some(code_ptr));
});
}
@@ -2672,7 +3022,7 @@ impl Assembler
// so that we can move the closure below
let entryref = entryref.clone();
- self.pos_marker(move |code_ptr| {
+ self.pos_marker(move |code_ptr, _| {
entryref.end_addr.set(Some(code_ptr));
});
}
@@ -2684,7 +3034,7 @@ impl Assembler
// so that we can move the closure below
let branchref = branchref.clone();
- self.pos_marker(move |code_ptr| {
+ self.pos_marker(move |code_ptr, _| {
branchref.start_addr.set(Some(code_ptr));
});
}
@@ -2696,7 +3046,7 @@ impl Assembler
// so that we can move the closure below
let branchref = branchref.clone();
- self.pos_marker(move |code_ptr| {
+ self.pos_marker(move |code_ptr, _| {
branchref.end_addr.set(Some(code_ptr));
});
}
@@ -2745,7 +3095,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
let block_addr = block.start_addr;
// Call the branch generation function
- asm.comment("gen_direct_jmp: existing block");
+ asm_comment!(asm, "gen_direct_jmp: existing block");
asm.mark_branch_start(&branch);
branch.gen_fn.call(asm, Target::CodePtr(block_addr), None);
asm.mark_branch_end(&branch);
@@ -2753,7 +3103,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
BranchTarget::Block(blockref)
} else {
// The branch is effectively empty (a noop)
- asm.comment("gen_direct_jmp: fallthrough");
+ asm_comment!(asm, "gen_direct_jmp: fallthrough");
asm.mark_branch_start(&branch);
asm.mark_branch_end(&branch);
branch.gen_fn.set_shape(BranchShape::Next0);
@@ -2762,7 +3112,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
// compile the target block right after this one (fallthrough).
BranchTarget::Stub(Box::new(BranchStub {
address: None,
- ctx: ctx.clone(),
+ ctx: *ctx,
iseq: Cell::new(target0.iseq),
iseq_idx: target0.idx,
}))
@@ -2777,16 +3127,13 @@ pub fn defer_compilation(
asm: &mut Assembler,
ocb: &mut OutlinedCb,
) {
- if asm.ctx.chain_depth != 0 {
+ if asm.ctx.is_deferred() {
panic!("Double defer!");
}
- let mut next_ctx = asm.ctx.clone();
+ let mut next_ctx = asm.ctx;
- if next_ctx.chain_depth == u8::MAX {
- panic!("max block version chain depth reached!");
- }
- next_ctx.chain_depth += 1;
+ next_ctx.mark_as_deferred();
let branch = new_pending_branch(jit, BranchGenFn::JumpToTarget0(Cell::new(BranchShape::Default)));
@@ -2798,8 +3145,14 @@ pub fn defer_compilation(
// Likely a stub due to the increased chain depth
let target0_address = branch.set_target(0, blockid, &next_ctx, ocb);
+ // Pad the block if it has the potential to be invalidated. This must be
+ // done before gen_fn() in case the jump is overwritten by a fallthrough.
+ if jit.block_entry_exit.is_some() {
+ asm.pad_inval_patch();
+ }
+
// Call the branch generation function
- asm.comment("defer_compilation");
+ asm_comment!(asm, "defer_compilation");
asm.mark_branch_start(&branch);
if let Some(dst_addr) = target0_address {
branch.gen_fn.call(asm, Target::CodePtr(dst_addr), None);
@@ -2951,7 +3304,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
// Get a pointer to the generated code for this block
let block_start = block.start_addr;
- // Make the the start of the block do an exit. This handles OOM situations
+ // Make the start of the block do an exit. This handles OOM situations
// and some cases where we can't efficiently patch incoming branches.
// Do this first, since in case there is a fallthrough branch into this
// block, the patching loop below can overwrite the start of the block.
@@ -2977,13 +3330,14 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
let mut asm = Assembler::new();
asm.jmp(block_entry_exit.as_side_exit());
cb.set_dropped_bytes(false);
- asm.compile(&mut cb, Some(ocb));
+ asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code");
assert!(
cb.get_write_ptr() <= block_end,
- "invalidation wrote past end of block (code_size: {:?}, new_size: {})",
+ "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})",
block.code_size(),
- cb.get_write_ptr().into_i64() - block_start.into_i64(),
+ cb.get_write_ptr().as_offset() - block_start.as_offset(),
+ block.start_addr.raw_ptr(cb),
);
cb.set_write_ptr(cur_pos);
cb.set_dropped_bytes(cur_dropped_bytes);
@@ -3024,7 +3378,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
address: Some(stub_addr),
iseq: block.iseq.clone(),
iseq_idx: block.iseq_range.start,
- ctx: block.ctx.clone(),
+ ctx: block.ctx,
})))));
// Check if the invalidated block immediately follows
@@ -3047,7 +3401,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
if !target_next && branch.code_size() > old_branch_size {
panic!(
"invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})",
- branch.start_addr.raw_ptr(), old_branch_size, branch.code_size()
+ branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size()
);
}
}
@@ -3089,9 +3443,9 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
// invalidated branch pointers. Example:
// def foo(n)
// if n == 2
-// # 1.times{} to use a cfunc to avoid exiting from the
-// # frame which will use the retained return address
-// return 1.times { Object.define_method(:foo) {} }
+// # 1.times.each to create a cfunc frame to preserve the JIT frame
+// # which will return to a stub housed in an invalidated block
+// return 1.times.each { Object.define_method(:foo) {} }
// end
//
// foo(n + 1)
@@ -3139,6 +3493,65 @@ mod tests {
use crate::core::*;
#[test]
+ fn type_size() {
+ // Check that we can store types in 4 bits,
+ // and all local types in 32 bits
+ assert_eq!(mem::size_of::<Type>(), 1);
+ assert!(Type::BlockParamProxy as usize <= 0b1111);
+ assert!(MAX_LOCAL_TYPES * 4 <= 32);
+ }
+
+ #[test]
+ fn tempmapping_size() {
+ assert_eq!(mem::size_of::<TempMapping>(), 1);
+ }
+
+ #[test]
+ fn local_types() {
+ let mut ctx = Context::default();
+
+ for i in 0..MAX_LOCAL_TYPES {
+ ctx.set_local_type(i, Type::Fixnum);
+ assert_eq!(ctx.get_local_type(i), Type::Fixnum);
+ ctx.set_local_type(i, Type::BlockParamProxy);
+ assert_eq!(ctx.get_local_type(i), Type::BlockParamProxy);
+ }
+
+ ctx.set_local_type(0, Type::Fixnum);
+ ctx.clear_local_types();
+ assert!(ctx.get_local_type(0) == Type::Unknown);
+
+ // Make sure we don't accidentally set bits incorrectly
+ let mut ctx = Context::default();
+ ctx.set_local_type(0, Type::Fixnum);
+ assert_eq!(ctx.get_local_type(0), Type::Fixnum);
+ ctx.set_local_type(2, Type::Fixnum);
+ ctx.set_local_type(1, Type::BlockParamProxy);
+ assert_eq!(ctx.get_local_type(0), Type::Fixnum);
+ assert_eq!(ctx.get_local_type(2), Type::Fixnum);
+ }
+
+ #[test]
+ fn tempmapping() {
+ let t = TempMapping::map_to_stack(Type::Unknown);
+ assert_eq!(t.get_kind(), MapToStack);
+ assert_eq!(t.get_type(), Type::Unknown);
+
+ let t = TempMapping::map_to_stack(Type::TString);
+ assert_eq!(t.get_kind(), MapToStack);
+ assert_eq!(t.get_type(), Type::TString);
+
+ let t = TempMapping::map_to_local(7);
+ assert_eq!(t.get_kind(), MapToLocal);
+ assert_eq!(t.get_local_idx(), 7);
+ }
+
+ #[test]
+ fn context_size() {
+ assert_eq!(mem::size_of::<Context>(), 23);
+ }
+
+ #[test]
fn types() {
// Valid src => dst
assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0));
@@ -3162,7 +3575,7 @@ mod tests {
assert_eq!(reg_temps.get(stack_idx), false);
}
- // Set 0, 2, 7
+ // Set 0, 2, 7 (RegTemps: 10100001)
reg_temps.set(0, true);
reg_temps.set(2, true);
reg_temps.set(3, true);
@@ -3178,6 +3591,17 @@ mod tests {
assert_eq!(reg_temps.get(5), false);
assert_eq!(reg_temps.get(6), false);
assert_eq!(reg_temps.get(7), true);
+
+ // Test conflicts
+ assert_eq!(5, get_option!(num_temp_regs));
+ assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict
+ assert_eq!(reg_temps.conflicts_with(1), false);
+ assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7
+ assert_eq!(reg_temps.conflicts_with(3), false);
+ assert_eq!(reg_temps.conflicts_with(4), false);
+ assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0
+ assert_eq!(reg_temps.conflicts_with(6), false);
+ assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2
}
#[test]
@@ -3195,6 +3619,60 @@ mod tests {
}
#[test]
+ fn context_upgrade_local() {
+ let mut asm = Assembler::new();
+ asm.stack_push_local(0);
+ asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil);
+ assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
+ }
+
+ #[test]
+ fn context_chain_depth() {
+ let mut ctx = Context::default();
+ assert_eq!(ctx.get_chain_depth(), 0);
+ assert_eq!(ctx.is_return_landing(), false);
+ assert_eq!(ctx.is_deferred(), false);
+
+ for _ in 0..5 {
+ ctx.increment_chain_depth();
+ }
+ assert_eq!(ctx.get_chain_depth(), 5);
+
+ ctx.set_as_return_landing();
+ assert_eq!(ctx.is_return_landing(), true);
+
+ ctx.clear_return_landing();
+ assert_eq!(ctx.is_return_landing(), false);
+
+ ctx.mark_as_deferred();
+ assert_eq!(ctx.is_deferred(), true);
+
+ ctx.reset_chain_depth_and_defer();
+ assert_eq!(ctx.get_chain_depth(), 0);
+ assert_eq!(ctx.is_deferred(), false);
+ }
+
+ #[test]
+ fn shift_stack_for_send() {
+ let mut asm = Assembler::new();
+
+ // Push values to simulate send(:name, arg) with 6 items already on-stack
+ for _ in 0..6 {
+ asm.stack_push(Type::Fixnum);
+ }
+ asm.stack_push(Type::Unknown);
+ asm.stack_push(Type::ImmSymbol);
+ asm.stack_push(Type::Unknown);
+
+ // This method takes argc of the sendee, not argc of send
+ asm.shift_stack(1);
+
+ // The symbol should be gone
+ assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(0)));
+ assert_eq!(Type::Unknown, asm.ctx.get_opnd_type(StackOpnd(1)));
+ }
+
+ #[test]
fn test_miri_ref_unchecked() {
let blockid = BlockId {
iseq: ptr::null(),