summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bootstraptest/test_yjit.rb28
-rw-r--r--yjit/src/backend/ir.rs156
-rw-r--r--yjit/src/codegen.rs154
-rw-r--r--yjit/src/core.rs106
4 files changed, 325 insertions, 119 deletions
diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb
index eccaa542c8..df1eba1ce1 100644
--- a/bootstraptest/test_yjit.rb
+++ b/bootstraptest/test_yjit.rb
@@ -2256,6 +2256,34 @@ assert_equal '7', %q{
foo(5,2)
}
+# regression test for argument registers with invalidation
+assert_equal '[0, 1, 2]', %q{
+ def test(n)
+ ret = n
+ binding
+ ret
+ end
+
+ [0, 1, 2].map do |n|
+ test(n)
+ end
+}
+
+# regression test for argument registers
+assert_equal 'true', %q{
+ class Foo
+ def ==(other)
+ other == nil
+ end
+ end
+
+ def test
+ [Foo.new].include?(Foo.new)
+ end
+
+ test
+}
+
# test pattern matching
assert_equal '[:ok, :ok]', %q{
class C
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 3355dc5da3..eb32dac987 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -1032,7 +1032,7 @@ pub struct Assembler {
pub ctx: Context,
/// The current ISEQ's local table size. asm.local_opnd() uses this, and it's
- /// sometimes hard to pass this value, e.g. asm.spill_temps() in asm.ccall().
+ /// sometimes hard to pass this value, e.g. asm.spill_regs() in asm.ccall().
///
/// `None` means we're not assembling for an ISEQ, or that the local size is
/// not relevant.
@@ -1241,8 +1241,36 @@ impl Assembler
self.ctx.clear_local_types();
}
+ /// Repurpose stack temp registers to the corresponding locals for arguments
+ pub fn map_temp_regs_to_args(&mut self, callee_ctx: &mut Context, argc: i32) -> Vec<RegOpnd> {
+ let mut callee_reg_mapping = callee_ctx.get_reg_mapping();
+ let mut mapped_temps = vec![];
+
+ for arg_idx in 0..argc {
+ let stack_idx: u8 = (self.ctx.get_stack_size() as i32 - argc + arg_idx).try_into().unwrap();
+ let temp_opnd = RegOpnd::Stack(stack_idx);
+
+ // For each argument, if the stack temp for it has a register,
+ // let the callee use the register for the local variable.
+ if let Some(reg_idx) = self.ctx.get_reg_mapping().get_reg(temp_opnd) {
+ let local_opnd = RegOpnd::Local(arg_idx.try_into().unwrap());
+ callee_reg_mapping.set_reg(local_opnd, reg_idx);
+ mapped_temps.push(temp_opnd);
+ }
+ }
+
+ asm_comment!(self, "local maps: {:?}", callee_reg_mapping);
+ callee_ctx.set_reg_mapping(callee_reg_mapping);
+ mapped_temps
+ }
+
/// Spill all live registers to the stack
pub fn spill_regs(&mut self) {
+ self.spill_regs_except(&vec![]);
+ }
+
+ /// Spill all live registers except `ignored_temps` to the stack
+ pub fn spill_regs_except(&mut self, ignored_temps: &Vec<RegOpnd>) {
// Forget registers above the stack top
let mut reg_mapping = self.ctx.get_reg_mapping();
for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
@@ -1250,36 +1278,46 @@ impl Assembler
}
self.set_reg_mapping(reg_mapping);
- // Spill live stack temps
- if self.ctx.get_reg_mapping() != RegMapping::default() {
- asm_comment!(self, "spill_temps: {:?} -> {:?}", self.ctx.get_reg_mapping(), RegMapping::default());
+ // If no registers are in use, skip all checks
+ if self.ctx.get_reg_mapping() == RegMapping::default() {
+ return;
+ }
- // Spill stack temps
- for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
- if reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)) {
- let idx = self.ctx.get_stack_size() - 1 - stack_idx;
- self.spill_temp(self.stack_opnd(idx.into()));
- }
+ // Collect stack temps to be spilled
+ let mut spilled_opnds = vec![];
+ for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
+ let reg_opnd = RegOpnd::Stack(stack_idx);
+ if !ignored_temps.contains(&reg_opnd) && reg_mapping.dealloc_reg(reg_opnd) {
+ let idx = self.ctx.get_stack_size() - 1 - stack_idx;
+ let spilled_opnd = self.stack_opnd(idx.into());
+ spilled_opnds.push(spilled_opnd);
+ reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
}
+ }
- // Spill locals
- for local_idx in 0..MAX_CTX_TEMPS as u8 {
- if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
- let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
- let ep_offset = first_local_ep_offset - local_idx as u32;
- self.spill_temp(self.local_opnd(ep_offset));
- }
+ // Collect locals to be spilled
+ for local_idx in 0..MAX_CTX_TEMPS as u8 {
+ if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
+ let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
+ let ep_offset = first_local_ep_offset - local_idx as u32;
+ let spilled_opnd = self.local_opnd(ep_offset);
+ spilled_opnds.push(spilled_opnd);
+ reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
}
+ }
+ // Spill stack temps and locals
+ if !spilled_opnds.is_empty() {
+ asm_comment!(self, "spill_regs: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping);
+ for &spilled_opnd in spilled_opnds.iter() {
+ self.spill_reg(spilled_opnd);
+ }
self.ctx.set_reg_mapping(reg_mapping);
}
-
- // Every stack temp should have been spilled
- assert_eq!(self.ctx.get_reg_mapping(), RegMapping::default());
}
/// Spill a stack temp from a register to the stack
- fn spill_temp(&mut self, opnd: Opnd) {
+ fn spill_reg(&mut self, opnd: Opnd) {
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
// Use different RegMappings for dest and src operands
@@ -1308,6 +1346,42 @@ impl Assembler
}
}
+ // Shuffle register moves, sometimes adding extra moves using SCRATCH_REG,
+ // so that they will not rewrite each other before they are used.
+ pub fn reorder_reg_moves(old_moves: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
+ // Return the index of a move whose destination is not used as a source if any.
+ fn find_safe_move(moves: &Vec<(Reg, Opnd)>) -> Option<usize> {
+ moves.iter().enumerate().find(|(_, &(dest_reg, _))| {
+ moves.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
+ }).map(|(index, _)| index)
+ }
+
+ // Remove moves whose source and destination are the same
+ let mut old_moves: Vec<(Reg, Opnd)> = old_moves.clone().into_iter()
+ .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
+
+ let mut new_moves = vec![];
+ while old_moves.len() > 0 {
+ // Keep taking safe moves
+ while let Some(index) = find_safe_move(&old_moves) {
+ new_moves.push(old_moves.remove(index));
+ }
+
+ // No safe move. Load the source of one move into SCRATCH_REG, and
+ // then load SCRATCH_REG into the destination when it's safe.
+ if old_moves.len() > 0 {
+ // Make sure it's safe to use SCRATCH_REG
+ assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
+
+ // Move SCRATCH <- opnd, and delay reg <- SCRATCH
+ let (reg, opnd) = old_moves.remove(0);
+ new_moves.push((Assembler::SCRATCH_REG, opnd));
+ old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
+ }
+ }
+ new_moves
+ }
+
/// Sets the out field on the various instructions that require allocated
/// registers because their output is used as the operand on a subsequent
/// instruction. This is our implementation of the linear scan algorithm.
@@ -1353,42 +1427,6 @@ impl Assembler
}
}
- // Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG,
- // so that they will not rewrite each other before they are used.
- fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
- // Return the index of a move whose destination is not used as a source if any.
- fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> {
- c_args.iter().enumerate().find(|(_, &(dest_reg, _))| {
- c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
- }).map(|(index, _)| index)
- }
-
- // Remove moves whose source and destination are the same
- let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter()
- .filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
-
- let mut moves = vec![];
- while c_args.len() > 0 {
- // Keep taking safe moves
- while let Some(index) = find_safe_arg(&c_args) {
- moves.push(c_args.remove(index));
- }
-
- // No safe move. Load the source of one move into SCRATCH_REG, and
- // then load SCRATCH_REG into the destination when it's safe.
- if c_args.len() > 0 {
- // Make sure it's safe to use SCRATCH_REG
- assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
-
- // Move SCRATCH <- opnd, and delay reg <- SCRATCH
- let (reg, opnd) = c_args.remove(0);
- moves.push((Assembler::SCRATCH_REG, opnd));
- c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
- }
- }
- moves
- }
-
// Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes.
fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) {
if shift_offset >= 0 {
@@ -1564,7 +1602,7 @@ impl Assembler
if c_args.len() > 0 {
// Resolve C argument dependencies
let c_args_len = c_args.len() as isize;
- let moves = reorder_c_args(&c_args.drain(..).into_iter().collect());
+ let moves = Self::reorder_reg_moves(&c_args.drain(..).into_iter().collect());
shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
// Push batched C arguments
@@ -1808,7 +1846,7 @@ impl Assembler {
// Mark all temps as not being in registers.
// Temps will be marked back as being in registers by cpop_all.
// We assume that cpush_all + cpop_all are used for C functions in utils.rs
- // that don't require spill_temps for GC.
+ // that don't require spill_regs for GC.
self.set_reg_mapping(RegMapping::default());
}
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 458777b5e3..493e61c28b 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -3,6 +3,7 @@
use crate::asm::*;
use crate::backend::ir::*;
+use crate::backend::current::TEMP_REGS;
use crate::core::*;
use crate::cruby::*;
use crate::invariants::*;
@@ -114,10 +115,13 @@ pub struct JITState<'a> {
/// Stack of symbol names for --yjit-perf
perf_stack: Vec<String>,
+
+ /// When true, this block is the first block compiled by gen_block_series().
+ first_block: bool,
}
impl<'a> JITState<'a> {
- pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb) -> Self {
+ pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb, first_block: bool) -> Self {
JITState {
iseq: blockid.iseq,
starting_insn_idx: blockid.idx,
@@ -140,6 +144,7 @@ impl<'a> JITState<'a> {
block_assumes_single_ractor: false,
perf_map: Rc::default(),
perf_stack: vec![],
+ first_block,
}
}
@@ -212,9 +217,16 @@ impl<'a> JITState<'a> {
self.next_insn_idx() + insn_len(next_opcode) as u16
}
- // Check if we are compiling the instruction at the stub PC
+ // Check if we are compiling the instruction at the stub PC with the target Context
// Meaning we are compiling the instruction that is next to execute
- pub fn at_current_insn(&self) -> bool {
+ pub fn at_compile_target(&self) -> bool {
+ // If this is not the first block compiled by gen_block_series(),
+ // it might be compiling the same block again with a different Context.
+ // In that case, it should defer_compilation() and inspect the stack there.
+ if !self.first_block {
+ return false;
+ }
+
let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) };
ec_pc == self.pc
}
@@ -222,7 +234,7 @@ impl<'a> JITState<'a> {
// Peek at the nth topmost value on the Ruby stack.
// Returns the topmost value when n == 0.
pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE {
- assert!(self.at_current_insn());
+ assert!(self.at_compile_target());
assert!(n < ctx.get_stack_size() as isize);
// Note: this does not account for ctx->sp_offset because
@@ -241,7 +253,7 @@ impl<'a> JITState<'a> {
}
fn peek_at_local(&self, n: i32) -> VALUE {
- assert!(self.at_current_insn());
+ assert!(self.at_compile_target());
let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) }
.try_into()
@@ -257,7 +269,7 @@ impl<'a> JITState<'a> {
}
fn peek_at_block_handler(&self, level: u32) -> VALUE {
- assert!(self.at_current_insn());
+ assert!(self.at_compile_target());
unsafe {
let ep = get_cfp_ep_level(self.get_cfp(), level);
@@ -656,7 +668,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
}
// Only able to check types when at current insn
- assert!(jit.at_current_insn());
+ assert!(jit.at_compile_target());
let self_val = jit.peek_at_self();
let self_val_type = Type::from(self_val);
@@ -1172,6 +1184,7 @@ pub fn gen_single_block(
ec: EcPtr,
cb: &mut CodeBlock,
ocb: &mut OutlinedCb,
+ first_block: bool,
) -> Result<BlockRef, ()> {
// Limit the number of specialized versions for this block
let ctx = limit_block_versions(blockid, start_ctx);
@@ -1195,7 +1208,7 @@ pub fn gen_single_block(
let mut insn_idx: IseqIdx = blockid.idx;
// Initialize a JIT state object
- let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb);
+ let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb, first_block);
jit.iseq = blockid.iseq;
// Create a backend assembler instance
@@ -1265,7 +1278,7 @@ pub fn gen_single_block(
}
// In debug mode, verify our existing assumption
- if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() {
+ if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_compile_target() {
verify_ctx(&jit, &asm.ctx);
}
@@ -1508,7 +1521,7 @@ fn fuse_putobject_opt_ltlt(
if shift_amt > 63 || shift_amt < 0 {
return None;
}
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -1772,7 +1785,7 @@ fn gen_splatkw(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime hash operand
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -2146,7 +2159,7 @@ fn gen_expandarray(
let array_opnd = asm.stack_opnd(0);
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -2345,7 +2358,16 @@ fn gen_getlocal_generic(
// Load the local from the block
// val = *(vm_get_ep(GET_EP(), level) - idx);
let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
- Opnd::mem(64, ep_opnd, offs)
+ let local_opnd = Opnd::mem(64, ep_opnd, offs);
+
+ // Write back an argument register to the stack. If the local variable
+ // is an argument, it might have an allocated register, but if this ISEQ
+ // is known to escape EP, the register shouldn't be used after this getlocal.
+ if level == 0 && asm.ctx.get_reg_mapping().get_reg(asm.local_opnd(ep_offset).reg_opnd()).is_some() {
+ asm.mov(local_opnd, asm.local_opnd(ep_offset));
+ }
+
+ local_opnd
};
// Write the local at SP
@@ -2425,6 +2447,13 @@ fn gen_setlocal_generic(
asm.alloc_reg(local_opnd.reg_opnd());
(flags_opnd, local_opnd)
} else {
+ // Make sure getlocal doesn't read a stale register. If the local variable
+ // is an argument, it might have an allocated register, but if this ISEQ
+ // is known to escape EP, the register shouldn't be used after this setlocal.
+ if level == 0 {
+ asm.ctx.dealloc_reg(asm.local_opnd(ep_offset).reg_opnd());
+ }
+
// Load flags and the local for the level
let ep_opnd = gen_get_ep(asm, level);
let flags_opnd = Opnd::mem(
@@ -2627,11 +2656,11 @@ fn gen_checkkeyword(
// The index of the keyword we want to check
let index: i64 = jit.get_arg(1).as_i64();
- // Load environment pointer EP
- let ep_opnd = gen_get_ep(asm, 0);
-
- // VALUE kw_bits = *(ep - bits);
- let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset);
+ // `unspecified_bits` is a part of the local table. Therefore, we may allocate a register for
+ // that "local" when passing it as an argument. We must use such a register to avoid loading
+ // random bits from the stack if any. We assume that EP is not escaped as of entering a method
+ // with keyword arguments.
+ let bits_opnd = asm.local_opnd(bits_offset as u32);
// unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
// if ((b & (0x01 << idx))) {
@@ -2846,7 +2875,7 @@ fn gen_getinstancevariable(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -2910,7 +2939,7 @@ fn gen_setinstancevariable(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -3221,7 +3250,7 @@ fn gen_definedivar(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize base on a runtime receiver
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -3550,7 +3579,7 @@ fn gen_equality_specialized(
return Some(true);
}
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
return None;
}
let comptime_a = jit.peek_at_stack(&asm.ctx, 1);
@@ -3669,7 +3698,7 @@ fn gen_opt_aref(
}
// Defer compilation so we can specialize base on a runtime receiver
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -3770,7 +3799,7 @@ fn gen_opt_aset(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -4376,7 +4405,7 @@ fn gen_opt_case_dispatch(
// We'd hope that our jitted code will be sufficiently fast without the
// hash lookup, at least for small hashes, but it's worth revisiting this
// assumption in the future.
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -6433,14 +6462,6 @@ fn gen_push_frame(
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv);
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into());
- if frame.iseq.is_some() {
- // Spill stack temps to let the callee use them (must be done before changing the SP register)
- asm.spill_regs();
-
- // Saving SP before calculating ep avoids a dependency on a register
- // However this must be done after referencing frame.recv, which may be SP-relative
- asm.mov(SP, sp);
- }
let ep = asm.sub(sp, SIZEOF_VALUE.into());
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep);
}
@@ -7770,9 +7791,32 @@ fn gen_send_iseq(
pc: None, // We are calling into jitted code, which will set the PC as necessary
}));
+ // Create a context for the callee
+ let mut callee_ctx = Context::default();
+
+ // Transfer some stack temp registers to the callee's locals for arguments.
+ let mapped_temps = if !forwarding {
+ asm.map_temp_regs_to_args(&mut callee_ctx, argc)
+ } else {
+ // When forwarding, the callee's local table has only a callinfo,
+ // so we can't map the actual arguments to the callee's locals.
+ vec![]
+ };
+
+ // Spill stack temps and locals that are not used by the callee.
+ // This must be done before changing the SP register.
+ asm.spill_regs_except(&mapped_temps);
+
+ // Saving SP before calculating ep avoids a dependency on a register
+ // However this must be done after referencing frame.recv, which may be SP-relative
+ asm.mov(SP, callee_sp);
+
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
// We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
if get_option!(gen_stats) {
+ // Protect caller-saved registers in case they're used for arguments
+ asm.cpush_all();
+
// Assemble the ISEQ name string
let name_str = get_iseq_name(iseq);
@@ -7781,6 +7825,7 @@ fn gen_send_iseq(
// Increment the counter for this cfunc
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
+ asm.cpop_all();
}
// No need to set cfp->pc since the callee sets it whenever calling into routines
@@ -7794,9 +7839,6 @@ fn gen_send_iseq(
idx: jit.next_insn_idx(),
};
- // Create a context for the callee
- let mut callee_ctx = Context::default();
-
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
@@ -7816,6 +7858,7 @@ fn gen_send_iseq(
callee_ctx.set_local_type(0, Type::Unknown)
}
+ // Set the receiver type in the callee's context
let recv_type = if captured_self {
Type::Unknown // we don't track the type information of captured->self for now
} else {
@@ -7823,6 +7866,29 @@ fn gen_send_iseq(
};
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
+ // Now that callee_ctx is prepared, discover a block that can be reused if we move some registers.
+ // If there's such a block, move registers accordingly to avoid creating a new block.
+ let blockid = BlockId { iseq, idx: start_pc_offset };
+ if !mapped_temps.is_empty() {
+ // Discover a block that have the same things in different (or same) registers
+ if let Some(block_ctx) = find_block_ctx_with_same_regs(blockid, &callee_ctx) {
+ // List pairs of moves for making the register mappings compatible
+ let mut moves = vec![];
+ for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
+ let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
+ let new_reg = TEMP_REGS[block_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
+ moves.push((new_reg, Opnd::Reg(old_reg)));
+ }
+
+ // Shuffle them to break cycles and generate the moves
+ let moves = Assembler::reorder_reg_moves(&moves);
+ for (reg, opnd) in moves {
+ asm.load_into(Opnd::Reg(reg), opnd);
+ }
+ callee_ctx.set_reg_mapping(block_ctx.get_reg_mapping());
+ }
+ }
+
// The callee might change locals through Kernel#binding and other means.
asm.clear_local_types();
@@ -7856,10 +7922,7 @@ fn gen_send_iseq(
gen_direct_jump(
jit,
&callee_ctx,
- BlockId {
- iseq: iseq,
- idx: start_pc_offset,
- },
+ blockid,
asm,
);
@@ -8541,7 +8604,7 @@ fn gen_send_general(
let mut flags = unsafe { vm_ci_flag(ci) };
// Defer compilation so we can specialize on class of receiver
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -9102,7 +9165,7 @@ fn gen_invokeblock_specialized(
asm: &mut Assembler,
cd: *const rb_call_data,
) -> Option<CodegenStatus> {
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -9265,7 +9328,7 @@ fn gen_invokesuper_specialized(
cd: *const rb_call_data,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on class of receiver
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -9499,7 +9562,7 @@ fn gen_objtostring(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -9842,7 +9905,7 @@ fn gen_getblockparamproxy(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
- if !jit.at_current_insn() {
+ if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@@ -10593,6 +10656,7 @@ mod tests {
cb.get_write_ptr(),
ptr::null(), // No execution context in tests. No peeking!
ocb,
+ true,
)
}
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 4d2425aeb2..1f96dd453f 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -371,6 +371,12 @@ impl RegMapping {
.map(|(reg_idx, _)| reg_idx)
}
+ /// Set a given operand to the register at a given index.
+ pub fn set_reg(&mut self, opnd: RegOpnd, reg_idx: usize) {
+ assert!(self.0[reg_idx].is_none());
+ self.0[reg_idx] = Some(opnd);
+ }
+
/// Allocate a register for a given operand if available.
/// Return true if self is updated.
pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool {
@@ -435,6 +441,32 @@ impl RegMapping {
RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()),
}.map(|(index, _)| index)
}
+
+ /// Return a vector of RegOpnds that have an allocated register
+ pub fn get_reg_opnds(&self) -> Vec<RegOpnd> {
+ self.0.iter().filter_map(|&reg_opnd| reg_opnd).collect()
+ }
+
+ /// Return TypeDiff::Compatible(diff) if dst has a mapping that can be made by moving registers
+ /// in self `diff` times. TypeDiff::Incompatible if they have different things in registers.
+ pub fn diff(&self, dst: RegMapping) -> TypeDiff {
+ let src_opnds = self.get_reg_opnds();
+ let dst_opnds = dst.get_reg_opnds();
+ if src_opnds.len() != dst_opnds.len() {
+ return TypeDiff::Incompatible;
+ }
+
+ let mut diff = 0;
+ for &reg_opnd in src_opnds.iter() {
+ match (self.get_reg(reg_opnd), dst.get_reg(reg_opnd)) {
+ (Some(src_idx), Some(dst_idx)) => if src_idx != dst_idx {
+ diff += 1;
+ }
+ _ => return TypeDiff::Incompatible,
+ }
+ }
+ TypeDiff::Compatible(diff)
+ }
}
impl fmt::Debug for RegMapping {
@@ -2080,9 +2112,8 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
}
}
-/// Count the number of block versions matching a given blockid
-/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
-fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
+/// Count the number of block versions that match a given BlockId and part of a Context
+fn get_num_versions(blockid: BlockId, ctx: &Context) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
@@ -2094,9 +2125,14 @@ fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
.version_map
.get(insn_idx)
.map(|versions| {
- versions.iter().filter(|&&version|
- Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined
- ).count()
+ versions.iter().filter(|&&version| {
+ let version_ctx = Context::decode(unsafe { version.as_ref() }.ctx);
+ // Inline versions are counted separately towards MAX_INLINE_VERSIONS.
+ version_ctx.inline() == ctx.inline() &&
+ // find_block_versions() finds only blocks with compatible reg_mapping,
+ // so count only versions with compatible reg_mapping.
+ version_ctx.reg_mapping == ctx.reg_mapping
+ }).count()
})
.unwrap_or(0)
}
@@ -2128,10 +2164,7 @@ pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
/// Retrieve a basic block version for an (iseq, idx) tuple
/// This will return None if no version is found
fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
- let versions = match get_version_list(blockid) {
- Some(versions) => versions,
- None => return None,
- };
+ let versions = get_version_list(blockid)?;
// Best match found
let mut best_version: Option<BlockRef> = None;
@@ -2156,6 +2189,33 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
return best_version;
}
+/// Basically find_block_version() but allows RegMapping incompatibility
+/// that can be fixed by register moves and returns Context
+pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<Context> {
+ let versions = get_version_list(blockid)?;
+
+ // Best match found
+ let mut best_ctx: Option<Context> = None;
+ let mut best_diff = usize::MAX;
+
+ // For each version matching the blockid
+ for blockref in versions.iter() {
+ let block = unsafe { blockref.as_ref() };
+ let block_ctx = Context::decode(block.ctx);
+
+ // Discover the best block that is compatible if we move registers
+ match ctx.diff_with_same_regs(&block_ctx) {
+ TypeDiff::Compatible(diff) if diff < best_diff => {
+ best_ctx = Some(block_ctx);
+ best_diff = diff;
+ }
+ _ => {}
+ }
+ }
+
+ best_ctx
+}
+
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
const MAX_INLINE_VERSIONS: usize = 1000;
@@ -2166,7 +2226,7 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
return *ctx;
}
- let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
+ let next_versions = get_num_versions(blockid, ctx) + 1;
let max_versions = if ctx.inline() {
MAX_INLINE_VERSIONS
} else {
@@ -2782,8 +2842,24 @@ impl Context {
return TypeDiff::Compatible(diff);
}
+ /// Basically diff() but allows RegMapping incompatibility that can be fixed
+ /// by register moves.
+ pub fn diff_with_same_regs(&self, dst: &Context) -> TypeDiff {
+ // Prepare a Context with the same registers
+ let mut dst_with_same_regs = dst.clone();
+ dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
+
+ // Diff registers and other stuff separately, and merge them
+ match (self.diff(&dst_with_same_regs), self.get_reg_mapping().diff(dst.get_reg_mapping())) {
+ (TypeDiff::Compatible(ctx_diff), TypeDiff::Compatible(reg_diff)) => {
+ TypeDiff::Compatible(ctx_diff + reg_diff)
+ }
+ _ => TypeDiff::Incompatible
+ }
+ }
+
pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> {
- if jit.at_current_insn() {
+ if jit.at_compile_target() {
let comptime_recv = jit.peek_at_stack(self, 1);
let comptime_arg = jit.peek_at_stack(self, 0);
return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p());
@@ -2955,7 +3031,7 @@ fn gen_block_series_body(
let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE);
// Generate code for the first block
- let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?;
+ let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb, true).ok()?;
batch.push(first_block); // Keep track of this block version
// Add the block version to the VersionMap for this ISEQ
@@ -2996,7 +3072,7 @@ fn gen_block_series_body(
// Generate new block using context from the last branch.
let requested_ctx = Context::decode(requested_ctx);
- let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
+ let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb, false);
// If the block failed to compile
if result.is_err() {
@@ -4312,7 +4388,7 @@ mod tests {
let cb = CodeBlock::new_dummy(1024);
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024));
let dumm_addr = cb.get_write_ptr();
- let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb)
+ let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb, true)
.into_block(0, dumm_addr, dumm_addr, vec![]);
let _dropper = BlockDropper(block);