From 4cc58ea0b865f2fd20f1e881ddbd4c4fab0b072c Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 17 Apr 2024 12:00:03 -0700 Subject: YJIT: Optimize local variables when EP == BP (#10487) --- yjit/bindgen/src/main.rs | 2 + yjit/src/codegen.rs | 96 ++++++++++++++++++++++++++++++++---------- yjit/src/core.rs | 19 ++++++++- yjit/src/cruby.rs | 1 + yjit/src/cruby_bindings.inc.rs | 11 +++++ yjit/src/invariants.rs | 55 ++++++++++++++++++++++++ 6 files changed, 160 insertions(+), 24 deletions(-) (limited to 'yjit') diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index c58df7c377..d91e330f5e 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -298,6 +298,7 @@ fn main() { .allowlist_type("ruby_tag_type") .allowlist_type("ruby_vm_throw_flags") .allowlist_type("vm_check_match_type") + .allowlist_type("rb_iseq_type") // From yjit.c .allowlist_function("rb_iseq_(get|set)_yjit_payload") @@ -415,6 +416,7 @@ fn main() { .allowlist_function("rb_get_iseq_body_parent_iseq") .allowlist_function("rb_get_iseq_body_iseq_encoded") .allowlist_function("rb_get_iseq_body_stack_max") + .allowlist_function("rb_get_iseq_body_type") .allowlist_function("rb_get_iseq_flags_has_lead") .allowlist_function("rb_get_iseq_flags_has_opt") .allowlist_function("rb_get_iseq_flags_has_kw") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 8ab5e0e230..9d355b854d 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -46,7 +46,7 @@ type InsnGenFn = fn( /// Represents a [core::Block] while we build it. pub struct JITState { /// Instruction sequence for the compiling block - iseq: IseqPtr, + pub iseq: IseqPtr, /// The iseq index of the first instruction in the block starting_insn_idx: IseqIdx, @@ -101,6 +101,9 @@ pub struct JITState { /// A list of classes that are not supposed to have a singleton class. pub no_singleton_class_assumptions: Vec, + /// When true, the block is valid only when base pointer is equal to environment pointer. + pub no_ep_escape: bool, + /// When true, the block is valid only when there is a total of one ractor running pub block_assumes_single_ractor: bool, @@ -130,6 +133,7 @@ impl JITState { bop_assumptions: vec![], stable_constant_names_assumption: None, no_singleton_class_assumptions: vec![], + no_ep_escape: false, block_assumes_single_ractor: false, perf_map: Rc::default(), perf_stack: vec![], @@ -171,6 +175,23 @@ impl JITState { unsafe { *(self.pc.offset(arg_idx + 1)) } } + /// Return true if the current ISEQ could escape an environment. + /// + /// As of vm_push_frame(), EP is always equal to BP. However, after pushing + /// a frame, some ISEQ setups call vm_bind_update_env(), which redirects EP. + /// Also, some method calls escape the environment to the heap. + fn escapes_ep(&self) -> bool { + match unsafe { get_iseq_body_type(self.iseq) } { + //
frame is always associated to TOPLEVEL_BINDING. + ISEQ_TYPE_MAIN | + // Kernel#eval uses a heap EP when a Binding argument is not nil. + ISEQ_TYPE_EVAL => true, + // If this ISEQ has previously escaped EP, give up the optimization. + _ if iseq_escapes_ep(self.iseq) => true, + _ => false, + } + } + // Get the index of the next instruction fn next_insn_idx(&self) -> u16 { self.insn_idx + insn_len(self.get_opcode()) as u16 @@ -250,6 +271,19 @@ impl JITState { true } + /// Assume that base pointer is equal to environment pointer in the current ISEQ. + /// Return true if it's safe to assume so. + fn assume_no_ep_escape(&mut self, asm: &mut Assembler, ocb: &mut OutlinedCb) -> bool { + if jit_ensure_block_entry_exit(self, asm, ocb).is_none() { + return false; // out of space, give up + } + if self.escapes_ep() { + return false; // EP has been escaped in this ISEQ. disable the optimization to avoid an invalidation loop. + } + self.no_ep_escape = true; + true + } + fn get_cfp(&self) -> *mut rb_control_frame_struct { unsafe { get_ec_cfp(self.ec) } } @@ -2203,16 +2237,22 @@ fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { fn gen_getlocal_generic( jit: &mut JITState, asm: &mut Assembler, + ocb: &mut OutlinedCb, ep_offset: u32, level: u32, ) -> Option { - // Load environment pointer EP (level 0) from CFP - let ep_opnd = gen_get_ep(asm, level); + let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm, ocb) { + // Load the local using SP register + asm.ctx.ep_opnd(-(ep_offset as i32)) + } else { + // Load environment pointer EP (level 0) from CFP + let ep_opnd = gen_get_ep(asm, level); - // Load the local from the block - // val = *(vm_get_ep(GET_EP(), level) - idx); - let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); - let local_opnd = Opnd::mem(64, ep_opnd, offs); + // Load the local from the block + // val = *(vm_get_ep(GET_EP(), level) - idx); + let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); + Opnd::mem(64, ep_opnd, offs) + }; // Write the local at SP let stack_top = if level == 0 { @@ -2230,29 +2270,29 @@ fn gen_getlocal_generic( fn gen_getlocal( jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, + ocb: &mut OutlinedCb, ) -> Option { let idx = jit.get_arg(0).as_u32(); let level = jit.get_arg(1).as_u32(); - gen_getlocal_generic(jit, asm, idx, level) + gen_getlocal_generic(jit, asm, ocb, idx, level) } fn gen_getlocal_wc0( jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, + ocb: &mut OutlinedCb, ) -> Option { let idx = jit.get_arg(0).as_u32(); - gen_getlocal_generic(jit, asm, idx, 0) + gen_getlocal_generic(jit, asm, ocb, idx, 0) } fn gen_getlocal_wc1( jit: &mut JITState, asm: &mut Assembler, - _ocb: &mut OutlinedCb, + ocb: &mut OutlinedCb, ) -> Option { let idx = jit.get_arg(0).as_u32(); - gen_getlocal_generic(jit, asm, idx, 1) + gen_getlocal_generic(jit, asm, ocb, idx, 1) } fn gen_setlocal_generic( @@ -2264,11 +2304,11 @@ fn gen_setlocal_generic( ) -> Option { let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); - // Load environment pointer EP at level - let ep_opnd = gen_get_ep(asm, level); - // Fallback because of write barrier if asm.ctx.get_chain_depth() > 0 { + // Load environment pointer EP at level + let ep_opnd = gen_get_ep(asm, level); + // This function should not yield to the GC. // void rb_vm_env_write(const VALUE *ep, int index, VALUE v) let index = -(ep_offset as i64); @@ -2286,16 +2326,27 @@ fn gen_setlocal_generic( return Some(KeepCompiling); } - // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers - // only affect heap objects being written. If we know an immediate value is being written we - // can skip this check. - if !value_type.is_imm() { - // flags & VM_ENV_FLAG_WB_REQUIRED + let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm, ocb) { + // Load flags and the local using SP register + let local_opnd = asm.ctx.ep_opnd(-(ep_offset as i32)); + let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32); + (flags_opnd, local_opnd) + } else { + // Load flags and the local for the level + let ep_opnd = gen_get_ep(asm, level); let flags_opnd = Opnd::mem( 64, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_FLAGS as i32, ); + (flags_opnd, Opnd::mem(64, ep_opnd, -SIZEOF_VALUE_I32 * ep_offset as i32)) + }; + + // Write barriers may be required when VM_ENV_FLAG_WB_REQUIRED is set, however write barriers + // only affect heap objects being written. If we know an immediate value is being written we + // can skip this check. + if !value_type.is_imm() { + // flags & VM_ENV_FLAG_WB_REQUIRED asm.test(flags_opnd, VM_ENV_FLAG_WB_REQUIRED.into()); // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0 @@ -2319,8 +2370,7 @@ fn gen_setlocal_generic( let stack_top = asm.stack_pop(1); // Write the value at the environment pointer - let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); - asm.mov(Opnd::mem(64, ep_opnd, offs), stack_top); + asm.mov(local_opnd, stack_top); Some(KeepCompiling) } diff --git a/yjit/src/core.rs b/yjit/src/core.rs index fb7d52cc5d..ccc458fe22 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -1657,6 +1657,9 @@ impl JITState { for klass in self.no_singleton_class_assumptions { track_no_singleton_class_assumption(blockref, klass); } + if self.no_ep_escape { + track_no_ep_escape_assumption(blockref, self.iseq); + } blockref } @@ -1798,6 +1801,13 @@ impl Context { return Opnd::mem(64, SP, offset); } + /// Get an operand for the adjusted environment pointer address using SP register. + /// This is valid only when a Binding object hasn't been created for the frame. + pub fn ep_opnd(&self, offset: i32) -> Opnd { + let ep_offset = self.get_stack_size() as i32 + 1; + self.sp_opnd(-ep_offset + offset) + } + /// Stop using a register for a given stack temp. /// This allows us to reuse the register for a value that we know is dead /// and will no longer be used (e.g. popped stack temp). @@ -3124,6 +3134,12 @@ pub fn defer_compilation( // Likely a stub due to the increased chain depth let target0_address = branch.set_target(0, blockid, &next_ctx, ocb); + // Pad the block if it has the potential to be invalidated. This must be + // done before gen_fn() in case the jump is overwritten by a fallthrough. + if jit.block_entry_exit.is_some() { + asm.pad_inval_patch(); + } + // Call the branch generation function asm_comment!(asm, "defer_compilation"); asm.mark_branch_start(&branch); @@ -3307,9 +3323,10 @@ pub fn invalidate_block_version(blockref: &BlockRef) { assert!( cb.get_write_ptr() <= block_end, - "invalidation wrote past end of block (code_size: {:?}, new_size: {})", + "invalidation wrote past end of block (code_size: {:?}, new_size: {}, start_addr: {:?})", block.code_size(), cb.get_write_ptr().as_offset() - block_start.as_offset(), + block.start_addr.raw_ptr(cb), ); cb.set_write_ptr(cur_pos); cb.set_dropped_bytes(cur_dropped_bytes); diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 9547e3fa2c..d07262ad4f 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -170,6 +170,7 @@ pub use rb_iseq_encoded_size as get_iseq_encoded_size; pub use rb_get_iseq_body_local_iseq as get_iseq_body_local_iseq; pub use rb_get_iseq_body_iseq_encoded as get_iseq_body_iseq_encoded; pub use rb_get_iseq_body_stack_max as get_iseq_body_stack_max; +pub use rb_get_iseq_body_type as get_iseq_body_type; pub use rb_get_iseq_flags_has_lead as get_iseq_flags_has_lead; pub use rb_get_iseq_flags_has_opt as get_iseq_flags_has_opt; pub use rb_get_iseq_flags_has_kw as get_iseq_flags_has_kw; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 359227d60d..98d380826a 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -478,6 +478,16 @@ pub struct iseq_inline_iv_cache_entry { pub struct iseq_inline_cvar_cache_entry { pub entry: *mut rb_cvar_class_tbl_entry, } +pub const ISEQ_TYPE_TOP: rb_iseq_type = 0; +pub const ISEQ_TYPE_METHOD: rb_iseq_type = 1; +pub const ISEQ_TYPE_BLOCK: rb_iseq_type = 2; +pub const ISEQ_TYPE_CLASS: rb_iseq_type = 3; +pub const ISEQ_TYPE_RESCUE: rb_iseq_type = 4; +pub const ISEQ_TYPE_ENSURE: rb_iseq_type = 5; +pub const ISEQ_TYPE_EVAL: rb_iseq_type = 6; +pub const ISEQ_TYPE_MAIN: rb_iseq_type = 7; +pub const ISEQ_TYPE_PLAIN: rb_iseq_type = 8; +pub type rb_iseq_type = u32; pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1; pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2; pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4; @@ -1153,6 +1163,7 @@ extern "C" { pub fn rb_get_iseq_body_local_table_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_iseq_body_iseq_encoded(iseq: *const rb_iseq_t) -> *mut VALUE; pub fn rb_get_iseq_body_stack_max(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; + pub fn rb_get_iseq_body_type(iseq: *const rb_iseq_t) -> rb_iseq_type; pub fn rb_get_iseq_flags_has_lead(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_opt(iseq: *const rb_iseq_t) -> bool; pub fn rb_get_iseq_flags_has_kw(iseq: *const rb_iseq_t) -> bool; diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs index e460293440..a5fd6b7ab5 100644 --- a/yjit/src/invariants.rs +++ b/yjit/src/invariants.rs @@ -59,6 +59,11 @@ pub struct Invariants { /// there has been a singleton class for the class after boot, so you cannot /// assume no singleton class going forward. no_singleton_classes: HashMap>, + + /// A map from an ISEQ to a set of blocks that assume base pointer is equal + /// to environment pointer. When the set is empty, it means that EP has been + /// escaped in the ISEQ. + no_ep_escape_iseqs: HashMap>, } /// Private singleton instance of the invariants global struct. @@ -76,6 +81,7 @@ impl Invariants { constant_state_blocks: HashMap::new(), block_constant_states: HashMap::new(), no_singleton_classes: HashMap::new(), + no_ep_escape_iseqs: HashMap::new(), }); } } @@ -154,6 +160,23 @@ pub fn has_singleton_class_of(klass: VALUE) -> bool { .map_or(false, |blocks| blocks.is_empty()) } +/// Track that a block will assume that base pointer is equal to environment pointer. +pub fn track_no_ep_escape_assumption(uninit_block: BlockRef, iseq: IseqPtr) { + Invariants::get_instance() + .no_ep_escape_iseqs + .entry(iseq) + .or_default() + .insert(uninit_block); +} + +/// Returns true if a given ISEQ has previously escaped an environment. +pub fn iseq_escapes_ep(iseq: IseqPtr) -> bool { + Invariants::get_instance() + .no_ep_escape_iseqs + .get(&iseq) + .map_or(false, |blocks| blocks.is_empty()) +} + // Checks rb_method_basic_definition_p and registers the current block for invalidation if method // lookup changes. // A "basic method" is one defined during VM boot, so we can use this to check assumptions based on @@ -420,6 +443,10 @@ pub fn block_assumptions_free(blockref: BlockRef) { for (_, blocks) in invariants.no_singleton_classes.iter_mut() { blocks.remove(&blockref); } + // Remove tracking for blocks assuming EP doesn't escape + for (_, blocks) in invariants.no_ep_escape_iseqs.iter_mut() { + blocks.remove(&blockref); + } } /// Callback from the opt_setinlinecache instruction in the interpreter. @@ -515,6 +542,34 @@ pub extern "C" fn rb_yjit_invalidate_no_singleton_class(klass: VALUE) { } } +/// Invalidate blocks for a given ISEQ that assumes environment pointer is +/// equal to base pointer. +#[no_mangle] +pub extern "C" fn rb_yjit_invalidate_ep_is_bp(iseq: IseqPtr) { + // Skip tracking EP escapes on boot. We don't need to invalidate anything during boot. + if unsafe { INVARIANTS.is_none() } { + return; + } + + // If an EP escape for this ISEQ is detected for the first time, invalidate all blocks + // associated to the ISEQ. + let no_ep_escape_iseqs = &mut Invariants::get_instance().no_ep_escape_iseqs; + match no_ep_escape_iseqs.get_mut(&iseq) { + Some(blocks) => { + // Invalidate existing blocks and let jit.ep_is_bp() + // return true when they are compiled again + for block in mem::take(blocks) { + invalidate_block_version(&block); + incr_counter!(invalidate_no_singleton_class); + } + } + None => { + // Let jit.ep_is_bp() return false for this ISEQ + no_ep_escape_iseqs.insert(iseq, HashSet::new()); + } + } +} + // Invalidate all generated code and patch C method return code to contain // logic for firing the c_return TracePoint event. Once rb_vm_barrier() // returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which -- cgit v1.2.3