diff options
| author | Takashi Kokubun <takashi.kokubun@shopify.com> | 2025-08-27 10:01:07 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-27 10:01:07 -0700 |
| commit | 76810fc34905011535f50c3f8bbcaf39cb80b6cc (patch) | |
| tree | 686b602c371a6436d0ea25cc1a054b641b385db9 | |
| parent | 61d26c35bf8c744b4c59a44536bc58a6c4653ab6 (diff) | |
ZJIT: Implement side exit stats (#14357)
| -rw-r--r-- | internal/vm.h | 2 | ||||
| -rw-r--r-- | test/ruby/test_zjit.rb | 4 | ||||
| -rw-r--r-- | vm_exec.c | 2 | ||||
| -rw-r--r-- | vm_insnhelper.h | 6 | ||||
| -rw-r--r-- | yjit/bindgen/src/main.rs | 2 | ||||
| -rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 2 | ||||
| -rw-r--r-- | yjit/src/stats.rs | 8 | ||||
| -rw-r--r-- | zjit.rb | 45 | ||||
| -rw-r--r-- | zjit/bindgen/src/main.rs | 2 | ||||
| -rw-r--r-- | zjit/src/backend/arm64/mod.rs | 2 | ||||
| -rw-r--r-- | zjit/src/backend/lir.rs | 26 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 100 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 2 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 4 | ||||
| -rw-r--r-- | zjit/src/state.rs | 35 | ||||
| -rw-r--r-- | zjit/src/stats.rs | 36 |
16 files changed, 192 insertions, 86 deletions
diff --git a/internal/vm.h b/internal/vm.h index 3a99011c44..e5ed47afae 100644 --- a/internal/vm.h +++ b/internal/vm.h @@ -81,7 +81,7 @@ VALUE rb_gccct_clear_table(VALUE); #if USE_YJIT || USE_ZJIT /* vm_exec.c */ -extern uint64_t rb_vm_insns_count; +extern uint64_t rb_vm_insn_count; #endif extern bool rb_free_at_exit; diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 28d25f88a4..9296cd3522 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1490,8 +1490,8 @@ class TestZJIT < Test::Unit::TestCase def test = 1 test [ - RubyVM::ZJIT.stats[:zjit_insns_count] > 0, - RubyVM::ZJIT.stats(:zjit_insns_count) > 0, + RubyVM::ZJIT.stats[:zjit_insn_count] > 0, + RubyVM::ZJIT.stats(:zjit_insn_count) > 0, ] }, stats: true end @@ -13,7 +13,7 @@ #if USE_YJIT || USE_ZJIT // The number of instructions executed on vm_exec_core. --yjit-stats and --zjit-stats use this. -uint64_t rb_vm_insns_count = 0; +uint64_t rb_vm_insn_count = 0; #endif #if VM_COLLECT_USAGE_DETAILS diff --git a/vm_insnhelper.h b/vm_insnhelper.h index 24bfbb8210..015edaed9d 100644 --- a/vm_insnhelper.h +++ b/vm_insnhelper.h @@ -16,11 +16,11 @@ RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_invalidations; RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_misses; RUBY_EXTERN rb_serial_t ruby_vm_global_cvar_state; -#if YJIT_STATS || ZJIT_STATS // We want vm_insns_count only on stats builds. -// Increment vm_insns_count for --yjit-stats. We increment this even when +#if YJIT_STATS || ZJIT_STATS // We want vm_insn_count only on stats builds. +// Increment vm_insn_count for --yjit-stats. We increment this even when // --yjit or --yjit-stats is not used because branching to skip it is slower. // We also don't use ATOMIC_INC for performance, allowing inaccuracy on Ractors. -#define JIT_COLLECT_USAGE_INSN(insn) rb_vm_insns_count++ +#define JIT_COLLECT_USAGE_INSN(insn) rb_vm_insn_count++ #else #define JIT_COLLECT_USAGE_INSN(insn) // none #endif diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 61b6f23326..dd0cb6dbf5 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -390,7 +390,7 @@ fn main() { .allowlist_function("rb_const_get") // From internal/vm.h - .allowlist_var("rb_vm_insns_count") + .allowlist_var("rb_vm_insn_count") // From include/ruby/internal/intern/vm.h .allowlist_function("rb_get_alloc_func") diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index ea51574fe7..4cae138c95 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1092,7 +1092,7 @@ extern "C" { elts: *const VALUE, ) -> VALUE; pub fn rb_vm_top_self() -> VALUE; - pub static mut rb_vm_insns_count: u64; + pub static mut rb_vm_insn_count: u64; pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t; pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t; pub fn rb_callable_method_entry_or_negative( diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 5358d83ea4..ea6130973d 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -789,8 +789,8 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { set_stat_usize!(hash, "context_cache_bytes", crate::core::CTX_ENCODE_CACHE_BYTES + crate::core::CTX_DECODE_CACHE_BYTES); // VM instructions count - if rb_vm_insns_count > 0 { - set_stat_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize); + if rb_vm_insn_count > 0 { + set_stat_usize!(hash, "vm_insns_count", rb_vm_insn_count as usize); } set_stat_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize); @@ -861,8 +861,8 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { set_stat_double!(hash, "avg_len_in_yjit", avg_len_in_yjit); // Proportion of instructions that retire in YJIT - if rb_vm_insns_count > 0 { - let total_insns_count = retired_in_yjit + rb_vm_insns_count; + if rb_vm_insn_count > 0 { + let total_insns_count = retired_in_yjit + rb_vm_insn_count; set_stat_usize!(hash, "total_insns_count", total_insns_count as usize); let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64; @@ -29,9 +29,9 @@ class << RubyVM::ZJIT stats = Primitive.rb_zjit_stats(key) return stats if stats.nil? || !key.nil? - if stats.key?(:vm_insns_count) && stats.key?(:zjit_insns_count) - stats[:total_insns_count] = stats[:vm_insns_count] + stats[:zjit_insns_count] - stats[:ratio_in_zjit] = 100.0 * stats[:zjit_insns_count] / stats[:total_insns_count] + if stats.key?(:vm_insn_count) && stats.key?(:zjit_insn_count) + stats[:total_insn_count] = stats[:vm_insn_count] + stats[:zjit_insn_count] + stats[:ratio_in_zjit] = 100.0 * stats[:zjit_insn_count] / stats[:total_insn_count] end stats @@ -52,11 +52,13 @@ class << RubyVM::ZJIT :gc_time_ns, :invalidation_time_ns, - :total_insns_count, - :vm_insns_count, - :zjit_insns_count, + :side_exit_count, + :total_insn_count, + :vm_insn_count, + :zjit_insn_count, :ratio_in_zjit, ], buf:, stats:) + print_counters_with_prefix(prefix: 'exit_', prompt: 'side exit reasons', buf:, stats:, limit: 20) buf end @@ -70,9 +72,9 @@ class << RubyVM::ZJIT private def print_counters(keys, buf:, stats:) - left_pad = keys.map(&:size).max + 1 + left_pad = keys.map { |key| key.to_s.sub(/_time_ns\z/, '_time').size }.max + 1 keys.each do |key| - # Some stats like vm_insns_count and ratio_in_zjit are not supported on the release build + # Some stats like vm_insn_count and ratio_in_zjit are not supported on the release build next unless stats.key?(key) value = stats[key] @@ -90,11 +92,28 @@ class << RubyVM::ZJIT end end - def print_counters_with_prefix(buf:, stats:, prefix:, prompt:) - keys = stats.keys.select { |key| key.start_with?(prefix) && stats[key] > 0 } - unless keys.empty? - buf << "#{prompt}:\n" - print_counters(keys, buf:, stats:) + def print_counters_with_prefix(buf:, stats:, prefix:, prompt:, limit: nil) + counters = stats.select { |key, value| key.start_with?(prefix) && value > 0 } + return if stats.empty? + + counters.transform_keys! { |key| key.to_s.delete_prefix!(prefix) } + left_pad = counters.keys.map(&:size).max + right_pad = counters.values.map { |value| number_with_delimiter(value).size }.max + total = counters.values.sum + count = counters.size + + counters = counters.to_a + counters.sort_by! { |_, value| -value } + counters = counters.first(limit) if limit + + buf << "Top-#{counters.size} " if limit + buf << "#{prompt}" + buf << " (%.1f%% of all #{count})" % (100.0 * counters.map(&:last).sum / total) if limit + buf << ":\n" + counters.each do |key, value| + padded_key = key.rjust(left_pad, ' ') + padded_value = number_with_delimiter(value).rjust(right_pad, ' ') + buf << " #{padded_key}: #{padded_value} (%4.1f%%)\n" % (100.0 * value / total) end end diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index ac10341996..a95b8dcaaa 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -402,7 +402,7 @@ fn main() { .allowlist_function("rb_mod_name") // From internal/vm.h - .allowlist_var("rb_vm_insns_count") + .allowlist_var("rb_vm_insn_count") // From include/ruby/internal/intern/vm.h .allowlist_function("rb_get_alloc_func") diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index c60ec53285..e243477ec8 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1737,7 +1737,7 @@ mod tests { fn test_store_unserviceable() { let (mut asm, mut cb) = setup_asm(); // This would put the source into SCRATCH_REG, messing up the destination - asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into()); + asm.store(Opnd::mem(64, SCRATCH_OPND, 0), 0x83902.into()); asm.compile_with_num_regs(&mut cb, 0); } diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 1bb4cd024b..7e317d4991 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -6,6 +6,7 @@ use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_ use crate::hir::SideExitReason; use crate::options::{debug, get_option}; use crate::cruby::VALUE; +use crate::stats::exit_counter_ptr; use crate::virtualmem::CodePtr; use crate::asm::{CodeBlock, Label}; @@ -1265,12 +1266,12 @@ impl Assembler // then load SCRATCH_REG into the destination when it's safe. if !old_moves.is_empty() { // Make sure it's safe to use SCRATCH_REG - assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG))); + assert!(old_moves.iter().all(|&(_, opnd)| opnd != SCRATCH_OPND)); // Move SCRATCH <- opnd, and delay reg <- SCRATCH let (reg, opnd) = old_moves.remove(0); new_moves.push((Assembler::SCRATCH_REG, opnd)); - old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG))); + old_moves.push((reg, SCRATCH_OPND)); } } new_moves @@ -1584,13 +1585,26 @@ impl Assembler } asm_comment!(self, "save cfp->pc"); - self.load_into(Opnd::Reg(Assembler::SCRATCH_REG), Opnd::const_ptr(pc)); - self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::Reg(Assembler::SCRATCH_REG)); + self.load_into(SCRATCH_OPND, Opnd::const_ptr(pc)); + self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), SCRATCH_OPND); asm_comment!(self, "save cfp->sp"); - self.lea_into(Opnd::Reg(Assembler::SCRATCH_REG), Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32)); + self.lea_into(SCRATCH_OPND, Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32)); let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); - self.store(cfp_sp, Opnd::Reg(Assembler::SCRATCH_REG)); + self.store(cfp_sp, SCRATCH_OPND); + + if get_option!(stats) { + asm_comment!(self, "increment an exit counter"); + self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr(pc))); + let counter_opnd = if cfg!(target_arch = "aarch64") { // See arm64_split() + // Using C_CRET_OPND since arm64_emit uses both SCRATCH0 and SCRATCH1 for IncrCounter. + self.lea_into(C_RET_OPND, Opnd::mem(64, SCRATCH_OPND, 0)); + C_RET_OPND + } else { // x86_emit expects Opnd::Mem + Opnd::mem(64, SCRATCH_OPND, 0) + }; + self.incr_counter(counter_opnd, 1.into()); + } asm_comment!(self, "exit to the interpreter"); self.frame_teardown(&[]); // matching the setup in :bb0-prologue: diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 021db1c4c8..2b71596a17 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -9,7 +9,7 @@ use crate::invariants::{track_bop_assumption, track_cme_assumption, track_single use crate::gc::{append_gc_offsets, get_or_create_iseq_payload, get_or_create_iseq_payload_ptr, IseqPayload, IseqStatus}; use crate::state::ZJITState; use crate::stats::incr_counter; -use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::compile_time_ns}; +use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compilation_failure}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SCRATCH_OPND, SP}; use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SpecialBackrefSymbol, SELF_PARAM_IDX}; @@ -73,45 +73,36 @@ pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, _ec: EcPtr) -> *co return std::ptr::null(); } - // Reject ISEQs with very large temp stacks. - // We cannot encode too large offsets to access locals in arm64. - let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; - if stack_max >= i8::MAX as u32 { - debug!("ISEQ stack too large: {stack_max}"); - return std::ptr::null(); - } - // Take a lock to avoid writing to ISEQ in parallel with Ractors. // with_vm_lock() does nothing if the program doesn't use Ractors. - let code_ptr = with_vm_lock(src_loc!(), || { - with_time_stat(compile_time_ns, || gen_iseq_entry_point(iseq)) - }); - - // Assert that the ISEQ compiles if RubyVM::ZJIT.assert_compiles is enabled - if ZJITState::assert_compiles_enabled() && code_ptr.is_null() { - let iseq_location = iseq_get_location(iseq, 0); - panic!("Failed to compile: {iseq_location}"); - } - - code_ptr -} + with_vm_lock(src_loc!(), || { + let cb = ZJITState::get_code_block(); + let mut code_ptr = with_time_stat(compile_time_ns, || gen_iseq_entry_point(cb, iseq)); + + if code_ptr.is_none() { + // Assert that the ISEQ compiles if RubyVM::ZJIT.assert_compiles is enabled + if ZJITState::assert_compiles_enabled() { + let iseq_location = iseq_get_location(iseq, 0); + panic!("Failed to compile: {iseq_location}"); + } -/// See [gen_iseq_entry_point_body]. This wrapper is to make sure cb.mark_all_executable() -/// is called even if gen_iseq_entry_point_body() partially fails and returns a null pointer. -fn gen_iseq_entry_point(iseq: IseqPtr) -> *const u8 { - let cb = ZJITState::get_code_block(); - let code_ptr = gen_iseq_entry_point_body(cb, iseq); + // For --zjit-stats, generate an entry that just increments exit_compilation_failure and exits + if get_option!(stats) { + code_ptr = gen_compilation_failure_counter(cb); + } + } - // Always mark the code region executable if asm.compile() has been used. - // We need to do this even if code_ptr is null because, whether gen_entry() or - // gen_function_stub() fails or not, gen_function() has already used asm.compile(). - cb.mark_all_executable(); + // Always mark the code region executable if asm.compile() has been used. + // We need to do this even if code_ptr is None because, whether gen_entry() + // fails or not, gen_iseq() may have already used asm.compile(). + cb.mark_all_executable(); - code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb)) + code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb)) + }) } /// Compile an entry point for a given ISEQ -fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<CodePtr> { +fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<CodePtr> { // Compile ISEQ into High-level IR let Some(function) = compile_iseq(iseq) else { incr_counter!(compilation_failure); @@ -283,7 +274,6 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio let insn = function.find(insn_id); if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); - incr_counter!(failed_gen_insn); gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledInstruction(insn_id), &function.frame_state(last_snapshot)); // Don't bother generating code after a side-exit. We won't run it. // TODO(max): Generate ud2 or equivalent. @@ -419,10 +409,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio | &Insn::Throw { state, .. } | &Insn::ToArray { state, .. } | &Insn::ToNewArray { state, .. } - => { - incr_counter!(failed_gen_insn_unexpected); - return Err(state); - } + => return Err(state), }; assert!(insn.has_output(), "Cannot write LIR output of HIR instruction with no output: {insn}"); @@ -1378,6 +1365,15 @@ pub fn local_size_and_idx_to_bp_offset(local_size: usize, local_idx: usize) -> i /// Convert ISEQ into High-level IR fn compile_iseq(iseq: IseqPtr) -> Option<Function> { + // Reject ISEQs with very large temp stacks. + // We cannot encode too large offsets to access locals in arm64. + let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) }; + if stack_max >= i8::MAX as u32 { + debug!("ISEQ stack too large: {stack_max}"); + incr_counter!(failed_iseq_stack_too_large); + return None; + } + let mut function = match iseq_to_hir(iseq) { Ok(function) => function, Err(err) => { @@ -1508,7 +1504,7 @@ c_callable! { // Exit to the interpreter spill_stack(iseq, cfp, sp); - return ZJITState::get_exit_trampoline().raw_ptr(cb); + return ZJITState::get_exit_trampoline_with_counter().raw_ptr(cb); } // Otherwise, attempt to compile the ISEQ. We have to mark_all_executable() beyond this point. @@ -1518,7 +1514,7 @@ c_callable! { } else { // Exit to the interpreter spill_stack(iseq, cfp, sp); - ZJITState::get_exit_trampoline() + ZJITState::get_exit_trampoline_with_counter() }; cb.mark_all_executable(); code_ptr.raw_ptr(cb) @@ -1610,6 +1606,20 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> { }) } +/// Generate a trampoline that increments exit_compilation_failure and jumps to exit_trampoline. +pub fn gen_exit_trampoline_with_counter(cb: &mut CodeBlock, exit_trampoline: CodePtr) -> Option<CodePtr> { + let mut asm = Assembler::new(); + + asm_comment!(asm, "function stub exit trampoline"); + gen_incr_counter(&mut asm, exit_compilation_failure); + asm.jmp(Target::CodePtr(exit_trampoline)); + + asm.compile(cb).map(|(code_ptr, gc_offsets)| { + assert_eq!(gc_offsets.len(), 0); + code_ptr + }) +} + fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd { let n = opnds.len(); @@ -1666,6 +1676,18 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd> result } +/// Generate a JIT entry that just increments exit_compilation_failure and exits +fn gen_compilation_failure_counter(cb: &mut CodeBlock) -> Option<CodePtr> { + let mut asm = Assembler::new(); + gen_incr_counter(&mut asm, exit_compilation_failure); + asm.cret(Qundef.into()); + + asm.compile(cb).map(|(code_ptr, gc_offsets)| { + assert_eq!(0, gc_offsets.len()); + code_ptr + }) +} + /// Given the number of spill slots needed for a function, return the number of bytes /// the function needs to allocate on the stack for the stack frame. fn aligned_stack_bytes(num_slots: usize) -> usize { diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 12fc6b91fa..c804ecce86 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -835,7 +835,7 @@ unsafe extern "C" { elts: *const VALUE, ) -> VALUE; pub fn rb_vm_top_self() -> VALUE; - pub static mut rb_vm_insns_count: u64; + pub static mut rb_vm_insn_count: u64; pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t; pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t; pub fn rb_callable_method_entry_or_negative( diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index f69605c43d..d269baf884 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -2878,9 +2878,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> { let exit_state = state.clone(); profiles.profile_stack(&exit_state); - // Increment zjit_insns_count for each YARV instruction if --zjit-stats is enabled. + // Increment zjit_insn_count for each YARV instruction if --zjit-stats is enabled. if get_option!(stats) { - fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insns_count)); + fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insn_count)); } // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. diff --git a/zjit/src/state.rs b/zjit/src/state.rs index 948204a1e6..6608f1ea23 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,10 +1,10 @@ -use crate::codegen::{gen_exit_trampoline, gen_function_stub_hit_trampoline}; -use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insns_count, EcPtr, Qnil, VALUE}; +use crate::codegen::{gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline}; +use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, EcPtr, Qnil, VALUE, VM_INSTRUCTION_SIZE}; use crate::cruby_methods; use crate::invariants::Invariants; use crate::asm::CodeBlock; use crate::options::get_option; -use crate::stats::Counters; +use crate::stats::{Counters, ExitCounters}; use crate::virtualmem::CodePtr; #[allow(non_upper_case_globals)] @@ -24,6 +24,9 @@ pub struct ZJITState { /// ZJIT statistics counters: Counters, + /// Side-exit counters + exit_counters: ExitCounters, + /// Assumptions that require invalidation invariants: Invariants, @@ -36,6 +39,9 @@ pub struct ZJITState { /// Trampoline to side-exit without restoring PC or the stack exit_trampoline: CodePtr, + /// Trampoline to side-exit and increment exit_compilation_failure + exit_trampoline_with_counter: CodePtr, + /// Trampoline to call function_stub_hit function_stub_hit_trampoline: CodePtr, } @@ -93,13 +99,24 @@ impl ZJITState { let zjit_state = ZJITState { code_block: cb, counters: Counters::default(), + exit_counters: [0; VM_INSTRUCTION_SIZE as usize], invariants: Invariants::default(), assert_compiles: false, method_annotations: cruby_methods::init(), exit_trampoline, function_stub_hit_trampoline, + exit_trampoline_with_counter: exit_trampoline, }; unsafe { ZJIT_STATE = Some(zjit_state); } + + // With --zjit-stats, use a different trampoline on function stub exits + // to count exit_compilation_failure. Note that the trampoline code depends + // on the counter, so ZJIT_STATE needs to be initialized first. + if get_option!(stats) { + let cb = ZJITState::get_code_block(); + let code_ptr = gen_exit_trampoline_with_counter(cb, exit_trampoline).unwrap(); + ZJITState::get_instance().exit_trampoline_with_counter = code_ptr; + } } /// Return true if zjit_state has been initialized @@ -142,6 +159,11 @@ impl ZJITState { &mut ZJITState::get_instance().counters } + /// Get a mutable reference to side-exit counters + pub fn get_exit_counters() -> &'static mut ExitCounters { + &mut ZJITState::get_instance().exit_counters + } + /// Was --zjit-save-compiled-iseqs specified? pub fn should_log_compiled_iseqs() -> bool { get_option!(log_compiled_iseqs).is_some() @@ -179,6 +201,11 @@ impl ZJITState { ZJITState::get_instance().exit_trampoline } + /// Return a code pointer to the exit trampoline for function stubs + pub fn get_exit_trampoline_with_counter() -> CodePtr { + ZJITState::get_instance().exit_trampoline_with_counter + } + /// Return a code pointer to the function stub hit trampoline pub fn get_function_stub_hit_trampoline() -> CodePtr { ZJITState::get_instance().function_stub_hit_trampoline @@ -199,7 +226,7 @@ pub extern "C" fn rb_zjit_init() { rb_bug_panic_hook(); // Discard the instruction count for boot which we never compile - unsafe { rb_vm_insns_count = 0; } + unsafe { rb_vm_insn_count = 0; } // ZJIT enabled and initialized successfully assert!(unsafe{ !rb_zjit_enabled_p }); diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index b17edcb3a0..b754404a66 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -1,6 +1,6 @@ use std::time::Instant; -use crate::{cruby::*, options::get_option, state::zjit_enabled_p}; +use crate::{cruby::*, options::get_option, state::{zjit_enabled_p, ZJITState}}; macro_rules! make_counters { ( @@ -70,15 +70,17 @@ make_counters! { } // The number of times YARV instructions are executed on JIT code - zjit_insns_count, + zjit_insn_count, // failed_: Compilation failure reasons + failed_iseq_stack_too_large, failed_hir_compile, failed_hir_compile_validate, failed_hir_optimize, - failed_gen_insn, - failed_gen_insn_unexpected, failed_asm_compile, + + // exit_: Side exit reasons (ExitCounters shares the same prefix) + exit_compilation_failure, } /// Increase a counter by a specified amount @@ -95,6 +97,16 @@ macro_rules! incr_counter { } pub(crate) use incr_counter; +/// The number of side exits from each YARV instruction +pub type ExitCounters = [u64; VM_INSTRUCTION_SIZE as usize]; + +/// Return a raw pointer to the exit counter for the YARV instruction at a given PC +pub fn exit_counter_ptr(pc: *const VALUE) -> *mut u64 { + let opcode = unsafe { rb_vm_insn_addr2opcode((*pc).as_ptr()) }; + let exit_counters = ZJITState::get_exit_counters(); + unsafe { exit_counters.get_unchecked_mut(opcode as usize) } +} + /// Return a Hash object that contains ZJIT statistics #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE, target_key: VALUE) -> VALUE { @@ -134,8 +146,20 @@ pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE, target_key: VALUE) -> set_stat!(hash, &counter.name(), unsafe { *counter_ptr(counter) }); } - if unsafe { rb_vm_insns_count } > 0 { - set_stat!(hash, "vm_insns_count", unsafe { rb_vm_insns_count }); + // Set side exit stats + let exit_counters = ZJITState::get_exit_counters(); + let mut side_exit_count = 0; + for op_idx in 0..VM_INSTRUCTION_SIZE as usize { + let op_name = insn_name(op_idx); + let key_string = "exit_".to_owned() + &op_name; + let count = exit_counters[op_idx]; + side_exit_count += count; + set_stat!(hash, &key_string, count); + } + set_stat!(hash, "side_exit_count", side_exit_count); + + if unsafe { rb_vm_insn_count } > 0 { + set_stat!(hash, "vm_insn_count", unsafe { rb_vm_insn_count }); } hash |
