summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Kokubun <takashi.kokubun@shopify.com>2025-08-27 10:01:07 -0700
committerGitHub <noreply@github.com>2025-08-27 10:01:07 -0700
commit76810fc34905011535f50c3f8bbcaf39cb80b6cc (patch)
tree686b602c371a6436d0ea25cc1a054b641b385db9
parent61d26c35bf8c744b4c59a44536bc58a6c4653ab6 (diff)
ZJIT: Implement side exit stats (#14357)
-rw-r--r--internal/vm.h2
-rw-r--r--test/ruby/test_zjit.rb4
-rw-r--r--vm_exec.c2
-rw-r--r--vm_insnhelper.h6
-rw-r--r--yjit/bindgen/src/main.rs2
-rw-r--r--yjit/src/cruby_bindings.inc.rs2
-rw-r--r--yjit/src/stats.rs8
-rw-r--r--zjit.rb45
-rw-r--r--zjit/bindgen/src/main.rs2
-rw-r--r--zjit/src/backend/arm64/mod.rs2
-rw-r--r--zjit/src/backend/lir.rs26
-rw-r--r--zjit/src/codegen.rs100
-rw-r--r--zjit/src/cruby_bindings.inc.rs2
-rw-r--r--zjit/src/hir.rs4
-rw-r--r--zjit/src/state.rs35
-rw-r--r--zjit/src/stats.rs36
16 files changed, 192 insertions, 86 deletions
diff --git a/internal/vm.h b/internal/vm.h
index 3a99011c44..e5ed47afae 100644
--- a/internal/vm.h
+++ b/internal/vm.h
@@ -81,7 +81,7 @@ VALUE rb_gccct_clear_table(VALUE);
#if USE_YJIT || USE_ZJIT
/* vm_exec.c */
-extern uint64_t rb_vm_insns_count;
+extern uint64_t rb_vm_insn_count;
#endif
extern bool rb_free_at_exit;
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index 28d25f88a4..9296cd3522 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -1490,8 +1490,8 @@ class TestZJIT < Test::Unit::TestCase
def test = 1
test
[
- RubyVM::ZJIT.stats[:zjit_insns_count] > 0,
- RubyVM::ZJIT.stats(:zjit_insns_count) > 0,
+ RubyVM::ZJIT.stats[:zjit_insn_count] > 0,
+ RubyVM::ZJIT.stats(:zjit_insn_count) > 0,
]
}, stats: true
end
diff --git a/vm_exec.c b/vm_exec.c
index 3d7c241a32..bd8c170d76 100644
--- a/vm_exec.c
+++ b/vm_exec.c
@@ -13,7 +13,7 @@
#if USE_YJIT || USE_ZJIT
// The number of instructions executed on vm_exec_core. --yjit-stats and --zjit-stats use this.
-uint64_t rb_vm_insns_count = 0;
+uint64_t rb_vm_insn_count = 0;
#endif
#if VM_COLLECT_USAGE_DETAILS
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index 24bfbb8210..015edaed9d 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -16,11 +16,11 @@ RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_invalidations;
RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_misses;
RUBY_EXTERN rb_serial_t ruby_vm_global_cvar_state;
-#if YJIT_STATS || ZJIT_STATS // We want vm_insns_count only on stats builds.
-// Increment vm_insns_count for --yjit-stats. We increment this even when
+#if YJIT_STATS || ZJIT_STATS // We want vm_insn_count only on stats builds.
+// Increment vm_insn_count for --yjit-stats. We increment this even when
// --yjit or --yjit-stats is not used because branching to skip it is slower.
// We also don't use ATOMIC_INC for performance, allowing inaccuracy on Ractors.
-#define JIT_COLLECT_USAGE_INSN(insn) rb_vm_insns_count++
+#define JIT_COLLECT_USAGE_INSN(insn) rb_vm_insn_count++
#else
#define JIT_COLLECT_USAGE_INSN(insn) // none
#endif
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 61b6f23326..dd0cb6dbf5 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -390,7 +390,7 @@ fn main() {
.allowlist_function("rb_const_get")
// From internal/vm.h
- .allowlist_var("rb_vm_insns_count")
+ .allowlist_var("rb_vm_insn_count")
// From include/ruby/internal/intern/vm.h
.allowlist_function("rb_get_alloc_func")
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index ea51574fe7..4cae138c95 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1092,7 +1092,7 @@ extern "C" {
elts: *const VALUE,
) -> VALUE;
pub fn rb_vm_top_self() -> VALUE;
- pub static mut rb_vm_insns_count: u64;
+ pub static mut rb_vm_insn_count: u64;
pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
pub fn rb_callable_method_entry_or_negative(
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 5358d83ea4..ea6130973d 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -789,8 +789,8 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
set_stat_usize!(hash, "context_cache_bytes", crate::core::CTX_ENCODE_CACHE_BYTES + crate::core::CTX_DECODE_CACHE_BYTES);
// VM instructions count
- if rb_vm_insns_count > 0 {
- set_stat_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
+ if rb_vm_insn_count > 0 {
+ set_stat_usize!(hash, "vm_insns_count", rb_vm_insn_count as usize);
}
set_stat_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize);
@@ -861,8 +861,8 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
set_stat_double!(hash, "avg_len_in_yjit", avg_len_in_yjit);
// Proportion of instructions that retire in YJIT
- if rb_vm_insns_count > 0 {
- let total_insns_count = retired_in_yjit + rb_vm_insns_count;
+ if rb_vm_insn_count > 0 {
+ let total_insns_count = retired_in_yjit + rb_vm_insn_count;
set_stat_usize!(hash, "total_insns_count", total_insns_count as usize);
let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64;
diff --git a/zjit.rb b/zjit.rb
index b20c110046..cf0896e107 100644
--- a/zjit.rb
+++ b/zjit.rb
@@ -29,9 +29,9 @@ class << RubyVM::ZJIT
stats = Primitive.rb_zjit_stats(key)
return stats if stats.nil? || !key.nil?
- if stats.key?(:vm_insns_count) && stats.key?(:zjit_insns_count)
- stats[:total_insns_count] = stats[:vm_insns_count] + stats[:zjit_insns_count]
- stats[:ratio_in_zjit] = 100.0 * stats[:zjit_insns_count] / stats[:total_insns_count]
+ if stats.key?(:vm_insn_count) && stats.key?(:zjit_insn_count)
+ stats[:total_insn_count] = stats[:vm_insn_count] + stats[:zjit_insn_count]
+ stats[:ratio_in_zjit] = 100.0 * stats[:zjit_insn_count] / stats[:total_insn_count]
end
stats
@@ -52,11 +52,13 @@ class << RubyVM::ZJIT
:gc_time_ns,
:invalidation_time_ns,
- :total_insns_count,
- :vm_insns_count,
- :zjit_insns_count,
+ :side_exit_count,
+ :total_insn_count,
+ :vm_insn_count,
+ :zjit_insn_count,
:ratio_in_zjit,
], buf:, stats:)
+ print_counters_with_prefix(prefix: 'exit_', prompt: 'side exit reasons', buf:, stats:, limit: 20)
buf
end
@@ -70,9 +72,9 @@ class << RubyVM::ZJIT
private
def print_counters(keys, buf:, stats:)
- left_pad = keys.map(&:size).max + 1
+ left_pad = keys.map { |key| key.to_s.sub(/_time_ns\z/, '_time').size }.max + 1
keys.each do |key|
- # Some stats like vm_insns_count and ratio_in_zjit are not supported on the release build
+ # Some stats like vm_insn_count and ratio_in_zjit are not supported on the release build
next unless stats.key?(key)
value = stats[key]
@@ -90,11 +92,28 @@ class << RubyVM::ZJIT
end
end
- def print_counters_with_prefix(buf:, stats:, prefix:, prompt:)
- keys = stats.keys.select { |key| key.start_with?(prefix) && stats[key] > 0 }
- unless keys.empty?
- buf << "#{prompt}:\n"
- print_counters(keys, buf:, stats:)
+ def print_counters_with_prefix(buf:, stats:, prefix:, prompt:, limit: nil)
+ counters = stats.select { |key, value| key.start_with?(prefix) && value > 0 }
+ return if stats.empty?
+
+ counters.transform_keys! { |key| key.to_s.delete_prefix!(prefix) }
+ left_pad = counters.keys.map(&:size).max
+ right_pad = counters.values.map { |value| number_with_delimiter(value).size }.max
+ total = counters.values.sum
+ count = counters.size
+
+ counters = counters.to_a
+ counters.sort_by! { |_, value| -value }
+ counters = counters.first(limit) if limit
+
+ buf << "Top-#{counters.size} " if limit
+ buf << "#{prompt}"
+ buf << " (%.1f%% of all #{count})" % (100.0 * counters.map(&:last).sum / total) if limit
+ buf << ":\n"
+ counters.each do |key, value|
+ padded_key = key.rjust(left_pad, ' ')
+ padded_value = number_with_delimiter(value).rjust(right_pad, ' ')
+ buf << " #{padded_key}: #{padded_value} (%4.1f%%)\n" % (100.0 * value / total)
end
end
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs
index ac10341996..a95b8dcaaa 100644
--- a/zjit/bindgen/src/main.rs
+++ b/zjit/bindgen/src/main.rs
@@ -402,7 +402,7 @@ fn main() {
.allowlist_function("rb_mod_name")
// From internal/vm.h
- .allowlist_var("rb_vm_insns_count")
+ .allowlist_var("rb_vm_insn_count")
// From include/ruby/internal/intern/vm.h
.allowlist_function("rb_get_alloc_func")
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index c60ec53285..e243477ec8 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -1737,7 +1737,7 @@ mod tests {
fn test_store_unserviceable() {
let (mut asm, mut cb) = setup_asm();
// This would put the source into SCRATCH_REG, messing up the destination
- asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into());
+ asm.store(Opnd::mem(64, SCRATCH_OPND, 0), 0x83902.into());
asm.compile_with_num_regs(&mut cb, 0);
}
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 1bb4cd024b..7e317d4991 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -6,6 +6,7 @@ use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_
use crate::hir::SideExitReason;
use crate::options::{debug, get_option};
use crate::cruby::VALUE;
+use crate::stats::exit_counter_ptr;
use crate::virtualmem::CodePtr;
use crate::asm::{CodeBlock, Label};
@@ -1265,12 +1266,12 @@ impl Assembler
// then load SCRATCH_REG into the destination when it's safe.
if !old_moves.is_empty() {
// Make sure it's safe to use SCRATCH_REG
- assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
+ assert!(old_moves.iter().all(|&(_, opnd)| opnd != SCRATCH_OPND));
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
let (reg, opnd) = old_moves.remove(0);
new_moves.push((Assembler::SCRATCH_REG, opnd));
- old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
+ old_moves.push((reg, SCRATCH_OPND));
}
}
new_moves
@@ -1584,13 +1585,26 @@ impl Assembler
}
asm_comment!(self, "save cfp->pc");
- self.load_into(Opnd::Reg(Assembler::SCRATCH_REG), Opnd::const_ptr(pc));
- self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::Reg(Assembler::SCRATCH_REG));
+ self.load_into(SCRATCH_OPND, Opnd::const_ptr(pc));
+ self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), SCRATCH_OPND);
asm_comment!(self, "save cfp->sp");
- self.lea_into(Opnd::Reg(Assembler::SCRATCH_REG), Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32));
+ self.lea_into(SCRATCH_OPND, Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32));
let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP);
- self.store(cfp_sp, Opnd::Reg(Assembler::SCRATCH_REG));
+ self.store(cfp_sp, SCRATCH_OPND);
+
+ if get_option!(stats) {
+ asm_comment!(self, "increment an exit counter");
+ self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr(pc)));
+ let counter_opnd = if cfg!(target_arch = "aarch64") { // See arm64_split()
+ // Using C_CRET_OPND since arm64_emit uses both SCRATCH0 and SCRATCH1 for IncrCounter.
+ self.lea_into(C_RET_OPND, Opnd::mem(64, SCRATCH_OPND, 0));
+ C_RET_OPND
+ } else { // x86_emit expects Opnd::Mem
+ Opnd::mem(64, SCRATCH_OPND, 0)
+ };
+ self.incr_counter(counter_opnd, 1.into());
+ }
asm_comment!(self, "exit to the interpreter");
self.frame_teardown(&[]); // matching the setup in :bb0-prologue:
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 021db1c4c8..2b71596a17 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -9,7 +9,7 @@ use crate::invariants::{track_bop_assumption, track_cme_assumption, track_single
use crate::gc::{append_gc_offsets, get_or_create_iseq_payload, get_or_create_iseq_payload_ptr, IseqPayload, IseqStatus};
use crate::state::ZJITState;
use crate::stats::incr_counter;
-use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::compile_time_ns};
+use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compilation_failure}};
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SCRATCH_OPND, SP};
use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SpecialBackrefSymbol, SELF_PARAM_IDX};
@@ -73,45 +73,36 @@ pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, _ec: EcPtr) -> *co
return std::ptr::null();
}
- // Reject ISEQs with very large temp stacks.
- // We cannot encode too large offsets to access locals in arm64.
- let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) };
- if stack_max >= i8::MAX as u32 {
- debug!("ISEQ stack too large: {stack_max}");
- return std::ptr::null();
- }
-
// Take a lock to avoid writing to ISEQ in parallel with Ractors.
// with_vm_lock() does nothing if the program doesn't use Ractors.
- let code_ptr = with_vm_lock(src_loc!(), || {
- with_time_stat(compile_time_ns, || gen_iseq_entry_point(iseq))
- });
-
- // Assert that the ISEQ compiles if RubyVM::ZJIT.assert_compiles is enabled
- if ZJITState::assert_compiles_enabled() && code_ptr.is_null() {
- let iseq_location = iseq_get_location(iseq, 0);
- panic!("Failed to compile: {iseq_location}");
- }
-
- code_ptr
-}
+ with_vm_lock(src_loc!(), || {
+ let cb = ZJITState::get_code_block();
+ let mut code_ptr = with_time_stat(compile_time_ns, || gen_iseq_entry_point(cb, iseq));
+
+ if code_ptr.is_none() {
+ // Assert that the ISEQ compiles if RubyVM::ZJIT.assert_compiles is enabled
+ if ZJITState::assert_compiles_enabled() {
+ let iseq_location = iseq_get_location(iseq, 0);
+ panic!("Failed to compile: {iseq_location}");
+ }
-/// See [gen_iseq_entry_point_body]. This wrapper is to make sure cb.mark_all_executable()
-/// is called even if gen_iseq_entry_point_body() partially fails and returns a null pointer.
-fn gen_iseq_entry_point(iseq: IseqPtr) -> *const u8 {
- let cb = ZJITState::get_code_block();
- let code_ptr = gen_iseq_entry_point_body(cb, iseq);
+ // For --zjit-stats, generate an entry that just increments exit_compilation_failure and exits
+ if get_option!(stats) {
+ code_ptr = gen_compilation_failure_counter(cb);
+ }
+ }
- // Always mark the code region executable if asm.compile() has been used.
- // We need to do this even if code_ptr is null because, whether gen_entry() or
- // gen_function_stub() fails or not, gen_function() has already used asm.compile().
- cb.mark_all_executable();
+ // Always mark the code region executable if asm.compile() has been used.
+ // We need to do this even if code_ptr is None because, whether gen_entry()
+ // fails or not, gen_iseq() may have already used asm.compile().
+ cb.mark_all_executable();
- code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb))
+ code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb))
+ })
}
/// Compile an entry point for a given ISEQ
-fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<CodePtr> {
+fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<CodePtr> {
// Compile ISEQ into High-level IR
let Some(function) = compile_iseq(iseq) else {
incr_counter!(compilation_failure);
@@ -283,7 +274,6 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio
let insn = function.find(insn_id);
if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) {
debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit.");
- incr_counter!(failed_gen_insn);
gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledInstruction(insn_id), &function.frame_state(last_snapshot));
// Don't bother generating code after a side-exit. We won't run it.
// TODO(max): Generate ud2 or equivalent.
@@ -419,10 +409,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
| &Insn::Throw { state, .. }
| &Insn::ToArray { state, .. }
| &Insn::ToNewArray { state, .. }
- => {
- incr_counter!(failed_gen_insn_unexpected);
- return Err(state);
- }
+ => return Err(state),
};
assert!(insn.has_output(), "Cannot write LIR output of HIR instruction with no output: {insn}");
@@ -1378,6 +1365,15 @@ pub fn local_size_and_idx_to_bp_offset(local_size: usize, local_idx: usize) -> i
/// Convert ISEQ into High-level IR
fn compile_iseq(iseq: IseqPtr) -> Option<Function> {
+ // Reject ISEQs with very large temp stacks.
+ // We cannot encode too large offsets to access locals in arm64.
+ let stack_max = unsafe { rb_get_iseq_body_stack_max(iseq) };
+ if stack_max >= i8::MAX as u32 {
+ debug!("ISEQ stack too large: {stack_max}");
+ incr_counter!(failed_iseq_stack_too_large);
+ return None;
+ }
+
let mut function = match iseq_to_hir(iseq) {
Ok(function) => function,
Err(err) => {
@@ -1508,7 +1504,7 @@ c_callable! {
// Exit to the interpreter
spill_stack(iseq, cfp, sp);
- return ZJITState::get_exit_trampoline().raw_ptr(cb);
+ return ZJITState::get_exit_trampoline_with_counter().raw_ptr(cb);
}
// Otherwise, attempt to compile the ISEQ. We have to mark_all_executable() beyond this point.
@@ -1518,7 +1514,7 @@ c_callable! {
} else {
// Exit to the interpreter
spill_stack(iseq, cfp, sp);
- ZJITState::get_exit_trampoline()
+ ZJITState::get_exit_trampoline_with_counter()
};
cb.mark_all_executable();
code_ptr.raw_ptr(cb)
@@ -1610,6 +1606,20 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option<CodePtr> {
})
}
+/// Generate a trampoline that increments exit_compilation_failure and jumps to exit_trampoline.
+pub fn gen_exit_trampoline_with_counter(cb: &mut CodeBlock, exit_trampoline: CodePtr) -> Option<CodePtr> {
+ let mut asm = Assembler::new();
+
+ asm_comment!(asm, "function stub exit trampoline");
+ gen_incr_counter(&mut asm, exit_compilation_failure);
+ asm.jmp(Target::CodePtr(exit_trampoline));
+
+ asm.compile(cb).map(|(code_ptr, gc_offsets)| {
+ assert_eq!(gc_offsets.len(), 0);
+ code_ptr
+ })
+}
+
fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd {
let n = opnds.len();
@@ -1666,6 +1676,18 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>
result
}
+/// Generate a JIT entry that just increments exit_compilation_failure and exits
+fn gen_compilation_failure_counter(cb: &mut CodeBlock) -> Option<CodePtr> {
+ let mut asm = Assembler::new();
+ gen_incr_counter(&mut asm, exit_compilation_failure);
+ asm.cret(Qundef.into());
+
+ asm.compile(cb).map(|(code_ptr, gc_offsets)| {
+ assert_eq!(0, gc_offsets.len());
+ code_ptr
+ })
+}
+
/// Given the number of spill slots needed for a function, return the number of bytes
/// the function needs to allocate on the stack for the stack frame.
fn aligned_stack_bytes(num_slots: usize) -> usize {
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 12fc6b91fa..c804ecce86 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -835,7 +835,7 @@ unsafe extern "C" {
elts: *const VALUE,
) -> VALUE;
pub fn rb_vm_top_self() -> VALUE;
- pub static mut rb_vm_insns_count: u64;
+ pub static mut rb_vm_insn_count: u64;
pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
pub fn rb_callable_method_entry_or_negative(
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index f69605c43d..d269baf884 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -2878,9 +2878,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
let exit_state = state.clone();
profiles.profile_stack(&exit_state);
- // Increment zjit_insns_count for each YARV instruction if --zjit-stats is enabled.
+ // Increment zjit_insn_count for each YARV instruction if --zjit-stats is enabled.
if get_option!(stats) {
- fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insns_count));
+ fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insn_count));
}
// try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
diff --git a/zjit/src/state.rs b/zjit/src/state.rs
index 948204a1e6..6608f1ea23 100644
--- a/zjit/src/state.rs
+++ b/zjit/src/state.rs
@@ -1,10 +1,10 @@
-use crate::codegen::{gen_exit_trampoline, gen_function_stub_hit_trampoline};
-use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insns_count, EcPtr, Qnil, VALUE};
+use crate::codegen::{gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline};
+use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, EcPtr, Qnil, VALUE, VM_INSTRUCTION_SIZE};
use crate::cruby_methods;
use crate::invariants::Invariants;
use crate::asm::CodeBlock;
use crate::options::get_option;
-use crate::stats::Counters;
+use crate::stats::{Counters, ExitCounters};
use crate::virtualmem::CodePtr;
#[allow(non_upper_case_globals)]
@@ -24,6 +24,9 @@ pub struct ZJITState {
/// ZJIT statistics
counters: Counters,
+ /// Side-exit counters
+ exit_counters: ExitCounters,
+
/// Assumptions that require invalidation
invariants: Invariants,
@@ -36,6 +39,9 @@ pub struct ZJITState {
/// Trampoline to side-exit without restoring PC or the stack
exit_trampoline: CodePtr,
+ /// Trampoline to side-exit and increment exit_compilation_failure
+ exit_trampoline_with_counter: CodePtr,
+
/// Trampoline to call function_stub_hit
function_stub_hit_trampoline: CodePtr,
}
@@ -93,13 +99,24 @@ impl ZJITState {
let zjit_state = ZJITState {
code_block: cb,
counters: Counters::default(),
+ exit_counters: [0; VM_INSTRUCTION_SIZE as usize],
invariants: Invariants::default(),
assert_compiles: false,
method_annotations: cruby_methods::init(),
exit_trampoline,
function_stub_hit_trampoline,
+ exit_trampoline_with_counter: exit_trampoline,
};
unsafe { ZJIT_STATE = Some(zjit_state); }
+
+ // With --zjit-stats, use a different trampoline on function stub exits
+ // to count exit_compilation_failure. Note that the trampoline code depends
+ // on the counter, so ZJIT_STATE needs to be initialized first.
+ if get_option!(stats) {
+ let cb = ZJITState::get_code_block();
+ let code_ptr = gen_exit_trampoline_with_counter(cb, exit_trampoline).unwrap();
+ ZJITState::get_instance().exit_trampoline_with_counter = code_ptr;
+ }
}
/// Return true if zjit_state has been initialized
@@ -142,6 +159,11 @@ impl ZJITState {
&mut ZJITState::get_instance().counters
}
+ /// Get a mutable reference to side-exit counters
+ pub fn get_exit_counters() -> &'static mut ExitCounters {
+ &mut ZJITState::get_instance().exit_counters
+ }
+
/// Was --zjit-save-compiled-iseqs specified?
pub fn should_log_compiled_iseqs() -> bool {
get_option!(log_compiled_iseqs).is_some()
@@ -179,6 +201,11 @@ impl ZJITState {
ZJITState::get_instance().exit_trampoline
}
+ /// Return a code pointer to the exit trampoline for function stubs
+ pub fn get_exit_trampoline_with_counter() -> CodePtr {
+ ZJITState::get_instance().exit_trampoline_with_counter
+ }
+
/// Return a code pointer to the function stub hit trampoline
pub fn get_function_stub_hit_trampoline() -> CodePtr {
ZJITState::get_instance().function_stub_hit_trampoline
@@ -199,7 +226,7 @@ pub extern "C" fn rb_zjit_init() {
rb_bug_panic_hook();
// Discard the instruction count for boot which we never compile
- unsafe { rb_vm_insns_count = 0; }
+ unsafe { rb_vm_insn_count = 0; }
// ZJIT enabled and initialized successfully
assert!(unsafe{ !rb_zjit_enabled_p });
diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs
index b17edcb3a0..b754404a66 100644
--- a/zjit/src/stats.rs
+++ b/zjit/src/stats.rs
@@ -1,6 +1,6 @@
use std::time::Instant;
-use crate::{cruby::*, options::get_option, state::zjit_enabled_p};
+use crate::{cruby::*, options::get_option, state::{zjit_enabled_p, ZJITState}};
macro_rules! make_counters {
(
@@ -70,15 +70,17 @@ make_counters! {
}
// The number of times YARV instructions are executed on JIT code
- zjit_insns_count,
+ zjit_insn_count,
// failed_: Compilation failure reasons
+ failed_iseq_stack_too_large,
failed_hir_compile,
failed_hir_compile_validate,
failed_hir_optimize,
- failed_gen_insn,
- failed_gen_insn_unexpected,
failed_asm_compile,
+
+ // exit_: Side exit reasons (ExitCounters shares the same prefix)
+ exit_compilation_failure,
}
/// Increase a counter by a specified amount
@@ -95,6 +97,16 @@ macro_rules! incr_counter {
}
pub(crate) use incr_counter;
+/// The number of side exits from each YARV instruction
+pub type ExitCounters = [u64; VM_INSTRUCTION_SIZE as usize];
+
+/// Return a raw pointer to the exit counter for the YARV instruction at a given PC
+pub fn exit_counter_ptr(pc: *const VALUE) -> *mut u64 {
+ let opcode = unsafe { rb_vm_insn_addr2opcode((*pc).as_ptr()) };
+ let exit_counters = ZJITState::get_exit_counters();
+ unsafe { exit_counters.get_unchecked_mut(opcode as usize) }
+}
+
/// Return a Hash object that contains ZJIT statistics
#[unsafe(no_mangle)]
pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE, target_key: VALUE) -> VALUE {
@@ -134,8 +146,20 @@ pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE, target_key: VALUE) ->
set_stat!(hash, &counter.name(), unsafe { *counter_ptr(counter) });
}
- if unsafe { rb_vm_insns_count } > 0 {
- set_stat!(hash, "vm_insns_count", unsafe { rb_vm_insns_count });
+ // Set side exit stats
+ let exit_counters = ZJITState::get_exit_counters();
+ let mut side_exit_count = 0;
+ for op_idx in 0..VM_INSTRUCTION_SIZE as usize {
+ let op_name = insn_name(op_idx);
+ let key_string = "exit_".to_owned() + &op_name;
+ let count = exit_counters[op_idx];
+ side_exit_count += count;
+ set_stat!(hash, &key_string, count);
+ }
+ set_stat!(hash, "side_exit_count", side_exit_count);
+
+ if unsafe { rb_vm_insn_count } > 0 {
+ set_stat!(hash, "vm_insn_count", unsafe { rb_vm_insn_count });
}
hash