summaryrefslogtreecommitdiff
path: root/zjit
diff options
context:
space:
mode:
authorMax Bernstein <tekknolagi@gmail.com>2026-03-24 16:45:40 -0400
committerGitHub <noreply@github.com>2026-03-24 16:45:40 -0400
commit3b8317eb42dd2d11988ecdc603387308ac991f23 (patch)
tree9d9e693a79126891489bbd043de3791ff69e84f9 /zjit
parente74823a08098ef87c7a2fc3a35647c4c4467ca40 (diff)
ZJIT: Dump side-exit locations in Fuchsia trace format (#16470)
This gives us instant access to all nice Fuchsia and Perfetto tooling, including zoomable, SQL queryable browsing for traces: <img width="1912" height="1185" alt="Screenshot 2026-03-20 at 10 50 57 AM" src="https://github.com/user-attachments/assets/6475bbec-eb55-4886-8e94-13450def2de5" /> Hottest side-exits grouped by exit location using SQL: ```sql SELECT reason, backtrace, count(*) AS exits FROM ( SELECT s.id, s.name AS reason, group_concat(a.display_value, ' <- ') AS backtrace FROM slice s JOIN args a USING(arg_set_id) WHERE s.category = 'side_exit' GROUP BY s.id ) GROUP BY reason, backtrace ORDER BY exits DESC LIMIT 30 ``` <img width="1912" height="1186" alt="Screenshot 2026-03-24 at 3 58 28 PM" src="https://github.com/user-attachments/assets/8195ccd8-aeb6-4396-8c07-e85bbb280a4a" />
Diffstat (limited to 'zjit')
-rw-r--r--zjit/bindgen/src/main.rs4
-rw-r--r--zjit/src/backend/lir.rs6
-rw-r--r--zjit/src/codegen.rs9
-rw-r--r--zjit/src/cruby.rs13
-rw-r--r--zjit/src/cruby_bindings.inc.rs8
-rw-r--r--zjit/src/gc.rs3
-rw-r--r--zjit/src/hir.rs6
-rw-r--r--zjit/src/state.rs232
-rw-r--r--zjit/src/stats.rs205
9 files changed, 255 insertions, 231 deletions
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs
index 3e82efd8f6..34720e77eb 100644
--- a/zjit/bindgen/src/main.rs
+++ b/zjit/bindgen/src/main.rs
@@ -299,7 +299,9 @@ fn main() {
.allowlist_function("rb_RSTRING_PTR")
.allowlist_function("rb_RSTRING_LEN")
.allowlist_function("rb_ENCODING_GET")
- .allowlist_function("rb_zjit_exit_locations_dict")
+ .allowlist_function("rb_profile_frame_full_label")
+ .allowlist_function("rb_profile_frame_absolute_path")
+ .allowlist_function("rb_profile_frame_path")
.allowlist_function("rb_optimized_call")
.allowlist_function("rb_jit_icache_invalidate")
.allowlist_function("rb_zjit_print_exception")
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 03dc02c678..00a80b9cf4 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -2727,7 +2727,11 @@ impl Assembler
// ccall doesn't clobber caller-saved registers
// holding stack/local operands.
compile_exit_save_state(self, &exit);
- asm_ccall!(self, rb_zjit_record_exit_stack, pc);
+ // Leak a CString with the reason so it's available at runtime
+ let reason_cstr = std::ffi::CString::new(reason.to_string())
+ .unwrap_or_else(|_| std::ffi::CString::new("unknown").unwrap());
+ let reason_ptr = reason_cstr.into_raw() as *const u8;
+ asm_ccall!(self, rb_zjit_record_exit_stack, Opnd::const_ptr(reason_ptr));
compile_exit_return(self);
} else {
// If the side exit has already been compiled, jump to it.
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 30c99152e2..b473bc69a6 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -467,7 +467,14 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func
if let Err(last_snapshot) = result {
debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit.");
gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn));
- gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot));
+ let reason = match insn {
+ Insn::ArrayMax { .. } => SideExitReason::UnhandledHIRArrayMax,
+ Insn::FixnumDiv { .. } => SideExitReason::UnhandledHIRFixnumDiv,
+ Insn::Throw { .. } => SideExitReason::UnhandledHIRThrow,
+ Insn::InvokeBuiltin { .. } => SideExitReason::UnhandledHIRInvokeBuiltin,
+ _ => SideExitReason::UnhandledHIRUnknown(insn_id),
+ };
+ gen_side_exit(&mut jit, &mut asm, &reason, &function.frame_state(last_snapshot));
// Don't bother generating code after a side-exit. We won't run it.
// TODO(max): Generate ud2 or equivalent.
break;
diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs
index 5c0b99f182..e1343f9b40 100644
--- a/zjit/src/cruby.rs
+++ b/zjit/src/cruby.rs
@@ -902,15 +902,18 @@ pub fn iseq_get_location(iseq: IseqPtr, pos: u32) -> String {
s
}
+pub fn ruby_str_to_rust_string_result(v: VALUE) -> Result<String, std::string::FromUtf8Error> {
+ let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
+ let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
+ let str_slice: &[u8] = unsafe { std::slice::from_raw_parts(str_ptr, str_len) };
+ String::from_utf8(str_slice.to_vec())
+}
// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
// This should work fine on ASCII strings and anything else
// that is considered legal UTF-8, including embedded nulls.
-fn ruby_str_to_rust_string(v: VALUE) -> String {
- let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
- let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
- let str_slice: &[u8] = unsafe { std::slice::from_raw_parts(str_ptr, str_len) };
- String::from_utf8(str_slice.to_vec()).unwrap_or_default()
+pub fn ruby_str_to_rust_string(v: VALUE) -> String {
+ ruby_str_to_rust_string_result(v).unwrap_or_default()
}
pub fn ruby_sym_to_rust_string(v: VALUE) -> String {
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 41ebdb0f55..5c7ce49fc6 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -2131,12 +2131,10 @@ unsafe extern "C" {
buff: *mut VALUE,
lines: *mut ::std::os::raw::c_int,
) -> ::std::os::raw::c_int;
+ pub fn rb_profile_frame_path(frame: VALUE) -> VALUE;
+ pub fn rb_profile_frame_absolute_path(frame: VALUE) -> VALUE;
+ pub fn rb_profile_frame_full_label(frame: VALUE) -> VALUE;
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
- pub fn rb_zjit_exit_locations_dict(
- zjit_raw_samples: *mut VALUE,
- zjit_line_samples: *mut ::std::os::raw::c_int,
- samples_len: ::std::os::raw::c_int,
- ) -> VALUE;
pub fn rb_zjit_profile_disable(iseq: *const rb_iseq_t);
pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_zjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool;
diff --git a/zjit/src/gc.rs b/zjit/src/gc.rs
index 40230ccc8d..b79f19837f 100644
--- a/zjit/src/gc.rs
+++ b/zjit/src/gc.rs
@@ -5,7 +5,6 @@ use std::{ffi::c_void, ops::Range};
use crate::{cruby::*, state::ZJITState, stats::with_time_stat, virtualmem::CodePtr};
use crate::payload::{IseqPayload, IseqVersionRef, get_or_create_iseq_payload};
use crate::stats::Counter::gc_time_ns;
-use crate::state::gc_mark_raw_samples;
/// GC callback for marking GC objects in the per-ISEQ payload.
#[unsafe(no_mangle)]
@@ -207,5 +206,5 @@ fn ranges_overlap<T>(left: &Range<T>, right: &Range<T>) -> bool where T: Partial
/// Callback for marking GC objects inside [crate::invariants::Invariants].
#[unsafe(no_mangle)]
pub extern "C" fn rb_zjit_root_mark() {
- gc_mark_raw_samples();
+ // TODO(max): Either add roots to mark or consider removing this callback
}
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index b8e37059eb..306ab7d8cb 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -498,7 +498,11 @@ pub enum SideExitReason {
UnhandledNewarraySend(vm_opt_newarray_send_type),
UnhandledDuparraySend(u64),
UnknownSpecialVariable(u64),
- UnhandledHIRInsn(InsnId),
+ UnhandledHIRArrayMax,
+ UnhandledHIRFixnumDiv,
+ UnhandledHIRThrow,
+ UnhandledHIRInvokeBuiltin,
+ UnhandledHIRUnknown(InsnId),
UnhandledYARVInsn(u32),
UnhandledCallType(CallType),
UnhandledBlockArg,
diff --git a/zjit/src/state.rs b/zjit/src/state.rs
index b8dcd70358..b9f8033e7f 100644
--- a/zjit/src/state.rs
+++ b/zjit/src/state.rs
@@ -1,14 +1,14 @@
//! Runtime state of ZJIT.
use crate::codegen::{gen_entry_trampoline, gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline};
-use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, src_loc, EcPtr, Qnil, Qtrue, rb_vm_insn_addr2opcode, rb_profile_frames, VALUE, VM_INSTRUCTION_SIZE, size_t, rb_gc_mark, with_vm_lock, rust_str_to_id, rb_funcallv, rb_const_get, rb_cRubyVM};
+use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, src_loc, EcPtr, Qnil, Qtrue, rb_profile_frames, rb_profile_frame_full_label, rb_profile_frame_absolute_path, rb_profile_frame_path, VALUE, VM_INSTRUCTION_SIZE, with_vm_lock, rust_str_to_id, rb_funcallv, rb_const_get, rb_cRubyVM};
use crate::cruby_methods;
-use cruby::{ID, rb_callable_method_entry, get_def_method_serial, rb_gc_register_mark_object};
+use cruby::{ID, rb_callable_method_entry, get_def_method_serial, rb_gc_register_mark_object, ruby_str_to_rust_string_result};
use std::sync::atomic::Ordering;
use crate::invariants::Invariants;
use crate::asm::CodeBlock;
use crate::options::{get_option, rb_zjit_prepare_options};
-use crate::stats::{Counters, InsnCounters, SideExitLocations};
+use crate::stats::{Counters, InsnCounters, PerfettoTracer};
use crate::virtualmem::CodePtr;
use std::sync::atomic::AtomicUsize;
use std::collections::HashMap;
@@ -68,8 +68,8 @@ pub struct ZJITState {
/// Counter pointers for access counts of ISEQs accessed by JIT code
iseq_calls_count_pointers: HashMap<String, Box<u64>>,
- /// Locations of side exists within generated code
- exit_locations: Option<SideExitLocations>,
+ /// Perfetto tracer for --zjit-trace-exits
+ perfetto_tracer: Option<PerfettoTracer>,
}
/// Tracks the initialization progress
@@ -124,8 +124,8 @@ impl ZJITState {
let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap();
let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap();
- let exit_locations = if get_option!(trace_side_exits).is_some() {
- Some(SideExitLocations::default())
+ let perfetto_tracer = if get_option!(trace_side_exits).is_some() {
+ Some(PerfettoTracer::new())
} else {
None
};
@@ -146,7 +146,7 @@ impl ZJITState {
not_annotated_frame_cfunc_counter_pointers: HashMap::new(),
ccall_counter_pointers: HashMap::new(),
iseq_calls_count_pointers: HashMap::new(),
- exit_locations,
+ perfetto_tracer,
};
unsafe { ZJIT_STATE = Enabled(zjit_state); }
@@ -283,24 +283,9 @@ impl ZJITState {
ZJITState::get_instance().function_stub_hit_trampoline
}
- /// Get a mutable reference to the ZJIT raw samples Vec
- pub fn get_raw_samples() -> Option<&'static mut Vec<VALUE>> {
- ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.raw_samples)
- }
-
- /// Get a mutable reference to the ZJIT line samples Vec.
- pub fn get_line_samples() -> Option<&'static mut Vec<i32>> {
- ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.line_samples)
- }
-
- /// Get number of skipped samples.
- pub fn get_skipped_samples() -> Option<&'static mut usize> {
- ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.skipped_samples)
- }
-
- /// Get number of skipped samples.
- pub fn set_skipped_samples(n: usize) -> Option<()> {
- ZJITState::get_instance().exit_locations.as_mut().map(|el| el.skipped_samples = n)
+ /// Get a mutable reference to the Perfetto tracer
+ pub fn get_tracer() -> Option<&'static mut PerfettoTracer> {
+ ZJITState::get_instance().perfetto_tracer.as_mut()
}
}
@@ -437,177 +422,66 @@ pub extern "C" fn rb_zjit_assert_compiles(_ec: EcPtr, _self: VALUE) -> VALUE {
Qnil
}
-/// Call `rb_profile_frames` and write the result into buffers to be consumed by `rb_zjit_record_exit_stack`.
-fn record_profiling_frames() -> (i32, Vec<VALUE>, Vec<i32>) {
- // Stackprof uses a buffer of length 2048 when collating the frames into statistics.
- // Since eventually the collected information will be used by Stackprof, collect only
- // 2048 frames at a time.
- // https://github.com/tmm1/stackprof/blob/5d832832e4afcb88521292d6dfad4a9af760ef7c/ext/stackprof/stackprof.c#L21
- const BUFF_LEN: usize = 2048;
-
- let mut frames_buffer = vec![VALUE(0_usize); BUFF_LEN];
- let mut lines_buffer = vec![0; BUFF_LEN];
-
- let stack_length = unsafe {
- rb_profile_frames(
- 0,
- BUFF_LEN as i32,
- frames_buffer.as_mut_ptr(),
- lines_buffer.as_mut_ptr(),
- )
- };
-
- // Trim at `stack_length` since anything past it is redundant
- frames_buffer.truncate(stack_length as usize);
- lines_buffer.truncate(stack_length as usize);
-
- (stack_length, frames_buffer, lines_buffer)
-}
-
-/// Write samples in `frames_buffer` and `lines_buffer` from profiling into
-/// `raw_samples` and `line_samples`. Also write opcode, number of frames,
-/// and stack size to be consumed by Stackprof.
-fn write_exit_stack_samples(
- raw_samples: &'static mut Vec<VALUE>,
- line_samples: &'static mut Vec<i32>,
- frames_buffer: &[VALUE],
- lines_buffer: &[i32],
- stack_length: i32,
- exit_pc: *const VALUE,
-) {
- raw_samples.push(VALUE(stack_length as usize));
- line_samples.push(stack_length);
-
- // Push frames and their lines in reverse order.
- for i in (0..stack_length as usize).rev() {
- raw_samples.push(frames_buffer[i]);
- line_samples.push(lines_buffer[i]);
- }
-
- // Get the opcode from instruction handler at exit PC.
- let exit_opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
- raw_samples.push(VALUE(exit_opcode as usize));
- // Push a dummy line number since we don't know where this insn is from.
- line_samples.push(0);
+/// Resolve a profile frame VALUE to a human-readable "label (path)" string.
+fn resolve_frame_label(frame: VALUE) -> String {
+ unsafe {
+ let label_str = ruby_str_to_rust_string_result(rb_profile_frame_full_label(frame)).unwrap_or("<unknown>".into());
- // Push number of times seen onto the stack.
- raw_samples.push(VALUE(1usize));
- line_samples.push(1);
-}
+ let path = rb_profile_frame_absolute_path(frame);
+ let path = if path.nil_p() { rb_profile_frame_path(frame) } else { path };
+ let path_str = ruby_str_to_rust_string_result(path).unwrap_or("<unknown>".into());
-fn try_increment_existing_stack(
- raw_samples: &mut [VALUE],
- line_samples: &mut [i32],
- frames_buffer: &[VALUE],
- stack_length: i32,
- samples_length: usize,
-) -> bool {
- let prev_stack_len_index = raw_samples.len() - samples_length;
- let prev_stack_len = i64::from(raw_samples[prev_stack_len_index]);
-
- if prev_stack_len == stack_length as i64 {
- // Check if all stack lengths match and all frames are identical
- let frames_match = (0..stack_length).all(|i| {
- let current_frame = frames_buffer[stack_length as usize - 1 - i as usize];
- let prev_frame = raw_samples[prev_stack_len_index + i as usize + 1];
- current_frame == prev_frame
- });
-
- if frames_match {
- let counter_idx = raw_samples.len() - 1;
- let new_count = i64::from(raw_samples[counter_idx]) + 1;
-
- raw_samples[counter_idx] = VALUE(new_count as usize);
- line_samples[counter_idx] = new_count as i32;
- return true;
- }
+ format!("{label_str} ({path_str})")
}
- false
}
-/// Record a backtrace with ZJIT side exits
+/// Record a backtrace with ZJIT side exits as a Perfetto trace event
#[unsafe(no_mangle)]
-pub extern "C" fn rb_zjit_record_exit_stack(exit_pc: *const VALUE) {
+pub extern "C" fn rb_zjit_record_exit_stack(reason: *const std::ffi::c_char) {
if !zjit_enabled_p() || get_option!(trace_side_exits).is_none() {
return;
}
- // When `trace_side_exits_sample_interval` is zero, then the feature is disabled.
+ let tracer = match ZJITState::get_tracer() {
+ Some(t) => t,
+ None => return,
+ };
+
+ // When `trace_side_exits_sample_interval` is non-zero, apply sampling.
if get_option!(trace_side_exits_sample_interval) != 0 {
- // If `trace_side_exits_sample_interval` is set, then can safely unwrap
- // both `get_skipped_samples` and `set_skipped_samples`.
- let skipped_samples = *ZJITState::get_skipped_samples().unwrap();
- if skipped_samples < get_option!(trace_side_exits_sample_interval) {
- // Skip sample and increment counter.
- ZJITState::set_skipped_samples(skipped_samples + 1).unwrap();
+ if tracer.skipped_samples < get_option!(trace_side_exits_sample_interval) {
+ tracer.skipped_samples += 1;
return;
} else {
- ZJITState::set_skipped_samples(0).unwrap();
+ tracer.skipped_samples = 0;
}
}
- let (stack_length, frames_buffer, lines_buffer) = record_profiling_frames();
-
- // Can safely unwrap since `trace_side_exits` must be true at this point
- let zjit_raw_samples = ZJITState::get_raw_samples().unwrap();
- let zjit_line_samples = ZJITState::get_line_samples().unwrap();
- assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len());
-
- // Represents pushing the stack length, the instruction opcode, and the sample count.
- const SAMPLE_METADATA_SIZE: usize = 3;
- let samples_length = (stack_length as usize) + SAMPLE_METADATA_SIZE;
-
- // If zjit_raw_samples is greater than or equal to the current length of the samples
- // we might have seen this stack trace previously.
- if zjit_raw_samples.len() >= samples_length
- && try_increment_existing_stack(
- zjit_raw_samples,
- zjit_line_samples,
- &frames_buffer,
- stack_length,
- samples_length,
- )
- {
- return;
- }
-
- write_exit_stack_samples(
- zjit_raw_samples,
- zjit_line_samples,
- &frames_buffer,
- &lines_buffer,
- stack_length,
- exit_pc,
- );
-}
-
-/// Mark `raw_samples` so they can be used by rb_zjit_add_frame.
-pub fn gc_mark_raw_samples() {
- // Return if ZJIT is not enabled
- if !zjit_enabled_p() || get_option!(trace_side_exits).is_none() {
- return;
- }
+ // Collect profile frames
+ const BUFF_LEN: usize = 2048;
+ let mut frames_buffer = vec![VALUE(0_usize); BUFF_LEN];
+ let mut lines_buffer = vec![0i32; BUFF_LEN];
- let mut idx: size_t = 0;
- let zjit_raw_samples = ZJITState::get_raw_samples().unwrap();
+ let stack_length = unsafe {
+ rb_profile_frames(
+ 0,
+ BUFF_LEN as i32,
+ frames_buffer.as_mut_ptr(),
+ lines_buffer.as_mut_ptr(),
+ )
+ };
- while idx < zjit_raw_samples.len() as size_t {
- let num = zjit_raw_samples[idx as usize];
- let mut i = 0;
- idx += 1;
+ // Resolve each frame to a human-readable string (top frame first)
+ let frames: Vec<String> = (0..stack_length as usize)
+ .map(|i| resolve_frame_label(frames_buffer[i]))
+ .collect();
- // Mark the zjit_raw_samples at the given index. These represent
- // the data that needs to be GC'd which are the current frames.
- while i < i32::from(num) {
- unsafe { rb_gc_mark(zjit_raw_samples[idx as usize]); }
- i += 1;
- idx += 1;
- }
+ // Get the reason string
+ let reason_str = if reason.is_null() {
+ "unknown"
+ } else {
+ unsafe { std::ffi::CStr::from_ptr(reason).to_str().unwrap_or("unknown") }
+ };
- // Increase index for exit instruction.
- idx += 1;
- // Increase index for bookeeping value (number of times we've seen this
- // row in a stack).
- idx += 1;
- }
+ tracer.write_event(reason_str, &frames);
}
diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs
index 68379e90cb..28bd623893 100644
--- a/zjit/src/stats.rs
+++ b/zjit/src/stats.rs
@@ -573,7 +573,11 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter {
UnhandledCallType(Splat) => exit_unhandled_splat,
UnhandledCallType(Kwarg) => exit_unhandled_kwarg,
UnknownSpecialVariable(_) => exit_unknown_special_variable,
- UnhandledHIRInsn(_) => exit_unhandled_hir_insn,
+ UnhandledHIRArrayMax => exit_unhandled_hir_insn,
+ UnhandledHIRFixnumDiv => exit_unhandled_hir_insn,
+ UnhandledHIRThrow => exit_unhandled_hir_insn,
+ UnhandledHIRInvokeBuiltin => exit_unhandled_hir_insn,
+ UnhandledHIRUnknown(_) => exit_unhandled_hir_insn,
UnhandledYARVInsn(_) => exit_unhandled_yarv_insn,
UnhandledBlockArg => exit_unhandled_block_arg,
FixnumAddOverflow => exit_fixnum_add_overflow,
@@ -975,15 +979,170 @@ pub fn zjit_alloc_bytes() -> usize {
jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)
}
-/// Struct of arrays for --zjit-trace-exits.
-#[derive(Default)]
-pub struct SideExitLocations {
- /// Control frames of method entries.
- pub raw_samples: Vec<VALUE>,
- /// Line numbers of the iseq caller.
- pub line_samples: Vec<i32>,
- /// Skipped samples
- pub skipped_samples: usize
+/// Fuchsia Trace Format (FXT) binary writer for --zjit-trace-exits.
+/// Produces .fxt files that can be opened directly in Perfetto UI.
+/// Uses the string table for deduplication of repeated reason/frame strings.
+/// See: <https://fuchsia.dev/fuchsia-src/reference/tracing/trace-format>
+pub struct PerfettoTracer {
+ writer: std::io::BufWriter<std::fs::File>,
+ start_time: std::time::Instant,
+ event_count: usize,
+ pub skipped_samples: usize,
+ /// String table: string content -> interned index (1..32767)
+ string_table: std::collections::HashMap<String, u16>,
+ next_string_index: u16,
+ pid: u32,
+}
+
+impl PerfettoTracer {
+ /// Write a single 64-bit little-endian word.
+ fn write_word(&mut self, val: u64) {
+ use std::io::Write;
+ let _ = self.writer.write_all(&val.to_le_bytes());
+ }
+
+ /// Write bytes padded to 8-byte alignment.
+ fn write_padded_bytes(&mut self, bytes: &[u8]) {
+ use std::io::Write;
+ let _ = self.writer.write_all(bytes);
+ let remainder = bytes.len() % 8;
+ if remainder != 0 {
+ let _ = self.writer.write_all(&[0u8; 7][..8 - remainder]);
+ }
+ }
+
+ /// Number of 8-byte words needed for `len` bytes (rounded up).
+ fn word_count(len: usize) -> u64 {
+ ((len + 7) / 8) as u64
+ }
+
+ pub fn new() -> Self {
+ let pid = std::process::id();
+ let path = format!("/tmp/perfetto-{pid}.fxt");
+ let file = std::fs::File::create(&path)
+ .unwrap_or_else(|e| panic!("ZJIT: failed to create {path}: {e}"));
+ let mut tracer = PerfettoTracer {
+ writer: std::io::BufWriter::new(file),
+ start_time: std::time::Instant::now(),
+ event_count: 0,
+ skipped_samples: 0,
+ string_table: std::collections::HashMap::new(),
+ next_string_index: 1, // index 0 = empty string
+ pid,
+ };
+
+ // Magic number record: metadata type=4 (trace info), trace info type=0,
+ // magic=0x16547846 at bits [24..55]
+ tracer.write_word((1u64 << 4) | (4u64 << 16) | (0x16547846u64 << 24));
+
+ // Initialization record: 1 tick = 1 nanosecond
+ tracer.write_word(1u64 | (2u64 << 4));
+ tracer.write_word(1_000_000_000u64);
+
+ // Register thread at index 1: (process_koid=pid, thread_koid=1)
+ tracer.write_word(3u64 | (3u64 << 4) | (1u64 << 16));
+ tracer.write_word(pid as u64);
+ tracer.write_word(1u64);
+
+ // Pre-intern common strings
+ tracer.intern_string("side_exit");
+ // Pre-intern argument names "0".."14" for per-frame arguments
+ for i in 0..15u32 {
+ tracer.intern_string(&i.to_string());
+ }
+
+ // Flush header immediately so something is written even if process exits abruptly
+ {
+ use std::io::Write;
+ let _ = tracer.writer.flush();
+ }
+
+ eprintln!("ZJIT: writing trace exits to {path}");
+ tracer
+ }
+
+ /// Intern a string into the string table, writing a string record if new.
+ /// Returns the string table index (1..32767). Returns 0 for empty strings
+ /// or if the table is full.
+ fn intern_string(&mut self, s: &str) -> u16 {
+ if s.is_empty() {
+ return 0;
+ }
+ if let Some(&idx) = self.string_table.get(s) {
+ return idx;
+ }
+ if self.next_string_index >= 0x8000 {
+ return 0; // table full
+ }
+
+ let idx = self.next_string_index;
+ let bytes = s.as_bytes();
+ let len = bytes.len().min(0x7FFF); // 15-bit max length
+ let record_words = 1 + Self::word_count(len);
+
+ // String record: type=2, index in [16..30], length in [32..46]
+ let header: u64 = 2u64
+ | (record_words << 4)
+ | ((idx as u64) << 16)
+ | ((len as u64) << 32);
+ self.write_word(header);
+ self.write_padded_bytes(&bytes[..len]);
+
+ self.string_table.insert(s.to_string(), idx);
+ self.next_string_index += 1;
+ idx
+ }
+
+ pub fn write_event(&mut self, reason: &str, frames: &[String]) {
+ let ts_nanos = self.start_time.elapsed().as_nanos() as u64;
+
+ // Intern event metadata strings (may emit string records first)
+ let category_ref = self.intern_string("side_exit");
+ let name_ref = self.intern_string(reason);
+
+ // Intern each frame label and collect refs (max 15 due to 4-bit n_args)
+ let n_args = frames.len().min(15) as u64;
+ let mut frame_refs: Vec<(u16, u16)> = Vec::with_capacity(n_args as usize);
+ for (i, frame) in frames.iter().take(15).enumerate() {
+ let name_ref = self.intern_string(&i.to_string());
+ let value_ref = self.intern_string(frame);
+ frame_refs.push((name_ref, value_ref));
+ }
+
+ // Each fully-interned string argument is exactly 1 word
+ let event_words = 2 + n_args;
+ let header: u64 = 4u64
+ | (event_words << 4)
+ | (n_args << 20) // argument count
+ | (1u64 << 24) // thread_ref = 1
+ | ((category_ref as u64) << 32)
+ | ((name_ref as u64) << 48);
+ self.write_word(header);
+ self.write_word(ts_nanos);
+
+ // One 1-word string argument per frame: type=6, size=1, indexed name, indexed value
+ for (name_ref, value_ref) in frame_refs {
+ let arg_header: u64 = 6u64
+ | (1u64 << 4)
+ | ((name_ref as u64) << 16)
+ | ((value_ref as u64) << 32);
+ self.write_word(arg_header);
+ }
+
+ self.event_count += 1;
+
+ // Flush to ensure data reaches disk. Static globals may not be
+ // dropped on process exit, so we can't rely on Drop for flushing.
+ use std::io::Write;
+ let _ = self.writer.flush();
+ }
+}
+
+impl Drop for PerfettoTracer {
+ fn drop(&mut self) {
+ use std::io::Write;
+ let _ = self.writer.flush();
+ }
}
/// Primitive called in zjit.rb
@@ -999,29 +1158,3 @@ pub extern "C" fn rb_zjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self:
}
}
-/// Call the C function to parse the raw_samples and line_samples
-/// into raw, lines, and frames hash for RubyVM::YJIT.exit_locations.
-#[unsafe(no_mangle)]
-pub extern "C" fn rb_zjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
- if !zjit_enabled_p() || get_option!(trace_side_exits).is_none() {
- return Qnil;
- }
-
- // Can safely unwrap since `trace_side_exits` must be true at this point
- let zjit_raw_samples = ZJITState::get_raw_samples().unwrap();
- let zjit_line_samples = ZJITState::get_line_samples().unwrap();
-
- assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len());
-
- // zjit_raw_samples and zjit_line_samples are the same length so
- // pass only one of the lengths in the C function.
- let samples_len = zjit_raw_samples.len() as i32;
-
- unsafe {
- rb_zjit_exit_locations_dict(
- zjit_raw_samples.as_mut_ptr(),
- zjit_line_samples.as_mut_ptr(),
- samples_len
- )
- }
-}