diff options
| author | Max Bernstein <tekknolagi@gmail.com> | 2026-03-24 16:45:40 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-03-24 16:45:40 -0400 |
| commit | 3b8317eb42dd2d11988ecdc603387308ac991f23 (patch) | |
| tree | 9d9e693a79126891489bbd043de3791ff69e84f9 /zjit | |
| parent | e74823a08098ef87c7a2fc3a35647c4c4467ca40 (diff) | |
ZJIT: Dump side-exit locations in Fuchsia trace format (#16470)
This gives us instant access to all nice Fuchsia and Perfetto tooling, including zoomable, SQL queryable browsing for traces:
<img width="1912" height="1185" alt="Screenshot 2026-03-20 at 10 50 57 AM" src="https://github.com/user-attachments/assets/6475bbec-eb55-4886-8e94-13450def2de5" />
Hottest side-exits grouped by exit location using SQL:
```sql
SELECT reason, backtrace, count(*) AS exits FROM (
SELECT
s.id,
s.name AS reason,
group_concat(a.display_value, ' <- ') AS backtrace
FROM slice s
JOIN args a USING(arg_set_id)
WHERE s.category = 'side_exit'
GROUP BY s.id
)
GROUP BY reason, backtrace
ORDER BY exits DESC
LIMIT 30
```
<img width="1912" height="1186" alt="Screenshot 2026-03-24 at 3 58 28 PM" src="https://github.com/user-attachments/assets/8195ccd8-aeb6-4396-8c07-e85bbb280a4a" />
Diffstat (limited to 'zjit')
| -rw-r--r-- | zjit/bindgen/src/main.rs | 4 | ||||
| -rw-r--r-- | zjit/src/backend/lir.rs | 6 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 9 | ||||
| -rw-r--r-- | zjit/src/cruby.rs | 13 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 8 | ||||
| -rw-r--r-- | zjit/src/gc.rs | 3 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 6 | ||||
| -rw-r--r-- | zjit/src/state.rs | 232 | ||||
| -rw-r--r-- | zjit/src/stats.rs | 205 |
9 files changed, 255 insertions, 231 deletions
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 3e82efd8f6..34720e77eb 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -299,7 +299,9 @@ fn main() { .allowlist_function("rb_RSTRING_PTR") .allowlist_function("rb_RSTRING_LEN") .allowlist_function("rb_ENCODING_GET") - .allowlist_function("rb_zjit_exit_locations_dict") + .allowlist_function("rb_profile_frame_full_label") + .allowlist_function("rb_profile_frame_absolute_path") + .allowlist_function("rb_profile_frame_path") .allowlist_function("rb_optimized_call") .allowlist_function("rb_jit_icache_invalidate") .allowlist_function("rb_zjit_print_exception") diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 03dc02c678..00a80b9cf4 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -2727,7 +2727,11 @@ impl Assembler // ccall doesn't clobber caller-saved registers // holding stack/local operands. compile_exit_save_state(self, &exit); - asm_ccall!(self, rb_zjit_record_exit_stack, pc); + // Leak a CString with the reason so it's available at runtime + let reason_cstr = std::ffi::CString::new(reason.to_string()) + .unwrap_or_else(|_| std::ffi::CString::new("unknown").unwrap()); + let reason_ptr = reason_cstr.into_raw() as *const u8; + asm_ccall!(self, rb_zjit_record_exit_stack, Opnd::const_ptr(reason_ptr)); compile_exit_return(self); } else { // If the side exit has already been compiled, jump to it. diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 30c99152e2..b473bc69a6 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -467,7 +467,14 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func if let Err(last_snapshot) = result { debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit."); gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn)); - gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot)); + let reason = match insn { + Insn::ArrayMax { .. } => SideExitReason::UnhandledHIRArrayMax, + Insn::FixnumDiv { .. } => SideExitReason::UnhandledHIRFixnumDiv, + Insn::Throw { .. } => SideExitReason::UnhandledHIRThrow, + Insn::InvokeBuiltin { .. } => SideExitReason::UnhandledHIRInvokeBuiltin, + _ => SideExitReason::UnhandledHIRUnknown(insn_id), + }; + gen_side_exit(&mut jit, &mut asm, &reason, &function.frame_state(last_snapshot)); // Don't bother generating code after a side-exit. We won't run it. // TODO(max): Generate ud2 or equivalent. break; diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 5c0b99f182..e1343f9b40 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -902,15 +902,18 @@ pub fn iseq_get_location(iseq: IseqPtr, pos: u32) -> String { s } +pub fn ruby_str_to_rust_string_result(v: VALUE) -> Result<String, std::string::FromUtf8Error> { + let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; + let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); + let str_slice: &[u8] = unsafe { std::slice::from_raw_parts(str_ptr, str_len) }; + String::from_utf8(str_slice.to_vec()) +} // Convert a CRuby UTF-8-encoded RSTRING into a Rust string. // This should work fine on ASCII strings and anything else // that is considered legal UTF-8, including embedded nulls. -fn ruby_str_to_rust_string(v: VALUE) -> String { - let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; - let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); - let str_slice: &[u8] = unsafe { std::slice::from_raw_parts(str_ptr, str_len) }; - String::from_utf8(str_slice.to_vec()).unwrap_or_default() +pub fn ruby_str_to_rust_string(v: VALUE) -> String { + ruby_str_to_rust_string_result(v).unwrap_or_default() } pub fn ruby_sym_to_rust_string(v: VALUE) -> String { diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 41ebdb0f55..5c7ce49fc6 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -2131,12 +2131,10 @@ unsafe extern "C" { buff: *mut VALUE, lines: *mut ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; + pub fn rb_profile_frame_path(frame: VALUE) -> VALUE; + pub fn rb_profile_frame_absolute_path(frame: VALUE) -> VALUE; + pub fn rb_profile_frame_full_label(frame: VALUE) -> VALUE; pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); - pub fn rb_zjit_exit_locations_dict( - zjit_raw_samples: *mut VALUE, - zjit_line_samples: *mut ::std::os::raw::c_int, - samples_len: ::std::os::raw::c_int, - ) -> VALUE; pub fn rb_zjit_profile_disable(iseq: *const rb_iseq_t); pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_zjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool; diff --git a/zjit/src/gc.rs b/zjit/src/gc.rs index 40230ccc8d..b79f19837f 100644 --- a/zjit/src/gc.rs +++ b/zjit/src/gc.rs @@ -5,7 +5,6 @@ use std::{ffi::c_void, ops::Range}; use crate::{cruby::*, state::ZJITState, stats::with_time_stat, virtualmem::CodePtr}; use crate::payload::{IseqPayload, IseqVersionRef, get_or_create_iseq_payload}; use crate::stats::Counter::gc_time_ns; -use crate::state::gc_mark_raw_samples; /// GC callback for marking GC objects in the per-ISEQ payload. #[unsafe(no_mangle)] @@ -207,5 +206,5 @@ fn ranges_overlap<T>(left: &Range<T>, right: &Range<T>) -> bool where T: Partial /// Callback for marking GC objects inside [crate::invariants::Invariants]. #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_root_mark() { - gc_mark_raw_samples(); + // TODO(max): Either add roots to mark or consider removing this callback } diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index b8e37059eb..306ab7d8cb 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -498,7 +498,11 @@ pub enum SideExitReason { UnhandledNewarraySend(vm_opt_newarray_send_type), UnhandledDuparraySend(u64), UnknownSpecialVariable(u64), - UnhandledHIRInsn(InsnId), + UnhandledHIRArrayMax, + UnhandledHIRFixnumDiv, + UnhandledHIRThrow, + UnhandledHIRInvokeBuiltin, + UnhandledHIRUnknown(InsnId), UnhandledYARVInsn(u32), UnhandledCallType(CallType), UnhandledBlockArg, diff --git a/zjit/src/state.rs b/zjit/src/state.rs index b8dcd70358..b9f8033e7f 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,14 +1,14 @@ //! Runtime state of ZJIT. use crate::codegen::{gen_entry_trampoline, gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline}; -use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, src_loc, EcPtr, Qnil, Qtrue, rb_vm_insn_addr2opcode, rb_profile_frames, VALUE, VM_INSTRUCTION_SIZE, size_t, rb_gc_mark, with_vm_lock, rust_str_to_id, rb_funcallv, rb_const_get, rb_cRubyVM}; +use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, src_loc, EcPtr, Qnil, Qtrue, rb_profile_frames, rb_profile_frame_full_label, rb_profile_frame_absolute_path, rb_profile_frame_path, VALUE, VM_INSTRUCTION_SIZE, with_vm_lock, rust_str_to_id, rb_funcallv, rb_const_get, rb_cRubyVM}; use crate::cruby_methods; -use cruby::{ID, rb_callable_method_entry, get_def_method_serial, rb_gc_register_mark_object}; +use cruby::{ID, rb_callable_method_entry, get_def_method_serial, rb_gc_register_mark_object, ruby_str_to_rust_string_result}; use std::sync::atomic::Ordering; use crate::invariants::Invariants; use crate::asm::CodeBlock; use crate::options::{get_option, rb_zjit_prepare_options}; -use crate::stats::{Counters, InsnCounters, SideExitLocations}; +use crate::stats::{Counters, InsnCounters, PerfettoTracer}; use crate::virtualmem::CodePtr; use std::sync::atomic::AtomicUsize; use std::collections::HashMap; @@ -68,8 +68,8 @@ pub struct ZJITState { /// Counter pointers for access counts of ISEQs accessed by JIT code iseq_calls_count_pointers: HashMap<String, Box<u64>>, - /// Locations of side exists within generated code - exit_locations: Option<SideExitLocations>, + /// Perfetto tracer for --zjit-trace-exits + perfetto_tracer: Option<PerfettoTracer>, } /// Tracks the initialization progress @@ -124,8 +124,8 @@ impl ZJITState { let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap(); let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap(); - let exit_locations = if get_option!(trace_side_exits).is_some() { - Some(SideExitLocations::default()) + let perfetto_tracer = if get_option!(trace_side_exits).is_some() { + Some(PerfettoTracer::new()) } else { None }; @@ -146,7 +146,7 @@ impl ZJITState { not_annotated_frame_cfunc_counter_pointers: HashMap::new(), ccall_counter_pointers: HashMap::new(), iseq_calls_count_pointers: HashMap::new(), - exit_locations, + perfetto_tracer, }; unsafe { ZJIT_STATE = Enabled(zjit_state); } @@ -283,24 +283,9 @@ impl ZJITState { ZJITState::get_instance().function_stub_hit_trampoline } - /// Get a mutable reference to the ZJIT raw samples Vec - pub fn get_raw_samples() -> Option<&'static mut Vec<VALUE>> { - ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.raw_samples) - } - - /// Get a mutable reference to the ZJIT line samples Vec. - pub fn get_line_samples() -> Option<&'static mut Vec<i32>> { - ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.line_samples) - } - - /// Get number of skipped samples. - pub fn get_skipped_samples() -> Option<&'static mut usize> { - ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.skipped_samples) - } - - /// Get number of skipped samples. - pub fn set_skipped_samples(n: usize) -> Option<()> { - ZJITState::get_instance().exit_locations.as_mut().map(|el| el.skipped_samples = n) + /// Get a mutable reference to the Perfetto tracer + pub fn get_tracer() -> Option<&'static mut PerfettoTracer> { + ZJITState::get_instance().perfetto_tracer.as_mut() } } @@ -437,177 +422,66 @@ pub extern "C" fn rb_zjit_assert_compiles(_ec: EcPtr, _self: VALUE) -> VALUE { Qnil } -/// Call `rb_profile_frames` and write the result into buffers to be consumed by `rb_zjit_record_exit_stack`. -fn record_profiling_frames() -> (i32, Vec<VALUE>, Vec<i32>) { - // Stackprof uses a buffer of length 2048 when collating the frames into statistics. - // Since eventually the collected information will be used by Stackprof, collect only - // 2048 frames at a time. - // https://github.com/tmm1/stackprof/blob/5d832832e4afcb88521292d6dfad4a9af760ef7c/ext/stackprof/stackprof.c#L21 - const BUFF_LEN: usize = 2048; - - let mut frames_buffer = vec![VALUE(0_usize); BUFF_LEN]; - let mut lines_buffer = vec![0; BUFF_LEN]; - - let stack_length = unsafe { - rb_profile_frames( - 0, - BUFF_LEN as i32, - frames_buffer.as_mut_ptr(), - lines_buffer.as_mut_ptr(), - ) - }; - - // Trim at `stack_length` since anything past it is redundant - frames_buffer.truncate(stack_length as usize); - lines_buffer.truncate(stack_length as usize); - - (stack_length, frames_buffer, lines_buffer) -} - -/// Write samples in `frames_buffer` and `lines_buffer` from profiling into -/// `raw_samples` and `line_samples`. Also write opcode, number of frames, -/// and stack size to be consumed by Stackprof. -fn write_exit_stack_samples( - raw_samples: &'static mut Vec<VALUE>, - line_samples: &'static mut Vec<i32>, - frames_buffer: &[VALUE], - lines_buffer: &[i32], - stack_length: i32, - exit_pc: *const VALUE, -) { - raw_samples.push(VALUE(stack_length as usize)); - line_samples.push(stack_length); - - // Push frames and their lines in reverse order. - for i in (0..stack_length as usize).rev() { - raw_samples.push(frames_buffer[i]); - line_samples.push(lines_buffer[i]); - } - - // Get the opcode from instruction handler at exit PC. - let exit_opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; - raw_samples.push(VALUE(exit_opcode as usize)); - // Push a dummy line number since we don't know where this insn is from. - line_samples.push(0); +/// Resolve a profile frame VALUE to a human-readable "label (path)" string. +fn resolve_frame_label(frame: VALUE) -> String { + unsafe { + let label_str = ruby_str_to_rust_string_result(rb_profile_frame_full_label(frame)).unwrap_or("<unknown>".into()); - // Push number of times seen onto the stack. - raw_samples.push(VALUE(1usize)); - line_samples.push(1); -} + let path = rb_profile_frame_absolute_path(frame); + let path = if path.nil_p() { rb_profile_frame_path(frame) } else { path }; + let path_str = ruby_str_to_rust_string_result(path).unwrap_or("<unknown>".into()); -fn try_increment_existing_stack( - raw_samples: &mut [VALUE], - line_samples: &mut [i32], - frames_buffer: &[VALUE], - stack_length: i32, - samples_length: usize, -) -> bool { - let prev_stack_len_index = raw_samples.len() - samples_length; - let prev_stack_len = i64::from(raw_samples[prev_stack_len_index]); - - if prev_stack_len == stack_length as i64 { - // Check if all stack lengths match and all frames are identical - let frames_match = (0..stack_length).all(|i| { - let current_frame = frames_buffer[stack_length as usize - 1 - i as usize]; - let prev_frame = raw_samples[prev_stack_len_index + i as usize + 1]; - current_frame == prev_frame - }); - - if frames_match { - let counter_idx = raw_samples.len() - 1; - let new_count = i64::from(raw_samples[counter_idx]) + 1; - - raw_samples[counter_idx] = VALUE(new_count as usize); - line_samples[counter_idx] = new_count as i32; - return true; - } + format!("{label_str} ({path_str})") } - false } -/// Record a backtrace with ZJIT side exits +/// Record a backtrace with ZJIT side exits as a Perfetto trace event #[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_record_exit_stack(exit_pc: *const VALUE) { +pub extern "C" fn rb_zjit_record_exit_stack(reason: *const std::ffi::c_char) { if !zjit_enabled_p() || get_option!(trace_side_exits).is_none() { return; } - // When `trace_side_exits_sample_interval` is zero, then the feature is disabled. + let tracer = match ZJITState::get_tracer() { + Some(t) => t, + None => return, + }; + + // When `trace_side_exits_sample_interval` is non-zero, apply sampling. if get_option!(trace_side_exits_sample_interval) != 0 { - // If `trace_side_exits_sample_interval` is set, then can safely unwrap - // both `get_skipped_samples` and `set_skipped_samples`. - let skipped_samples = *ZJITState::get_skipped_samples().unwrap(); - if skipped_samples < get_option!(trace_side_exits_sample_interval) { - // Skip sample and increment counter. - ZJITState::set_skipped_samples(skipped_samples + 1).unwrap(); + if tracer.skipped_samples < get_option!(trace_side_exits_sample_interval) { + tracer.skipped_samples += 1; return; } else { - ZJITState::set_skipped_samples(0).unwrap(); + tracer.skipped_samples = 0; } } - let (stack_length, frames_buffer, lines_buffer) = record_profiling_frames(); - - // Can safely unwrap since `trace_side_exits` must be true at this point - let zjit_raw_samples = ZJITState::get_raw_samples().unwrap(); - let zjit_line_samples = ZJITState::get_line_samples().unwrap(); - assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len()); - - // Represents pushing the stack length, the instruction opcode, and the sample count. - const SAMPLE_METADATA_SIZE: usize = 3; - let samples_length = (stack_length as usize) + SAMPLE_METADATA_SIZE; - - // If zjit_raw_samples is greater than or equal to the current length of the samples - // we might have seen this stack trace previously. - if zjit_raw_samples.len() >= samples_length - && try_increment_existing_stack( - zjit_raw_samples, - zjit_line_samples, - &frames_buffer, - stack_length, - samples_length, - ) - { - return; - } - - write_exit_stack_samples( - zjit_raw_samples, - zjit_line_samples, - &frames_buffer, - &lines_buffer, - stack_length, - exit_pc, - ); -} - -/// Mark `raw_samples` so they can be used by rb_zjit_add_frame. -pub fn gc_mark_raw_samples() { - // Return if ZJIT is not enabled - if !zjit_enabled_p() || get_option!(trace_side_exits).is_none() { - return; - } + // Collect profile frames + const BUFF_LEN: usize = 2048; + let mut frames_buffer = vec![VALUE(0_usize); BUFF_LEN]; + let mut lines_buffer = vec![0i32; BUFF_LEN]; - let mut idx: size_t = 0; - let zjit_raw_samples = ZJITState::get_raw_samples().unwrap(); + let stack_length = unsafe { + rb_profile_frames( + 0, + BUFF_LEN as i32, + frames_buffer.as_mut_ptr(), + lines_buffer.as_mut_ptr(), + ) + }; - while idx < zjit_raw_samples.len() as size_t { - let num = zjit_raw_samples[idx as usize]; - let mut i = 0; - idx += 1; + // Resolve each frame to a human-readable string (top frame first) + let frames: Vec<String> = (0..stack_length as usize) + .map(|i| resolve_frame_label(frames_buffer[i])) + .collect(); - // Mark the zjit_raw_samples at the given index. These represent - // the data that needs to be GC'd which are the current frames. - while i < i32::from(num) { - unsafe { rb_gc_mark(zjit_raw_samples[idx as usize]); } - i += 1; - idx += 1; - } + // Get the reason string + let reason_str = if reason.is_null() { + "unknown" + } else { + unsafe { std::ffi::CStr::from_ptr(reason).to_str().unwrap_or("unknown") } + }; - // Increase index for exit instruction. - idx += 1; - // Increase index for bookeeping value (number of times we've seen this - // row in a stack). - idx += 1; - } + tracer.write_event(reason_str, &frames); } diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 68379e90cb..28bd623893 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -573,7 +573,11 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { UnhandledCallType(Splat) => exit_unhandled_splat, UnhandledCallType(Kwarg) => exit_unhandled_kwarg, UnknownSpecialVariable(_) => exit_unknown_special_variable, - UnhandledHIRInsn(_) => exit_unhandled_hir_insn, + UnhandledHIRArrayMax => exit_unhandled_hir_insn, + UnhandledHIRFixnumDiv => exit_unhandled_hir_insn, + UnhandledHIRThrow => exit_unhandled_hir_insn, + UnhandledHIRInvokeBuiltin => exit_unhandled_hir_insn, + UnhandledHIRUnknown(_) => exit_unhandled_hir_insn, UnhandledYARVInsn(_) => exit_unhandled_yarv_insn, UnhandledBlockArg => exit_unhandled_block_arg, FixnumAddOverflow => exit_fixnum_add_overflow, @@ -975,15 +979,170 @@ pub fn zjit_alloc_bytes() -> usize { jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst) } -/// Struct of arrays for --zjit-trace-exits. -#[derive(Default)] -pub struct SideExitLocations { - /// Control frames of method entries. - pub raw_samples: Vec<VALUE>, - /// Line numbers of the iseq caller. - pub line_samples: Vec<i32>, - /// Skipped samples - pub skipped_samples: usize +/// Fuchsia Trace Format (FXT) binary writer for --zjit-trace-exits. +/// Produces .fxt files that can be opened directly in Perfetto UI. +/// Uses the string table for deduplication of repeated reason/frame strings. +/// See: <https://fuchsia.dev/fuchsia-src/reference/tracing/trace-format> +pub struct PerfettoTracer { + writer: std::io::BufWriter<std::fs::File>, + start_time: std::time::Instant, + event_count: usize, + pub skipped_samples: usize, + /// String table: string content -> interned index (1..32767) + string_table: std::collections::HashMap<String, u16>, + next_string_index: u16, + pid: u32, +} + +impl PerfettoTracer { + /// Write a single 64-bit little-endian word. + fn write_word(&mut self, val: u64) { + use std::io::Write; + let _ = self.writer.write_all(&val.to_le_bytes()); + } + + /// Write bytes padded to 8-byte alignment. + fn write_padded_bytes(&mut self, bytes: &[u8]) { + use std::io::Write; + let _ = self.writer.write_all(bytes); + let remainder = bytes.len() % 8; + if remainder != 0 { + let _ = self.writer.write_all(&[0u8; 7][..8 - remainder]); + } + } + + /// Number of 8-byte words needed for `len` bytes (rounded up). + fn word_count(len: usize) -> u64 { + ((len + 7) / 8) as u64 + } + + pub fn new() -> Self { + let pid = std::process::id(); + let path = format!("/tmp/perfetto-{pid}.fxt"); + let file = std::fs::File::create(&path) + .unwrap_or_else(|e| panic!("ZJIT: failed to create {path}: {e}")); + let mut tracer = PerfettoTracer { + writer: std::io::BufWriter::new(file), + start_time: std::time::Instant::now(), + event_count: 0, + skipped_samples: 0, + string_table: std::collections::HashMap::new(), + next_string_index: 1, // index 0 = empty string + pid, + }; + + // Magic number record: metadata type=4 (trace info), trace info type=0, + // magic=0x16547846 at bits [24..55] + tracer.write_word((1u64 << 4) | (4u64 << 16) | (0x16547846u64 << 24)); + + // Initialization record: 1 tick = 1 nanosecond + tracer.write_word(1u64 | (2u64 << 4)); + tracer.write_word(1_000_000_000u64); + + // Register thread at index 1: (process_koid=pid, thread_koid=1) + tracer.write_word(3u64 | (3u64 << 4) | (1u64 << 16)); + tracer.write_word(pid as u64); + tracer.write_word(1u64); + + // Pre-intern common strings + tracer.intern_string("side_exit"); + // Pre-intern argument names "0".."14" for per-frame arguments + for i in 0..15u32 { + tracer.intern_string(&i.to_string()); + } + + // Flush header immediately so something is written even if process exits abruptly + { + use std::io::Write; + let _ = tracer.writer.flush(); + } + + eprintln!("ZJIT: writing trace exits to {path}"); + tracer + } + + /// Intern a string into the string table, writing a string record if new. + /// Returns the string table index (1..32767). Returns 0 for empty strings + /// or if the table is full. + fn intern_string(&mut self, s: &str) -> u16 { + if s.is_empty() { + return 0; + } + if let Some(&idx) = self.string_table.get(s) { + return idx; + } + if self.next_string_index >= 0x8000 { + return 0; // table full + } + + let idx = self.next_string_index; + let bytes = s.as_bytes(); + let len = bytes.len().min(0x7FFF); // 15-bit max length + let record_words = 1 + Self::word_count(len); + + // String record: type=2, index in [16..30], length in [32..46] + let header: u64 = 2u64 + | (record_words << 4) + | ((idx as u64) << 16) + | ((len as u64) << 32); + self.write_word(header); + self.write_padded_bytes(&bytes[..len]); + + self.string_table.insert(s.to_string(), idx); + self.next_string_index += 1; + idx + } + + pub fn write_event(&mut self, reason: &str, frames: &[String]) { + let ts_nanos = self.start_time.elapsed().as_nanos() as u64; + + // Intern event metadata strings (may emit string records first) + let category_ref = self.intern_string("side_exit"); + let name_ref = self.intern_string(reason); + + // Intern each frame label and collect refs (max 15 due to 4-bit n_args) + let n_args = frames.len().min(15) as u64; + let mut frame_refs: Vec<(u16, u16)> = Vec::with_capacity(n_args as usize); + for (i, frame) in frames.iter().take(15).enumerate() { + let name_ref = self.intern_string(&i.to_string()); + let value_ref = self.intern_string(frame); + frame_refs.push((name_ref, value_ref)); + } + + // Each fully-interned string argument is exactly 1 word + let event_words = 2 + n_args; + let header: u64 = 4u64 + | (event_words << 4) + | (n_args << 20) // argument count + | (1u64 << 24) // thread_ref = 1 + | ((category_ref as u64) << 32) + | ((name_ref as u64) << 48); + self.write_word(header); + self.write_word(ts_nanos); + + // One 1-word string argument per frame: type=6, size=1, indexed name, indexed value + for (name_ref, value_ref) in frame_refs { + let arg_header: u64 = 6u64 + | (1u64 << 4) + | ((name_ref as u64) << 16) + | ((value_ref as u64) << 32); + self.write_word(arg_header); + } + + self.event_count += 1; + + // Flush to ensure data reaches disk. Static globals may not be + // dropped on process exit, so we can't rely on Drop for flushing. + use std::io::Write; + let _ = self.writer.flush(); + } +} + +impl Drop for PerfettoTracer { + fn drop(&mut self) { + use std::io::Write; + let _ = self.writer.flush(); + } } /// Primitive called in zjit.rb @@ -999,29 +1158,3 @@ pub extern "C" fn rb_zjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: } } -/// Call the C function to parse the raw_samples and line_samples -/// into raw, lines, and frames hash for RubyVM::YJIT.exit_locations. -#[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { - if !zjit_enabled_p() || get_option!(trace_side_exits).is_none() { - return Qnil; - } - - // Can safely unwrap since `trace_side_exits` must be true at this point - let zjit_raw_samples = ZJITState::get_raw_samples().unwrap(); - let zjit_line_samples = ZJITState::get_line_samples().unwrap(); - - assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len()); - - // zjit_raw_samples and zjit_line_samples are the same length so - // pass only one of the lengths in the C function. - let samples_len = zjit_raw_samples.len() as i32; - - unsafe { - rb_zjit_exit_locations_dict( - zjit_raw_samples.as_mut_ptr(), - zjit_line_samples.as_mut_ptr(), - samples_len - ) - } -} |
