diff options
| author | Aiden Fox Ivey <aiden.foxivey@shopify.com> | 2025-09-30 11:55:33 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-30 15:55:33 +0000 |
| commit | 2f1c30cd50e464880e44da670d3ad8ebe00fc899 (patch) | |
| tree | 40176613b3f2587d7f898e21bfa74af84cd5311d | |
| parent | d016595387069677c6b992dffe9322f67dc9bc73 (diff) | |
ZJIT: Add --zjit-trace-exits (#14640)
Add side exit tracing functionality for ZJIT
| -rw-r--r-- | doc/zjit.md | 14 | ||||
| -rw-r--r-- | gc.c | 8 | ||||
| -rw-r--r-- | yjit/src/stats.rs | 4 | ||||
| -rw-r--r-- | zjit.c | 91 | ||||
| -rw-r--r-- | zjit.rb | 114 | ||||
| -rw-r--r-- | zjit/bindgen/src/main.rs | 3 | ||||
| -rw-r--r-- | zjit/src/backend/lir.rs | 12 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 5 | ||||
| -rw-r--r-- | zjit/src/gc.rs | 7 | ||||
| -rw-r--r-- | zjit/src/options.rs | 10 | ||||
| -rw-r--r-- | zjit/src/state.rs | 185 | ||||
| -rw-r--r-- | zjit/src/stats.rs | 49 |
12 files changed, 496 insertions, 6 deletions
diff --git a/doc/zjit.md b/doc/zjit.md index 4eedcca3ba..57a95457d3 100644 --- a/doc/zjit.md +++ b/doc/zjit.md @@ -153,6 +153,20 @@ To build with stats support: make -j ``` +### Tracing side exits + +Through [Stackprof](https://github.com/tmm1/stackprof), detailed information about the methods that the JIT side-exits from can be displayed after some execution of a program. Note that the use of `--zjit-trace-exits` must be used alongside `--zjit-stats`. + +```bash +./miniruby --zjit-stats --zjit-trace-exits script.rb +``` + +A file called `zjit_exit_locations.dump` will be created in the same directory as `script.rb`. Viewing the side exited methods can be done with Stackprof: + +```bash +stackprof path/to/zjit_exit_locations.dump +``` + ## ZJIT Glossary This glossary contains terms that are helpful for understanding ZJIT. @@ -3070,6 +3070,14 @@ rb_gc_mark_roots(void *objspace, const char **categoryp) } #endif +#if USE_ZJIT + void rb_zjit_root_mark(void); + if (rb_zjit_enabled_p) { + MARK_CHECKPOINT("ZJIT"); + rb_zjit_root_mark(); + } +#endif + MARK_CHECKPOINT("machine_context"); mark_current_machine_context(ec); diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 09971c5b3a..b63e1c3272 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -893,7 +893,7 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE { /// and line samples. Their length should be the same, however the data stored in /// them is different. #[no_mangle] -pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE) +pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE) { // Return if YJIT is not enabled if !yjit_enabled_p() { @@ -920,7 +920,7 @@ pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE) #[cfg(not(test))] { // Get the opcode from the encoded insn handler at this PC - let insn = unsafe { rb_vm_insn_addr2opcode((*_exit_pc).as_ptr()) }; + let insn = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; // Use the same buffer size as Stackprof. const BUFF_LEN: usize = 2048; @@ -31,6 +31,95 @@ #include <errno.h> +#define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x))) + +// For a given raw_sample (frame), set the hash with the caller's +// name, file, and line number. Return the hash with collected frame_info. +static void +rb_zjit_add_frame(VALUE hash, VALUE frame) +{ + VALUE frame_id = PTR2NUM(frame); + + if (RTEST(rb_hash_aref(hash, frame_id))) { + return; + } + else { + VALUE frame_info = rb_hash_new(); + // Full label for the frame + VALUE name = rb_profile_frame_full_label(frame); + // Absolute path of the frame from rb_iseq_realpath + VALUE file = rb_profile_frame_absolute_path(frame); + // Line number of the frame + VALUE line = rb_profile_frame_first_lineno(frame); + + // If absolute path isn't available use the rb_iseq_path + if (NIL_P(file)) { + file = rb_profile_frame_path(frame); + } + + rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name); + rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file); + + if (line != INT2FIX(0)) { + rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line); + } + + rb_hash_aset(hash, frame_id, frame_info); + } +} + +// Parses the ZjitExitLocations raw_samples and line_samples collected by +// rb_zjit_record_exit_stack and turns them into 3 hashes (raw, lines, and frames) to +// be used by RubyVM::ZJIT.exit_locations. zjit_raw_samples represents the raw frames information +// (without name, file, and line), and zjit_line_samples represents the line information +// of the iseq caller. +VALUE +rb_zjit_exit_locations_dict(VALUE *zjit_raw_samples, int *zjit_line_samples, int samples_len) +{ + VALUE result = rb_hash_new(); + VALUE raw_samples = rb_ary_new_capa(samples_len); + VALUE line_samples = rb_ary_new_capa(samples_len); + VALUE frames = rb_hash_new(); + int idx = 0; + + // While the index is less than samples_len, parse zjit_raw_samples and + // zjit_line_samples, then add casted values to raw_samples and line_samples array. + while (idx < samples_len) { + int num = (int)zjit_raw_samples[idx]; + int line_num = (int)zjit_line_samples[idx]; + idx++; + + rb_ary_push(raw_samples, SIZET2NUM(num)); + rb_ary_push(line_samples, INT2NUM(line_num)); + + // Loop through the length of samples_len and add data to the + // frames hash. Also push the current value onto the raw_samples + // and line_samples arrary respectively. + for (int o = 0; o < num; o++) { + rb_zjit_add_frame(frames, zjit_raw_samples[idx]); + rb_ary_push(raw_samples, SIZET2NUM(zjit_raw_samples[idx])); + rb_ary_push(line_samples, INT2NUM(zjit_line_samples[idx])); + idx++; + } + + rb_ary_push(raw_samples, SIZET2NUM(zjit_raw_samples[idx])); + rb_ary_push(line_samples, INT2NUM(zjit_line_samples[idx])); + idx++; + + rb_ary_push(raw_samples, SIZET2NUM(zjit_raw_samples[idx])); + rb_ary_push(line_samples, INT2NUM(zjit_line_samples[idx])); + idx++; + } + + // Set add the raw_samples, line_samples, and frames to the results + // hash. + rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples); + rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples); + rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames); + + return result; +} + void rb_zjit_profile_disable(const rb_iseq_t *iseq); void @@ -217,6 +306,8 @@ VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self, VALUE target_key); VALUE rb_zjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_print_stats_p(rb_execution_context_t *ec, VALUE self); +VALUE rb_zjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self); +VALUE rb_zjit_get_exit_locations(rb_execution_context_t *ec, VALUE self); // Preprocessed zjit.rb generated during build #include "zjit.rbinc" @@ -9,7 +9,10 @@ module RubyVM::ZJIT # Avoid calling a Ruby method here to avoid interfering with compilation tests if Primitive.rb_zjit_print_stats_p - at_exit { print_stats } + at_exit { + print_stats + dump_locations + } end end @@ -19,6 +22,106 @@ class << RubyVM::ZJIT Primitive.cexpr! 'RBOOL(rb_zjit_enabled_p)' end + # Check if `--zjit-trace-exits` is used + def trace_exit_locations_enabled? + Primitive.rb_zjit_trace_exit_locations_enabled_p + end + + # If --zjit-trace-exits is enabled parse the hashes from + # Primitive.rb_zjit_get_exit_locations into a format readable + # by Stackprof. This will allow us to find the exact location of a + # side exit in ZJIT based on the instruction that is exiting. + def exit_locations + return unless trace_exit_locations_enabled? + + results = Primitive.rb_zjit_get_exit_locations + raw_samples = results[:raw].dup + line_samples = results[:lines].dup + frames = results[:frames].dup + samples_count = 0 + + frames.each do |frame_id, frame| + frame[:samples] = 0 + frame[:edges] = {} + end + + # Loop through the instructions and set the frame hash with the data. + # We use nonexistent.def for the file name, otherwise insns.def will be displayed + # and that information isn't useful in this context. + RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id| + frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil } + results[:frames][frame_id] = frame_hash + frames[frame_id] = frame_hash + end + + # Loop through the raw_samples and build the hashes for StackProf. + # The loop is based off an example in the StackProf documentation and therefore + # this functionality can only work with that library. + while raw_samples.length > 0 + stack_trace = raw_samples.shift(raw_samples.shift + 1) + lines = line_samples.shift(line_samples.shift + 1) + prev_frame_id = nil + + stack_trace.each_with_index do |frame_id, idx| + if prev_frame_id + prev_frame = frames[prev_frame_id] + prev_frame[:edges][frame_id] ||= 0 + prev_frame[:edges][frame_id] += 1 + end + + frame_info = frames[frame_id] + frame_info[:total_samples] ||= 0 + frame_info[:total_samples] += 1 + + frame_info[:lines] ||= {} + frame_info[:lines][lines[idx]] ||= [0, 0] + frame_info[:lines][lines[idx]][0] += 1 + + prev_frame_id = frame_id + end + + top_frame_id = stack_trace.last + top_frame_line = 1 + + frames[top_frame_id][:samples] += 1 + frames[top_frame_id][:lines] ||= {} + frames[top_frame_id][:lines][top_frame_line] ||= [0, 0] + frames[top_frame_id][:lines][top_frame_line][1] += 1 + + samples_count += raw_samples.shift + line_samples.shift + end + + results[:samples] = samples_count + # Set missed_samples and gc_samples to 0 as their values + # don't matter to us in this context. + results[:missed_samples] = 0 + results[:gc_samples] = 0 + results + end + + # Marshal dumps exit locations to the given filename. + # + # Usage: + # + # In a script call: + # + # RubyVM::ZJIT.dump_exit_locations("my_file.dump") + # + # Then run the file with the following options: + # + # ruby --zjit --zjit-stats --zjit-trace-exits test.rb + # + # Once the code is done running, use Stackprof to read the dump file. + # See Stackprof documentation for options. + def dump_exit_locations(filename) + unless trace_exit_locations_enabled? + raise ArgumentError, "--zjit-trace-exits must be enabled to use dump_exit_locations." + end + + File.write(filename, Marshal.dump(RubyVM::ZJIT.exit_locations)) + end + # Check if `--zjit-stats` is used def stats_enabled? Primitive.rb_zjit_stats_enabled_p @@ -148,4 +251,13 @@ class << RubyVM::ZJIT def print_stats $stderr.write stats_string end + + def dump_locations # :nodoc: + return unless trace_exit_locations_enabled? + + filename = "zjit_exit_locations.dump" + dump_exit_locations(filename) + + $stderr.puts("ZJIT exit locations dumped to `#{filename}`.") + end end diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index c4233521cc..e1d19f9442 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -281,6 +281,7 @@ fn main() { .allowlist_function("rb_RSTRING_PTR") .allowlist_function("rb_RSTRING_LEN") .allowlist_function("rb_ENCODING_GET") + .allowlist_function("rb_zjit_exit_locations_dict") .allowlist_function("rb_optimized_call") .allowlist_function("rb_jit_icache_invalidate") .allowlist_function("rb_zjit_print_exception") @@ -327,6 +328,8 @@ fn main() { .allowlist_function("rb_class_new_instance_pass_kw") .allowlist_function("rb_obj_alloc") .allowlist_function("rb_obj_info") + // From include/ruby/debug.h + .allowlist_function("rb_profile_frames") .allowlist_function("ruby_xfree") .allowlist_function("rb_profile_frames") diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 21adc42cd1..76a53c66d6 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -9,6 +9,7 @@ use crate::cruby::VALUE; use crate::stats::{exit_counter_ptr, exit_counter_ptr_for_opcode, CompileError}; use crate::virtualmem::CodePtr; use crate::asm::{CodeBlock, Label}; +use crate::state::rb_zjit_record_exit_stack; pub use crate::backend::current::{ Reg, @@ -1629,6 +1630,16 @@ impl Assembler } } + if get_option!(trace_side_exits) { + // Use `load_into` with `C_ARG_OPNDS` instead of `opnds` argument for ccall, since `compile_side_exits` + // is after the split pass, which would allow use of `opnds`. + self.load_into(C_ARG_OPNDS[0], Opnd::const_ptr(pc as *const u8)); + self.ccall( + rb_zjit_record_exit_stack as *const u8, + vec![] + ); + } + asm_comment!(self, "exit to the interpreter"); self.frame_teardown(&[]); // matching the setup in :bb0-prologue: self.mov(C_RET_OPND, Opnd::UImm(Qundef.as_u64())); @@ -2080,4 +2091,3 @@ mod tests { asm.load_into(mem, mem); } } - diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 17a2d5a63d..2d8a8eb11e 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -921,6 +921,11 @@ unsafe extern "C" { lines: *mut ::std::os::raw::c_int, ) -> ::std::os::raw::c_int; pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); + pub fn rb_zjit_exit_locations_dict( + zjit_raw_samples: *mut VALUE, + zjit_line_samples: *mut ::std::os::raw::c_int, + samples_len: ::std::os::raw::c_int, + ) -> VALUE; pub fn rb_zjit_profile_disable(iseq: *const rb_iseq_t); pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; pub fn rb_zjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool; diff --git a/zjit/src/gc.rs b/zjit/src/gc.rs index cc08b8fc9e..0974c5bfce 100644 --- a/zjit/src/gc.rs +++ b/zjit/src/gc.rs @@ -5,6 +5,7 @@ use crate::codegen::IseqCallRef; use crate::stats::CompileError; use crate::{cruby::*, profile::IseqProfile, state::ZJITState, stats::with_time_stat, virtualmem::CodePtr}; use crate::stats::Counter::gc_time_ns; +use crate::state::gc_mark_raw_samples; /// This is all the data ZJIT stores on an ISEQ. We mark objects in this struct on GC. #[derive(Debug)] @@ -250,3 +251,9 @@ pub fn remove_gc_offsets(payload_ptr: *mut IseqPayload, removed_range: &Range<Co fn ranges_overlap<T>(left: &Range<T>, right: &Range<T>) -> bool where T: PartialOrd { left.start < right.end && right.start < left.end } + +/// Callback for marking GC objects inside [Invariants]. +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_root_mark() { + gc_mark_raw_samples(); +} diff --git a/zjit/src/options.rs b/zjit/src/options.rs index b33d18efff..ab9d1960eb 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -69,6 +69,9 @@ pub struct Options { /// Dump all compiled machine code. pub dump_disasm: bool, + /// Trace and write side exit source maps to /tmp for stackprof. + pub trace_side_exits: bool, + /// Dump code map to /tmp for performance profilers. pub perf: bool, @@ -94,6 +97,7 @@ impl Default for Options { dump_hir_graphviz: None, dump_lir: false, dump_disasm: false, + trace_side_exits: false, perf: false, allowed_iseqs: None, log_compiled_iseqs: None, @@ -115,6 +119,8 @@ pub const ZJIT_OPTIONS: &[(&str, &str)] = &[ ("--zjit-perf", "Dump ISEQ symbols into /tmp/perf-{}.map for Linux perf."), ("--zjit-log-compiled-iseqs=path", "Log compiled ISEQs to the file. The file will be truncated."), + ("--zjit-trace-exits", + "Record Ruby source location when side-exiting.") ]; #[derive(Clone, Copy, Debug)] @@ -235,6 +241,10 @@ fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { options.print_stats = false; } + ("trace-exits", "") => { + options.trace_side_exits = true; + } + ("debug", "") => options.debug = true, ("disable-hir-opt", "") => options.disable_hir_opt = true, diff --git a/zjit/src/state.rs b/zjit/src/state.rs index 81c05f4986..50c3f4b1c1 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,12 +1,12 @@ //! Runtime state of ZJIT. use crate::codegen::{gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline}; -use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, EcPtr, Qnil, VALUE, VM_INSTRUCTION_SIZE}; +use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, EcPtr, Qnil, rb_vm_insn_addr2opcode, rb_profile_frames, VALUE, VM_INSTRUCTION_SIZE, size_t, rb_gc_mark}; use crate::cruby_methods; use crate::invariants::Invariants; use crate::asm::CodeBlock; use crate::options::get_option; -use crate::stats::{Counters, InsnCounters}; +use crate::stats::{Counters, InsnCounters, SideExitLocations}; use crate::virtualmem::CodePtr; use std::collections::HashMap; @@ -53,6 +53,9 @@ pub struct ZJITState { /// Counter pointers for unoptimized C functions unoptimized_cfunc_counter_pointers: HashMap<String, Box<u64>>, + + /// Locations of side exists within generated code + exit_locations: Option<SideExitLocations>, } /// Private singleton instance of the codegen globals @@ -76,6 +79,12 @@ impl ZJITState { let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap(); let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap(); + let exit_locations = if get_option!(trace_side_exits) { + Some(SideExitLocations::default()) + } else { + None + }; + // Initialize the codegen globals instance let zjit_state = ZJITState { code_block: cb, @@ -89,6 +98,7 @@ impl ZJITState { function_stub_hit_trampoline, exit_trampoline_with_counter: exit_trampoline, unoptimized_cfunc_counter_pointers: HashMap::new(), + exit_locations, }; unsafe { ZJIT_STATE = Some(zjit_state); } @@ -203,6 +213,16 @@ impl ZJITState { pub fn get_function_stub_hit_trampoline() -> CodePtr { ZJITState::get_instance().function_stub_hit_trampoline } + + /// Get a mutable reference to the ZJIT raw samples Vec + pub fn get_raw_samples() -> Option<&'static mut Vec<VALUE>> { + ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.raw_samples) + } + + /// Get a mutable reference to the ZJIT line samples Vec. + pub fn get_line_samples() -> Option<&'static mut Vec<i32>> { + ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.line_samples) + } } /// Initialize ZJIT @@ -238,3 +258,164 @@ pub extern "C" fn rb_zjit_assert_compiles(_ec: EcPtr, _self: VALUE) -> VALUE { ZJITState::enable_assert_compiles(); Qnil } + +/// Call `rb_profile_frames` and write the result into buffers to be consumed by `rb_zjit_record_exit_stack`. +fn record_profiling_frames() -> (i32, Vec<VALUE>, Vec<i32>) { + // Stackprof uses a buffer of length 2048 when collating the frames into statistics. + // Since eventually the collected information will be used by Stackprof, collect only + // 2048 frames at a time. + // https://github.com/tmm1/stackprof/blob/5d832832e4afcb88521292d6dfad4a9af760ef7c/ext/stackprof/stackprof.c#L21 + const BUFF_LEN: usize = 2048; + + let mut frames_buffer = vec![VALUE(0_usize); BUFF_LEN]; + let mut lines_buffer = vec![0; BUFF_LEN]; + + let stack_length = unsafe { + rb_profile_frames( + 0, + BUFF_LEN as i32, + frames_buffer.as_mut_ptr(), + lines_buffer.as_mut_ptr(), + ) + }; + + // Trim at `stack_length` since anything past it is redundant + frames_buffer.truncate(stack_length as usize); + lines_buffer.truncate(stack_length as usize); + + (stack_length, frames_buffer, lines_buffer) +} + +/// Write samples in `frames_buffer` and `lines_buffer` from profiling into +/// `raw_samples` and `line_samples`. Also write opcode, number of frames, +/// and stack size to be consumed by Stackprof. +fn write_exit_stack_samples( + raw_samples: &'static mut Vec<VALUE>, + line_samples: &'static mut Vec<i32>, + frames_buffer: &[VALUE], + lines_buffer: &[i32], + stack_length: i32, + exit_pc: *const VALUE, +) { + raw_samples.push(VALUE(stack_length as usize)); + line_samples.push(stack_length); + + // Push frames and their lines in reverse order. + for i in (0..stack_length as usize).rev() { + raw_samples.push(frames_buffer[i]); + line_samples.push(lines_buffer[i]); + } + + // Get the opcode from instruction handler at exit PC. + let exit_opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) }; + raw_samples.push(VALUE(exit_opcode as usize)); + // Push a dummy line number since we don't know where this insn is from. + line_samples.push(0); + + // Push number of times seen onto the stack. + raw_samples.push(VALUE(1usize)); + line_samples.push(1); +} + +fn try_increment_existing_stack( + raw_samples: &mut [VALUE], + line_samples: &mut [i32], + frames_buffer: &[VALUE], + stack_length: i32, + samples_length: usize, +) -> bool { + let prev_stack_len_index = raw_samples.len() - samples_length; + let prev_stack_len = i64::from(raw_samples[prev_stack_len_index]); + + if prev_stack_len == stack_length as i64 { + // Check if all stack lengths match and all frames are identical + let frames_match = (0..stack_length).all(|i| { + let current_frame = frames_buffer[stack_length as usize - 1 - i as usize]; + let prev_frame = raw_samples[prev_stack_len_index + i as usize + 1]; + current_frame == prev_frame + }); + + if frames_match { + let counter_idx = raw_samples.len() - 1; + let new_count = i64::from(raw_samples[counter_idx]) + 1; + + raw_samples[counter_idx] = VALUE(new_count as usize); + line_samples[counter_idx] = new_count as i32; + return true; + } + } + false +} + +/// Record a backtrace with ZJIT side exits +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_record_exit_stack(exit_pc: *const VALUE) { + if !zjit_enabled_p() || !get_option!(trace_side_exits) { + return; + } + + let (stack_length, frames_buffer, lines_buffer) = record_profiling_frames(); + + // Can safely unwrap since `trace_side_exits` must be true at this point + let zjit_raw_samples = ZJITState::get_raw_samples().unwrap(); + let zjit_line_samples = ZJITState::get_line_samples().unwrap(); + assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len()); + + // Represents pushing the stack length, the instruction opcode, and the sample count. + const SAMPLE_METADATA_SIZE: usize = 3; + let samples_length = (stack_length as usize) + SAMPLE_METADATA_SIZE; + + // If zjit_raw_samples is greater than or equal to the current length of the samples + // we might have seen this stack trace previously. + if zjit_raw_samples.len() >= samples_length + && try_increment_existing_stack( + zjit_raw_samples, + zjit_line_samples, + &frames_buffer, + stack_length, + samples_length, + ) + { + return; + } + + write_exit_stack_samples( + zjit_raw_samples, + zjit_line_samples, + &frames_buffer, + &lines_buffer, + stack_length, + exit_pc, + ); +} + +/// Mark `raw_samples` so they can be used by rb_zjit_add_frame. +pub fn gc_mark_raw_samples() { + // Return if ZJIT is not enabled + if !zjit_enabled_p() || !get_option!(stats) || !get_option!(trace_side_exits) { + return; + } + + let mut idx: size_t = 0; + let zjit_raw_samples = ZJITState::get_raw_samples().unwrap(); + + while idx < zjit_raw_samples.len() as size_t { + let num = zjit_raw_samples[idx as usize]; + let mut i = 0; + idx += 1; + + // Mark the zjit_raw_samples at the given index. These represent + // the data that needs to be GC'd which are the current frames. + while i < i32::from(num) { + unsafe { rb_gc_mark(zjit_raw_samples[idx as usize]); } + i += 1; + idx += 1; + } + + // Increase index for exit instruction. + idx += 1; + // Increase index for bookeeping value (number of times we've seen this + // row in a stack). + idx += 1; + } +} diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index f9f9fb9e37..05ae231dad 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -463,3 +463,52 @@ pub fn with_time_stat<F, R>(counter: Counter, func: F) -> R where F: FnOnce() -> pub fn zjit_alloc_size() -> usize { jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst) } + +/// Struct of arrays for --zjit-trace-exits. +#[derive(Default)] +pub struct SideExitLocations { + /// Control frames of method entries. + pub raw_samples: Vec<VALUE>, + /// Line numbers of the iseq caller. + pub line_samples: Vec<i32>, +} + +/// Primitive called in zjit.rb +/// +/// Check if trace_exits generation is enabled. Requires the stats feature +/// to be enabled. +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if get_option!(stats) && get_option!(trace_side_exits) { + Qtrue + } else { + Qfalse + } +} + +/// Call the C function to parse the raw_samples and line_samples +/// into raw, lines, and frames hash for RubyVM::YJIT.exit_locations. +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + if !zjit_enabled_p() || !get_option!(stats) || !get_option!(trace_side_exits) { + return Qnil; + } + + // Can safely unwrap since `trace_side_exits` must be true at this point + let zjit_raw_samples = ZJITState::get_raw_samples().unwrap(); + let zjit_line_samples = ZJITState::get_line_samples().unwrap(); + + assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len()); + + // zjit_raw_samples and zjit_line_samples are the same length so + // pass only one of the lengths in the C function. + let samples_len = zjit_raw_samples.len() as i32; + + unsafe { + rb_zjit_exit_locations_dict( + zjit_raw_samples.as_mut_ptr(), + zjit_line_samples.as_mut_ptr(), + samples_len + ) + } +} |
