summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAiden Fox Ivey <aiden.foxivey@shopify.com>2025-09-30 11:55:33 -0400
committerGitHub <noreply@github.com>2025-09-30 15:55:33 +0000
commit2f1c30cd50e464880e44da670d3ad8ebe00fc899 (patch)
tree40176613b3f2587d7f898e21bfa74af84cd5311d
parentd016595387069677c6b992dffe9322f67dc9bc73 (diff)
ZJIT: Add --zjit-trace-exits (#14640)
Add side exit tracing functionality for ZJIT
-rw-r--r--doc/zjit.md14
-rw-r--r--gc.c8
-rw-r--r--yjit/src/stats.rs4
-rw-r--r--zjit.c91
-rw-r--r--zjit.rb114
-rw-r--r--zjit/bindgen/src/main.rs3
-rw-r--r--zjit/src/backend/lir.rs12
-rw-r--r--zjit/src/cruby_bindings.inc.rs5
-rw-r--r--zjit/src/gc.rs7
-rw-r--r--zjit/src/options.rs10
-rw-r--r--zjit/src/state.rs185
-rw-r--r--zjit/src/stats.rs49
12 files changed, 496 insertions, 6 deletions
diff --git a/doc/zjit.md b/doc/zjit.md
index 4eedcca3ba..57a95457d3 100644
--- a/doc/zjit.md
+++ b/doc/zjit.md
@@ -153,6 +153,20 @@ To build with stats support:
make -j
```
+### Tracing side exits
+
+Through [Stackprof](https://github.com/tmm1/stackprof), detailed information about the methods that the JIT side-exits from can be displayed after some execution of a program. Note that the use of `--zjit-trace-exits` must be used alongside `--zjit-stats`.
+
+```bash
+./miniruby --zjit-stats --zjit-trace-exits script.rb
+```
+
+A file called `zjit_exit_locations.dump` will be created in the same directory as `script.rb`. Viewing the side exited methods can be done with Stackprof:
+
+```bash
+stackprof path/to/zjit_exit_locations.dump
+```
+
## ZJIT Glossary
This glossary contains terms that are helpful for understanding ZJIT.
diff --git a/gc.c b/gc.c
index 8c8887c46b..1961670c54 100644
--- a/gc.c
+++ b/gc.c
@@ -3070,6 +3070,14 @@ rb_gc_mark_roots(void *objspace, const char **categoryp)
}
#endif
+#if USE_ZJIT
+ void rb_zjit_root_mark(void);
+ if (rb_zjit_enabled_p) {
+ MARK_CHECKPOINT("ZJIT");
+ rb_zjit_root_mark();
+ }
+#endif
+
MARK_CHECKPOINT("machine_context");
mark_current_machine_context(ec);
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 09971c5b3a..b63e1c3272 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -893,7 +893,7 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
/// and line samples. Their length should be the same, however the data stored in
/// them is different.
#[no_mangle]
-pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE)
+pub extern "C" fn rb_yjit_record_exit_stack(exit_pc: *const VALUE)
{
// Return if YJIT is not enabled
if !yjit_enabled_p() {
@@ -920,7 +920,7 @@ pub extern "C" fn rb_yjit_record_exit_stack(_exit_pc: *const VALUE)
#[cfg(not(test))]
{
// Get the opcode from the encoded insn handler at this PC
- let insn = unsafe { rb_vm_insn_addr2opcode((*_exit_pc).as_ptr()) };
+ let insn = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
// Use the same buffer size as Stackprof.
const BUFF_LEN: usize = 2048;
diff --git a/zjit.c b/zjit.c
index 37619fd729..4bc27d9fe2 100644
--- a/zjit.c
+++ b/zjit.c
@@ -31,6 +31,95 @@
#include <errno.h>
+#define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
+
+// For a given raw_sample (frame), set the hash with the caller's
+// name, file, and line number. Return the hash with collected frame_info.
+static void
+rb_zjit_add_frame(VALUE hash, VALUE frame)
+{
+ VALUE frame_id = PTR2NUM(frame);
+
+ if (RTEST(rb_hash_aref(hash, frame_id))) {
+ return;
+ }
+ else {
+ VALUE frame_info = rb_hash_new();
+ // Full label for the frame
+ VALUE name = rb_profile_frame_full_label(frame);
+ // Absolute path of the frame from rb_iseq_realpath
+ VALUE file = rb_profile_frame_absolute_path(frame);
+ // Line number of the frame
+ VALUE line = rb_profile_frame_first_lineno(frame);
+
+ // If absolute path isn't available use the rb_iseq_path
+ if (NIL_P(file)) {
+ file = rb_profile_frame_path(frame);
+ }
+
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name);
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file);
+
+ if (line != INT2FIX(0)) {
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line);
+ }
+
+ rb_hash_aset(hash, frame_id, frame_info);
+ }
+}
+
+// Parses the ZjitExitLocations raw_samples and line_samples collected by
+// rb_zjit_record_exit_stack and turns them into 3 hashes (raw, lines, and frames) to
+// be used by RubyVM::ZJIT.exit_locations. zjit_raw_samples represents the raw frames information
+// (without name, file, and line), and zjit_line_samples represents the line information
+// of the iseq caller.
+VALUE
+rb_zjit_exit_locations_dict(VALUE *zjit_raw_samples, int *zjit_line_samples, int samples_len)
+{
+ VALUE result = rb_hash_new();
+ VALUE raw_samples = rb_ary_new_capa(samples_len);
+ VALUE line_samples = rb_ary_new_capa(samples_len);
+ VALUE frames = rb_hash_new();
+ int idx = 0;
+
+ // While the index is less than samples_len, parse zjit_raw_samples and
+ // zjit_line_samples, then add casted values to raw_samples and line_samples array.
+ while (idx < samples_len) {
+ int num = (int)zjit_raw_samples[idx];
+ int line_num = (int)zjit_line_samples[idx];
+ idx++;
+
+ rb_ary_push(raw_samples, SIZET2NUM(num));
+ rb_ary_push(line_samples, INT2NUM(line_num));
+
+ // Loop through the length of samples_len and add data to the
+ // frames hash. Also push the current value onto the raw_samples
+ // and line_samples arrary respectively.
+ for (int o = 0; o < num; o++) {
+ rb_zjit_add_frame(frames, zjit_raw_samples[idx]);
+ rb_ary_push(raw_samples, SIZET2NUM(zjit_raw_samples[idx]));
+ rb_ary_push(line_samples, INT2NUM(zjit_line_samples[idx]));
+ idx++;
+ }
+
+ rb_ary_push(raw_samples, SIZET2NUM(zjit_raw_samples[idx]));
+ rb_ary_push(line_samples, INT2NUM(zjit_line_samples[idx]));
+ idx++;
+
+ rb_ary_push(raw_samples, SIZET2NUM(zjit_raw_samples[idx]));
+ rb_ary_push(line_samples, INT2NUM(zjit_line_samples[idx]));
+ idx++;
+ }
+
+ // Set add the raw_samples, line_samples, and frames to the results
+ // hash.
+ rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples);
+ rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples);
+ rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames);
+
+ return result;
+}
+
void rb_zjit_profile_disable(const rb_iseq_t *iseq);
void
@@ -217,6 +306,8 @@ VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self, VALUE target_key);
VALUE rb_zjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self);
VALUE rb_zjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self);
VALUE rb_zjit_print_stats_p(rb_execution_context_t *ec, VALUE self);
+VALUE rb_zjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self);
+VALUE rb_zjit_get_exit_locations(rb_execution_context_t *ec, VALUE self);
// Preprocessed zjit.rb generated during build
#include "zjit.rbinc"
diff --git a/zjit.rb b/zjit.rb
index 4438a10c75..8289846c03 100644
--- a/zjit.rb
+++ b/zjit.rb
@@ -9,7 +9,10 @@
module RubyVM::ZJIT
# Avoid calling a Ruby method here to avoid interfering with compilation tests
if Primitive.rb_zjit_print_stats_p
- at_exit { print_stats }
+ at_exit {
+ print_stats
+ dump_locations
+ }
end
end
@@ -19,6 +22,106 @@ class << RubyVM::ZJIT
Primitive.cexpr! 'RBOOL(rb_zjit_enabled_p)'
end
+ # Check if `--zjit-trace-exits` is used
+ def trace_exit_locations_enabled?
+ Primitive.rb_zjit_trace_exit_locations_enabled_p
+ end
+
+ # If --zjit-trace-exits is enabled parse the hashes from
+ # Primitive.rb_zjit_get_exit_locations into a format readable
+ # by Stackprof. This will allow us to find the exact location of a
+ # side exit in ZJIT based on the instruction that is exiting.
+ def exit_locations
+ return unless trace_exit_locations_enabled?
+
+ results = Primitive.rb_zjit_get_exit_locations
+ raw_samples = results[:raw].dup
+ line_samples = results[:lines].dup
+ frames = results[:frames].dup
+ samples_count = 0
+
+ frames.each do |frame_id, frame|
+ frame[:samples] = 0
+ frame[:edges] = {}
+ end
+
+ # Loop through the instructions and set the frame hash with the data.
+ # We use nonexistent.def for the file name, otherwise insns.def will be displayed
+ # and that information isn't useful in this context.
+ RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id|
+ frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil }
+ results[:frames][frame_id] = frame_hash
+ frames[frame_id] = frame_hash
+ end
+
+ # Loop through the raw_samples and build the hashes for StackProf.
+ # The loop is based off an example in the StackProf documentation and therefore
+ # this functionality can only work with that library.
+ while raw_samples.length > 0
+ stack_trace = raw_samples.shift(raw_samples.shift + 1)
+ lines = line_samples.shift(line_samples.shift + 1)
+ prev_frame_id = nil
+
+ stack_trace.each_with_index do |frame_id, idx|
+ if prev_frame_id
+ prev_frame = frames[prev_frame_id]
+ prev_frame[:edges][frame_id] ||= 0
+ prev_frame[:edges][frame_id] += 1
+ end
+
+ frame_info = frames[frame_id]
+ frame_info[:total_samples] ||= 0
+ frame_info[:total_samples] += 1
+
+ frame_info[:lines] ||= {}
+ frame_info[:lines][lines[idx]] ||= [0, 0]
+ frame_info[:lines][lines[idx]][0] += 1
+
+ prev_frame_id = frame_id
+ end
+
+ top_frame_id = stack_trace.last
+ top_frame_line = 1
+
+ frames[top_frame_id][:samples] += 1
+ frames[top_frame_id][:lines] ||= {}
+ frames[top_frame_id][:lines][top_frame_line] ||= [0, 0]
+ frames[top_frame_id][:lines][top_frame_line][1] += 1
+
+ samples_count += raw_samples.shift
+ line_samples.shift
+ end
+
+ results[:samples] = samples_count
+ # Set missed_samples and gc_samples to 0 as their values
+ # don't matter to us in this context.
+ results[:missed_samples] = 0
+ results[:gc_samples] = 0
+ results
+ end
+
+ # Marshal dumps exit locations to the given filename.
+ #
+ # Usage:
+ #
+ # In a script call:
+ #
+ # RubyVM::ZJIT.dump_exit_locations("my_file.dump")
+ #
+ # Then run the file with the following options:
+ #
+ # ruby --zjit --zjit-stats --zjit-trace-exits test.rb
+ #
+ # Once the code is done running, use Stackprof to read the dump file.
+ # See Stackprof documentation for options.
+ def dump_exit_locations(filename)
+ unless trace_exit_locations_enabled?
+ raise ArgumentError, "--zjit-trace-exits must be enabled to use dump_exit_locations."
+ end
+
+ File.write(filename, Marshal.dump(RubyVM::ZJIT.exit_locations))
+ end
+
# Check if `--zjit-stats` is used
def stats_enabled?
Primitive.rb_zjit_stats_enabled_p
@@ -148,4 +251,13 @@ class << RubyVM::ZJIT
def print_stats
$stderr.write stats_string
end
+
+ def dump_locations # :nodoc:
+ return unless trace_exit_locations_enabled?
+
+ filename = "zjit_exit_locations.dump"
+ dump_exit_locations(filename)
+
+ $stderr.puts("ZJIT exit locations dumped to `#{filename}`.")
+ end
end
diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs
index c4233521cc..e1d19f9442 100644
--- a/zjit/bindgen/src/main.rs
+++ b/zjit/bindgen/src/main.rs
@@ -281,6 +281,7 @@ fn main() {
.allowlist_function("rb_RSTRING_PTR")
.allowlist_function("rb_RSTRING_LEN")
.allowlist_function("rb_ENCODING_GET")
+ .allowlist_function("rb_zjit_exit_locations_dict")
.allowlist_function("rb_optimized_call")
.allowlist_function("rb_jit_icache_invalidate")
.allowlist_function("rb_zjit_print_exception")
@@ -327,6 +328,8 @@ fn main() {
.allowlist_function("rb_class_new_instance_pass_kw")
.allowlist_function("rb_obj_alloc")
.allowlist_function("rb_obj_info")
+ // From include/ruby/debug.h
+ .allowlist_function("rb_profile_frames")
.allowlist_function("ruby_xfree")
.allowlist_function("rb_profile_frames")
diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs
index 21adc42cd1..76a53c66d6 100644
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@@ -9,6 +9,7 @@ use crate::cruby::VALUE;
use crate::stats::{exit_counter_ptr, exit_counter_ptr_for_opcode, CompileError};
use crate::virtualmem::CodePtr;
use crate::asm::{CodeBlock, Label};
+use crate::state::rb_zjit_record_exit_stack;
pub use crate::backend::current::{
Reg,
@@ -1629,6 +1630,16 @@ impl Assembler
}
}
+ if get_option!(trace_side_exits) {
+ // Use `load_into` with `C_ARG_OPNDS` instead of `opnds` argument for ccall, since `compile_side_exits`
+ // is after the split pass, which would allow use of `opnds`.
+ self.load_into(C_ARG_OPNDS[0], Opnd::const_ptr(pc as *const u8));
+ self.ccall(
+ rb_zjit_record_exit_stack as *const u8,
+ vec![]
+ );
+ }
+
asm_comment!(self, "exit to the interpreter");
self.frame_teardown(&[]); // matching the setup in :bb0-prologue:
self.mov(C_RET_OPND, Opnd::UImm(Qundef.as_u64()));
@@ -2080,4 +2091,3 @@ mod tests {
asm.load_into(mem, mem);
}
}
-
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 17a2d5a63d..2d8a8eb11e 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -921,6 +921,11 @@ unsafe extern "C" {
lines: *mut ::std::os::raw::c_int,
) -> ::std::os::raw::c_int;
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
+ pub fn rb_zjit_exit_locations_dict(
+ zjit_raw_samples: *mut VALUE,
+ zjit_line_samples: *mut ::std::os::raw::c_int,
+ samples_len: ::std::os::raw::c_int,
+ ) -> VALUE;
pub fn rb_zjit_profile_disable(iseq: *const rb_iseq_t);
pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE;
pub fn rb_zjit_constcache_shareable(ice: *const iseq_inline_constant_cache_entry) -> bool;
diff --git a/zjit/src/gc.rs b/zjit/src/gc.rs
index cc08b8fc9e..0974c5bfce 100644
--- a/zjit/src/gc.rs
+++ b/zjit/src/gc.rs
@@ -5,6 +5,7 @@ use crate::codegen::IseqCallRef;
use crate::stats::CompileError;
use crate::{cruby::*, profile::IseqProfile, state::ZJITState, stats::with_time_stat, virtualmem::CodePtr};
use crate::stats::Counter::gc_time_ns;
+use crate::state::gc_mark_raw_samples;
/// This is all the data ZJIT stores on an ISEQ. We mark objects in this struct on GC.
#[derive(Debug)]
@@ -250,3 +251,9 @@ pub fn remove_gc_offsets(payload_ptr: *mut IseqPayload, removed_range: &Range<Co
fn ranges_overlap<T>(left: &Range<T>, right: &Range<T>) -> bool where T: PartialOrd {
left.start < right.end && right.start < left.end
}
+
+/// Callback for marking GC objects inside [Invariants].
+#[unsafe(no_mangle)]
+pub extern "C" fn rb_zjit_root_mark() {
+ gc_mark_raw_samples();
+}
diff --git a/zjit/src/options.rs b/zjit/src/options.rs
index b33d18efff..ab9d1960eb 100644
--- a/zjit/src/options.rs
+++ b/zjit/src/options.rs
@@ -69,6 +69,9 @@ pub struct Options {
/// Dump all compiled machine code.
pub dump_disasm: bool,
+ /// Trace and write side exit source maps to /tmp for stackprof.
+ pub trace_side_exits: bool,
+
/// Dump code map to /tmp for performance profilers.
pub perf: bool,
@@ -94,6 +97,7 @@ impl Default for Options {
dump_hir_graphviz: None,
dump_lir: false,
dump_disasm: false,
+ trace_side_exits: false,
perf: false,
allowed_iseqs: None,
log_compiled_iseqs: None,
@@ -115,6 +119,8 @@ pub const ZJIT_OPTIONS: &[(&str, &str)] = &[
("--zjit-perf", "Dump ISEQ symbols into /tmp/perf-{}.map for Linux perf."),
("--zjit-log-compiled-iseqs=path",
"Log compiled ISEQs to the file. The file will be truncated."),
+ ("--zjit-trace-exits",
+ "Record Ruby source location when side-exiting.")
];
#[derive(Clone, Copy, Debug)]
@@ -235,6 +241,10 @@ fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
options.print_stats = false;
}
+ ("trace-exits", "") => {
+ options.trace_side_exits = true;
+ }
+
("debug", "") => options.debug = true,
("disable-hir-opt", "") => options.disable_hir_opt = true,
diff --git a/zjit/src/state.rs b/zjit/src/state.rs
index 81c05f4986..50c3f4b1c1 100644
--- a/zjit/src/state.rs
+++ b/zjit/src/state.rs
@@ -1,12 +1,12 @@
//! Runtime state of ZJIT.
use crate::codegen::{gen_exit_trampoline, gen_exit_trampoline_with_counter, gen_function_stub_hit_trampoline};
-use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, EcPtr, Qnil, VALUE, VM_INSTRUCTION_SIZE};
+use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insn_count, EcPtr, Qnil, rb_vm_insn_addr2opcode, rb_profile_frames, VALUE, VM_INSTRUCTION_SIZE, size_t, rb_gc_mark};
use crate::cruby_methods;
use crate::invariants::Invariants;
use crate::asm::CodeBlock;
use crate::options::get_option;
-use crate::stats::{Counters, InsnCounters};
+use crate::stats::{Counters, InsnCounters, SideExitLocations};
use crate::virtualmem::CodePtr;
use std::collections::HashMap;
@@ -53,6 +53,9 @@ pub struct ZJITState {
/// Counter pointers for unoptimized C functions
unoptimized_cfunc_counter_pointers: HashMap<String, Box<u64>>,
+
+ /// Locations of side exists within generated code
+ exit_locations: Option<SideExitLocations>,
}
/// Private singleton instance of the codegen globals
@@ -76,6 +79,12 @@ impl ZJITState {
let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap();
let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap();
+ let exit_locations = if get_option!(trace_side_exits) {
+ Some(SideExitLocations::default())
+ } else {
+ None
+ };
+
// Initialize the codegen globals instance
let zjit_state = ZJITState {
code_block: cb,
@@ -89,6 +98,7 @@ impl ZJITState {
function_stub_hit_trampoline,
exit_trampoline_with_counter: exit_trampoline,
unoptimized_cfunc_counter_pointers: HashMap::new(),
+ exit_locations,
};
unsafe { ZJIT_STATE = Some(zjit_state); }
@@ -203,6 +213,16 @@ impl ZJITState {
pub fn get_function_stub_hit_trampoline() -> CodePtr {
ZJITState::get_instance().function_stub_hit_trampoline
}
+
+ /// Get a mutable reference to the ZJIT raw samples Vec
+ pub fn get_raw_samples() -> Option<&'static mut Vec<VALUE>> {
+ ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.raw_samples)
+ }
+
+ /// Get a mutable reference to the ZJIT line samples Vec.
+ pub fn get_line_samples() -> Option<&'static mut Vec<i32>> {
+ ZJITState::get_instance().exit_locations.as_mut().map(|el| &mut el.line_samples)
+ }
}
/// Initialize ZJIT
@@ -238,3 +258,164 @@ pub extern "C" fn rb_zjit_assert_compiles(_ec: EcPtr, _self: VALUE) -> VALUE {
ZJITState::enable_assert_compiles();
Qnil
}
+
+/// Call `rb_profile_frames` and write the result into buffers to be consumed by `rb_zjit_record_exit_stack`.
+fn record_profiling_frames() -> (i32, Vec<VALUE>, Vec<i32>) {
+ // Stackprof uses a buffer of length 2048 when collating the frames into statistics.
+ // Since eventually the collected information will be used by Stackprof, collect only
+ // 2048 frames at a time.
+ // https://github.com/tmm1/stackprof/blob/5d832832e4afcb88521292d6dfad4a9af760ef7c/ext/stackprof/stackprof.c#L21
+ const BUFF_LEN: usize = 2048;
+
+ let mut frames_buffer = vec![VALUE(0_usize); BUFF_LEN];
+ let mut lines_buffer = vec![0; BUFF_LEN];
+
+ let stack_length = unsafe {
+ rb_profile_frames(
+ 0,
+ BUFF_LEN as i32,
+ frames_buffer.as_mut_ptr(),
+ lines_buffer.as_mut_ptr(),
+ )
+ };
+
+ // Trim at `stack_length` since anything past it is redundant
+ frames_buffer.truncate(stack_length as usize);
+ lines_buffer.truncate(stack_length as usize);
+
+ (stack_length, frames_buffer, lines_buffer)
+}
+
+/// Write samples in `frames_buffer` and `lines_buffer` from profiling into
+/// `raw_samples` and `line_samples`. Also write opcode, number of frames,
+/// and stack size to be consumed by Stackprof.
+fn write_exit_stack_samples(
+ raw_samples: &'static mut Vec<VALUE>,
+ line_samples: &'static mut Vec<i32>,
+ frames_buffer: &[VALUE],
+ lines_buffer: &[i32],
+ stack_length: i32,
+ exit_pc: *const VALUE,
+) {
+ raw_samples.push(VALUE(stack_length as usize));
+ line_samples.push(stack_length);
+
+ // Push frames and their lines in reverse order.
+ for i in (0..stack_length as usize).rev() {
+ raw_samples.push(frames_buffer[i]);
+ line_samples.push(lines_buffer[i]);
+ }
+
+ // Get the opcode from instruction handler at exit PC.
+ let exit_opcode = unsafe { rb_vm_insn_addr2opcode((*exit_pc).as_ptr()) };
+ raw_samples.push(VALUE(exit_opcode as usize));
+ // Push a dummy line number since we don't know where this insn is from.
+ line_samples.push(0);
+
+ // Push number of times seen onto the stack.
+ raw_samples.push(VALUE(1usize));
+ line_samples.push(1);
+}
+
+fn try_increment_existing_stack(
+ raw_samples: &mut [VALUE],
+ line_samples: &mut [i32],
+ frames_buffer: &[VALUE],
+ stack_length: i32,
+ samples_length: usize,
+) -> bool {
+ let prev_stack_len_index = raw_samples.len() - samples_length;
+ let prev_stack_len = i64::from(raw_samples[prev_stack_len_index]);
+
+ if prev_stack_len == stack_length as i64 {
+ // Check if all stack lengths match and all frames are identical
+ let frames_match = (0..stack_length).all(|i| {
+ let current_frame = frames_buffer[stack_length as usize - 1 - i as usize];
+ let prev_frame = raw_samples[prev_stack_len_index + i as usize + 1];
+ current_frame == prev_frame
+ });
+
+ if frames_match {
+ let counter_idx = raw_samples.len() - 1;
+ let new_count = i64::from(raw_samples[counter_idx]) + 1;
+
+ raw_samples[counter_idx] = VALUE(new_count as usize);
+ line_samples[counter_idx] = new_count as i32;
+ return true;
+ }
+ }
+ false
+}
+
+/// Record a backtrace with ZJIT side exits
+#[unsafe(no_mangle)]
+pub extern "C" fn rb_zjit_record_exit_stack(exit_pc: *const VALUE) {
+ if !zjit_enabled_p() || !get_option!(trace_side_exits) {
+ return;
+ }
+
+ let (stack_length, frames_buffer, lines_buffer) = record_profiling_frames();
+
+ // Can safely unwrap since `trace_side_exits` must be true at this point
+ let zjit_raw_samples = ZJITState::get_raw_samples().unwrap();
+ let zjit_line_samples = ZJITState::get_line_samples().unwrap();
+ assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len());
+
+ // Represents pushing the stack length, the instruction opcode, and the sample count.
+ const SAMPLE_METADATA_SIZE: usize = 3;
+ let samples_length = (stack_length as usize) + SAMPLE_METADATA_SIZE;
+
+ // If zjit_raw_samples is greater than or equal to the current length of the samples
+ // we might have seen this stack trace previously.
+ if zjit_raw_samples.len() >= samples_length
+ && try_increment_existing_stack(
+ zjit_raw_samples,
+ zjit_line_samples,
+ &frames_buffer,
+ stack_length,
+ samples_length,
+ )
+ {
+ return;
+ }
+
+ write_exit_stack_samples(
+ zjit_raw_samples,
+ zjit_line_samples,
+ &frames_buffer,
+ &lines_buffer,
+ stack_length,
+ exit_pc,
+ );
+}
+
+/// Mark `raw_samples` so they can be used by rb_zjit_add_frame.
+pub fn gc_mark_raw_samples() {
+ // Return if ZJIT is not enabled
+ if !zjit_enabled_p() || !get_option!(stats) || !get_option!(trace_side_exits) {
+ return;
+ }
+
+ let mut idx: size_t = 0;
+ let zjit_raw_samples = ZJITState::get_raw_samples().unwrap();
+
+ while idx < zjit_raw_samples.len() as size_t {
+ let num = zjit_raw_samples[idx as usize];
+ let mut i = 0;
+ idx += 1;
+
+ // Mark the zjit_raw_samples at the given index. These represent
+ // the data that needs to be GC'd which are the current frames.
+ while i < i32::from(num) {
+ unsafe { rb_gc_mark(zjit_raw_samples[idx as usize]); }
+ i += 1;
+ idx += 1;
+ }
+
+ // Increase index for exit instruction.
+ idx += 1;
+ // Increase index for bookeeping value (number of times we've seen this
+ // row in a stack).
+ idx += 1;
+ }
+}
diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs
index f9f9fb9e37..05ae231dad 100644
--- a/zjit/src/stats.rs
+++ b/zjit/src/stats.rs
@@ -463,3 +463,52 @@ pub fn with_time_stat<F, R>(counter: Counter, func: F) -> R where F: FnOnce() ->
pub fn zjit_alloc_size() -> usize {
jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)
}
+
+/// Struct of arrays for --zjit-trace-exits.
+#[derive(Default)]
+pub struct SideExitLocations {
+ /// Control frames of method entries.
+ pub raw_samples: Vec<VALUE>,
+ /// Line numbers of the iseq caller.
+ pub line_samples: Vec<i32>,
+}
+
+/// Primitive called in zjit.rb
+///
+/// Check if trace_exits generation is enabled. Requires the stats feature
+/// to be enabled.
+#[unsafe(no_mangle)]
+pub extern "C" fn rb_zjit_trace_exit_locations_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+ if get_option!(stats) && get_option!(trace_side_exits) {
+ Qtrue
+ } else {
+ Qfalse
+ }
+}
+
+/// Call the C function to parse the raw_samples and line_samples
+/// into raw, lines, and frames hash for RubyVM::YJIT.exit_locations.
+#[unsafe(no_mangle)]
+pub extern "C" fn rb_zjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+ if !zjit_enabled_p() || !get_option!(stats) || !get_option!(trace_side_exits) {
+ return Qnil;
+ }
+
+ // Can safely unwrap since `trace_side_exits` must be true at this point
+ let zjit_raw_samples = ZJITState::get_raw_samples().unwrap();
+ let zjit_line_samples = ZJITState::get_line_samples().unwrap();
+
+ assert_eq!(zjit_raw_samples.len(), zjit_line_samples.len());
+
+ // zjit_raw_samples and zjit_line_samples are the same length so
+ // pass only one of the lengths in the C function.
+ let samples_len = zjit_raw_samples.len() as i32;
+
+ unsafe {
+ rb_zjit_exit_locations_dict(
+ zjit_raw_samples.as_mut_ptr(),
+ zjit_line_samples.as_mut_ptr(),
+ samples_len
+ )
+ }
+}