From e7b0a01002323d9ed8eed9abca2a4979ebe9ae32 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 9 Feb 2024 14:12:24 -0800 Subject: YJIT: Add top ISEQ call counts to --yjit-stats (#9906) --- yjit.c | 24 +++++++++ yjit.rb | 32 +++++------- yjit/bindgen/src/main.rs | 2 + yjit/src/codegen.rs | 108 +++++++++++++++++++++------------------- yjit/src/cruby_bindings.inc.rs | 2 + yjit/src/stats.rs | 110 ++++++++++++++++++++++++----------------- 6 files changed, 161 insertions(+), 117 deletions(-) diff --git a/yjit.c b/yjit.c index cf6d81092f..4e9fed35ae 100644 --- a/yjit.c +++ b/yjit.c @@ -895,6 +895,30 @@ rb_yjit_dump_iseq_loc(const rb_iseq_t *iseq, uint32_t insn_idx) fprintf(stderr, "%s %.*s:%u\n", __func__, (int)len, ptr, rb_iseq_line_no(iseq, insn_idx)); } +// Get the number of digits required to print an integer +static int +num_digits(int integer) +{ + int num = 1; + while (integer /= 10) { + num++; + } + return num; +} + +// Allocate a C string that formats an ISEQ label like iseq_inspect() +char * +rb_yjit_iseq_inspect(const rb_iseq_t *iseq) +{ + const char *label = RSTRING_PTR(iseq->body->location.label); + const char *path = RSTRING_PTR(rb_iseq_path(iseq)); + int lineno = iseq->body->location.code_location.beg_pos.lineno; + + char *buf = ZALLOC_N(char, strlen(label) + strlen(path) + num_digits(lineno) + 3); + sprintf(buf, "%s@%s:%d", label, path, lineno); + return buf; +} + // The FL_TEST() macro VALUE rb_FL_TEST(VALUE obj, VALUE flags) diff --git a/yjit.rb b/yjit.rb index 13c394ff1e..0a649e0dc7 100644 --- a/yjit.rb +++ b/yjit.rb @@ -315,10 +315,11 @@ module RubyVM::YJIT out.puts "num_send_polymorphic: " + format_number_pct(13, stats[:num_send_polymorphic], stats[:num_send]) out.puts "num_send_megamorphic: " + format_number_pct(13, stats[:send_megamorphic], stats[:num_send]) out.puts "num_send_dynamic: " + format_number_pct(13, stats[:num_send_dynamic], stats[:num_send]) - out.puts "num_send_inline: " + format_number_pct(13, stats[:num_send_inline], stats[:num_send]) - out.puts "num_send_leaf_builtin: " + format_number_pct(13, stats[:num_send_leaf_builtin], stats[:num_send]) out.puts "num_send_cfunc: " + format_number_pct(13, stats[:num_send_cfunc], stats[:num_send]) out.puts "num_send_cfunc_inline: " + format_number_pct(13, stats[:num_send_cfunc_inline], stats[:num_send_cfunc]) + out.puts "num_send_iseq: " + format_number_pct(13, stats[:num_send_iseq], stats[:num_send]) + out.puts "num_send_iseq_leaf: " + format_number_pct(13, stats[:num_send_iseq_leaf], stats[:num_send_iseq]) + out.puts "num_send_iseq_inline: " + format_number_pct(13, stats[:num_send_iseq_inline], stats[:num_send_iseq]) if stats[:num_send_x86_rel32] != 0 || stats[:num_send_x86_reg] != 0 out.puts "num_send_x86_rel32: " + format_number(13, stats[:num_send_x86_rel32]) out.puts "num_send_x86_reg: " + format_number(13, stats[:num_send_x86_reg]) @@ -385,17 +386,12 @@ module RubyVM::YJIT print_sorted_exit_counts(stats, out: out, prefix: "exit_") - print_sorted_cfunc_calls(stats, out:out) + print_sorted_method_calls(stats[:cfunc_calls], stats[:num_send_cfunc], out: out, type: 'C') + print_sorted_method_calls(stats[:iseq_calls], stats[:num_send_iseq], out: out, type: 'ISEQ') end - def print_sorted_cfunc_calls(stats, out:, how_many: 20, left_pad: 4) # :nodoc: - calls = stats[:cfunc_calls] - if calls.empty? - return - end - - # Total number of cfunc calls - num_send_cfunc = stats[:num_send_cfunc] + def print_sorted_method_calls(calls, num_calls, out:, type:, how_many: 20, left_pad: 4) # :nodoc: + return if calls.empty? # Sort calls by decreasing frequency and keep the top N pairs = calls.map { |k,v| [k, v] } @@ -404,16 +400,14 @@ module RubyVM::YJIT pairs = pairs[0...how_many] top_n_total = pairs.sum { |name, count| count } - top_n_pct = 100.0 * top_n_total / num_send_cfunc + top_n_pct = 100.0 * top_n_total / num_calls longest_name_len = pairs.max_by { |name, count| name.length }.first.length - out.puts "Top-#{pairs.size} most frequent C calls (#{"%.1f" % top_n_pct}% of C calls):" + out.puts "Top-#{pairs.size} most frequent #{type} calls (#{"%.1f" % top_n_pct}% of #{type} calls):" pairs.each do |name, count| - padding = longest_name_len + left_pad - padded_name = "%#{padding}s" % name - padded_count = format_number_pct(10, count, num_send_cfunc) - out.puts("#{padded_name}: #{padded_count}") + padded_count = format_number_pct(10, count, num_calls) + out.puts("#{padded_count}: #{name}") end end @@ -437,10 +431,8 @@ module RubyVM::YJIT longest_insn_name_len = exits.max_by { |name, count| name.length }.first.length exits.each do |name, count| - padding = longest_insn_name_len + left_pad - padded_name = "%#{padding}s" % name padded_count = format_number_pct(10, count, total_exits) - out.puts("#{padded_name}: #{padded_count}") + out.puts("#{padded_count}: #{name}") end else out.puts "total_exits: " + format_number(10, total_exits) diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index a09ab068cb..899298dd2e 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -308,6 +308,7 @@ fn main() { .allowlist_function("rb_yjit_mark_unused") .allowlist_function("rb_yjit_get_page_size") .allowlist_function("rb_yjit_iseq_builtin_attrs") + .allowlist_function("rb_yjit_iseq_inspect") .allowlist_function("rb_yjit_builtin_function") .allowlist_function("rb_set_cfp_(pc|sp)") .allowlist_function("rb_yjit_multi_ractor_p") @@ -377,6 +378,7 @@ fn main() { // From gc.h and internal/gc.h .allowlist_function("rb_class_allocate_instance") .allowlist_function("rb_obj_info") + .allowlist_function("ruby_xfree") // From include/ruby/debug.h .allowlist_function("rb_profile_frames") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 1fc0e07fe2..322553cce1 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -16,6 +16,7 @@ use std::cell::Cell; use std::cmp; use std::cmp::min; use std::collections::HashMap; +use std::ffi::c_void; use std::ffi::CStr; use std::mem; use std::os::raw::c_int; @@ -5966,32 +5967,6 @@ fn gen_send_cfunc( } } - // Log the name of the method we're calling to, - // note that we intentionally don't do this for inlined cfuncs - if get_option!(gen_stats) { - // TODO: extract code to get method name string into its own function - - // Assemble the method name string - let mid = unsafe { vm_ci_mid(ci) }; - let class_name = if let Some(class) = recv_known_class { - unsafe { cstr_to_rust_string(rb_class2name(class)) }.unwrap() - } else { - "Unknown".to_string() - }; - let method_name = if mid != 0 { - unsafe { cstr_to_rust_string(rb_id2name(mid)) }.unwrap() - } else { - "Unknown".to_string() - }; - let name_str = format!("{}#{}", class_name, method_name); - - // Get an index for this cfunc name - let cfunc_idx = get_cfunc_idx(&name_str); - - // Increment the counter for this cfunc - asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]); - } - // Check for interrupts gen_check_ints(asm, Counter::guard_send_interrupted); @@ -6180,6 +6155,20 @@ fn gen_send_cfunc( let stack_ret = asm.stack_push(Type::Unknown); asm.mov(stack_ret, ret); + // Log the name of the method we're calling to. We intentionally don't do this for inlined cfuncs. + // We also do this after the C call to minimize the impact of spill_temps() on asm.ccall(). + if get_option!(gen_stats) { + // Assemble the method name string + let mid = unsafe { vm_ci_mid(ci) }; + let name_str = get_method_name(recv_known_class, mid); + + // Get an index for this cfunc name + let cfunc_idx = get_cfunc_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_cfunc_counter as *const u8, vec![cfunc_idx.into()]); + } + // Pop the stack frame (ec->cfp++) // Instead of recalculating, we can reuse the previous CFP, which is stored in a callee-saved // register @@ -6650,6 +6639,9 @@ fn gen_send_iseq( } } + // Increment total ISEQ send count + gen_counter_incr(asm, Counter::num_send_iseq); + // Shortcut for special `Primitive.attr! :leaf` builtins let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) }; let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) }; @@ -6671,7 +6663,7 @@ fn gen_send_iseq( } asm_comment!(asm, "inlined leaf builtin"); - gen_counter_incr(asm, Counter::num_send_leaf_builtin); + gen_counter_incr(asm, Counter::num_send_iseq_leaf); // The callee may allocate, e.g. Integer#abs on a Bignum. // Save SP for GC, save PC for allocation tracing, and prepare @@ -6705,7 +6697,7 @@ fn gen_send_iseq( // Inline simple ISEQs whose return value is known at compile time if let (Some(value), None, false) = (iseq_get_return_value(iseq), block_arg_type, opt_send_call) { asm_comment!(asm, "inlined simple ISEQ"); - gen_counter_incr(asm, Counter::num_send_inline); + gen_counter_incr(asm, Counter::num_send_iseq_inline); // Pop receiver and arguments asm.stack_pop(argc as usize + if captured_opnd.is_some() { 0 } else { 1 }); @@ -7210,6 +7202,19 @@ fn gen_send_iseq( pc: None, // We are calling into jitted code, which will set the PC as necessary }); + // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs. + // We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall(). + if get_option!(gen_stats) { + // Assemble the ISEQ name string + let name_str = get_iseq_name(iseq); + + // Get an index for this ISEQ name + let iseq_idx = get_iseq_idx(&name_str); + + // Increment the counter for this cfunc + asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); + } + // No need to set cfp->pc since the callee sets it whenever calling into routines // that could look at it through jit_save_pc(). // mov(cb, REG0, const_ptr_opnd(start_pc)); @@ -7639,16 +7644,7 @@ fn gen_send_general( // Log the name of the method we're calling to #[cfg(feature = "disasm")] - { - let class_name = unsafe { cstr_to_rust_string(rb_class2name(comptime_recv_klass)) }; - let method_name = unsafe { cstr_to_rust_string(rb_id2name(mid)) }; - match (class_name, method_name) { - (Some(class_name), Some(method_name)) => { - asm_comment!(asm, "call to {}#{}", class_name, method_name); - } - _ => {} - } - } + asm_comment!(asm, "call to {}", get_method_name(Some(comptime_recv_klass), mid)); // Gather some statistics about sends gen_counter_incr(asm, Counter::num_send); @@ -8069,6 +8065,27 @@ fn gen_send_general( } } +/// Assemble "{class_name}#{method_name}" from a class pointer and a method ID +fn get_method_name(class: Option, mid: u64) -> String { + let class_name = class.and_then(|class| unsafe { + cstr_to_rust_string(rb_class2name(class)) + }).unwrap_or_else(|| "Unknown".to_string()); + let method_name = if mid != 0 { + unsafe { cstr_to_rust_string(rb_id2name(mid)) } + } else { + None + }.unwrap_or_else(|| "Unknown".to_string()); + format!("{}#{}", class_name, method_name) +} + +/// Assemble "{label}@{iseq_path}:{lineno}" (iseq_inspect() format) from an ISEQ +fn get_iseq_name(iseq: IseqPtr) -> String { + let c_string = unsafe { rb_yjit_iseq_inspect(iseq) }; + let string = unsafe { CStr::from_ptr(c_string) }.to_str() + .unwrap_or_else(|_| "not UTF-8").to_string(); + unsafe { ruby_xfree(c_string as *mut c_void); } + string +} /// Shifts the stack for send in order to remove the name of the method /// Comment below borrow from vm_call_opt_send in vm_insnhelper.c @@ -8239,20 +8256,7 @@ fn gen_invokeblock_specialized( Counter::guard_invokeblock_iseq_block_changed, ); - gen_send_iseq( - jit, - asm, - ocb, - comptime_iseq, - ci, - VM_FRAME_MAGIC_BLOCK, - None, - 0 as _, - None, - flags, - argc, - Some(captured_opnd), - ) + gen_send_iseq(jit, asm, ocb, comptime_iseq, ci, VM_FRAME_MAGIC_BLOCK, None, 0 as _, None, flags, argc, Some(captured_opnd)) } else if comptime_handler.0 & 0x3 == 0x3 { // VM_BH_IFUNC_P // We aren't handling CALLER_SETUP_ARG and CALLER_REMOVE_EMPTY_KW_SPLAT yet. if flags & VM_CALL_ARGS_SPLAT != 0 { diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 4246468cea..dabd3c9681 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -907,6 +907,7 @@ pub const RUBY_OFFSET_RSTRING_LEN: rstring_offsets = 16; pub type rstring_offsets = u32; pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; extern "C" { + pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void); pub fn rb_class_attached_object(klass: VALUE) -> VALUE; pub fn rb_singleton_class(obj: VALUE) -> VALUE; pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t; @@ -1154,6 +1155,7 @@ extern "C" { pub fn rb_yjit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize; pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32); + pub fn rb_yjit_iseq_inspect(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_char; pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 8214769d9b..395a1b45f9 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -53,56 +53,64 @@ unsafe impl GlobalAlloc for StatsAlloc { } } -/// Mapping of C function name to integer indices +/// Mapping of C function / ISEQ name to integer indices /// This is accessed at compilation time only (protected by a lock) static mut CFUNC_NAME_TO_IDX: Option> = None; +static mut ISEQ_NAME_TO_IDX: Option> = None; -/// Vector of call counts for each C function index +/// Vector of call counts for each C function / ISEQ index /// This is modified (but not resized) by JITted code static mut CFUNC_CALL_COUNT: Option> = None; +static mut ISEQ_CALL_COUNT: Option> = None; /// Assign an index to a given cfunc name string -pub fn get_cfunc_idx(name: &str) -> usize -{ - //println!("{}", name); - - unsafe { - if CFUNC_NAME_TO_IDX.is_none() { - CFUNC_NAME_TO_IDX = Some(HashMap::default()); - } +pub fn get_cfunc_idx(name: &str) -> usize { + unsafe { get_method_idx(name, &mut CFUNC_NAME_TO_IDX, &mut CFUNC_CALL_COUNT) } +} - if CFUNC_CALL_COUNT.is_none() { - CFUNC_CALL_COUNT = Some(Vec::default()); - } +/// Assign an index to a given ISEQ name string +pub fn get_iseq_idx(name: &str) -> usize { + unsafe { get_method_idx(name, &mut ISEQ_NAME_TO_IDX, &mut ISEQ_CALL_COUNT) } +} - let name_to_idx = CFUNC_NAME_TO_IDX.as_mut().unwrap(); +fn get_method_idx( + name: &str, + method_name_to_idx: &mut Option>, + method_call_count: &mut Option>, +) -> usize { + //println!("{}", name); - match name_to_idx.get(name) { - Some(idx) => *idx, - None => { - let idx = name_to_idx.len(); - name_to_idx.insert(name.to_string(), idx); + let name_to_idx = method_name_to_idx.get_or_insert_with(HashMap::default); + let call_count = method_call_count.get_or_insert_with(Vec::default); - // Resize the call count vector - let cfunc_call_count = CFUNC_CALL_COUNT.as_mut().unwrap(); - if idx >= cfunc_call_count.len() { - cfunc_call_count.resize(idx + 1, 0); - } + match name_to_idx.get(name) { + Some(idx) => *idx, + None => { + let idx = name_to_idx.len(); + name_to_idx.insert(name.to_string(), idx); - idx + // Resize the call count vector + if idx >= call_count.len() { + call_count.resize(idx + 1, 0); } + + idx } } } // Increment the counter for a C function -pub extern "C" fn incr_cfunc_counter(idx: usize) -{ - unsafe { - let cfunc_call_count = CFUNC_CALL_COUNT.as_mut().unwrap(); - assert!(idx < cfunc_call_count.len()); - cfunc_call_count[idx] += 1; - } +pub extern "C" fn incr_cfunc_counter(idx: usize) { + let cfunc_call_count = unsafe { CFUNC_CALL_COUNT.as_mut().unwrap() }; + assert!(idx < cfunc_call_count.len()); + cfunc_call_count[idx] += 1; +} + +// Increment the counter for an ISEQ +pub extern "C" fn incr_iseq_counter(idx: usize) { + let iseq_call_count = unsafe { ISEQ_CALL_COUNT.as_mut().unwrap() }; + assert!(idx < iseq_call_count.len()); + iseq_call_count[idx] += 1; } // YJIT exit counts for each instruction type @@ -585,10 +593,11 @@ make_counters! { num_send_x86_rel32, num_send_x86_reg, num_send_dynamic, - num_send_inline, - num_send_leaf_builtin, num_send_cfunc, num_send_cfunc_inline, + num_send_iseq, + num_send_iseq_leaf, + num_send_iseq_inline, num_getivar_megamorphic, num_setivar_megamorphic, @@ -795,19 +804,30 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE { rb_hash_aset(hash, key, value); } - // Create a hash for the cfunc call counts - let calls_hash = rb_hash_new(); - rb_hash_aset(hash, rust_str_to_sym("cfunc_calls"), calls_hash); - if let Some(cfunc_name_to_idx) = CFUNC_NAME_TO_IDX.as_mut() { - let call_counts = CFUNC_CALL_COUNT.as_mut().unwrap(); - - for (name, idx) in cfunc_name_to_idx { - let count = call_counts[*idx]; - let key = rust_str_to_sym(name); - let value = VALUE::fixnum_from_usize(count as usize); - rb_hash_aset(calls_hash, key, value); + fn set_call_counts( + calls_hash: VALUE, + method_name_to_idx: &mut Option>, + method_call_count: &mut Option>, + ) { + if let (Some(name_to_idx), Some(call_counts)) = (method_name_to_idx, method_call_count) { + for (name, idx) in name_to_idx { + let count = call_counts[*idx]; + let key = rust_str_to_sym(name); + let value = VALUE::fixnum_from_usize(count as usize); + unsafe { rb_hash_aset(calls_hash, key, value); } + } } } + + // Create a hash for the cfunc call counts + let cfunc_calls = rb_hash_new(); + rb_hash_aset(hash, rust_str_to_sym("cfunc_calls"), cfunc_calls); + set_call_counts(cfunc_calls, &mut CFUNC_NAME_TO_IDX, &mut CFUNC_CALL_COUNT); + + // Create a hash for the ISEQ call counts + let iseq_calls = rb_hash_new(); + rb_hash_aset(hash, rust_str_to_sym("iseq_calls"), iseq_calls); + set_call_counts(iseq_calls, &mut ISEQ_NAME_TO_IDX, &mut ISEQ_CALL_COUNT); } hash -- cgit v1.2.3