diff options
| author | Max Bernstein <rubybugs@bernsteinbear.com> | 2025-11-05 15:01:17 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-05 20:01:17 +0000 |
| commit | 02267417da32bf480f7050ff2ab182076aa0ad83 (patch) | |
| tree | 577d29c67081d3ef636fa33e5abe9efa3b12017a | |
| parent | 4f56abbb0a3e25972b246fa516718520e5cd27e9 (diff) | |
ZJIT: Profile specific objects for invokeblock (#15051)
I made a special kind of `ProfiledType` that looks at specific objects, not just their classes/shapes (https://github.com/ruby/ruby/pull/15051). Then I profiled some of our benchmarks.
For lobsters:
```
Top-6 invokeblock handler (100.0% of total 1,064,155):
megamorphic: 494,931 (46.5%)
monomorphic_iseq: 337,171 (31.7%)
polymorphic: 113,381 (10.7%)
monomorphic_ifunc: 52,260 ( 4.9%)
monomorphic_other: 38,970 ( 3.7%)
no_profiles: 27,442 ( 2.6%)
```
For railsbench:
```
Top-6 invokeblock handler (100.0% of total 2,529,104):
monomorphic_iseq: 834,452 (33.0%)
megamorphic: 818,347 (32.4%)
polymorphic: 632,273 (25.0%)
monomorphic_ifunc: 224,243 ( 8.9%)
monomorphic_other: 19,595 ( 0.8%)
no_profiles: 194 ( 0.0%)
```
For shipit:
```
Top-6 invokeblock handler (100.0% of total 2,104,148):
megamorphic: 1,269,889 (60.4%)
polymorphic: 411,475 (19.6%)
no_profiles: 173,367 ( 8.2%)
monomorphic_other: 118,619 ( 5.6%)
monomorphic_iseq: 84,891 ( 4.0%)
monomorphic_ifunc: 45,907 ( 2.2%)
```
Seems like a monomorphic case for a specific ISEQ actually isn't a bad way of going about this, at least to start...
| -rw-r--r-- | insns.def | 1 | ||||
| -rw-r--r-- | vm_insnhelper.c | 12 | ||||
| -rw-r--r-- | zjit.c | 4 | ||||
| -rw-r--r-- | zjit.rb | 1 | ||||
| -rw-r--r-- | zjit/bindgen/src/main.rs | 2 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 54 | ||||
| -rw-r--r-- | zjit/src/distribution.rs | 8 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 28 | ||||
| -rw-r--r-- | zjit/src/profile.rs | 30 | ||||
| -rw-r--r-- | zjit/src/stats.rs | 7 |
10 files changed, 124 insertions, 23 deletions
@@ -1137,6 +1137,7 @@ invokeblock // attr bool handles_sp = true; // attr rb_snum_t sp_inc = sp_inc_of_invokeblock(cd->ci); // attr rb_snum_t comptime_sp_inc = sp_inc_of_invokeblock(ci); +// attr bool zjit_profile = true; { VALUE bh = VM_BLOCK_HANDLER_NONE; val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_invokeblock); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index e3ae25b176..e1ec5e63ec 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -5497,6 +5497,12 @@ vm_invoke_proc_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, return vm_invoke_block(ec, reg_cfp, calling, ci, is_lambda, block_handler); } +enum rb_block_handler_type +rb_vm_block_handler_type(VALUE block_handler) +{ + return vm_block_handler_type(block_handler); +} + static inline VALUE vm_invoke_block(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, const struct rb_callinfo *ci, @@ -6059,6 +6065,12 @@ vm_define_method(const rb_execution_context_t *ec, VALUE obj, ID id, VALUE iseqv } } +VALUE +rb_vm_get_block_handler(rb_control_frame_t *reg_cfp) +{ + return VM_CF_BLOCK_HANDLER(reg_cfp); +} + static VALUE vm_invokeblock_i(struct rb_execution_context_struct *ec, struct rb_control_frame_struct *reg_cfp, @@ -301,6 +301,10 @@ rb_zjit_class_has_default_allocator(VALUE klass) return alloc == rb_class_allocate_instance; } + +VALUE rb_vm_get_block_handler(rb_control_frame_t *reg_cfp); +enum rb_block_handler_type rb_vm_block_handler_type(VALUE block_handler); + // Primitives used by zjit.rb. Don't put other functions below, which wouldn't use them. VALUE rb_zjit_assert_compiles(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self, VALUE target_key); @@ -164,6 +164,7 @@ class << RubyVM::ZJIT print_counters_with_prefix(prefix: 'unspecialized_send_without_block_def_type_', prompt: 'not optimized method types for send_without_block', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'not_optimized_yarv_insn_', prompt: 'not optimized instructions', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'send_fallback_', prompt: 'send fallback reasons', buf:, stats:, limit: 20) + print_counters_with_prefix(prefix: 'invokeblock_handler_', prompt: 'invokeblock handler', buf:, stats:, limit: 10) # Show most popular unsupported call features. Because each call can # use multiple complex features, a decrease in this number does not diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 76f04f4369..bbb3b54d6c 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -399,6 +399,8 @@ fn main() { .allowlist_function("rb_yarv_str_eql_internal") .allowlist_function("rb_str_neq_internal") .allowlist_function("rb_yarv_ary_entry_internal") + .allowlist_function("rb_vm_get_block_handler") + .allowlist_function("rb_vm_block_handler_type") .allowlist_function("rb_FL_TEST") .allowlist_function("rb_FL_TEST_RAW") .allowlist_function("rb_RB_TYPE_P") diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index eb8ffcd158..dc9d0d144c 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -571,6 +571,11 @@ pub struct rb_captured_block__bindgen_ty_1 { pub val: __BindgenUnionField<VALUE>, pub bindgen_union_field: u64, } +pub const block_handler_type_iseq: rb_block_handler_type = 0; +pub const block_handler_type_ifunc: rb_block_handler_type = 1; +pub const block_handler_type_symbol: rb_block_handler_type = 2; +pub const block_handler_type_proc: rb_block_handler_type = 3; +pub type rb_block_handler_type = u32; pub const block_type_iseq: rb_block_type = 0; pub const block_type_ifunc: rb_block_type = 1; pub const block_type_symbol: rb_block_type = 2; @@ -1047,29 +1052,30 @@ pub const YARVINSN_zjit_send: ruby_vminsn_type = 219; pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 220; pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 221; pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 244; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 245; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 246; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), @@ -1333,6 +1339,8 @@ unsafe extern "C" { pub fn rb_zjit_class_initialized_p(klass: VALUE) -> bool; pub fn rb_zjit_class_get_alloc_func(klass: VALUE) -> rb_alloc_func_t; pub fn rb_zjit_class_has_default_allocator(klass: VALUE) -> bool; + pub fn rb_vm_get_block_handler(reg_cfp: *mut rb_control_frame_t) -> VALUE; + pub fn rb_vm_block_handler_type(block_handler: VALUE) -> rb_block_handler_type; pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE; pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; diff --git a/zjit/src/distribution.rs b/zjit/src/distribution.rs index 7a496ffd8d..2c6ffb3ae6 100644 --- a/zjit/src/distribution.rs +++ b/zjit/src/distribution.rs @@ -114,10 +114,18 @@ impl<T: Copy + PartialEq + Default + std::fmt::Debug, const N: usize> Distributi self.kind == DistributionKind::Monomorphic } + pub fn is_polymorphic(&self) -> bool { + self.kind == DistributionKind::Polymorphic + } + pub fn is_skewed_polymorphic(&self) -> bool { self.kind == DistributionKind::SkewedPolymorphic } + pub fn is_megamorphic(&self) -> bool { + self.kind == DistributionKind::Megamorphic + } + pub fn is_skewed_megamorphic(&self) -> bool { self.kind == DistributionKind::SkewedMegamorphic } diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 9762a87dd4..449047d0df 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4407,6 +4407,34 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> { // profiled cfp->self. if opcode == YARVINSN_getinstancevariable || opcode == YARVINSN_trace_getinstancevariable { profiles.profile_self(&exit_state, self_param); + } else if opcode == YARVINSN_invokeblock || opcode == YARVINSN_trace_invokeblock { + if get_option!(stats) { + let iseq_insn_idx = exit_state.insn_idx; + if let Some(operand_types) = profiles.payload.profile.get_operand_types(iseq_insn_idx) { + if let [self_type_distribution] = &operand_types[..] { + let summary = TypeDistributionSummary::new(&self_type_distribution); + if summary.is_monomorphic() { + let obj = summary.bucket(0).class(); + let bh_type = unsafe { rb_vm_block_handler_type(obj) }; + if bh_type == block_handler_type_iseq { + fun.push_insn(block, Insn::IncrCounter(Counter::invokeblock_handler_monomorphic_iseq)); + } else if bh_type == block_handler_type_ifunc { + fun.push_insn(block, Insn::IncrCounter(Counter::invokeblock_handler_monomorphic_ifunc)); + } else { + fun.push_insn(block, Insn::IncrCounter(Counter::invokeblock_handler_monomorphic_other)); + } + } else if summary.is_skewed_polymorphic() || summary.is_polymorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::invokeblock_handler_polymorphic)); + } else if summary.is_skewed_megamorphic() || summary.is_megamorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::invokeblock_handler_megamorphic)); + } else { + fun.push_insn(block, Insn::IncrCounter(Counter::invokeblock_handler_no_profiles)); + } + } else { + fun.push_insn(block, Insn::IncrCounter(Counter::invokeblock_handler_no_profiles)); + } + } + } } else { profiles.profile_stack(&exit_state); } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 08fdf3eb97..c58999668e 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -43,6 +43,10 @@ impl Profiler { fn peek_at_self(&self) -> VALUE { unsafe { rb_get_cfp_self(self.cfp) } } + + fn peek_at_block_handler(&self) -> VALUE { + unsafe { rb_vm_get_block_handler(self.cfp) } + } } /// API called from zjit_* instruction. opcode is the bare (non-zjit_*) instruction. @@ -83,6 +87,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_length => profile_operands(profiler, profile, 1), YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), + YARVINSN_invokeblock => profile_block_handler(profiler, profile), YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; @@ -135,6 +140,17 @@ fn profile_self(profiler: &mut Profiler, profile: &mut IseqProfile) { types[0].observe(ty); } +fn profile_block_handler(profiler: &mut Profiler, profile: &mut IseqProfile) { + let types = &mut profile.opnd_types[profiler.insn_idx]; + if types.is_empty() { + types.resize(1, TypeDistribution::new()); + } + let obj = profiler.peek_at_block_handler(); + let ty = ProfiledType::object(obj); + unsafe { rb_gc_writebarrier(profiler.iseq.into(), ty.class()) }; + types[0].observe(ty); +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Flags(u32); @@ -147,6 +163,8 @@ impl Flags { const IS_T_OBJECT: u32 = 1 << 2; /// Object is a struct with embedded fields const IS_STRUCT_EMBEDDED: u32 = 1 << 3; + /// Set if the ProfiledType is used for profiling specific objects, not just classes/shapes + const IS_OBJECT_PROFILING: u32 = 1 << 4; pub fn none() -> Self { Self(Self::NONE) } @@ -155,6 +173,7 @@ impl Flags { pub fn is_embedded(self) -> bool { (self.0 & Self::IS_EMBEDDED) != 0 } pub fn is_t_object(self) -> bool { (self.0 & Self::IS_T_OBJECT) != 0 } pub fn is_struct_embedded(self) -> bool { (self.0 & Self::IS_STRUCT_EMBEDDED) != 0 } + pub fn is_object_profiling(self) -> bool { (self.0 & Self::IS_OBJECT_PROFILING) != 0 } } /// opt_send_without_block/opt_plus/... should store: @@ -182,6 +201,14 @@ impl Default for ProfiledType { } impl ProfiledType { + /// Profile the object itself + fn object(obj: VALUE) -> Self { + let mut flags = Flags::none(); + flags.0 |= Flags::IS_OBJECT_PROFILING; + Self { class: obj, shape: INVALID_SHAPE_ID, flags } + } + + /// Profile the class and shape of the given object fn new(obj: VALUE) -> Self { if obj == Qfalse { return Self { class: unsafe { rb_cFalseClass }, @@ -251,6 +278,9 @@ impl ProfiledType { } pub fn is_string(&self) -> bool { + if self.flags.is_object_profiling() { + panic!("should not call is_string on object-profiled ProfiledType"); + } // Fast paths for immediates and exact-class if self.flags.is_immediate() { return false; diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 12e6e3aa8d..fbfac7b429 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -287,6 +287,13 @@ make_counters! { // The number of times we ran a dynamic check guard_type_count, guard_shape_count, + + invokeblock_handler_monomorphic_iseq, + invokeblock_handler_monomorphic_ifunc, + invokeblock_handler_monomorphic_other, + invokeblock_handler_polymorphic, + invokeblock_handler_megamorphic, + invokeblock_handler_no_profiles, } /// Increase a counter by a specified amount |
