diff options
| author | Kevin Menard <kevin@nirvdrum.com> | 2026-01-14 19:10:06 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-14 19:10:06 -0500 |
| commit | 4a21b83693fdc0e976da209047ba286b2f4084e5 (patch) | |
| tree | d7976abd267ba43eafee2aa24285a67cdd29f6d9 | |
| parent | cdb2b0eed50e1c837adeb85ef8978e533f056327 (diff) | |
ZJIT: Optimize common `invokesuper` cases (#15816)
* ZJIT: Profile `invokesuper` instructions
* ZJIT: Introduce the `InvokeSuperDirect` HIR instruction
The new instruction is an optimized version of `InvokeSuper` when we know the `super` target is an ISEQ.
* ZJIT: Expand definition of unspecializable to more complex cases
* ZJIT: Ensure `invokesuper` optimization works when the inheritance hierarchy is modified
* ZJIT: Simplify `invokesuper` specialization to most common case
Looking at ruby-bench, most `super` calls don't pass a block, which means we can use the already optimized `SendWithoutBlockDirect`.
* ZJIT: Track `super` method entries directly to avoid GC issues
Because the method entry isn't typed as a `VALUE`, we set up barriers on its `VALUE` fields. But, that was insufficient as the method entry itself could be collected in certain cases, resulting in dangling objects. Now we track the method entry as a `VALUE` and can more naturally mark it and its children.
* ZJIT: Optimize `super` calls with simple argument forms
* ZJIT: Report the reason why we can't optimize an `invokesuper` instance
* ZJIT: Revise send fallback reasons for `super` calls
* ZJIT: Assert `super` calls are `FCALL` and don't need visibily checks
| -rw-r--r-- | insns.def | 1 | ||||
| -rw-r--r-- | test/ruby/test_zjit.rb | 477 | ||||
| -rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 49 | ||||
| -rw-r--r-- | zjit.rb | 1 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 42 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 49 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 164 | ||||
| -rw-r--r-- | zjit/src/hir/opt_tests.rs | 341 | ||||
| -rw-r--r-- | zjit/src/profile.rs | 57 | ||||
| -rw-r--r-- | zjit/src/stats.rs | 57 |
10 files changed, 1180 insertions, 58 deletions
@@ -1092,6 +1092,7 @@ invokesuper (VALUE val) // attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci); // attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci); +// attr bool zjit_profile = true; { VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), cd->ci, blockiseq, true); val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_super); diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index cf3c46b3ed..bc4f5f2ae8 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -843,6 +843,483 @@ class TestZJIT < Test::Unit::TestCase } end + def test_invokesuper_to_iseq + assert_compiles '["B", "A"]', %q{ + class A + def foo + "A" + end + end + + class B < A + def foo + ["B", super] + end + end + + def test + B.new.foo + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_with_args + assert_compiles '["B", 11]', %q{ + class A + def foo(x) + x * 2 + end + end + + class B < A + def foo(x) + ["B", super(x) + 1] + end + end + + def test + B.new.foo(5) + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + # Test super with explicit args when callee has rest parameter. + # This should fall back to dynamic dispatch since we can't handle rest params yet. + def test_invokesuper_with_args_to_rest_param + assert_compiles '["B", "a", ["b", "c"]]', %q{ + class A + def foo(x, *rest) + [x, rest] + end + end + + class B < A + def foo(x, y, z) + ["B", *super(x, y, z)] + end + end + + def test + B.new.foo("a", "b", "c") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_with_block + assert_compiles '["B", "from_block"]', %q{ + class A + def foo + block_given? ? yield : "no_block" + end + end + + class B < A + def foo + ["B", super { "from_block" }] + end + end + + def test + B.new.foo + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc + assert_compiles '["MyArray", 3]', %q{ + class MyArray < Array + def length + ["MyArray", super] + end + end + + def test + MyArray.new([1, 2, 3]).length + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_multilevel + assert_compiles '["C", ["B", "A"]]', %q{ + class A + def foo + "A" + end + end + + class B < A + def foo + ["B", super] + end + end + + class C < B + def foo + ["C", super] + end + end + + def test + C.new.foo + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + # Test implicit block forwarding - super without explicit block should forward caller's block + # Note: We call test twice to ensure ZJIT compiles it before the final call that we check + def test_invokesuper_forwards_block_implicitly + assert_compiles '["B", "forwarded_block"]', %q{ + class A + def foo + block_given? ? yield : "no_block" + end + end + + class B < A + def foo + ["B", super] # should forward the block from caller + end + end + + def test + B.new.foo { "forwarded_block" } + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + # Test implicit block forwarding with explicit arguments + def test_invokesuper_forwards_block_implicitly_with_args + assert_compiles '["B", ["arg_value", "forwarded"]]', %q{ + class A + def foo(x) + [x, (block_given? ? yield : "no_block")] + end + end + + class B < A + def foo(x) + ["B", super(x)] # explicit args, but block should still be forwarded + end + end + + def test + B.new.foo("arg_value") { "forwarded" } + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + # Test implicit block forwarding when no block is given (should not fail) + def test_invokesuper_forwards_block_implicitly_no_block_given + assert_compiles '["B", "no_block"]', %q{ + class A + def foo + block_given? ? yield : "no_block" + end + end + + class B < A + def foo + ["B", super] # no block given by caller + end + end + + def test + B.new.foo # called without a block + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + # Test implicit block forwarding through multiple inheritance levels + def test_invokesuper_forwards_block_implicitly_multilevel + assert_compiles '["C", ["B", "deep_block"]]', %q{ + class A + def foo + block_given? ? yield : "no_block" + end + end + + class B < A + def foo + ["B", super] # forwards block to A + end + end + + class C < B + def foo + ["C", super] # forwards block to B, which forwards to A + end + end + + def test + C.new.foo { "deep_block" } + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + # Test implicit block forwarding with block parameter syntax + def test_invokesuper_forwards_block_param + assert_compiles '["B", "block_param_forwarded"]', %q{ + class A + def foo + block_given? ? yield : "no_block" + end + end + + class B < A + def foo(&block) + ["B", super] # should forward &block implicitly + end + end + + def test + B.new.foo { "block_param_forwarded" } + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_with_blockarg + assert_compiles '["B", "different block"]', %q{ + class A + def foo + block_given? ? yield : "no block" + end + end + + class B < A + def foo(&blk) + other_block = proc { "different block" } + ["B", super(&other_block)] + end + end + + def test + B.new.foo { "passed block" } + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_with_symbol_to_proc + assert_compiles '["B", [3, 5, 7]]', %q{ + class A + def foo(items, &blk) + items.map(&blk) + end + end + + class B < A + def foo(items) + ["B", super(items, &:succ)] + end + end + + def test + B.new.foo([2, 4, 6]) + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_with_splat + assert_compiles '["B", 6]', %q{ + class A + def foo(a, b, c) + a + b + c + end + end + + class B < A + def foo(*args) + ["B", super(*args)] + end + end + + def test + B.new.foo(1, 2, 3) + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_with_kwargs + assert_compiles '["B", "x=1, y=2"]', %q{ + class A + def foo(x:, y:) + "x=#{x}, y=#{y}" + end + end + + class B < A + def foo(x:, y:) + ["B", super(x: x, y: y)] + end + end + + def test + B.new.foo(x: 1, y: 2) + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_with_kw_splat + assert_compiles '["B", "x=1, y=2"]', %q{ + class A + def foo(x:, y:) + "x=#{x}, y=#{y}" + end + end + + class B < A + def foo(**kwargs) + ["B", super(**kwargs)] + end + end + + def test + B.new.foo(x: 1, y: 2) + end + + test # profile + test # compile + run compiled code + }, call_threshold: 2 + end + + # Test that including a module after compilation correctly changes the super target. + # The included module's method should be called, not the original super target. + def test_invokesuper_with_include + assert_compiles '["B", "M"]', %q{ + class A + def foo + "A" + end + end + + class B < A + def foo + ["B", super] + end + end + + def test + B.new.foo + end + + test # profile invokesuper (super -> A#foo) + test # compile with super -> A#foo + + # Now include a module in B that defines foo - super should go to M#foo instead + module M + def foo + "M" + end + end + B.include(M) + + test # should call M#foo, not A#foo + }, call_threshold: 2 + end + + # Test that prepending a module after compilation correctly changes the super target. + # The prepended module's method should be called, not the original super target. + def test_invokesuper_with_prepend + assert_compiles '["B", "M"]', %q{ + class A + def foo + "A" + end + end + + class B < A + def foo + ["B", super] + end + end + + def test + B.new.foo + end + + test # profile invokesuper (super -> A#foo) + test # compile with super -> A#foo + + # Now prepend a module that defines foo - super should go to M#foo instead + module M + def foo + "M" + end + end + A.prepend(M) + + test # should call M#foo, not A#foo + }, call_threshold: 2 + end + + # Test super with positional and keyword arguments (pattern from chunky_png) + def test_invokesuper_with_keyword_args + assert_compiles '{content: "image data"}', %q{ + class A + def foo(attributes = {}) + @attributes = attributes + end + end + + class B < A + def foo(content = '') + super(content: content) + end + end + + def test + B.new.foo("image data") + end + + test + test + }, call_threshold: 2 + end + def test_invokebuiltin # Not using assert_compiles due to register spill assert_runs '["."]', %q{ diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 61dbf9b5c3..9fbcf2169f 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -924,30 +924,31 @@ pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 247; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), @@ -184,6 +184,7 @@ class << RubyVM::ZJIT # Show fallback counters, ordered by the typical amount of fallbacks for the prefix at the time print_counters_with_prefix(prefix: 'unspecialized_send_def_type_', prompt: 'not optimized method types for send', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'unspecialized_send_without_block_def_type_', prompt: 'not optimized method types for send_without_block', buf:, stats:, limit: 20) + print_counters_with_prefix(prefix: 'unspecialized_super_def_type_', prompt: 'not optimized method types for super', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'uncategorized_fallback_yarv_insn_', prompt: 'instructions with uncategorized fallback reason', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'send_fallback_', prompt: 'send fallback reasons', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'setivar_fallback_', prompt: 'setivar fallback reasons', buf:, stats:, limit: 5) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 4a186d960c..16ac2573f2 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -15,7 +15,7 @@ use crate::invariants::{ use crate::gc::append_gc_offsets; use crate::payload::{get_or_create_iseq_payload, IseqCodePtrs, IseqVersion, IseqVersionRef, IseqStatus}; use crate::state::ZJITState; -use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type}; +use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_for_super_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type}; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_BASE_PTR, NATIVE_STACK_PTR, Opnd, SP, SideExit, Target, asm_ccall, asm_comment}; @@ -401,7 +401,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), - Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state)), + Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state), None), &Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), // Ensure we have enough room fit ec, self, and arguments @@ -453,6 +453,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::GuardNotShared { recv, state } => gen_guard_not_shared(jit, asm, opnd!(recv), &function.frame_state(*state)), &Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), &Insn::GuardGreaterEq { left, right, state } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), + &Insn::GuardSuperMethodEntry { cme, state } => no_output!(gen_guard_super_method_entry(jit, asm, cme, &function.frame_state(state))), + Insn::GetBlockHandler => gen_get_block_handler(jit, asm), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)), // Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args). @@ -715,6 +717,29 @@ fn gen_guard_greater_eq(jit: &JITState, asm: &mut Assembler, left: Opnd, right: left } +/// Guard that the method entry at ep[VM_ENV_DATA_INDEX_ME_CREF] matches the expected CME. +/// This ensures we're calling super from the expected method context. +fn gen_guard_super_method_entry( + jit: &JITState, + asm: &mut Assembler, + cme: *const rb_callable_method_entry_t, + state: &FrameState, +) { + asm_comment!(asm, "guard super method entry"); + let lep = gen_get_lep(jit, asm); + let ep_me_opnd = Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF); + let ep_me = asm.load(ep_me_opnd); + asm.cmp(ep_me, Opnd::UImm(cme as u64)); + asm.jne(side_exit(jit, state, SideExitReason::GuardSuperMethodEntry)); +} + +/// Get the block handler from ep[VM_ENV_DATA_INDEX_SPECVAL] at the local EP (LEP). +fn gen_get_block_handler(jit: &JITState, asm: &mut Assembler) -> Opnd { + asm_comment!(asm, "get block handler from LEP"); + let lep = gen_get_lep(jit, asm); + asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)) +} + fn gen_get_constant_path(jit: &JITState, asm: &mut Assembler, ic: *const iseq_inline_constant_cache, state: &FrameState) -> Opnd { unsafe extern "C" { fn rb_vm_opt_getconstant_path(ec: EcPtr, cfp: CfpPtr, ic: *const iseq_inline_constant_cache) -> VALUE; @@ -1321,8 +1346,10 @@ fn gen_send_without_block( ) } -/// Compile a direct jump to an ISEQ call without block -fn gen_send_without_block_direct( +/// Compile a direct call to an ISEQ method. +/// If `block_handler` is provided, it's used as the specval for the new frame (for forwarding blocks). +/// Otherwise, `VM_BLOCK_HANDLER_NONE` is used. +fn gen_send_iseq_direct( cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, @@ -1331,6 +1358,7 @@ fn gen_send_without_block_direct( recv: Opnd, args: Vec<Opnd>, state: &FrameState, + block_handler: Option<Opnd>, ) -> lir::Opnd { gen_incr_counter(asm, Counter::iseq_optimized_send_count); @@ -1357,7 +1385,8 @@ fn gen_send_without_block_direct( let bmethod_specval = (capture.ep.addr() | 1).into(); (bmethod_frame_type, bmethod_specval) } else { - (VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, VM_BLOCK_HANDLER_NONE.into()) + let specval = block_handler.unwrap_or_else(|| VM_BLOCK_HANDLER_NONE.into()); + (VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, specval) }; // Set up the new frame @@ -2131,6 +2160,9 @@ fn gen_incr_send_fallback_counter(asm: &mut Assembler, reason: SendFallbackReaso SendNotOptimizedMethodType(method_type) => { gen_incr_counter(asm, send_fallback_counter_for_method_type(method_type)); } + SuperNotOptimizedMethodType(method_type) => { + gen_incr_counter(asm, send_fallback_counter_for_super_method_type(method_type)); + } _ => {} } } diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 5d4fed0c3a..2201bdcffe 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -1713,30 +1713,31 @@ pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 247; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index cf0625cdad..48f85c4f23 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -484,6 +484,7 @@ pub enum SideExitReason { UnhandledHIRInsn(InsnId), UnhandledYARVInsn(u32), UnhandledCallType(CallType), + UnhandledBlockArg, TooManyKeywordParameters, FixnumAddOverflow, FixnumSubOverflow, @@ -497,6 +498,7 @@ pub enum SideExitReason { GuardNotShared, GuardLess, GuardGreaterEq, + GuardSuperMethodEntry, PatchPoint(Invariant), CalleeSideExit, ObjToStringFallback, @@ -647,6 +649,22 @@ pub enum SendFallbackReason { /// A singleton class has been seen for the receiver class, so we skip the optimization /// to avoid an invalidation loop. SingletonClassSeen, + /// The super call is passed a block that the optimizer does not support. + SuperCallWithBlock, + /// The profiled super class cannot be found. + SuperClassNotFound, + /// The `super` call uses a complex argument pattern that the optimizer does not support. + SuperComplexArgsPass, + /// The cached target of a `super` call could not be found. + SuperTargetNotFound, + /// Attempted to specialize a `super` call that doesn't have profile data. + SuperNoProfiles, + /// Cannot optimize the `super` call due to the target method. + SuperNotOptimizedMethodType(MethodType), + /// The `super` call is polymorpic. + SuperPolymorphic, + /// The `super` target call uses a complex argument pattern that the optimizer does not support. + SuperTargetComplexArgsPass, /// Initial fallback reason for every instruction, which should be mutated to /// a more actionable reason when an attempt to specialize the instruction fails. Uncategorized(ruby_vminsn_type), @@ -684,6 +702,14 @@ impl Display for SendFallbackReason { ComplexArgPass => write!(f, "Complex argument passing"), UnexpectedKeywordArgs => write!(f, "Unexpected Keyword Args"), SingletonClassSeen => write!(f, "Singleton class previously created for receiver class"), + SuperCallWithBlock => write!(f, "super: call made with a block"), + SuperClassNotFound => write!(f, "super: profiled class cannot be found"), + SuperComplexArgsPass => write!(f, "super: complex argument passing to `super` call"), + SuperNoProfiles => write!(f, "super: no profile data available"), + SuperNotOptimizedMethodType(method_type) => write!(f, "super: unsupported target method type {:?}", method_type), + SuperPolymorphic => write!(f, "super: polymorphic call site"), + SuperTargetNotFound => write!(f, "super: profiled target method cannot be found"), + SuperTargetComplexArgsPass => write!(f, "super: complex argument passing to `super` target call"), Uncategorized(insn) => write!(f, "Uncategorized({})", insn_name(*insn as usize)), } } @@ -975,6 +1001,11 @@ pub enum Insn { GuardGreaterEq { left: InsnId, right: InsnId, state: InsnId }, /// Side-exit if left is not less than right (both operands are C long). GuardLess { left: InsnId, right: InsnId, state: InsnId }, + /// Side-exit if the method entry at ep[VM_ENV_DATA_INDEX_ME_CREF] doesn't match the expected CME. + /// Used to ensure super calls are made from the expected method context. + GuardSuperMethodEntry { cme: *const rb_callable_method_entry_t, state: InsnId }, + /// Get the block handler from ep[VM_ENV_DATA_INDEX_SPECVAL] at the local EP (LEP). + GetBlockHandler, /// Generate no code (or padding if necessary) and insert a patch point /// that can be rewritten to a side exit when the Invariant is broken. @@ -1003,7 +1034,7 @@ impl Insn { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => false, _ => true, @@ -1353,6 +1384,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GuardNotShared { recv, .. } => write!(f, "GuardNotShared {recv}"), Insn::GuardLess { left, right, .. } => write!(f, "GuardLess {left}, {right}"), Insn::GuardGreaterEq { left, right, .. } => write!(f, "GuardGreaterEq {left}, {right}"), + Insn::GuardSuperMethodEntry { cme, .. } => write!(f, "GuardSuperMethodEntry {:p}", self.ptr_map.map_ptr(cme)), + Insn::GetBlockHandler => write!(f, "GetBlockHandler"), Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) }, Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) }, Insn::IsBlockGiven => { write!(f, "IsBlockGiven") }, @@ -2015,6 +2048,8 @@ impl Function { &GuardNotShared { recv, state } => GuardNotShared { recv: find!(recv), state }, &GuardGreaterEq { left, right, state } => GuardGreaterEq { left: find!(left), right: find!(right), state }, &GuardLess { left, right, state } => GuardLess { left: find!(left), right: find!(right), state }, + &GuardSuperMethodEntry { cme, state } => GuardSuperMethodEntry { cme, state }, + &GetBlockHandler => GetBlockHandler, &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, @@ -2187,8 +2222,9 @@ impl Function { Insn::SetGlobal { .. } | Insn::Jump(_) | Insn::EntryPoint { .. } | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::Throw { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } - | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::IncrCounterPtr { .. } + | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } + | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), @@ -2296,6 +2332,7 @@ impl Function { Insn::AnyToString { .. } => types::String, Insn::GetLocal { rest_param: true, .. } => types::ArrayExact, Insn::GetLocal { .. } => types::BasicObject, + Insn::GetBlockHandler => types::RubyValue, // The type of Snapshot doesn't really matter; it's never materialized. It's used only // as a reference for FrameState, which we use to generate side-exit code. Insn::Snapshot { .. } => types::Any, @@ -3060,6 +3097,120 @@ impl Function { self.push_insn_id(block, insn_id); }; } + Insn::InvokeSuper { recv, cd, blockiseq, args, state, .. } => { + // Don't handle calls with literal blocks (e.g., super { ... }) + if !blockiseq.is_null() { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperCallWithBlock); + continue; + } + + let ci = unsafe { get_call_data_ci(cd) }; + let flags = unsafe { rb_vm_ci_flag(ci) }; + assert!(flags & VM_CALL_FCALL != 0); + + // Reject calls with complex argument handling. + let complex_arg_types = VM_CALL_ARGS_SPLAT + | VM_CALL_KW_SPLAT + | VM_CALL_KWARG + | VM_CALL_ARGS_BLOCKARG + | VM_CALL_FORWARDING; + + if (flags & complex_arg_types) != 0 { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperComplexArgsPass); + continue; + } + + let frame_state = self.frame_state(state); + + // Get the profiled CME from the current method. + let Some(profiles) = self.profiles.as_ref() else { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNoProfiles); + continue; + }; + + let Some(current_cme) = profiles.payload.profile.get_super_method_entry(frame_state.insn_idx) else { + self.push_insn_id(block, insn_id); + + // The absence of the super CME could be due to a missing profile, but + // if we've made it this far the value would have been deleted, indicating + // that the call is at least polymorphic and possibly megamorphic. + self.set_dynamic_send_reason(insn_id, SuperPolymorphic); + continue; + }; + + // Get defined_class and method ID from the profiled CME. + let current_defined_class = unsafe { (*current_cme).defined_class }; + let mid = unsafe { get_def_original_id((*current_cme).def) }; + + // Compute superclass: RCLASS_SUPER(RCLASS_ORIGIN(defined_class)) + let superclass = unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; + if superclass.nil_p() { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperClassNotFound); + continue; + } + + // Look up the super method. + let super_cme = unsafe { rb_callable_method_entry(superclass, mid) }; + if super_cme.is_null() { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperTargetNotFound); + continue; + } + + // Check if it's an ISEQ method; bail if it isn't. + let def_type = unsafe { get_cme_def_type(super_cme) }; + if def_type != VM_METHOD_TYPE_ISEQ { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); + continue; + } + + // Check if the super method's parameters support direct send. + // If not, we can't do direct dispatch. + let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; + if !can_direct_send(self, block, super_iseq, insn_id, args.as_slice()) { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); + continue; + } + + // Add PatchPoint for method redefinition. + self.push_insn(block, Insn::PatchPoint { + invariant: Invariant::MethodRedefined { + klass: unsafe { (*super_cme).defined_class }, + method: mid, + cme: super_cme + }, + state + }); + + // Guard that we're calling `super` from the expected method context. + self.push_insn(block, Insn::GuardSuperMethodEntry { cme: current_cme, state }); + + // Guard that no block is being passed (implicit or explicit). + let block_handler = self.push_insn(block, Insn::GetBlockHandler); + self.push_insn(block, Insn::GuardBitEquals { + val: block_handler, + expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), + reason: SideExitReason::UnhandledBlockArg, + state + }); + + // Use SendWithoutBlockDirect with the super method's CME and ISEQ. + let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { + recv, + cd, + cme: super_cme, + iseq: super_iseq, + args, + state + }); + self.make_equal_to(insn_id, send_direct); + } _ => { self.push_insn_id(block, insn_id); } } } @@ -3977,6 +4128,7 @@ impl Function { | &Insn::LoadEC | &Insn::LoadSelf | &Insn::GetLocal { .. } + | &Insn::GetBlockHandler | &Insn::PutSpecialObject { .. } | &Insn::IsBlockGiven | &Insn::IncrCounter(_) @@ -4205,6 +4357,7 @@ impl Function { worklist.push_back(val); } &Insn::GuardBlockParamProxy { state, .. } | + &Insn::GuardSuperMethodEntry { state, .. } | &Insn::GetGlobal { state, .. } | &Insn::GetSpecialSymbol { state, .. } | &Insn::GetSpecialNumber { state, .. } | @@ -4720,6 +4873,8 @@ impl Function { | Insn::Jump { .. } | Insn::EntryPoint { .. } | Insn::GuardBlockParamProxy { .. } + | Insn::GuardSuperMethodEntry { .. } + | Insn::GetBlockHandler | Insn::PatchPoint { .. } | Insn::SideExit { .. } | Insn::IncrCounter { .. } @@ -5397,7 +5552,8 @@ fn unspecializable_c_call_type(flags: u32) -> bool { fn unspecializable_call_type(flags: u32) -> bool { ((flags & VM_CALL_ARGS_SPLAT) != 0) || ((flags & VM_CALL_KW_SPLAT) != 0) || - ((flags & VM_CALL_ARGS_BLOCKARG) != 0) + ((flags & VM_CALL_ARGS_BLOCKARG) != 0) || + ((flags & VM_CALL_FORWARDING) != 0) } /// We have IseqPayload, which keeps track of HIR Types in the interpreter, but this is not useful diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 2cf76e5e74..138bbde718 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -10554,4 +10554,345 @@ mod hir_opt_tests { Return v15 "); } + + #[test] + fn test_invokesuper_to_iseq_optimizes_to_direct() { + eval(" + class A + def foo + 'A' + end + end + + class B < A + def foo + super + end + end + + B.new.foo; B.new.foo + "); + + // A Ruby method as the target of `super` should optimize provided no block is given. + let hir = hir_string_proc("B.new.method(:foo)"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}"); + assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}"); + + assert_snapshot!(hir, @r" + fn foo@<compiled>:10: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint MethodRedefined(A@0x1000, foo@0x1008, cme:0x1010) + GuardSuperMethodEntry 0x1038 + v18:RubyValue = GetBlockHandler + v19:FalseClass = GuardBitEquals v18, Value(false) + v20:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) + CheckInterrupts + Return v20 + "); + } + + #[test] + fn test_invokesuper_with_positional_args_optimizes_to_direct() { + eval(" + class A + def foo(x) + x * 2 + end + end + + class B < A + def foo(x) + super(x) + 1 + end + end + + B.new.foo(5); B.new.foo(5) + "); + + let hir = hir_string_proc("B.new.method(:foo)"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}"); + assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}"); + + assert_snapshot!(hir, @r" + fn foo@<compiled>:10: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :x, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(A@0x1000, foo@0x1008, cme:0x1010) + GuardSuperMethodEntry 0x1038 + v27:RubyValue = GetBlockHandler + v28:FalseClass = GuardBitEquals v27, Value(false) + v29:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v9 + v17:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Integer@0x1048, +@0x1050, cme:0x1058) + v32:Fixnum = GuardType v29, Fixnum + v33:Fixnum = FixnumAdd v32, v17 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v33 + "); + } + + #[test] + fn test_invokesuper_with_forwarded_splat_args_remains_invokesuper() { + eval(" + class A + def foo(x) + x * 2 + end + end + + class B < A + def foo(*x) + super + end + end + + B.new.foo(5); B.new.foo(5) + "); + + let hir = hir_string_proc("B.new.method(:foo)"); + assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}"); + + assert_snapshot!(hir, @r" + fn foo@<compiled>:10: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:ArrayExact = GetLocal :x, l0, SP@4, * + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:ArrayExact): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:ArrayExact): + v15:ArrayExact = ToArray v9 + v17:BasicObject = InvokeSuper v8, 0x1000, v15 # SendFallbackReason: super: complex argument passing to `super` call + CheckInterrupts + Return v17 + "); + } + + #[test] + fn test_invokesuper_with_block_literal_remains_invokesuper() { + eval(" + class A + def foo + block_given? ? yield : 'no block' + end + end + + class B < A + def foo + super { 'from subclass' } + end + end + + B.new.foo; B.new.foo + "); + + let hir = hir_string_proc("B.new.method(:foo)"); + assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for block literal:\n{hir}"); + + // With a block, we don't optimize to SendWithoutBlockDirect + assert_snapshot!(hir, @r" + fn foo@<compiled>:10: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: call made with a block + CheckInterrupts + Return v11 + "); + } + + #[test] + fn test_invokesuper_to_cfunc_remains_invokesuper() { + eval(" + class MyArray < Array + def length + super + end + end + + MyArray.new.length; MyArray.new.length + "); + + let hir = hir_string_proc("MyArray.new.method(:length)"); + assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for CFUNC:\n{hir}"); + + assert_snapshot!(hir, @r" + fn length@<compiled>:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: unsupported target method type Cfunc + CheckInterrupts + Return v11 + "); + } + + #[test] + fn test_invokesuper_with_blockarg_remains_invokesuper() { + eval(" + class A + def foo + block_given? ? yield : 'no block' + end + end + + class B < A + def foo(&blk) + other_block = proc { 'different block' } + super(&other_block) + end + end + + B.new.foo { 'passed' }; B.new.foo { 'passed' } + "); + + let hir = hir_string_proc("B.new.method(:foo)"); + assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}"); + + assert_snapshot!(hir, @r" + fn foo@<compiled>:10: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :blk, l0, SP@5 + v3:NilClass = Const Value(nil) + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject): + EntryPoint JIT(0) + v8:NilClass = Const Value(nil) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:NilClass): + PatchPoint NoSingletonClass(B@0x1000) + PatchPoint MethodRedefined(B@0x1000, proc@0x1008, cme:0x1010) + v35:HeapObject[class_exact:B] = GuardType v10, HeapObject[class_exact:B] + v36:BasicObject = CCallWithFrame v35, :Kernel#proc@0x1038, block=0x1040 + v18:BasicObject = GetLocal :blk, l0, EP@4 + SetLocal :other_block, l0, EP@3, v36 + v25:BasicObject = GetLocal :other_block, l0, EP@3 + v27:BasicObject = InvokeSuper v10, 0x1048, v25 # SendFallbackReason: super: complex argument passing to `super` call + CheckInterrupts + Return v27 + "); + } + + #[test] + fn test_invokesuper_with_symbol_to_proc_remains_invokesuper() { + eval(" + class A + def foo(items, &blk) + items.map(&blk) + end + end + + class B < A + def foo(items) + super(items, &:succ) + end + end + + B.new.foo([1, 2, 3]); B.new.foo([1, 2, 3]) + "); + + let hir = hir_string_proc("B.new.method(:foo)"); + assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for symbol-to-proc:\n{hir}"); + + assert_snapshot!(hir, @r" + fn foo@<compiled>:10: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :items, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v15:StaticSymbol[:succ] = Const Value(VALUE(0x1000)) + v17:BasicObject = InvokeSuper v8, 0x1008, v9, v15 # SendFallbackReason: super: complex argument passing to `super` call + CheckInterrupts + Return v17 + "); + } + + #[test] + fn test_invokesuper_with_keyword_args_remains_invokesuper() { + eval(" + class A + def foo(attributes = {}) + @attributes = attributes + end + end + + class B < A + def foo(content = '') + super(content: content) + end + end + + B.new.foo('image data'); B.new.foo('image data') + "); + + let hir = hir_string_proc("B.new.method(:foo)"); + assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + + assert_snapshot!(hir, @r" + fn foo@<compiled>:9: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :content, l0, SP@4 + v3:CPtr = LoadPC + v4:CPtr[CPtr(0x1000)] = Const CPtr(0x1008) + v5:CBool = IsBitEqual v3, v4 + IfTrue v5, bb2(v1, v2) + Jump bb4(v1, v2) + bb1(v9:BasicObject): + EntryPoint JIT(0) + v10:NilClass = Const Value(nil) + Jump bb2(v9, v10) + bb2(v16:BasicObject, v17:BasicObject): + v20:StringExact[VALUE(0x1010)] = Const Value(VALUE(0x1010)) + v21:StringExact = StringCopy v20 + Jump bb4(v16, v21) + bb3(v13:BasicObject, v14:BasicObject): + EntryPoint JIT(1) + Jump bb4(v13, v14) + bb4(v24:BasicObject, v25:BasicObject): + v31:BasicObject = InvokeSuper v24, 0x1018, v25 # SendFallbackReason: super: complex argument passing to `super` call + CheckInterrupts + Return v31 + "); + } } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 867d97641b..7a584afd6f 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -3,6 +3,7 @@ // We use the YARV bytecode constants which have a CRuby-style name #![allow(non_upper_case_globals)] +use std::collections::HashMap; use crate::{cruby::*, payload::get_or_create_iseq_payload, options::{get_option, NumProfiles}}; use crate::distribution::{Distribution, DistributionSummary}; use crate::stats::Counter::profile_time_ns; @@ -90,6 +91,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), YARVINSN_invokeblock => profile_block_handler(profiler, profile), + YARVINSN_invokesuper => profile_invokesuper(profiler, profile), YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; @@ -153,6 +155,37 @@ fn profile_block_handler(profiler: &mut Profiler, profile: &mut IseqProfile) { types[0].observe(ty); } +fn profile_invokesuper(profiler: &mut Profiler, profile: &mut IseqProfile) { + let cme = unsafe { rb_vm_frame_method_entry(profiler.cfp) }; + let cme_value = VALUE(cme as usize); // CME is a T_IMEMO, which is a VALUE + + match profile.super_cme.get(&profiler.insn_idx) { + None => { + // If `None`, then this is our first time looking at `super` for this instruction. + profile.super_cme.insert(profiler.insn_idx, Some(cme_value)); + }, + Some(Some(existing_cme)) => { + // Check if the stored method entry is the same as the current one. If it isn't, then + // mark the call site as polymorphic. + if *existing_cme != cme_value { + profile.super_cme.insert(profiler.insn_idx, None); + } + } + Some(None) => { + // We've visited this instruction and explicitly stored `None` to mark the call site + // as polymorphic. + } + } + + unsafe { rb_gc_writebarrier(profiler.iseq.into(), cme_value) }; + + let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); + let argc = unsafe { vm_ci_argc((*cd).ci) }; + + // Profile all the arguments and self (+1). + profile_operands(profiler, profile, (argc + 1) as usize); +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Flags(u32); @@ -324,6 +357,9 @@ pub struct IseqProfile { /// Number of profiled executions for each YARV instruction, indexed by the instruction index num_profiles: Vec<NumProfiles>, + + /// Method entries for `super` calls (stored as VALUE to be GC-safe) + super_cme: HashMap<usize, Option<VALUE>> } impl IseqProfile { @@ -331,6 +367,7 @@ impl IseqProfile { Self { opnd_types: vec![vec![]; iseq_size as usize], num_profiles: vec![0; iseq_size as usize], + super_cme: HashMap::new(), } } @@ -339,6 +376,11 @@ impl IseqProfile { self.opnd_types.get(insn_idx).map(|v| &**v) } + pub fn get_super_method_entry(&self, insn_idx: usize) -> Option<*const rb_callable_method_entry_t> { + self.super_cme.get(&insn_idx) + .and_then(|opt| opt.map(|v| v.0 as *const rb_callable_method_entry_t)) + } + /// Run a given callback with every object in IseqProfile pub fn each_object(&self, callback: impl Fn(VALUE)) { for operands in &self.opnd_types { @@ -349,9 +391,15 @@ impl IseqProfile { } } } + + for cme_value in self.super_cme.values() { + if let Some(cme) = cme_value { + callback(*cme); + } + } } - /// Run a given callback with a mutable reference to every object in IseqProfile + /// Run a given callback with a mutable reference to every object in IseqProfile. pub fn each_object_mut(&mut self, callback: impl Fn(&mut VALUE)) { for operands in &mut self.opnd_types { for distribution in operands { @@ -361,6 +409,13 @@ impl IseqProfile { } } } + + // Update CME references if they move during compaction. + for cme_value in self.super_cme.values_mut() { + if let Some(cme) = cme_value { + callback(cme); + } + } } } diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 089037dcd7..506bd82686 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -175,6 +175,7 @@ make_counters! { exit_unhandled_tailcall, exit_unhandled_splat, exit_unhandled_kwarg, + exit_unhandled_block_arg, exit_unknown_special_variable, exit_unhandled_hir_insn, exit_unhandled_yarv_insn, @@ -195,6 +196,7 @@ make_counters! { exit_guard_not_shared_failure, exit_guard_less_failure, exit_guard_greater_eq_failure, + exit_guard_super_method_entry, exit_patchpoint_bop_redefined, exit_patchpoint_method_redefined, exit_patchpoint_stable_constant_names, @@ -247,6 +249,15 @@ make_counters! { send_fallback_obj_to_string_not_string, send_fallback_send_cfunc_variadic, send_fallback_send_cfunc_array_variadic, + send_fallback_super_call_with_block, + send_fallback_super_class_not_found, + send_fallback_super_complex_args_pass, + send_fallback_super_fallback_no_profile, + send_fallback_super_not_optimized_method_type, + send_fallback_super_polymorphic, + send_fallback_super_target_not_found, + send_fallback_super_target_complex_args_pass, + send_fallback_cannot_send_direct, send_fallback_uncategorized, } @@ -357,6 +368,21 @@ make_counters! { unspecialized_send_def_type_refined, unspecialized_send_def_type_null, + // Super call def_type related to send fallback to dynamic dispatch + unspecialized_super_def_type_iseq, + unspecialized_super_def_type_cfunc, + unspecialized_super_def_type_attrset, + unspecialized_super_def_type_ivar, + unspecialized_super_def_type_bmethod, + unspecialized_super_def_type_zsuper, + unspecialized_super_def_type_alias, + unspecialized_super_def_type_undef, + unspecialized_super_def_type_not_implemented, + unspecialized_super_def_type_optimized, + unspecialized_super_def_type_missing, + unspecialized_super_def_type_refined, + unspecialized_super_def_type_null, + // Unsupported parameter features complex_arg_pass_param_rest, complex_arg_pass_param_post, @@ -503,6 +529,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { UnknownSpecialVariable(_) => exit_unknown_special_variable, UnhandledHIRInsn(_) => exit_unhandled_hir_insn, UnhandledYARVInsn(_) => exit_unhandled_yarv_insn, + UnhandledBlockArg => exit_unhandled_block_arg, FixnumAddOverflow => exit_fixnum_add_overflow, FixnumSubOverflow => exit_fixnum_sub_overflow, FixnumMultOverflow => exit_fixnum_mult_overflow, @@ -518,6 +545,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { GuardNotShared => exit_guard_not_shared_failure, GuardLess => exit_guard_less_failure, GuardGreaterEq => exit_guard_greater_eq_failure, + GuardSuperMethodEntry => exit_guard_super_method_entry, CalleeSideExit => exit_callee_side_exit, ObjToStringFallback => exit_obj_to_string_fallback, Interrupt => exit_interrupt, @@ -582,6 +610,14 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter SendNotOptimizedNeedPermission => send_fallback_send_not_optimized_need_permission, CCallWithFrameTooManyArgs => send_fallback_ccall_with_frame_too_many_args, ObjToStringNotString => send_fallback_obj_to_string_not_string, + SuperCallWithBlock => send_fallback_super_call_with_block, + SuperClassNotFound => send_fallback_super_class_not_found, + SuperComplexArgsPass => send_fallback_super_complex_args_pass, + SuperNoProfiles => send_fallback_super_fallback_no_profile, + SuperNotOptimizedMethodType(_) => send_fallback_super_not_optimized_method_type, + SuperPolymorphic => send_fallback_super_polymorphic, + SuperTargetNotFound => send_fallback_super_target_not_found, + SuperTargetComplexArgsPass => send_fallback_super_target_complex_args_pass, Uncategorized(_) => send_fallback_uncategorized, } } @@ -641,6 +677,27 @@ pub fn send_fallback_counter_for_method_type(method_type: crate::hir::MethodType } } +pub fn send_fallback_counter_for_super_method_type(method_type: crate::hir::MethodType) -> Counter { + use crate::hir::MethodType::*; + use crate::stats::Counter::*; + + match method_type { + Iseq => unspecialized_super_def_type_iseq, + Cfunc => unspecialized_super_def_type_cfunc, + Attrset => unspecialized_super_def_type_attrset, + Ivar => unspecialized_super_def_type_ivar, + Bmethod => unspecialized_super_def_type_bmethod, + Zsuper => unspecialized_super_def_type_zsuper, + Alias => unspecialized_super_def_type_alias, + Undefined => unspecialized_super_def_type_undef, + NotImplemented => unspecialized_super_def_type_not_implemented, + Optimized => unspecialized_super_def_type_optimized, + Missing => unspecialized_super_def_type_missing, + Refined => unspecialized_super_def_type_refined, + Null => unspecialized_super_def_type_null, + } +} + /// Primitive called in zjit.rb. Zero out all the counters. #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_reset_stats_bang(_ec: EcPtr, _self: VALUE) -> VALUE { |
