summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Menard <kevin@nirvdrum.com>2026-01-14 19:10:06 -0500
committerGitHub <noreply@github.com>2026-01-14 19:10:06 -0500
commit4a21b83693fdc0e976da209047ba286b2f4084e5 (patch)
treed7976abd267ba43eafee2aa24285a67cdd29f6d9
parentcdb2b0eed50e1c837adeb85ef8978e533f056327 (diff)
ZJIT: Optimize common `invokesuper` cases (#15816)
* ZJIT: Profile `invokesuper` instructions * ZJIT: Introduce the `InvokeSuperDirect` HIR instruction The new instruction is an optimized version of `InvokeSuper` when we know the `super` target is an ISEQ. * ZJIT: Expand definition of unspecializable to more complex cases * ZJIT: Ensure `invokesuper` optimization works when the inheritance hierarchy is modified * ZJIT: Simplify `invokesuper` specialization to most common case Looking at ruby-bench, most `super` calls don't pass a block, which means we can use the already optimized `SendWithoutBlockDirect`. * ZJIT: Track `super` method entries directly to avoid GC issues Because the method entry isn't typed as a `VALUE`, we set up barriers on its `VALUE` fields. But, that was insufficient as the method entry itself could be collected in certain cases, resulting in dangling objects. Now we track the method entry as a `VALUE` and can more naturally mark it and its children. * ZJIT: Optimize `super` calls with simple argument forms * ZJIT: Report the reason why we can't optimize an `invokesuper` instance * ZJIT: Revise send fallback reasons for `super` calls * ZJIT: Assert `super` calls are `FCALL` and don't need visibily checks
-rw-r--r--insns.def1
-rw-r--r--test/ruby/test_zjit.rb477
-rw-r--r--yjit/src/cruby_bindings.inc.rs49
-rw-r--r--zjit.rb1
-rw-r--r--zjit/src/codegen.rs42
-rw-r--r--zjit/src/cruby_bindings.inc.rs49
-rw-r--r--zjit/src/hir.rs164
-rw-r--r--zjit/src/hir/opt_tests.rs341
-rw-r--r--zjit/src/profile.rs57
-rw-r--r--zjit/src/stats.rs57
10 files changed, 1180 insertions, 58 deletions
diff --git a/insns.def b/insns.def
index 7df3672615..ceeaf4128e 100644
--- a/insns.def
+++ b/insns.def
@@ -1092,6 +1092,7 @@ invokesuper
(VALUE val)
// attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci);
// attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci);
+// attr bool zjit_profile = true;
{
VALUE bh = vm_caller_setup_arg_block(ec, GET_CFP(), cd->ci, blockiseq, true);
val = vm_sendish(ec, GET_CFP(), cd, bh, mexp_search_super);
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index cf3c46b3ed..bc4f5f2ae8 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -843,6 +843,483 @@ class TestZJIT < Test::Unit::TestCase
}
end
+ def test_invokesuper_to_iseq
+ assert_compiles '["B", "A"]', %q{
+ class A
+ def foo
+ "A"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super]
+ end
+ end
+
+ def test
+ B.new.foo
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_with_args
+ assert_compiles '["B", 11]', %q{
+ class A
+ def foo(x)
+ x * 2
+ end
+ end
+
+ class B < A
+ def foo(x)
+ ["B", super(x) + 1]
+ end
+ end
+
+ def test
+ B.new.foo(5)
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ # Test super with explicit args when callee has rest parameter.
+ # This should fall back to dynamic dispatch since we can't handle rest params yet.
+ def test_invokesuper_with_args_to_rest_param
+ assert_compiles '["B", "a", ["b", "c"]]', %q{
+ class A
+ def foo(x, *rest)
+ [x, rest]
+ end
+ end
+
+ class B < A
+ def foo(x, y, z)
+ ["B", *super(x, y, z)]
+ end
+ end
+
+ def test
+ B.new.foo("a", "b", "c")
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_with_block
+ assert_compiles '["B", "from_block"]', %q{
+ class A
+ def foo
+ block_given? ? yield : "no_block"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super { "from_block" }]
+ end
+ end
+
+ def test
+ B.new.foo
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_to_cfunc
+ assert_compiles '["MyArray", 3]', %q{
+ class MyArray < Array
+ def length
+ ["MyArray", super]
+ end
+ end
+
+ def test
+ MyArray.new([1, 2, 3]).length
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_multilevel
+ assert_compiles '["C", ["B", "A"]]', %q{
+ class A
+ def foo
+ "A"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super]
+ end
+ end
+
+ class C < B
+ def foo
+ ["C", super]
+ end
+ end
+
+ def test
+ C.new.foo
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ # Test implicit block forwarding - super without explicit block should forward caller's block
+ # Note: We call test twice to ensure ZJIT compiles it before the final call that we check
+ def test_invokesuper_forwards_block_implicitly
+ assert_compiles '["B", "forwarded_block"]', %q{
+ class A
+ def foo
+ block_given? ? yield : "no_block"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super] # should forward the block from caller
+ end
+ end
+
+ def test
+ B.new.foo { "forwarded_block" }
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ # Test implicit block forwarding with explicit arguments
+ def test_invokesuper_forwards_block_implicitly_with_args
+ assert_compiles '["B", ["arg_value", "forwarded"]]', %q{
+ class A
+ def foo(x)
+ [x, (block_given? ? yield : "no_block")]
+ end
+ end
+
+ class B < A
+ def foo(x)
+ ["B", super(x)] # explicit args, but block should still be forwarded
+ end
+ end
+
+ def test
+ B.new.foo("arg_value") { "forwarded" }
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ # Test implicit block forwarding when no block is given (should not fail)
+ def test_invokesuper_forwards_block_implicitly_no_block_given
+ assert_compiles '["B", "no_block"]', %q{
+ class A
+ def foo
+ block_given? ? yield : "no_block"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super] # no block given by caller
+ end
+ end
+
+ def test
+ B.new.foo # called without a block
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ # Test implicit block forwarding through multiple inheritance levels
+ def test_invokesuper_forwards_block_implicitly_multilevel
+ assert_compiles '["C", ["B", "deep_block"]]', %q{
+ class A
+ def foo
+ block_given? ? yield : "no_block"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super] # forwards block to A
+ end
+ end
+
+ class C < B
+ def foo
+ ["C", super] # forwards block to B, which forwards to A
+ end
+ end
+
+ def test
+ C.new.foo { "deep_block" }
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ # Test implicit block forwarding with block parameter syntax
+ def test_invokesuper_forwards_block_param
+ assert_compiles '["B", "block_param_forwarded"]', %q{
+ class A
+ def foo
+ block_given? ? yield : "no_block"
+ end
+ end
+
+ class B < A
+ def foo(&block)
+ ["B", super] # should forward &block implicitly
+ end
+ end
+
+ def test
+ B.new.foo { "block_param_forwarded" }
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_with_blockarg
+ assert_compiles '["B", "different block"]', %q{
+ class A
+ def foo
+ block_given? ? yield : "no block"
+ end
+ end
+
+ class B < A
+ def foo(&blk)
+ other_block = proc { "different block" }
+ ["B", super(&other_block)]
+ end
+ end
+
+ def test
+ B.new.foo { "passed block" }
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_with_symbol_to_proc
+ assert_compiles '["B", [3, 5, 7]]', %q{
+ class A
+ def foo(items, &blk)
+ items.map(&blk)
+ end
+ end
+
+ class B < A
+ def foo(items)
+ ["B", super(items, &:succ)]
+ end
+ end
+
+ def test
+ B.new.foo([2, 4, 6])
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_with_splat
+ assert_compiles '["B", 6]', %q{
+ class A
+ def foo(a, b, c)
+ a + b + c
+ end
+ end
+
+ class B < A
+ def foo(*args)
+ ["B", super(*args)]
+ end
+ end
+
+ def test
+ B.new.foo(1, 2, 3)
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_with_kwargs
+ assert_compiles '["B", "x=1, y=2"]', %q{
+ class A
+ def foo(x:, y:)
+ "x=#{x}, y=#{y}"
+ end
+ end
+
+ class B < A
+ def foo(x:, y:)
+ ["B", super(x: x, y: y)]
+ end
+ end
+
+ def test
+ B.new.foo(x: 1, y: 2)
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_with_kw_splat
+ assert_compiles '["B", "x=1, y=2"]', %q{
+ class A
+ def foo(x:, y:)
+ "x=#{x}, y=#{y}"
+ end
+ end
+
+ class B < A
+ def foo(**kwargs)
+ ["B", super(**kwargs)]
+ end
+ end
+
+ def test
+ B.new.foo(x: 1, y: 2)
+ end
+
+ test # profile
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ # Test that including a module after compilation correctly changes the super target.
+ # The included module's method should be called, not the original super target.
+ def test_invokesuper_with_include
+ assert_compiles '["B", "M"]', %q{
+ class A
+ def foo
+ "A"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super]
+ end
+ end
+
+ def test
+ B.new.foo
+ end
+
+ test # profile invokesuper (super -> A#foo)
+ test # compile with super -> A#foo
+
+ # Now include a module in B that defines foo - super should go to M#foo instead
+ module M
+ def foo
+ "M"
+ end
+ end
+ B.include(M)
+
+ test # should call M#foo, not A#foo
+ }, call_threshold: 2
+ end
+
+ # Test that prepending a module after compilation correctly changes the super target.
+ # The prepended module's method should be called, not the original super target.
+ def test_invokesuper_with_prepend
+ assert_compiles '["B", "M"]', %q{
+ class A
+ def foo
+ "A"
+ end
+ end
+
+ class B < A
+ def foo
+ ["B", super]
+ end
+ end
+
+ def test
+ B.new.foo
+ end
+
+ test # profile invokesuper (super -> A#foo)
+ test # compile with super -> A#foo
+
+ # Now prepend a module that defines foo - super should go to M#foo instead
+ module M
+ def foo
+ "M"
+ end
+ end
+ A.prepend(M)
+
+ test # should call M#foo, not A#foo
+ }, call_threshold: 2
+ end
+
+ # Test super with positional and keyword arguments (pattern from chunky_png)
+ def test_invokesuper_with_keyword_args
+ assert_compiles '{content: "image data"}', %q{
+ class A
+ def foo(attributes = {})
+ @attributes = attributes
+ end
+ end
+
+ class B < A
+ def foo(content = '')
+ super(content: content)
+ end
+ end
+
+ def test
+ B.new.foo("image data")
+ end
+
+ test
+ test
+ }, call_threshold: 2
+ end
+
def test_invokebuiltin
# Not using assert_compiles due to register spill
assert_runs '["."]', %q{
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 61dbf9b5c3..9fbcf2169f 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -924,30 +924,31 @@ pub const YARVINSN_zjit_send: ruby_vminsn_type = 221;
pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222;
pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223;
pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224;
-pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 225;
-pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 226;
-pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 227;
-pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 228;
-pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 229;
-pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 230;
-pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 231;
-pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 232;
-pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 233;
-pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 234;
-pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 235;
-pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 236;
-pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 237;
-pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 238;
-pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 239;
-pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 240;
-pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 241;
-pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 242;
-pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 243;
-pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 244;
-pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 245;
-pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 246;
-pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 247;
-pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 248;
+pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225;
+pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226;
+pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227;
+pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228;
+pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229;
+pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230;
+pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231;
+pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232;
+pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233;
+pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234;
+pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235;
+pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236;
+pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237;
+pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238;
+pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239;
+pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240;
+pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241;
+pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242;
+pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243;
+pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244;
+pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245;
+pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246;
+pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247;
+pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248;
+pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249;
pub type ruby_vminsn_type = u32;
pub type rb_iseq_callback = ::std::option::Option<
unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
diff --git a/zjit.rb b/zjit.rb
index e2aa55f764..0bd6c1b96d 100644
--- a/zjit.rb
+++ b/zjit.rb
@@ -184,6 +184,7 @@ class << RubyVM::ZJIT
# Show fallback counters, ordered by the typical amount of fallbacks for the prefix at the time
print_counters_with_prefix(prefix: 'unspecialized_send_def_type_', prompt: 'not optimized method types for send', buf:, stats:, limit: 20)
print_counters_with_prefix(prefix: 'unspecialized_send_without_block_def_type_', prompt: 'not optimized method types for send_without_block', buf:, stats:, limit: 20)
+ print_counters_with_prefix(prefix: 'unspecialized_super_def_type_', prompt: 'not optimized method types for super', buf:, stats:, limit: 20)
print_counters_with_prefix(prefix: 'uncategorized_fallback_yarv_insn_', prompt: 'instructions with uncategorized fallback reason', buf:, stats:, limit: 20)
print_counters_with_prefix(prefix: 'send_fallback_', prompt: 'send fallback reasons', buf:, stats:, limit: 20)
print_counters_with_prefix(prefix: 'setivar_fallback_', prompt: 'setivar fallback reasons', buf:, stats:, limit: 5)
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 4a186d960c..16ac2573f2 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -15,7 +15,7 @@ use crate::invariants::{
use crate::gc::append_gc_offsets;
use crate::payload::{get_or_create_iseq_payload, IseqCodePtrs, IseqVersion, IseqVersionRef, IseqStatus};
use crate::state::ZJITState;
-use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type};
+use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_for_super_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type};
use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}};
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_BASE_PTR, NATIVE_STACK_PTR, Opnd, SP, SideExit, Target, asm_ccall, asm_comment};
@@ -401,7 +401,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
&Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
&Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
&Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason),
- Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state)),
+ Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state), None),
&Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
&Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason),
// Ensure we have enough room fit ec, self, and arguments
@@ -453,6 +453,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::GuardNotShared { recv, state } => gen_guard_not_shared(jit, asm, opnd!(recv), &function.frame_state(*state)),
&Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)),
&Insn::GuardGreaterEq { left, right, state } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)),
+ &Insn::GuardSuperMethodEntry { cme, state } => no_output!(gen_guard_super_method_entry(jit, asm, cme, &function.frame_state(state))),
+ Insn::GetBlockHandler => gen_get_block_handler(jit, asm),
Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))),
Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)),
// Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args).
@@ -715,6 +717,29 @@ fn gen_guard_greater_eq(jit: &JITState, asm: &mut Assembler, left: Opnd, right:
left
}
+/// Guard that the method entry at ep[VM_ENV_DATA_INDEX_ME_CREF] matches the expected CME.
+/// This ensures we're calling super from the expected method context.
+fn gen_guard_super_method_entry(
+ jit: &JITState,
+ asm: &mut Assembler,
+ cme: *const rb_callable_method_entry_t,
+ state: &FrameState,
+) {
+ asm_comment!(asm, "guard super method entry");
+ let lep = gen_get_lep(jit, asm);
+ let ep_me_opnd = Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF);
+ let ep_me = asm.load(ep_me_opnd);
+ asm.cmp(ep_me, Opnd::UImm(cme as u64));
+ asm.jne(side_exit(jit, state, SideExitReason::GuardSuperMethodEntry));
+}
+
+/// Get the block handler from ep[VM_ENV_DATA_INDEX_SPECVAL] at the local EP (LEP).
+fn gen_get_block_handler(jit: &JITState, asm: &mut Assembler) -> Opnd {
+ asm_comment!(asm, "get block handler from LEP");
+ let lep = gen_get_lep(jit, asm);
+ asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL))
+}
+
fn gen_get_constant_path(jit: &JITState, asm: &mut Assembler, ic: *const iseq_inline_constant_cache, state: &FrameState) -> Opnd {
unsafe extern "C" {
fn rb_vm_opt_getconstant_path(ec: EcPtr, cfp: CfpPtr, ic: *const iseq_inline_constant_cache) -> VALUE;
@@ -1321,8 +1346,10 @@ fn gen_send_without_block(
)
}
-/// Compile a direct jump to an ISEQ call without block
-fn gen_send_without_block_direct(
+/// Compile a direct call to an ISEQ method.
+/// If `block_handler` is provided, it's used as the specval for the new frame (for forwarding blocks).
+/// Otherwise, `VM_BLOCK_HANDLER_NONE` is used.
+fn gen_send_iseq_direct(
cb: &mut CodeBlock,
jit: &mut JITState,
asm: &mut Assembler,
@@ -1331,6 +1358,7 @@ fn gen_send_without_block_direct(
recv: Opnd,
args: Vec<Opnd>,
state: &FrameState,
+ block_handler: Option<Opnd>,
) -> lir::Opnd {
gen_incr_counter(asm, Counter::iseq_optimized_send_count);
@@ -1357,7 +1385,8 @@ fn gen_send_without_block_direct(
let bmethod_specval = (capture.ep.addr() | 1).into();
(bmethod_frame_type, bmethod_specval)
} else {
- (VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, VM_BLOCK_HANDLER_NONE.into())
+ let specval = block_handler.unwrap_or_else(|| VM_BLOCK_HANDLER_NONE.into());
+ (VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, specval)
};
// Set up the new frame
@@ -2131,6 +2160,9 @@ fn gen_incr_send_fallback_counter(asm: &mut Assembler, reason: SendFallbackReaso
SendNotOptimizedMethodType(method_type) => {
gen_incr_counter(asm, send_fallback_counter_for_method_type(method_type));
}
+ SuperNotOptimizedMethodType(method_type) => {
+ gen_incr_counter(asm, send_fallback_counter_for_super_method_type(method_type));
+ }
_ => {}
}
}
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 5d4fed0c3a..2201bdcffe 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -1713,30 +1713,31 @@ pub const YARVINSN_zjit_send: ruby_vminsn_type = 221;
pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222;
pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223;
pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224;
-pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 225;
-pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 226;
-pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 227;
-pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 228;
-pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 229;
-pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 230;
-pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 231;
-pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 232;
-pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 233;
-pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 234;
-pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 235;
-pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 236;
-pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 237;
-pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 238;
-pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 239;
-pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 240;
-pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 241;
-pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 242;
-pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 243;
-pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 244;
-pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 245;
-pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 246;
-pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 247;
-pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 248;
+pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225;
+pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226;
+pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227;
+pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228;
+pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229;
+pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230;
+pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231;
+pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232;
+pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233;
+pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234;
+pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235;
+pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236;
+pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237;
+pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238;
+pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239;
+pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240;
+pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241;
+pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242;
+pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243;
+pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244;
+pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245;
+pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246;
+pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247;
+pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248;
+pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249;
pub type ruby_vminsn_type = u32;
pub type rb_iseq_callback = ::std::option::Option<
unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index cf0625cdad..48f85c4f23 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -484,6 +484,7 @@ pub enum SideExitReason {
UnhandledHIRInsn(InsnId),
UnhandledYARVInsn(u32),
UnhandledCallType(CallType),
+ UnhandledBlockArg,
TooManyKeywordParameters,
FixnumAddOverflow,
FixnumSubOverflow,
@@ -497,6 +498,7 @@ pub enum SideExitReason {
GuardNotShared,
GuardLess,
GuardGreaterEq,
+ GuardSuperMethodEntry,
PatchPoint(Invariant),
CalleeSideExit,
ObjToStringFallback,
@@ -647,6 +649,22 @@ pub enum SendFallbackReason {
/// A singleton class has been seen for the receiver class, so we skip the optimization
/// to avoid an invalidation loop.
SingletonClassSeen,
+ /// The super call is passed a block that the optimizer does not support.
+ SuperCallWithBlock,
+ /// The profiled super class cannot be found.
+ SuperClassNotFound,
+ /// The `super` call uses a complex argument pattern that the optimizer does not support.
+ SuperComplexArgsPass,
+ /// The cached target of a `super` call could not be found.
+ SuperTargetNotFound,
+ /// Attempted to specialize a `super` call that doesn't have profile data.
+ SuperNoProfiles,
+ /// Cannot optimize the `super` call due to the target method.
+ SuperNotOptimizedMethodType(MethodType),
+ /// The `super` call is polymorpic.
+ SuperPolymorphic,
+ /// The `super` target call uses a complex argument pattern that the optimizer does not support.
+ SuperTargetComplexArgsPass,
/// Initial fallback reason for every instruction, which should be mutated to
/// a more actionable reason when an attempt to specialize the instruction fails.
Uncategorized(ruby_vminsn_type),
@@ -684,6 +702,14 @@ impl Display for SendFallbackReason {
ComplexArgPass => write!(f, "Complex argument passing"),
UnexpectedKeywordArgs => write!(f, "Unexpected Keyword Args"),
SingletonClassSeen => write!(f, "Singleton class previously created for receiver class"),
+ SuperCallWithBlock => write!(f, "super: call made with a block"),
+ SuperClassNotFound => write!(f, "super: profiled class cannot be found"),
+ SuperComplexArgsPass => write!(f, "super: complex argument passing to `super` call"),
+ SuperNoProfiles => write!(f, "super: no profile data available"),
+ SuperNotOptimizedMethodType(method_type) => write!(f, "super: unsupported target method type {:?}", method_type),
+ SuperPolymorphic => write!(f, "super: polymorphic call site"),
+ SuperTargetNotFound => write!(f, "super: profiled target method cannot be found"),
+ SuperTargetComplexArgsPass => write!(f, "super: complex argument passing to `super` target call"),
Uncategorized(insn) => write!(f, "Uncategorized({})", insn_name(*insn as usize)),
}
}
@@ -975,6 +1001,11 @@ pub enum Insn {
GuardGreaterEq { left: InsnId, right: InsnId, state: InsnId },
/// Side-exit if left is not less than right (both operands are C long).
GuardLess { left: InsnId, right: InsnId, state: InsnId },
+ /// Side-exit if the method entry at ep[VM_ENV_DATA_INDEX_ME_CREF] doesn't match the expected CME.
+ /// Used to ensure super calls are made from the expected method context.
+ GuardSuperMethodEntry { cme: *const rb_callable_method_entry_t, state: InsnId },
+ /// Get the block handler from ep[VM_ENV_DATA_INDEX_SPECVAL] at the local EP (LEP).
+ GetBlockHandler,
/// Generate no code (or padding if necessary) and insert a patch point
/// that can be rewritten to a side exit when the Invariant is broken.
@@ -1003,7 +1034,7 @@ impl Insn {
| Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. }
| Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. }
| Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. }
- | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. }
+ | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. }
| Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. }
| Insn::ArrayAset { .. } => false,
_ => true,
@@ -1353,6 +1384,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
Insn::GuardNotShared { recv, .. } => write!(f, "GuardNotShared {recv}"),
Insn::GuardLess { left, right, .. } => write!(f, "GuardLess {left}, {right}"),
Insn::GuardGreaterEq { left, right, .. } => write!(f, "GuardGreaterEq {left}, {right}"),
+ Insn::GuardSuperMethodEntry { cme, .. } => write!(f, "GuardSuperMethodEntry {:p}", self.ptr_map.map_ptr(cme)),
+ Insn::GetBlockHandler => write!(f, "GetBlockHandler"),
Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) },
Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) },
Insn::IsBlockGiven => { write!(f, "IsBlockGiven") },
@@ -2015,6 +2048,8 @@ impl Function {
&GuardNotShared { recv, state } => GuardNotShared { recv: find!(recv), state },
&GuardGreaterEq { left, right, state } => GuardGreaterEq { left: find!(left), right: find!(right), state },
&GuardLess { left, right, state } => GuardLess { left: find!(left), right: find!(right), state },
+ &GuardSuperMethodEntry { cme, state } => GuardSuperMethodEntry { cme, state },
+ &GetBlockHandler => GetBlockHandler,
&FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state },
&FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state },
&FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state },
@@ -2187,8 +2222,9 @@ impl Function {
Insn::SetGlobal { .. } | Insn::Jump(_) | Insn::EntryPoint { .. }
| Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::Throw { .. }
| Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. }
- | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_)
- | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::IncrCounterPtr { .. }
+ | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. }
+ | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. }
+ | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. }
| Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } =>
panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]),
Insn::Const { val: Const::Value(val) } => Type::from_value(*val),
@@ -2296,6 +2332,7 @@ impl Function {
Insn::AnyToString { .. } => types::String,
Insn::GetLocal { rest_param: true, .. } => types::ArrayExact,
Insn::GetLocal { .. } => types::BasicObject,
+ Insn::GetBlockHandler => types::RubyValue,
// The type of Snapshot doesn't really matter; it's never materialized. It's used only
// as a reference for FrameState, which we use to generate side-exit code.
Insn::Snapshot { .. } => types::Any,
@@ -3060,6 +3097,120 @@ impl Function {
self.push_insn_id(block, insn_id);
};
}
+ Insn::InvokeSuper { recv, cd, blockiseq, args, state, .. } => {
+ // Don't handle calls with literal blocks (e.g., super { ... })
+ if !blockiseq.is_null() {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperCallWithBlock);
+ continue;
+ }
+
+ let ci = unsafe { get_call_data_ci(cd) };
+ let flags = unsafe { rb_vm_ci_flag(ci) };
+ assert!(flags & VM_CALL_FCALL != 0);
+
+ // Reject calls with complex argument handling.
+ let complex_arg_types = VM_CALL_ARGS_SPLAT
+ | VM_CALL_KW_SPLAT
+ | VM_CALL_KWARG
+ | VM_CALL_ARGS_BLOCKARG
+ | VM_CALL_FORWARDING;
+
+ if (flags & complex_arg_types) != 0 {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperComplexArgsPass);
+ continue;
+ }
+
+ let frame_state = self.frame_state(state);
+
+ // Get the profiled CME from the current method.
+ let Some(profiles) = self.profiles.as_ref() else {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperNoProfiles);
+ continue;
+ };
+
+ let Some(current_cme) = profiles.payload.profile.get_super_method_entry(frame_state.insn_idx) else {
+ self.push_insn_id(block, insn_id);
+
+ // The absence of the super CME could be due to a missing profile, but
+ // if we've made it this far the value would have been deleted, indicating
+ // that the call is at least polymorphic and possibly megamorphic.
+ self.set_dynamic_send_reason(insn_id, SuperPolymorphic);
+ continue;
+ };
+
+ // Get defined_class and method ID from the profiled CME.
+ let current_defined_class = unsafe { (*current_cme).defined_class };
+ let mid = unsafe { get_def_original_id((*current_cme).def) };
+
+ // Compute superclass: RCLASS_SUPER(RCLASS_ORIGIN(defined_class))
+ let superclass = unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) };
+ if superclass.nil_p() {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperClassNotFound);
+ continue;
+ }
+
+ // Look up the super method.
+ let super_cme = unsafe { rb_callable_method_entry(superclass, mid) };
+ if super_cme.is_null() {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperTargetNotFound);
+ continue;
+ }
+
+ // Check if it's an ISEQ method; bail if it isn't.
+ let def_type = unsafe { get_cme_def_type(super_cme) };
+ if def_type != VM_METHOD_TYPE_ISEQ {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type)));
+ continue;
+ }
+
+ // Check if the super method's parameters support direct send.
+ // If not, we can't do direct dispatch.
+ let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) };
+ if !can_direct_send(self, block, super_iseq, insn_id, args.as_slice()) {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass);
+ continue;
+ }
+
+ // Add PatchPoint for method redefinition.
+ self.push_insn(block, Insn::PatchPoint {
+ invariant: Invariant::MethodRedefined {
+ klass: unsafe { (*super_cme).defined_class },
+ method: mid,
+ cme: super_cme
+ },
+ state
+ });
+
+ // Guard that we're calling `super` from the expected method context.
+ self.push_insn(block, Insn::GuardSuperMethodEntry { cme: current_cme, state });
+
+ // Guard that no block is being passed (implicit or explicit).
+ let block_handler = self.push_insn(block, Insn::GetBlockHandler);
+ self.push_insn(block, Insn::GuardBitEquals {
+ val: block_handler,
+ expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)),
+ reason: SideExitReason::UnhandledBlockArg,
+ state
+ });
+
+ // Use SendWithoutBlockDirect with the super method's CME and ISEQ.
+ let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect {
+ recv,
+ cd,
+ cme: super_cme,
+ iseq: super_iseq,
+ args,
+ state
+ });
+ self.make_equal_to(insn_id, send_direct);
+ }
_ => { self.push_insn_id(block, insn_id); }
}
}
@@ -3977,6 +4128,7 @@ impl Function {
| &Insn::LoadEC
| &Insn::LoadSelf
| &Insn::GetLocal { .. }
+ | &Insn::GetBlockHandler
| &Insn::PutSpecialObject { .. }
| &Insn::IsBlockGiven
| &Insn::IncrCounter(_)
@@ -4205,6 +4357,7 @@ impl Function {
worklist.push_back(val);
}
&Insn::GuardBlockParamProxy { state, .. } |
+ &Insn::GuardSuperMethodEntry { state, .. } |
&Insn::GetGlobal { state, .. } |
&Insn::GetSpecialSymbol { state, .. } |
&Insn::GetSpecialNumber { state, .. } |
@@ -4720,6 +4873,8 @@ impl Function {
| Insn::Jump { .. }
| Insn::EntryPoint { .. }
| Insn::GuardBlockParamProxy { .. }
+ | Insn::GuardSuperMethodEntry { .. }
+ | Insn::GetBlockHandler
| Insn::PatchPoint { .. }
| Insn::SideExit { .. }
| Insn::IncrCounter { .. }
@@ -5397,7 +5552,8 @@ fn unspecializable_c_call_type(flags: u32) -> bool {
fn unspecializable_call_type(flags: u32) -> bool {
((flags & VM_CALL_ARGS_SPLAT) != 0) ||
((flags & VM_CALL_KW_SPLAT) != 0) ||
- ((flags & VM_CALL_ARGS_BLOCKARG) != 0)
+ ((flags & VM_CALL_ARGS_BLOCKARG) != 0) ||
+ ((flags & VM_CALL_FORWARDING) != 0)
}
/// We have IseqPayload, which keeps track of HIR Types in the interpreter, but this is not useful
diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs
index 2cf76e5e74..138bbde718 100644
--- a/zjit/src/hir/opt_tests.rs
+++ b/zjit/src/hir/opt_tests.rs
@@ -10554,4 +10554,345 @@ mod hir_opt_tests {
Return v15
");
}
+
+ #[test]
+ fn test_invokesuper_to_iseq_optimizes_to_direct() {
+ eval("
+ class A
+ def foo
+ 'A'
+ end
+ end
+
+ class B < A
+ def foo
+ super
+ end
+ end
+
+ B.new.foo; B.new.foo
+ ");
+
+ // A Ruby method as the target of `super` should optimize provided no block is given.
+ let hir = hir_string_proc("B.new.method(:foo)");
+ assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}");
+ assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}");
+
+ assert_snapshot!(hir, @r"
+ fn foo@<compiled>:10:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ Jump bb2(v1)
+ bb1(v4:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v4)
+ bb2(v6:BasicObject):
+ PatchPoint MethodRedefined(A@0x1000, foo@0x1008, cme:0x1010)
+ GuardSuperMethodEntry 0x1038
+ v18:RubyValue = GetBlockHandler
+ v19:FalseClass = GuardBitEquals v18, Value(false)
+ v20:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040)
+ CheckInterrupts
+ Return v20
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_with_positional_args_optimizes_to_direct() {
+ eval("
+ class A
+ def foo(x)
+ x * 2
+ end
+ end
+
+ class B < A
+ def foo(x)
+ super(x) + 1
+ end
+ end
+
+ B.new.foo(5); B.new.foo(5)
+ ");
+
+ let hir = hir_string_proc("B.new.method(:foo)");
+ assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}");
+ assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}");
+
+ assert_snapshot!(hir, @r"
+ fn foo@<compiled>:10:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal :x, l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ PatchPoint MethodRedefined(A@0x1000, foo@0x1008, cme:0x1010)
+ GuardSuperMethodEntry 0x1038
+ v27:RubyValue = GetBlockHandler
+ v28:FalseClass = GuardBitEquals v27, Value(false)
+ v29:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v9
+ v17:Fixnum[1] = Const Value(1)
+ PatchPoint MethodRedefined(Integer@0x1048, +@0x1050, cme:0x1058)
+ v32:Fixnum = GuardType v29, Fixnum
+ v33:Fixnum = FixnumAdd v32, v17
+ IncrCounter inline_cfunc_optimized_send_count
+ CheckInterrupts
+ Return v33
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_with_forwarded_splat_args_remains_invokesuper() {
+ eval("
+ class A
+ def foo(x)
+ x * 2
+ end
+ end
+
+ class B < A
+ def foo(*x)
+ super
+ end
+ end
+
+ B.new.foo(5); B.new.foo(5)
+ ");
+
+ let hir = hir_string_proc("B.new.method(:foo)");
+ assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
+ assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}");
+
+ assert_snapshot!(hir, @r"
+ fn foo@<compiled>:10:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:ArrayExact = GetLocal :x, l0, SP@4, *
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:ArrayExact):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:ArrayExact):
+ v15:ArrayExact = ToArray v9
+ v17:BasicObject = InvokeSuper v8, 0x1000, v15 # SendFallbackReason: super: complex argument passing to `super` call
+ CheckInterrupts
+ Return v17
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_with_block_literal_remains_invokesuper() {
+ eval("
+ class A
+ def foo
+ block_given? ? yield : 'no block'
+ end
+ end
+
+ class B < A
+ def foo
+ super { 'from subclass' }
+ end
+ end
+
+ B.new.foo; B.new.foo
+ ");
+
+ let hir = hir_string_proc("B.new.method(:foo)");
+ assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
+ assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for block literal:\n{hir}");
+
+ // With a block, we don't optimize to SendWithoutBlockDirect
+ assert_snapshot!(hir, @r"
+ fn foo@<compiled>:10:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ Jump bb2(v1)
+ bb1(v4:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v4)
+ bb2(v6:BasicObject):
+ v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: call made with a block
+ CheckInterrupts
+ Return v11
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_to_cfunc_remains_invokesuper() {
+ eval("
+ class MyArray < Array
+ def length
+ super
+ end
+ end
+
+ MyArray.new.length; MyArray.new.length
+ ");
+
+ let hir = hir_string_proc("MyArray.new.method(:length)");
+ assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
+ assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for CFUNC:\n{hir}");
+
+ assert_snapshot!(hir, @r"
+ fn length@<compiled>:4:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ Jump bb2(v1)
+ bb1(v4:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v4)
+ bb2(v6:BasicObject):
+ v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: unsupported target method type Cfunc
+ CheckInterrupts
+ Return v11
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_with_blockarg_remains_invokesuper() {
+ eval("
+ class A
+ def foo
+ block_given? ? yield : 'no block'
+ end
+ end
+
+ class B < A
+ def foo(&blk)
+ other_block = proc { 'different block' }
+ super(&other_block)
+ end
+ end
+
+ B.new.foo { 'passed' }; B.new.foo { 'passed' }
+ ");
+
+ let hir = hir_string_proc("B.new.method(:foo)");
+ assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
+ assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}");
+
+ assert_snapshot!(hir, @r"
+ fn foo@<compiled>:10:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal :blk, l0, SP@5
+ v3:NilClass = Const Value(nil)
+ Jump bb2(v1, v2, v3)
+ bb1(v6:BasicObject, v7:BasicObject):
+ EntryPoint JIT(0)
+ v8:NilClass = Const Value(nil)
+ Jump bb2(v6, v7, v8)
+ bb2(v10:BasicObject, v11:BasicObject, v12:NilClass):
+ PatchPoint NoSingletonClass(B@0x1000)
+ PatchPoint MethodRedefined(B@0x1000, proc@0x1008, cme:0x1010)
+ v35:HeapObject[class_exact:B] = GuardType v10, HeapObject[class_exact:B]
+ v36:BasicObject = CCallWithFrame v35, :Kernel#proc@0x1038, block=0x1040
+ v18:BasicObject = GetLocal :blk, l0, EP@4
+ SetLocal :other_block, l0, EP@3, v36
+ v25:BasicObject = GetLocal :other_block, l0, EP@3
+ v27:BasicObject = InvokeSuper v10, 0x1048, v25 # SendFallbackReason: super: complex argument passing to `super` call
+ CheckInterrupts
+ Return v27
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_with_symbol_to_proc_remains_invokesuper() {
+ eval("
+ class A
+ def foo(items, &blk)
+ items.map(&blk)
+ end
+ end
+
+ class B < A
+ def foo(items)
+ super(items, &:succ)
+ end
+ end
+
+ B.new.foo([1, 2, 3]); B.new.foo([1, 2, 3])
+ ");
+
+ let hir = hir_string_proc("B.new.method(:foo)");
+ assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
+ assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for symbol-to-proc:\n{hir}");
+
+ assert_snapshot!(hir, @r"
+ fn foo@<compiled>:10:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal :items, l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ v15:StaticSymbol[:succ] = Const Value(VALUE(0x1000))
+ v17:BasicObject = InvokeSuper v8, 0x1008, v9, v15 # SendFallbackReason: super: complex argument passing to `super` call
+ CheckInterrupts
+ Return v17
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_with_keyword_args_remains_invokesuper() {
+ eval("
+ class A
+ def foo(attributes = {})
+ @attributes = attributes
+ end
+ end
+
+ class B < A
+ def foo(content = '')
+ super(content: content)
+ end
+ end
+
+ B.new.foo('image data'); B.new.foo('image data')
+ ");
+
+ let hir = hir_string_proc("B.new.method(:foo)");
+ assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
+
+ assert_snapshot!(hir, @r"
+ fn foo@<compiled>:9:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal :content, l0, SP@4
+ v3:CPtr = LoadPC
+ v4:CPtr[CPtr(0x1000)] = Const CPtr(0x1008)
+ v5:CBool = IsBitEqual v3, v4
+ IfTrue v5, bb2(v1, v2)
+ Jump bb4(v1, v2)
+ bb1(v9:BasicObject):
+ EntryPoint JIT(0)
+ v10:NilClass = Const Value(nil)
+ Jump bb2(v9, v10)
+ bb2(v16:BasicObject, v17:BasicObject):
+ v20:StringExact[VALUE(0x1010)] = Const Value(VALUE(0x1010))
+ v21:StringExact = StringCopy v20
+ Jump bb4(v16, v21)
+ bb3(v13:BasicObject, v14:BasicObject):
+ EntryPoint JIT(1)
+ Jump bb4(v13, v14)
+ bb4(v24:BasicObject, v25:BasicObject):
+ v31:BasicObject = InvokeSuper v24, 0x1018, v25 # SendFallbackReason: super: complex argument passing to `super` call
+ CheckInterrupts
+ Return v31
+ ");
+ }
}
diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs
index 867d97641b..7a584afd6f 100644
--- a/zjit/src/profile.rs
+++ b/zjit/src/profile.rs
@@ -3,6 +3,7 @@
// We use the YARV bytecode constants which have a CRuby-style name
#![allow(non_upper_case_globals)]
+use std::collections::HashMap;
use crate::{cruby::*, payload::get_or_create_iseq_payload, options::{get_option, NumProfiles}};
use crate::distribution::{Distribution, DistributionSummary};
use crate::stats::Counter::profile_time_ns;
@@ -90,6 +91,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) {
YARVINSN_opt_size => profile_operands(profiler, profile, 1),
YARVINSN_opt_succ => profile_operands(profiler, profile, 1),
YARVINSN_invokeblock => profile_block_handler(profiler, profile),
+ YARVINSN_invokesuper => profile_invokesuper(profiler, profile),
YARVINSN_opt_send_without_block | YARVINSN_send => {
let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr();
let argc = unsafe { vm_ci_argc((*cd).ci) };
@@ -153,6 +155,37 @@ fn profile_block_handler(profiler: &mut Profiler, profile: &mut IseqProfile) {
types[0].observe(ty);
}
+fn profile_invokesuper(profiler: &mut Profiler, profile: &mut IseqProfile) {
+ let cme = unsafe { rb_vm_frame_method_entry(profiler.cfp) };
+ let cme_value = VALUE(cme as usize); // CME is a T_IMEMO, which is a VALUE
+
+ match profile.super_cme.get(&profiler.insn_idx) {
+ None => {
+ // If `None`, then this is our first time looking at `super` for this instruction.
+ profile.super_cme.insert(profiler.insn_idx, Some(cme_value));
+ },
+ Some(Some(existing_cme)) => {
+ // Check if the stored method entry is the same as the current one. If it isn't, then
+ // mark the call site as polymorphic.
+ if *existing_cme != cme_value {
+ profile.super_cme.insert(profiler.insn_idx, None);
+ }
+ }
+ Some(None) => {
+ // We've visited this instruction and explicitly stored `None` to mark the call site
+ // as polymorphic.
+ }
+ }
+
+ unsafe { rb_gc_writebarrier(profiler.iseq.into(), cme_value) };
+
+ let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr();
+ let argc = unsafe { vm_ci_argc((*cd).ci) };
+
+ // Profile all the arguments and self (+1).
+ profile_operands(profiler, profile, (argc + 1) as usize);
+}
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Flags(u32);
@@ -324,6 +357,9 @@ pub struct IseqProfile {
/// Number of profiled executions for each YARV instruction, indexed by the instruction index
num_profiles: Vec<NumProfiles>,
+
+ /// Method entries for `super` calls (stored as VALUE to be GC-safe)
+ super_cme: HashMap<usize, Option<VALUE>>
}
impl IseqProfile {
@@ -331,6 +367,7 @@ impl IseqProfile {
Self {
opnd_types: vec![vec![]; iseq_size as usize],
num_profiles: vec![0; iseq_size as usize],
+ super_cme: HashMap::new(),
}
}
@@ -339,6 +376,11 @@ impl IseqProfile {
self.opnd_types.get(insn_idx).map(|v| &**v)
}
+ pub fn get_super_method_entry(&self, insn_idx: usize) -> Option<*const rb_callable_method_entry_t> {
+ self.super_cme.get(&insn_idx)
+ .and_then(|opt| opt.map(|v| v.0 as *const rb_callable_method_entry_t))
+ }
+
/// Run a given callback with every object in IseqProfile
pub fn each_object(&self, callback: impl Fn(VALUE)) {
for operands in &self.opnd_types {
@@ -349,9 +391,15 @@ impl IseqProfile {
}
}
}
+
+ for cme_value in self.super_cme.values() {
+ if let Some(cme) = cme_value {
+ callback(*cme);
+ }
+ }
}
- /// Run a given callback with a mutable reference to every object in IseqProfile
+ /// Run a given callback with a mutable reference to every object in IseqProfile.
pub fn each_object_mut(&mut self, callback: impl Fn(&mut VALUE)) {
for operands in &mut self.opnd_types {
for distribution in operands {
@@ -361,6 +409,13 @@ impl IseqProfile {
}
}
}
+
+ // Update CME references if they move during compaction.
+ for cme_value in self.super_cme.values_mut() {
+ if let Some(cme) = cme_value {
+ callback(cme);
+ }
+ }
}
}
diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs
index 089037dcd7..506bd82686 100644
--- a/zjit/src/stats.rs
+++ b/zjit/src/stats.rs
@@ -175,6 +175,7 @@ make_counters! {
exit_unhandled_tailcall,
exit_unhandled_splat,
exit_unhandled_kwarg,
+ exit_unhandled_block_arg,
exit_unknown_special_variable,
exit_unhandled_hir_insn,
exit_unhandled_yarv_insn,
@@ -195,6 +196,7 @@ make_counters! {
exit_guard_not_shared_failure,
exit_guard_less_failure,
exit_guard_greater_eq_failure,
+ exit_guard_super_method_entry,
exit_patchpoint_bop_redefined,
exit_patchpoint_method_redefined,
exit_patchpoint_stable_constant_names,
@@ -247,6 +249,15 @@ make_counters! {
send_fallback_obj_to_string_not_string,
send_fallback_send_cfunc_variadic,
send_fallback_send_cfunc_array_variadic,
+ send_fallback_super_call_with_block,
+ send_fallback_super_class_not_found,
+ send_fallback_super_complex_args_pass,
+ send_fallback_super_fallback_no_profile,
+ send_fallback_super_not_optimized_method_type,
+ send_fallback_super_polymorphic,
+ send_fallback_super_target_not_found,
+ send_fallback_super_target_complex_args_pass,
+ send_fallback_cannot_send_direct,
send_fallback_uncategorized,
}
@@ -357,6 +368,21 @@ make_counters! {
unspecialized_send_def_type_refined,
unspecialized_send_def_type_null,
+ // Super call def_type related to send fallback to dynamic dispatch
+ unspecialized_super_def_type_iseq,
+ unspecialized_super_def_type_cfunc,
+ unspecialized_super_def_type_attrset,
+ unspecialized_super_def_type_ivar,
+ unspecialized_super_def_type_bmethod,
+ unspecialized_super_def_type_zsuper,
+ unspecialized_super_def_type_alias,
+ unspecialized_super_def_type_undef,
+ unspecialized_super_def_type_not_implemented,
+ unspecialized_super_def_type_optimized,
+ unspecialized_super_def_type_missing,
+ unspecialized_super_def_type_refined,
+ unspecialized_super_def_type_null,
+
// Unsupported parameter features
complex_arg_pass_param_rest,
complex_arg_pass_param_post,
@@ -503,6 +529,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter {
UnknownSpecialVariable(_) => exit_unknown_special_variable,
UnhandledHIRInsn(_) => exit_unhandled_hir_insn,
UnhandledYARVInsn(_) => exit_unhandled_yarv_insn,
+ UnhandledBlockArg => exit_unhandled_block_arg,
FixnumAddOverflow => exit_fixnum_add_overflow,
FixnumSubOverflow => exit_fixnum_sub_overflow,
FixnumMultOverflow => exit_fixnum_mult_overflow,
@@ -518,6 +545,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter {
GuardNotShared => exit_guard_not_shared_failure,
GuardLess => exit_guard_less_failure,
GuardGreaterEq => exit_guard_greater_eq_failure,
+ GuardSuperMethodEntry => exit_guard_super_method_entry,
CalleeSideExit => exit_callee_side_exit,
ObjToStringFallback => exit_obj_to_string_fallback,
Interrupt => exit_interrupt,
@@ -582,6 +610,14 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter
SendNotOptimizedNeedPermission => send_fallback_send_not_optimized_need_permission,
CCallWithFrameTooManyArgs => send_fallback_ccall_with_frame_too_many_args,
ObjToStringNotString => send_fallback_obj_to_string_not_string,
+ SuperCallWithBlock => send_fallback_super_call_with_block,
+ SuperClassNotFound => send_fallback_super_class_not_found,
+ SuperComplexArgsPass => send_fallback_super_complex_args_pass,
+ SuperNoProfiles => send_fallback_super_fallback_no_profile,
+ SuperNotOptimizedMethodType(_) => send_fallback_super_not_optimized_method_type,
+ SuperPolymorphic => send_fallback_super_polymorphic,
+ SuperTargetNotFound => send_fallback_super_target_not_found,
+ SuperTargetComplexArgsPass => send_fallback_super_target_complex_args_pass,
Uncategorized(_) => send_fallback_uncategorized,
}
}
@@ -641,6 +677,27 @@ pub fn send_fallback_counter_for_method_type(method_type: crate::hir::MethodType
}
}
+pub fn send_fallback_counter_for_super_method_type(method_type: crate::hir::MethodType) -> Counter {
+ use crate::hir::MethodType::*;
+ use crate::stats::Counter::*;
+
+ match method_type {
+ Iseq => unspecialized_super_def_type_iseq,
+ Cfunc => unspecialized_super_def_type_cfunc,
+ Attrset => unspecialized_super_def_type_attrset,
+ Ivar => unspecialized_super_def_type_ivar,
+ Bmethod => unspecialized_super_def_type_bmethod,
+ Zsuper => unspecialized_super_def_type_zsuper,
+ Alias => unspecialized_super_def_type_alias,
+ Undefined => unspecialized_super_def_type_undef,
+ NotImplemented => unspecialized_super_def_type_not_implemented,
+ Optimized => unspecialized_super_def_type_optimized,
+ Missing => unspecialized_super_def_type_missing,
+ Refined => unspecialized_super_def_type_refined,
+ Null => unspecialized_super_def_type_null,
+ }
+}
+
/// Primitive called in zjit.rb. Zero out all the counters.
#[unsafe(no_mangle)]
pub extern "C" fn rb_zjit_reset_stats_bang(_ec: EcPtr, _self: VALUE) -> VALUE {