summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Menard <kevin@nirvdrum.com>2026-01-29 18:24:50 -0500
committerGitHub <noreply@github.com>2026-01-29 18:24:50 -0500
commit5fec5456b9cd9dd7fdea18ac9c43b1cf6d4cf4cb (patch)
tree350159ac0896d4fe5f0c736215ebb0b130b2ce35
parent91619f0230c0e5a95c796c1bd4f784c151e15614 (diff)
ZJIT: Optimize `super` calls to C function targets (#15993)
This PR is an extension of the work in #15816. There, we optimized `super` calls where the target method was an ISeq. The code bailed on any other `super` target method type. The discussion for that PR included the ZJIT stats from running the _railsbench_ benchmark in _ruby-bench_. The stats showed the other types of `super` calls we encountered that we didn't process: ``` Top-2 not optimized method types for super (100.0% of total 2,700,015): cfunc: 2,680,044 (99.3%) attrset: 19,971 ( 0.7%) ``` This PR handles most of the cfunc cases. We still only handle simple method signatures and don't handle blocks at all, but if the target function is a cfunc where `argc != 2`, we now optimize to either `Insn::CCallWithFrame` or `Insn::CCallVariadic` as appropriate. This covers 100% of the C func cases we encounter in _railsbench_. <details><summary>Baseline ZJIT stats</summary> <p> ``` Top-20 not inlined C methods (51.1% of total 15,736,824): Hash#key?: 1,260,867 ( 8.0%) Regexp#match?: 970,899 ( 6.2%) Hash#fetch: 898,248 ( 5.7%) Integer#===: 439,075 ( 2.8%) Hash#delete: 405,821 ( 2.6%) Array#any?: 403,598 ( 2.6%) String.new: 401,818 ( 2.6%) String#b: 319,473 ( 2.0%) String#to_sym: 272,868 ( 1.7%) Array#all?: 260,132 ( 1.7%) Fiber.current: 259,588 ( 1.6%) Array#join: 257,125 ( 1.6%) Array#include?: 247,718 ( 1.6%) Kernel#Array: 244,574 ( 1.6%) String#<<: 242,475 ( 1.5%) Symbol#end_with?: 239,977 ( 1.5%) String#force_encoding: 239,520 ( 1.5%) Kernel#dup: 232,701 ( 1.5%) Array#[]: 225,160 ( 1.4%) Kernel#respond_to?: 220,246 ( 1.4%) Top-20 calls to C functions from JIT code (75.3% of total 106,711,108): rb_vm_opt_send_without_block: 22,031,658 (20.6%) rb_hash_aref: 9,335,540 ( 8.7%) rb_vm_env_write: 7,865,750 ( 7.4%) rb_vm_send: 6,836,936 ( 6.4%) rb_zjit_writebarrier_check_immediate: 5,623,383 ( 5.3%) rb_vm_getinstancevariable: 5,012,846 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%) rb_vm_invokesuper: 3,240,208 ( 3.0%) rb_hash_aset: 2,061,526 ( 1.9%) rb_obj_is_kind_of: 1,812,573 ( 1.7%) rb_vm_invokeblock: 1,647,238 ( 1.5%) rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%) Hash#key?: 1,260,867 ( 1.2%) rb_class_allocate_instance: 1,190,707 ( 1.1%) rb_hash_new_with_size: 1,150,766 ( 1.1%) rb_vm_setinstancevariable: 1,119,304 ( 1.0%) rb_ec_ary_new_from_values: 1,050,781 ( 1.0%) rb_obj_alloc: 993,445 ( 0.9%) rb_str_concat_literals: 984,558 ( 0.9%) Regexp#match?: 970,899 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 3,423,067): iseq: 3,410,096 (99.6%) optimized: 12,971 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 319,311): optimized_send: 246,250 (77.1%) null: 73,061 (22.9%) Top-2 not optimized method types for super (100.0% of total 2,680,495): cfunc: 2,660,334 (99.2%) attrset: 20,161 ( 0.8%) Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553): invokeblock: 1,647,238 (62.9%) sendforward: 748,101 (28.6%) invokesuperforward: 199,443 ( 7.6%) opt_send_without_block: 22,771 ( 0.9%) Top-20 send fallback reasons (100.0% of total 34,703,584): send_without_block_polymorphic: 12,818,893 (36.9%) send_without_block_no_profiles: 5,442,960 (15.7%) send_not_optimized_method_type: 3,423,067 ( 9.9%) super_not_optimized_method_type: 2,680,495 ( 7.7%) uncategorized: 2,617,553 ( 7.5%) send_no_profiles: 2,083,822 ( 6.0%) one_or_more_complex_arg_pass: 1,663,149 ( 4.8%) send_polymorphic: 1,329,141 ( 3.8%) send_without_block_not_optimized_need_permission: 510,815 ( 1.5%) too_many_args_for_lir: 477,266 ( 1.4%) singleton_class_seen: 441,058 ( 1.3%) super_complex_args_pass: 331,767 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.7%) send_without_block_megamorphic: 228,672 ( 0.7%) super_target_complex_args_pass: 165,855 ( 0.5%) send_without_block_not_optimized_method_type: 73,061 ( 0.2%) obj_to_string_not_string: 67,862 ( 0.2%) super_call_with_block: 40,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%) super_polymorphic: 22,087 ( 0.1%) Top-3 setivar fallback reasons (100.0% of total 1,119,304): not_monomorphic: 1,077,792 (96.3%) not_t_object: 41,335 ( 3.7%) new_shape_needs_extension: 177 ( 0.0%) Top-1 getivar fallback reasons (100.0% of total 5,012,871): not_monomorphic: 5,012,871 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 142,798): not_monomorphic: 142,711 (99.9%) not_t_object: 87 ( 0.1%) Top-6 invokeblock handler (100.0% of total 1,647,238): monomorphic_iseq: 878,253 (53.3%) polymorphic: 483,612 (29.4%) monomorphic_other: 134,943 ( 8.2%) monomorphic_ifunc: 115,175 ( 7.0%) megamorphic: 34,939 ( 2.1%) no_profiles: 316 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,581): param_forwardable: 729,353 (35.3%) param_block: 716,533 (34.6%) param_rest: 327,865 (15.8%) caller_splat: 114,365 ( 5.5%) caller_kw_splat: 99,266 ( 4.8%) param_kwrest: 80,149 ( 3.9%) caller_blockarg: 877 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 156,707): exception_handler: 156,707 (100.0%) Top-5 unhandled YARV insns (100.0% of total 201,517): getconstant: 160,920 (79.9%) expandarray: 19,985 ( 9.9%) setblockparam: 19,972 ( 9.9%) checkmatch: 480 ( 0.2%) once: 160 ( 0.1%) Top-2 unhandled HIR insns (100.0% of total 128,647): throw: 93,060 (72.3%) invokebuiltin: 35,587 (27.7%) Top-19 side exit reasons (100.0% of total 3,484,374): guard_shape_failure: 1,042,511 (29.9%) guard_type_failure: 812,342 (23.3%) block_param_proxy_not_iseq_or_ifunc: 795,628 (22.8%) unhandled_yarv_insn: 201,517 ( 5.8%) compile_error: 156,707 ( 4.5%) unhandled_hir_insn: 128,647 ( 3.7%) unhandled_newarray_send_pack: 119,187 ( 3.4%) patchpoint_method_redefined: 80,619 ( 2.3%) unhandled_block_arg: 60,517 ( 1.7%) block_param_proxy_modified: 49,695 ( 1.4%) guard_less_failure: 20,033 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.3%) patchpoint_stable_constant_names: 5,752 ( 0.2%) fixnum_mult_overflow: 570 ( 0.0%) obj_to_string_fallback: 498 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 43 ( 0.0%) guard_super_method_entry: 8 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 133,679,714 dynamic_send_count: 34,703,584 (26.0%) optimized_send_count: 98,976,130 (74.0%) dynamic_setivar_count: 1,119,304 ( 0.8%) dynamic_getivar_count: 5,012,871 ( 3.7%) dynamic_definedivar_count: 142,798 ( 0.1%) iseq_optimized_send_count: 38,085,055 (28.5%) inline_cfunc_optimized_send_count: 39,628,908 (29.6%) inline_iseq_optimized_send_count: 3,624,852 ( 2.7%) non_variadic_cfunc_optimized_send_count: 10,434,756 ( 7.8%) variadic_cfunc_optimized_send_count: 7,202,559 ( 5.4%) compiled_iseq_count: 2,868 failed_iseq_count: 0 compile_time: 8,809ms profile_time: 135ms gc_time: 255ms invalidation_time: 21ms vm_write_pc_count: 116,809,164 vm_write_sp_count: 116,809,164 vm_write_locals_count: 111,533,227 vm_write_stack_count: 111,533,227 vm_write_to_parent_iseq_local_count: 521,277 vm_read_from_parent_iseq_local_count: 12,757,231 guard_type_count: 126,653,751 guard_type_exit_ratio: 0.6% guard_shape_count: 44,193,824 guard_shape_exit_ratio: 2.4% code_region_bytes: 14,336,000 zjit_alloc_bytes: 19,282,889 total_mem_bytes: 33,618,889 side_exit_count: 3,484,374 total_insn_count: 697,672,179 vm_insn_count: 52,531,010 zjit_insn_count: 645,141,169 ratio_in_zjit: 92.5% ``` </p> </details> <details><summary>Optimized invokesuper stats</summary> <p> ``` Top-20 not inlined C methods (51.1% of total 15,736,852): Hash#key?: 1,260,867 ( 8.0%) Regexp#match?: 970,900 ( 6.2%) Hash#fetch: 898,248 ( 5.7%) Integer#===: 439,075 ( 2.8%) Hash#delete: 405,825 ( 2.6%) Array#any?: 403,600 ( 2.6%) String.new: 401,818 ( 2.6%) String#b: 319,473 ( 2.0%) String#to_sym: 272,868 ( 1.7%) Array#all?: 260,132 ( 1.7%) Fiber.current: 259,588 ( 1.6%) Array#join: 257,125 ( 1.6%) Array#include?: 247,718 ( 1.6%) Kernel#Array: 244,579 ( 1.6%) String#<<: 242,475 ( 1.5%) Symbol#end_with?: 239,977 ( 1.5%) String#force_encoding: 239,520 ( 1.5%) Kernel#dup: 232,706 ( 1.5%) Array#[]: 225,160 ( 1.4%) Kernel#respond_to?: 220,246 ( 1.4%) Top-20 calls to C functions from JIT code (73.2% of total 106,690,862): rb_vm_opt_send_without_block: 22,031,722 (20.7%) rb_hash_aref: 9,335,543 ( 8.8%) rb_vm_env_write: 7,865,751 ( 7.4%) rb_vm_send: 6,836,939 ( 6.4%) rb_zjit_writebarrier_check_immediate: 5,623,259 ( 5.3%) rb_vm_getinstancevariable: 5,012,844 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%) rb_hash_aset: 2,061,385 ( 1.9%) rb_obj_is_kind_of: 1,812,575 ( 1.7%) rb_vm_invokeblock: 1,647,238 ( 1.5%) rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%) Hash#key?: 1,260,867 ( 1.2%) rb_class_allocate_instance: 1,190,704 ( 1.1%) rb_hash_new_with_size: 1,150,765 ( 1.1%) rb_vm_setinstancevariable: 1,119,304 ( 1.0%) rb_ec_ary_new_from_values: 1,050,780 ( 1.0%) rb_obj_alloc: 993,446 ( 0.9%) rb_str_concat_literals: 984,559 ( 0.9%) Regexp#match?: 970,900 ( 0.9%) rb_obj_as_string_result: 937,751 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 3,423,067): iseq: 3,410,096 (99.6%) optimized: 12,971 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 319,311): optimized_send: 246,250 (77.1%) null: 73,061 (22.9%) Top-1 not optimized method types for super (100.0% of total 20,161): attrset: 20,161 (100.0%) Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553): invokeblock: 1,647,238 (62.9%) sendforward: 748,101 (28.6%) invokesuperforward: 199,443 ( 7.6%) opt_send_without_block: 22,771 ( 0.9%) Top-20 send fallback reasons (100.0% of total 32,043,318): send_without_block_polymorphic: 12,818,949 (40.0%) send_without_block_no_profiles: 5,442,967 (17.0%) send_not_optimized_method_type: 3,423,067 (10.7%) uncategorized: 2,617,553 ( 8.2%) send_no_profiles: 2,083,824 ( 6.5%) one_or_more_complex_arg_pass: 1,663,150 ( 5.2%) send_polymorphic: 1,329,142 ( 4.1%) send_without_block_not_optimized_need_permission: 510,814 ( 1.6%) too_many_args_for_lir: 477,267 ( 1.5%) singleton_class_seen: 441,058 ( 1.4%) super_complex_args_pass: 331,767 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.8%) send_without_block_megamorphic: 228,672 ( 0.7%) super_target_complex_args_pass: 165,855 ( 0.5%) send_without_block_not_optimized_method_type: 73,061 ( 0.2%) obj_to_string_not_string: 67,862 ( 0.2%) super_call_with_block: 40,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%) super_polymorphic: 22,088 ( 0.1%) super_not_optimized_method_type: 20,161 ( 0.1%) Top-3 setivar fallback reasons (100.0% of total 1,119,304): not_monomorphic: 1,077,792 (96.3%) not_t_object: 41,335 ( 3.7%) new_shape_needs_extension: 177 ( 0.0%) Top-1 getivar fallback reasons (100.0% of total 5,012,869): not_monomorphic: 5,012,869 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 142,798): not_monomorphic: 142,711 (99.9%) not_t_object: 87 ( 0.1%) Top-6 invokeblock handler (100.0% of total 1,647,238): monomorphic_iseq: 878,253 (53.3%) polymorphic: 483,612 (29.4%) monomorphic_other: 134,943 ( 8.2%) monomorphic_ifunc: 115,175 ( 7.0%) megamorphic: 34,939 ( 2.1%) no_profiles: 316 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,582): param_forwardable: 729,353 (35.3%) param_block: 716,534 (34.6%) param_rest: 327,865 (15.8%) caller_splat: 114,365 ( 5.5%) caller_kw_splat: 99,266 ( 4.8%) param_kwrest: 80,149 ( 3.9%) caller_blockarg: 877 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 156,707): exception_handler: 156,707 (100.0%) Top-5 unhandled YARV insns (100.0% of total 201,517): getconstant: 160,920 (79.9%) expandarray: 19,985 ( 9.9%) setblockparam: 19,972 ( 9.9%) checkmatch: 480 ( 0.2%) once: 160 ( 0.1%) Top-2 unhandled HIR insns (100.0% of total 128,646): throw: 93,060 (72.3%) invokebuiltin: 35,586 (27.7%) Top-19 side exit reasons (100.0% of total 3,504,293): guard_shape_failure: 1,042,515 (29.7%) guard_type_failure: 812,249 (23.2%) block_param_proxy_not_iseq_or_ifunc: 795,628 (22.7%) unhandled_yarv_insn: 201,517 ( 5.8%) compile_error: 156,707 ( 4.5%) unhandled_hir_insn: 128,646 ( 3.7%) unhandled_newarray_send_pack: 119,187 ( 3.4%) patchpoint_method_redefined: 80,779 ( 2.3%) unhandled_block_arg: 60,517 ( 1.7%) block_param_proxy_modified: 49,695 ( 1.4%) guard_less_failure: 20,033 ( 0.6%) guard_super_method_entry: 19,855 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.3%) patchpoint_stable_constant_names: 5,752 ( 0.2%) fixnum_mult_overflow: 569 ( 0.0%) obj_to_string_fallback: 498 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 46 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 133,600,402 dynamic_send_count: 32,043,318 (24.0%) optimized_send_count: 101,557,084 (76.0%) dynamic_setivar_count: 1,119,304 ( 0.8%) dynamic_getivar_count: 5,012,869 ( 3.8%) dynamic_definedivar_count: 142,798 ( 0.1%) iseq_optimized_send_count: 38,025,870 (28.5%) inline_cfunc_optimized_send_count: 39,628,762 (29.7%) inline_iseq_optimized_send_count: 3,624,854 ( 2.7%) non_variadic_cfunc_optimized_send_count: 12,631,917 ( 9.5%) variadic_cfunc_optimized_send_count: 7,645,681 ( 5.7%) compiled_iseq_count: 2,870 failed_iseq_count: 0 compile_time: 8,419ms profile_time: 133ms gc_time: 248ms invalidation_time: 20ms vm_write_pc_count: 116,729,857 vm_write_sp_count: 116,729,857 vm_write_locals_count: 111,453,921 vm_write_stack_count: 111,453,921 vm_write_to_parent_iseq_local_count: 521,275 vm_read_from_parent_iseq_local_count: 12,757,225 guard_type_count: 126,594,209 guard_type_exit_ratio: 0.6% guard_shape_count: 44,193,683 guard_shape_exit_ratio: 2.4% code_region_bytes: 14,368,768 zjit_alloc_bytes: 19,581,578 total_mem_bytes: 33,950,346 side_exit_count: 3,504,293 total_insn_count: 697,692,070 vm_insn_count: 52,828,675 zjit_insn_count: 644,863,395 ratio_in_zjit: 92.4% ``` </p> </details>
-rw-r--r--test/ruby/test_zjit.rb62
-rw-r--r--zjit/src/hir.rs209
-rw-r--r--zjit/src/hir/opt_tests.rs67
3 files changed, 260 insertions, 78 deletions
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb
index 6ad06f9453..7b068e9898 100644
--- a/test/ruby/test_zjit.rb
+++ b/test/ruby/test_zjit.rb
@@ -1096,16 +1096,68 @@ class TestZJIT < Test::Unit::TestCase
}, call_threshold: 2
end
- def test_invokesuper_to_cfunc
- assert_compiles '["MyArray", 3]', %q{
- class MyArray < Array
+ def test_invokesuper_to_cfunc_no_args
+ assert_compiles '["MyString", 3]', %q{
+ class MyString < String
def length
- ["MyArray", super]
+ ["MyString", super]
end
end
def test
- MyArray.new([1, 2, 3]).length
+ MyString.new("abc").length
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_to_cfunc_simple_args
+ assert_compiles '["MyString", true]', %q{
+ class MyString < String
+ def include?(other)
+ ["MyString", super(other)]
+ end
+ end
+
+ def test
+ MyString.new("abc").include?("bc")
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+
+ def test_invokesuper_to_cfunc_with_optional_arg
+ assert_compiles '["MyString", 6]', %q{
+ class MyString < String
+ def byteindex(needle, offset = 0)
+ ["MyString", super(needle, offset)]
+ end
+ end
+
+ def test
+ MyString.new("hello world").byteindex("world")
+ end
+
+ test # profile invokesuper
+ test # compile + run compiled code
+ }, call_threshold: 2
+ end
+
+ def test_invokesuper_to_cfunc_varargs
+ assert_compiles '["MyString", true]', %q{
+ class MyString < String
+ def end_with?(str)
+ ["MyString", super(str)]
+ end
+ end
+
+ def test
+ MyString.new("abc").end_with?("bc")
end
test # profile invokesuper
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index 8a9d5a5bb0..901beffea0 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -1297,6 +1297,20 @@ fn get_local_var_name_for_printer(iseq: Option<IseqPtr>, level: u32, ep_offset:
Some(format!(":{}", id.contents_lossy()))
}
+/// Construct a qualified method name for display/debug output.
+/// Returns strings like "Array#length" for instance methods or "Foo.bar" for singleton methods.
+fn qualified_method_name(class: VALUE, method_id: ID) -> String {
+ let method_name = method_id.contents_lossy();
+ // rb_zjit_singleton_class_p also checks if it's a class
+ if unsafe { rb_zjit_singleton_class_p(class) } {
+ let class_name = get_class_name(unsafe { rb_class_attached_object(class) });
+ format!("{class_name}.{method_name}")
+ } else {
+ let class_name = get_class_name(class);
+ format!("{class_name}#{method_name}")
+ }
+}
+
static REGEXP_FLAGS: &[(u32, &str)] = &[
(ONIG_OPTION_MULTILINE, "MULTILINE"),
(ONIG_OPTION_IGNORECASE, "IGNORECASE"),
@@ -3504,6 +3518,40 @@ impl Function {
};
}
Insn::InvokeSuper { recv, cd, blockiseq, args, state, .. } => {
+ // Helper to emit common guards for super call optimization.
+ fn emit_super_call_guards(
+ fun: &mut Function,
+ block: BlockId,
+ super_cme: *const rb_callable_method_entry_t,
+ current_cme: *const rb_callable_method_entry_t,
+ mid: ID,
+ state: InsnId,
+ ) {
+ fun.push_insn(block, Insn::PatchPoint {
+ invariant: Invariant::MethodRedefined {
+ klass: unsafe { (*super_cme).defined_class },
+ method: mid,
+ cme: super_cme
+ },
+ state
+ });
+
+ let lep = fun.push_insn(block, Insn::GetLEP);
+ fun.push_insn(block, Insn::GuardSuperMethodEntry {
+ lep,
+ cme: current_cme,
+ state
+ });
+
+ let block_handler = fun.push_insn(block, Insn::GetBlockHandler { lep });
+ fun.push_insn(block, Insn::GuardBitEquals {
+ val: block_handler,
+ expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)),
+ reason: SideExitReason::UnhandledBlockArg,
+ state
+ });
+ }
+
// Don't handle calls with literal blocks (e.g., super { ... })
if !blockiseq.is_null() {
self.push_insn_id(block, insn_id);
@@ -3567,68 +3615,107 @@ impl Function {
continue;
}
- // Check if it's an ISEQ method; bail if it isn't.
let def_type = unsafe { get_cme_def_type(super_cme) };
- if def_type != VM_METHOD_TYPE_ISEQ {
- self.push_insn_id(block, insn_id);
- self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type)));
- continue;
- }
- // Check if the super method's parameters support direct send.
- // If not, we can't do direct dispatch.
- let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) };
- // TODO: pass Option<blockiseq> to can_direct_send when we start specializing super { ... }
- if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) {
- self.push_insn_id(block, insn_id);
- self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass);
- continue;
- }
+ if def_type == VM_METHOD_TYPE_ISEQ {
+ // Check if the super method's parameters support direct send.
+ // If not, we can't do direct dispatch.
+ let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) };
+ // TODO: pass Option<blockiseq> to can_direct_send when we start specializing `super { ... }`.
+ if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass);
+ continue;
+ }
- // Add PatchPoint for method redefinition.
- self.push_insn(block, Insn::PatchPoint {
- invariant: Invariant::MethodRedefined {
- klass: unsafe { (*super_cme).defined_class },
- method: mid,
- cme: super_cme
- },
- state
- });
+ let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state)
+ .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else {
+ self.push_insn_id(block, insn_id); continue;
+ };
- // Guard that we're calling `super` from the expected method context.
- let lep = self.push_insn(block, Insn::GetLEP);
- self.push_insn(block, Insn::GuardSuperMethodEntry {
- lep,
- cme: current_cme,
- state
- });
+ emit_super_call_guards(self, block, super_cme, current_cme, mid, state);
- // Guard that no block is being passed (implicit or explicit).
- let block_handler = self.push_insn(block, Insn::GetBlockHandler { lep });
- self.push_insn(block, Insn::GuardBitEquals {
- val: block_handler,
- expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)),
- reason: SideExitReason::UnhandledBlockArg,
- state
- });
+ // Use SendDirect with the super method's CME and ISEQ.
+ let send_direct = self.push_insn(block, Insn::SendDirect {
+ recv,
+ cd,
+ cme: super_cme,
+ iseq: super_iseq,
+ args: processed_args,
+ kw_bits,
+ state: send_state,
+ blockiseq: None,
+ });
+ self.make_equal_to(insn_id, send_direct);
- let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state)
- .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else {
- self.push_insn_id(block, insn_id); continue;
- };
+ } else if def_type == VM_METHOD_TYPE_CFUNC {
+ let cfunc = unsafe { get_cme_def_body_cfunc(super_cme) };
+ let cfunc_argc = unsafe { get_mct_argc(cfunc) };
+ let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast();
+
+ match cfunc_argc {
+ // C function with fixed argument count.
+ 0.. => {
+ // Check argc matches
+ if args.len() != cfunc_argc as usize {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, ArgcParamMismatch);
+ continue;
+ }
- // Use SendDirect with the super method's CME and ISEQ.
- let send_direct = self.push_insn(block, Insn::SendDirect {
- recv,
- cd,
- cme: super_cme,
- iseq: super_iseq,
- args: processed_args,
- kw_bits,
- state: send_state,
- blockiseq: None,
- });
- self.make_equal_to(insn_id, send_direct);
+ emit_super_call_guards(self, block, super_cme, current_cme, mid, state);
+
+ // Use CCallWithFrame for the C function.
+ let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id }));
+ let ccall = self.push_insn(block, Insn::CCallWithFrame {
+ cd,
+ cfunc: cfunc_ptr,
+ recv,
+ args: args.clone(),
+ cme: super_cme,
+ name,
+ state,
+ return_type: types::BasicObject,
+ elidable: false,
+ blockiseq: None,
+ });
+ self.make_equal_to(insn_id, ccall);
+ }
+
+ // Variadic C function: func(int argc, VALUE *argv, VALUE recv)
+ -1 => {
+ emit_super_call_guards(self, block, super_cme, current_cme, mid, state);
+
+ // Use CCallVariadic for the variadic C function.
+ let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id }));
+ let ccall = self.push_insn(block, Insn::CCallVariadic {
+ cfunc: cfunc_ptr,
+ recv,
+ args: args.clone(),
+ cme: super_cme,
+ name,
+ state,
+ return_type: types::BasicObject,
+ elidable: false,
+ blockiseq: None,
+ });
+ self.make_equal_to(insn_id, ccall);
+ }
+
+ // Array-variadic: (self, args_ruby_array).
+ -2 => {
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::Cfunc));
+ continue;
+ }
+ _ => unreachable!("unknown cfunc argc: {}", cfunc_argc)
+ }
+ } else {
+ // Other method types (not ISEQ or CFUNC)
+ self.push_insn_id(block, insn_id);
+ self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type)));
+ continue;
+ }
}
_ => { self.push_insn_id(block, insn_id); }
}
@@ -4296,18 +4383,6 @@ impl Function {
Err(())
}
- fn qualified_method_name(class: VALUE, method_id: ID) -> String {
- let method_name = method_id.contents_lossy();
- // rb_zjit_singleton_class_p also checks if it's a class
- if unsafe { rb_zjit_singleton_class_p(class) } {
- let class_name = get_class_name(unsafe { rb_class_attached_object(class) });
- format!("{class_name}.{method_name}")
- } else {
- let class_name = get_class_name(class);
- format!("{class_name}#{method_name}")
- }
- }
-
fn count_not_inlined_cfunc(fun: &mut Function, block: BlockId, cme: *const rb_callable_method_entry_t) {
let owner = unsafe { (*cme).owner };
let called_id = unsafe { (*cme).called_id };
diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs
index 29b1e36331..de4e2ec39d 100644
--- a/zjit/src/hir/opt_tests.rs
+++ b/zjit/src/hir/opt_tests.rs
@@ -11406,7 +11406,7 @@ mod hir_opt_tests {
}
#[test]
- fn test_invokesuper_to_cfunc_remains_invokesuper() {
+ fn test_invokesuper_to_cfunc_optimizes_to_ccall() {
eval("
class MyArray < Array
def length
@@ -11418,10 +11418,10 @@ mod hir_opt_tests {
");
let hir = hir_string_proc("MyArray.new.method(:length)");
- assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
- assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for CFUNC:\n{hir}");
+ assert!(!hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}");
+ assert!(hir.contains("CCallWithFrame"), "Should optimize to CCallWithFrame for non-variadic cfunc:\n{hir}");
- assert_snapshot!(hir, @r"
+ assert_snapshot!(hir, @"
fn length@<compiled>:4:
bb0():
EntryPoint interpreter
@@ -11431,9 +11431,64 @@ mod hir_opt_tests {
EntryPoint JIT(0)
Jump bb2(v4)
bb2(v6:BasicObject):
- v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: unsupported target method type Cfunc
+ PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010)
+ v17:CPtr = GetLEP
+ GuardSuperMethodEntry v17, 0x1038
+ v19:RubyValue = GetBlockHandler v17
+ v20:FalseClass = GuardBitEquals v19, Value(false)
+ v21:BasicObject = CCallWithFrame v6, :Array#length@0x1040
CheckInterrupts
- Return v11
+ Return v21
+ ");
+ }
+
+ #[test]
+ fn test_invokesuper_to_variadic_cfunc_optimizes_to_ccall() {
+ eval("
+ class MyString < String
+ def byteindex(needle, offset = 0)
+ super(needle, offset)
+ end
+ end
+
+ MyString.new('hello world').byteindex('world', 0); MyString.new('hello world').byteindex('world', 0)
+ ");
+
+ let hir = hir_string_proc("MyString.new('hello world').method(:byteindex)");
+ assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to CCallVariadic but got:\n{hir}");
+ assert!(hir.contains("CCallVariadic"), "Should optimize to CCallVariadic for variadic cfunc:\n{hir}");
+
+ assert_snapshot!(hir, @"
+ fn byteindex@<compiled>:3:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal :needle, l0, SP@5
+ v3:BasicObject = GetLocal :offset, l0, SP@4
+ v4:CPtr = LoadPC
+ v5:CPtr[CPtr(0x1000)] = Const CPtr(0x1008)
+ v6:CBool = IsBitEqual v4, v5
+ IfTrue v6, bb2(v1, v2, v3)
+ Jump bb4(v1, v2, v3)
+ bb1(v10:BasicObject, v11:BasicObject):
+ EntryPoint JIT(0)
+ v12:NilClass = Const Value(nil)
+ Jump bb2(v10, v11, v12)
+ bb2(v19:BasicObject, v20:BasicObject, v21:BasicObject):
+ v24:Fixnum[0] = Const Value(0)
+ Jump bb4(v19, v20, v24)
+ bb3(v15:BasicObject, v16:BasicObject, v17:BasicObject):
+ EntryPoint JIT(1)
+ Jump bb4(v15, v16, v17)
+ bb4(v27:BasicObject, v28:BasicObject, v29:BasicObject):
+ PatchPoint MethodRedefined(String@0x1010, byteindex@0x1018, cme:0x1020)
+ v42:CPtr = GetLEP
+ GuardSuperMethodEntry v42, 0x1008
+ v44:RubyValue = GetBlockHandler v42
+ v45:FalseClass = GuardBitEquals v44, Value(false)
+ v46:BasicObject = CCallVariadic v27, :String#byteindex@0x1048, v28, v29
+ CheckInterrupts
+ Return v46
");
}