From 5fec5456b9cd9dd7fdea18ac9c43b1cf6d4cf4cb Mon Sep 17 00:00:00 2001 From: Kevin Menard Date: Thu, 29 Jan 2026 18:24:50 -0500 Subject: ZJIT: Optimize `super` calls to C function targets (#15993) This PR is an extension of the work in #15816. There, we optimized `super` calls where the target method was an ISeq. The code bailed on any other `super` target method type. The discussion for that PR included the ZJIT stats from running the _railsbench_ benchmark in _ruby-bench_. The stats showed the other types of `super` calls we encountered that we didn't process: ``` Top-2 not optimized method types for super (100.0% of total 2,700,015): cfunc: 2,680,044 (99.3%) attrset: 19,971 ( 0.7%) ``` This PR handles most of the cfunc cases. We still only handle simple method signatures and don't handle blocks at all, but if the target function is a cfunc where `argc != 2`, we now optimize to either `Insn::CCallWithFrame` or `Insn::CCallVariadic` as appropriate. This covers 100% of the C func cases we encounter in _railsbench_.
Baseline ZJIT stats

``` Top-20 not inlined C methods (51.1% of total 15,736,824): Hash#key?: 1,260,867 ( 8.0%) Regexp#match?: 970,899 ( 6.2%) Hash#fetch: 898,248 ( 5.7%) Integer#===: 439,075 ( 2.8%) Hash#delete: 405,821 ( 2.6%) Array#any?: 403,598 ( 2.6%) String.new: 401,818 ( 2.6%) String#b: 319,473 ( 2.0%) String#to_sym: 272,868 ( 1.7%) Array#all?: 260,132 ( 1.7%) Fiber.current: 259,588 ( 1.6%) Array#join: 257,125 ( 1.6%) Array#include?: 247,718 ( 1.6%) Kernel#Array: 244,574 ( 1.6%) String#<<: 242,475 ( 1.5%) Symbol#end_with?: 239,977 ( 1.5%) String#force_encoding: 239,520 ( 1.5%) Kernel#dup: 232,701 ( 1.5%) Array#[]: 225,160 ( 1.4%) Kernel#respond_to?: 220,246 ( 1.4%) Top-20 calls to C functions from JIT code (75.3% of total 106,711,108): rb_vm_opt_send_without_block: 22,031,658 (20.6%) rb_hash_aref: 9,335,540 ( 8.7%) rb_vm_env_write: 7,865,750 ( 7.4%) rb_vm_send: 6,836,936 ( 6.4%) rb_zjit_writebarrier_check_immediate: 5,623,383 ( 5.3%) rb_vm_getinstancevariable: 5,012,846 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%) rb_vm_invokesuper: 3,240,208 ( 3.0%) rb_hash_aset: 2,061,526 ( 1.9%) rb_obj_is_kind_of: 1,812,573 ( 1.7%) rb_vm_invokeblock: 1,647,238 ( 1.5%) rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%) Hash#key?: 1,260,867 ( 1.2%) rb_class_allocate_instance: 1,190,707 ( 1.1%) rb_hash_new_with_size: 1,150,766 ( 1.1%) rb_vm_setinstancevariable: 1,119,304 ( 1.0%) rb_ec_ary_new_from_values: 1,050,781 ( 1.0%) rb_obj_alloc: 993,445 ( 0.9%) rb_str_concat_literals: 984,558 ( 0.9%) Regexp#match?: 970,899 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 3,423,067): iseq: 3,410,096 (99.6%) optimized: 12,971 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 319,311): optimized_send: 246,250 (77.1%) null: 73,061 (22.9%) Top-2 not optimized method types for super (100.0% of total 2,680,495): cfunc: 2,660,334 (99.2%) attrset: 20,161 ( 0.8%) Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553): invokeblock: 1,647,238 (62.9%) sendforward: 748,101 (28.6%) invokesuperforward: 199,443 ( 7.6%) opt_send_without_block: 22,771 ( 0.9%) Top-20 send fallback reasons (100.0% of total 34,703,584): send_without_block_polymorphic: 12,818,893 (36.9%) send_without_block_no_profiles: 5,442,960 (15.7%) send_not_optimized_method_type: 3,423,067 ( 9.9%) super_not_optimized_method_type: 2,680,495 ( 7.7%) uncategorized: 2,617,553 ( 7.5%) send_no_profiles: 2,083,822 ( 6.0%) one_or_more_complex_arg_pass: 1,663,149 ( 4.8%) send_polymorphic: 1,329,141 ( 3.8%) send_without_block_not_optimized_need_permission: 510,815 ( 1.5%) too_many_args_for_lir: 477,266 ( 1.4%) singleton_class_seen: 441,058 ( 1.3%) super_complex_args_pass: 331,767 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.7%) send_without_block_megamorphic: 228,672 ( 0.7%) super_target_complex_args_pass: 165,855 ( 0.5%) send_without_block_not_optimized_method_type: 73,061 ( 0.2%) obj_to_string_not_string: 67,862 ( 0.2%) super_call_with_block: 40,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%) super_polymorphic: 22,087 ( 0.1%) Top-3 setivar fallback reasons (100.0% of total 1,119,304): not_monomorphic: 1,077,792 (96.3%) not_t_object: 41,335 ( 3.7%) new_shape_needs_extension: 177 ( 0.0%) Top-1 getivar fallback reasons (100.0% of total 5,012,871): not_monomorphic: 5,012,871 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 142,798): not_monomorphic: 142,711 (99.9%) not_t_object: 87 ( 0.1%) Top-6 invokeblock handler (100.0% of total 1,647,238): monomorphic_iseq: 878,253 (53.3%) polymorphic: 483,612 (29.4%) monomorphic_other: 134,943 ( 8.2%) monomorphic_ifunc: 115,175 ( 7.0%) megamorphic: 34,939 ( 2.1%) no_profiles: 316 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,581): param_forwardable: 729,353 (35.3%) param_block: 716,533 (34.6%) param_rest: 327,865 (15.8%) caller_splat: 114,365 ( 5.5%) caller_kw_splat: 99,266 ( 4.8%) param_kwrest: 80,149 ( 3.9%) caller_blockarg: 877 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 156,707): exception_handler: 156,707 (100.0%) Top-5 unhandled YARV insns (100.0% of total 201,517): getconstant: 160,920 (79.9%) expandarray: 19,985 ( 9.9%) setblockparam: 19,972 ( 9.9%) checkmatch: 480 ( 0.2%) once: 160 ( 0.1%) Top-2 unhandled HIR insns (100.0% of total 128,647): throw: 93,060 (72.3%) invokebuiltin: 35,587 (27.7%) Top-19 side exit reasons (100.0% of total 3,484,374): guard_shape_failure: 1,042,511 (29.9%) guard_type_failure: 812,342 (23.3%) block_param_proxy_not_iseq_or_ifunc: 795,628 (22.8%) unhandled_yarv_insn: 201,517 ( 5.8%) compile_error: 156,707 ( 4.5%) unhandled_hir_insn: 128,647 ( 3.7%) unhandled_newarray_send_pack: 119,187 ( 3.4%) patchpoint_method_redefined: 80,619 ( 2.3%) unhandled_block_arg: 60,517 ( 1.7%) block_param_proxy_modified: 49,695 ( 1.4%) guard_less_failure: 20,033 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.3%) patchpoint_stable_constant_names: 5,752 ( 0.2%) fixnum_mult_overflow: 570 ( 0.0%) obj_to_string_fallback: 498 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 43 ( 0.0%) guard_super_method_entry: 8 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 133,679,714 dynamic_send_count: 34,703,584 (26.0%) optimized_send_count: 98,976,130 (74.0%) dynamic_setivar_count: 1,119,304 ( 0.8%) dynamic_getivar_count: 5,012,871 ( 3.7%) dynamic_definedivar_count: 142,798 ( 0.1%) iseq_optimized_send_count: 38,085,055 (28.5%) inline_cfunc_optimized_send_count: 39,628,908 (29.6%) inline_iseq_optimized_send_count: 3,624,852 ( 2.7%) non_variadic_cfunc_optimized_send_count: 10,434,756 ( 7.8%) variadic_cfunc_optimized_send_count: 7,202,559 ( 5.4%) compiled_iseq_count: 2,868 failed_iseq_count: 0 compile_time: 8,809ms profile_time: 135ms gc_time: 255ms invalidation_time: 21ms vm_write_pc_count: 116,809,164 vm_write_sp_count: 116,809,164 vm_write_locals_count: 111,533,227 vm_write_stack_count: 111,533,227 vm_write_to_parent_iseq_local_count: 521,277 vm_read_from_parent_iseq_local_count: 12,757,231 guard_type_count: 126,653,751 guard_type_exit_ratio: 0.6% guard_shape_count: 44,193,824 guard_shape_exit_ratio: 2.4% code_region_bytes: 14,336,000 zjit_alloc_bytes: 19,282,889 total_mem_bytes: 33,618,889 side_exit_count: 3,484,374 total_insn_count: 697,672,179 vm_insn_count: 52,531,010 zjit_insn_count: 645,141,169 ratio_in_zjit: 92.5% ```

Optimized invokesuper stats

``` Top-20 not inlined C methods (51.1% of total 15,736,852): Hash#key?: 1,260,867 ( 8.0%) Regexp#match?: 970,900 ( 6.2%) Hash#fetch: 898,248 ( 5.7%) Integer#===: 439,075 ( 2.8%) Hash#delete: 405,825 ( 2.6%) Array#any?: 403,600 ( 2.6%) String.new: 401,818 ( 2.6%) String#b: 319,473 ( 2.0%) String#to_sym: 272,868 ( 1.7%) Array#all?: 260,132 ( 1.7%) Fiber.current: 259,588 ( 1.6%) Array#join: 257,125 ( 1.6%) Array#include?: 247,718 ( 1.6%) Kernel#Array: 244,579 ( 1.6%) String#<<: 242,475 ( 1.5%) Symbol#end_with?: 239,977 ( 1.5%) String#force_encoding: 239,520 ( 1.5%) Kernel#dup: 232,706 ( 1.5%) Array#[]: 225,160 ( 1.4%) Kernel#respond_to?: 220,246 ( 1.4%) Top-20 calls to C functions from JIT code (73.2% of total 106,690,862): rb_vm_opt_send_without_block: 22,031,722 (20.7%) rb_hash_aref: 9,335,543 ( 8.8%) rb_vm_env_write: 7,865,751 ( 7.4%) rb_vm_send: 6,836,939 ( 6.4%) rb_zjit_writebarrier_check_immediate: 5,623,259 ( 5.3%) rb_vm_getinstancevariable: 5,012,844 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%) rb_hash_aset: 2,061,385 ( 1.9%) rb_obj_is_kind_of: 1,812,575 ( 1.7%) rb_vm_invokeblock: 1,647,238 ( 1.5%) rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%) Hash#key?: 1,260,867 ( 1.2%) rb_class_allocate_instance: 1,190,704 ( 1.1%) rb_hash_new_with_size: 1,150,765 ( 1.1%) rb_vm_setinstancevariable: 1,119,304 ( 1.0%) rb_ec_ary_new_from_values: 1,050,780 ( 1.0%) rb_obj_alloc: 993,446 ( 0.9%) rb_str_concat_literals: 984,559 ( 0.9%) Regexp#match?: 970,900 ( 0.9%) rb_obj_as_string_result: 937,751 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 3,423,067): iseq: 3,410,096 (99.6%) optimized: 12,971 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 319,311): optimized_send: 246,250 (77.1%) null: 73,061 (22.9%) Top-1 not optimized method types for super (100.0% of total 20,161): attrset: 20,161 (100.0%) Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553): invokeblock: 1,647,238 (62.9%) sendforward: 748,101 (28.6%) invokesuperforward: 199,443 ( 7.6%) opt_send_without_block: 22,771 ( 0.9%) Top-20 send fallback reasons (100.0% of total 32,043,318): send_without_block_polymorphic: 12,818,949 (40.0%) send_without_block_no_profiles: 5,442,967 (17.0%) send_not_optimized_method_type: 3,423,067 (10.7%) uncategorized: 2,617,553 ( 8.2%) send_no_profiles: 2,083,824 ( 6.5%) one_or_more_complex_arg_pass: 1,663,150 ( 5.2%) send_polymorphic: 1,329,142 ( 4.1%) send_without_block_not_optimized_need_permission: 510,814 ( 1.6%) too_many_args_for_lir: 477,267 ( 1.5%) singleton_class_seen: 441,058 ( 1.4%) super_complex_args_pass: 331,767 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.8%) send_without_block_megamorphic: 228,672 ( 0.7%) super_target_complex_args_pass: 165,855 ( 0.5%) send_without_block_not_optimized_method_type: 73,061 ( 0.2%) obj_to_string_not_string: 67,862 ( 0.2%) super_call_with_block: 40,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%) super_polymorphic: 22,088 ( 0.1%) super_not_optimized_method_type: 20,161 ( 0.1%) Top-3 setivar fallback reasons (100.0% of total 1,119,304): not_monomorphic: 1,077,792 (96.3%) not_t_object: 41,335 ( 3.7%) new_shape_needs_extension: 177 ( 0.0%) Top-1 getivar fallback reasons (100.0% of total 5,012,869): not_monomorphic: 5,012,869 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 142,798): not_monomorphic: 142,711 (99.9%) not_t_object: 87 ( 0.1%) Top-6 invokeblock handler (100.0% of total 1,647,238): monomorphic_iseq: 878,253 (53.3%) polymorphic: 483,612 (29.4%) monomorphic_other: 134,943 ( 8.2%) monomorphic_ifunc: 115,175 ( 7.0%) megamorphic: 34,939 ( 2.1%) no_profiles: 316 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,582): param_forwardable: 729,353 (35.3%) param_block: 716,534 (34.6%) param_rest: 327,865 (15.8%) caller_splat: 114,365 ( 5.5%) caller_kw_splat: 99,266 ( 4.8%) param_kwrest: 80,149 ( 3.9%) caller_blockarg: 877 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 156,707): exception_handler: 156,707 (100.0%) Top-5 unhandled YARV insns (100.0% of total 201,517): getconstant: 160,920 (79.9%) expandarray: 19,985 ( 9.9%) setblockparam: 19,972 ( 9.9%) checkmatch: 480 ( 0.2%) once: 160 ( 0.1%) Top-2 unhandled HIR insns (100.0% of total 128,646): throw: 93,060 (72.3%) invokebuiltin: 35,586 (27.7%) Top-19 side exit reasons (100.0% of total 3,504,293): guard_shape_failure: 1,042,515 (29.7%) guard_type_failure: 812,249 (23.2%) block_param_proxy_not_iseq_or_ifunc: 795,628 (22.7%) unhandled_yarv_insn: 201,517 ( 5.8%) compile_error: 156,707 ( 4.5%) unhandled_hir_insn: 128,646 ( 3.7%) unhandled_newarray_send_pack: 119,187 ( 3.4%) patchpoint_method_redefined: 80,779 ( 2.3%) unhandled_block_arg: 60,517 ( 1.7%) block_param_proxy_modified: 49,695 ( 1.4%) guard_less_failure: 20,033 ( 0.6%) guard_super_method_entry: 19,855 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.3%) patchpoint_stable_constant_names: 5,752 ( 0.2%) fixnum_mult_overflow: 569 ( 0.0%) obj_to_string_fallback: 498 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 46 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 133,600,402 dynamic_send_count: 32,043,318 (24.0%) optimized_send_count: 101,557,084 (76.0%) dynamic_setivar_count: 1,119,304 ( 0.8%) dynamic_getivar_count: 5,012,869 ( 3.8%) dynamic_definedivar_count: 142,798 ( 0.1%) iseq_optimized_send_count: 38,025,870 (28.5%) inline_cfunc_optimized_send_count: 39,628,762 (29.7%) inline_iseq_optimized_send_count: 3,624,854 ( 2.7%) non_variadic_cfunc_optimized_send_count: 12,631,917 ( 9.5%) variadic_cfunc_optimized_send_count: 7,645,681 ( 5.7%) compiled_iseq_count: 2,870 failed_iseq_count: 0 compile_time: 8,419ms profile_time: 133ms gc_time: 248ms invalidation_time: 20ms vm_write_pc_count: 116,729,857 vm_write_sp_count: 116,729,857 vm_write_locals_count: 111,453,921 vm_write_stack_count: 111,453,921 vm_write_to_parent_iseq_local_count: 521,275 vm_read_from_parent_iseq_local_count: 12,757,225 guard_type_count: 126,594,209 guard_type_exit_ratio: 0.6% guard_shape_count: 44,193,683 guard_shape_exit_ratio: 2.4% code_region_bytes: 14,368,768 zjit_alloc_bytes: 19,581,578 total_mem_bytes: 33,950,346 side_exit_count: 3,504,293 total_insn_count: 697,692,070 vm_insn_count: 52,828,675 zjit_insn_count: 644,863,395 ratio_in_zjit: 92.4% ```

--- test/ruby/test_zjit.rb | 62 ++++++++++++-- zjit/src/hir.rs | 209 +++++++++++++++++++++++++++++++--------------- zjit/src/hir/opt_tests.rs | 67 +++++++++++++-- 3 files changed, 260 insertions(+), 78 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 6ad06f9453..7b068e9898 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1096,16 +1096,68 @@ class TestZJIT < Test::Unit::TestCase }, call_threshold: 2 end - def test_invokesuper_to_cfunc - assert_compiles '["MyArray", 3]', %q{ - class MyArray < Array + def test_invokesuper_to_cfunc_no_args + assert_compiles '["MyString", 3]', %q{ + class MyString < String def length - ["MyArray", super] + ["MyString", super] end end def test - MyArray.new([1, 2, 3]).length + MyString.new("abc").length + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_simple_args + assert_compiles '["MyString", true]', %q{ + class MyString < String + def include?(other) + ["MyString", super(other)] + end + end + + def test + MyString.new("abc").include?("bc") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + + def test_invokesuper_to_cfunc_with_optional_arg + assert_compiles '["MyString", 6]', %q{ + class MyString < String + def byteindex(needle, offset = 0) + ["MyString", super(needle, offset)] + end + end + + def test + MyString.new("hello world").byteindex("world") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_varargs + assert_compiles '["MyString", true]', %q{ + class MyString < String + def end_with?(str) + ["MyString", super(str)] + end + end + + def test + MyString.new("abc").end_with?("bc") end test # profile invokesuper diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 8a9d5a5bb0..901beffea0 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1297,6 +1297,20 @@ fn get_local_var_name_for_printer(iseq: Option, level: u32, ep_offset: Some(format!(":{}", id.contents_lossy())) } +/// Construct a qualified method name for display/debug output. +/// Returns strings like "Array#length" for instance methods or "Foo.bar" for singleton methods. +fn qualified_method_name(class: VALUE, method_id: ID) -> String { + let method_name = method_id.contents_lossy(); + // rb_zjit_singleton_class_p also checks if it's a class + if unsafe { rb_zjit_singleton_class_p(class) } { + let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); + format!("{class_name}.{method_name}") + } else { + let class_name = get_class_name(class); + format!("{class_name}#{method_name}") + } +} + static REGEXP_FLAGS: &[(u32, &str)] = &[ (ONIG_OPTION_MULTILINE, "MULTILINE"), (ONIG_OPTION_IGNORECASE, "IGNORECASE"), @@ -3504,6 +3518,40 @@ impl Function { }; } Insn::InvokeSuper { recv, cd, blockiseq, args, state, .. } => { + // Helper to emit common guards for super call optimization. + fn emit_super_call_guards( + fun: &mut Function, + block: BlockId, + super_cme: *const rb_callable_method_entry_t, + current_cme: *const rb_callable_method_entry_t, + mid: ID, + state: InsnId, + ) { + fun.push_insn(block, Insn::PatchPoint { + invariant: Invariant::MethodRedefined { + klass: unsafe { (*super_cme).defined_class }, + method: mid, + cme: super_cme + }, + state + }); + + let lep = fun.push_insn(block, Insn::GetLEP); + fun.push_insn(block, Insn::GuardSuperMethodEntry { + lep, + cme: current_cme, + state + }); + + let block_handler = fun.push_insn(block, Insn::GetBlockHandler { lep }); + fun.push_insn(block, Insn::GuardBitEquals { + val: block_handler, + expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), + reason: SideExitReason::UnhandledBlockArg, + state + }); + } + // Don't handle calls with literal blocks (e.g., super { ... }) if !blockiseq.is_null() { self.push_insn_id(block, insn_id); @@ -3567,68 +3615,107 @@ impl Function { continue; } - // Check if it's an ISEQ method; bail if it isn't. let def_type = unsafe { get_cme_def_type(super_cme) }; - if def_type != VM_METHOD_TYPE_ISEQ { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); - continue; - } - // Check if the super method's parameters support direct send. - // If not, we can't do direct dispatch. - let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; - // TODO: pass Option to can_direct_send when we start specializing super { ... } - if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); - continue; - } + if def_type == VM_METHOD_TYPE_ISEQ { + // Check if the super method's parameters support direct send. + // If not, we can't do direct dispatch. + let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; + // TODO: pass Option to can_direct_send when we start specializing `super { ... }`. + if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); + continue; + } - // Add PatchPoint for method redefinition. - self.push_insn(block, Insn::PatchPoint { - invariant: Invariant::MethodRedefined { - klass: unsafe { (*super_cme).defined_class }, - method: mid, - cme: super_cme - }, - state - }); + let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) + .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { + self.push_insn_id(block, insn_id); continue; + }; - // Guard that we're calling `super` from the expected method context. - let lep = self.push_insn(block, Insn::GetLEP); - self.push_insn(block, Insn::GuardSuperMethodEntry { - lep, - cme: current_cme, - state - }); + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); - // Guard that no block is being passed (implicit or explicit). - let block_handler = self.push_insn(block, Insn::GetBlockHandler { lep }); - self.push_insn(block, Insn::GuardBitEquals { - val: block_handler, - expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), - reason: SideExitReason::UnhandledBlockArg, - state - }); + // Use SendDirect with the super method's CME and ISEQ. + let send_direct = self.push_insn(block, Insn::SendDirect { + recv, + cd, + cme: super_cme, + iseq: super_iseq, + args: processed_args, + kw_bits, + state: send_state, + blockiseq: None, + }); + self.make_equal_to(insn_id, send_direct); - let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) - .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { - self.push_insn_id(block, insn_id); continue; - }; + } else if def_type == VM_METHOD_TYPE_CFUNC { + let cfunc = unsafe { get_cme_def_body_cfunc(super_cme) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast(); + + match cfunc_argc { + // C function with fixed argument count. + 0.. => { + // Check argc matches + if args.len() != cfunc_argc as usize { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, ArgcParamMismatch); + continue; + } - // Use SendDirect with the super method's CME and ISEQ. - let send_direct = self.push_insn(block, Insn::SendDirect { - recv, - cd, - cme: super_cme, - iseq: super_iseq, - args: processed_args, - kw_bits, - state: send_state, - blockiseq: None, - }); - self.make_equal_to(insn_id, send_direct); + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallWithFrame for the C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } + + // Variadic C function: func(int argc, VALUE *argv, VALUE recv) + -1 => { + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallVariadic for the variadic C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallVariadic { + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } + + // Array-variadic: (self, args_ruby_array). + -2 => { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::Cfunc)); + continue; + } + _ => unreachable!("unknown cfunc argc: {}", cfunc_argc) + } + } else { + // Other method types (not ISEQ or CFUNC) + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); + continue; + } } _ => { self.push_insn_id(block, insn_id); } } @@ -4296,18 +4383,6 @@ impl Function { Err(()) } - fn qualified_method_name(class: VALUE, method_id: ID) -> String { - let method_name = method_id.contents_lossy(); - // rb_zjit_singleton_class_p also checks if it's a class - if unsafe { rb_zjit_singleton_class_p(class) } { - let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); - format!("{class_name}.{method_name}") - } else { - let class_name = get_class_name(class); - format!("{class_name}#{method_name}") - } - } - fn count_not_inlined_cfunc(fun: &mut Function, block: BlockId, cme: *const rb_callable_method_entry_t) { let owner = unsafe { (*cme).owner }; let called_id = unsafe { (*cme).called_id }; diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 29b1e36331..de4e2ec39d 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -11406,7 +11406,7 @@ mod hir_opt_tests { } #[test] - fn test_invokesuper_to_cfunc_remains_invokesuper() { + fn test_invokesuper_to_cfunc_optimizes_to_ccall() { eval(" class MyArray < Array def length @@ -11418,10 +11418,10 @@ mod hir_opt_tests { "); let hir = hir_string_proc("MyArray.new.method(:length)"); - assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for CFUNC:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(hir.contains("CCallWithFrame"), "Should optimize to CCallWithFrame for non-variadic cfunc:\n{hir}"); - assert_snapshot!(hir, @r" + assert_snapshot!(hir, @" fn length@:4: bb0(): EntryPoint interpreter @@ -11431,9 +11431,64 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: unsupported target method type Cfunc + PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010) + v17:CPtr = GetLEP + GuardSuperMethodEntry v17, 0x1038 + v19:RubyValue = GetBlockHandler v17 + v20:FalseClass = GuardBitEquals v19, Value(false) + v21:BasicObject = CCallWithFrame v6, :Array#length@0x1040 CheckInterrupts - Return v11 + Return v21 + "); + } + + #[test] + fn test_invokesuper_to_variadic_cfunc_optimizes_to_ccall() { + eval(" + class MyString < String + def byteindex(needle, offset = 0) + super(needle, offset) + end + end + + MyString.new('hello world').byteindex('world', 0); MyString.new('hello world').byteindex('world', 0) + "); + + let hir = hir_string_proc("MyString.new('hello world').method(:byteindex)"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to CCallVariadic but got:\n{hir}"); + assert!(hir.contains("CCallVariadic"), "Should optimize to CCallVariadic for variadic cfunc:\n{hir}"); + + assert_snapshot!(hir, @" + fn byteindex@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :needle, l0, SP@5 + v3:BasicObject = GetLocal :offset, l0, SP@4 + v4:CPtr = LoadPC + v5:CPtr[CPtr(0x1000)] = Const CPtr(0x1008) + v6:CBool = IsBitEqual v4, v5 + IfTrue v6, bb2(v1, v2, v3) + Jump bb4(v1, v2, v3) + bb1(v10:BasicObject, v11:BasicObject): + EntryPoint JIT(0) + v12:NilClass = Const Value(nil) + Jump bb2(v10, v11, v12) + bb2(v19:BasicObject, v20:BasicObject, v21:BasicObject): + v24:Fixnum[0] = Const Value(0) + Jump bb4(v19, v20, v24) + bb3(v15:BasicObject, v16:BasicObject, v17:BasicObject): + EntryPoint JIT(1) + Jump bb4(v15, v16, v17) + bb4(v27:BasicObject, v28:BasicObject, v29:BasicObject): + PatchPoint MethodRedefined(String@0x1010, byteindex@0x1018, cme:0x1020) + v42:CPtr = GetLEP + GuardSuperMethodEntry v42, 0x1008 + v44:RubyValue = GetBlockHandler v42 + v45:FalseClass = GuardBitEquals v44, Value(false) + v46:BasicObject = CCallVariadic v27, :String#byteindex@0x1048, v28, v29 + CheckInterrupts + Return v46 "); } -- cgit v1.2.3