diff options
| author | Kevin Menard <kevin@nirvdrum.com> | 2026-01-29 18:24:50 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-29 18:24:50 -0500 |
| commit | 5fec5456b9cd9dd7fdea18ac9c43b1cf6d4cf4cb (patch) | |
| tree | 350159ac0896d4fe5f0c736215ebb0b130b2ce35 | |
| parent | 91619f0230c0e5a95c796c1bd4f784c151e15614 (diff) | |
ZJIT: Optimize `super` calls to C function targets (#15993)
This PR is an extension of the work in #15816. There, we optimized `super` calls where the target method was an ISeq. The code bailed on any other `super` target method type.
The discussion for that PR included the ZJIT stats from running the _railsbench_ benchmark in _ruby-bench_. The stats showed the other types of `super` calls we encountered that we didn't process:
```
Top-2 not optimized method types for super (100.0% of total 2,700,015):
cfunc: 2,680,044 (99.3%)
attrset: 19,971 ( 0.7%)
```
This PR handles most of the cfunc cases. We still only handle simple method signatures and don't handle blocks at all, but if the target function is a cfunc where `argc != 2`, we now optimize to either `Insn::CCallWithFrame` or `Insn::CCallVariadic` as appropriate. This covers 100% of the C func cases we encounter in _railsbench_.
<details><summary>Baseline ZJIT stats</summary>
<p>
```
Top-20 not inlined C methods (51.1% of total 15,736,824):
Hash#key?: 1,260,867 ( 8.0%)
Regexp#match?: 970,899 ( 6.2%)
Hash#fetch: 898,248 ( 5.7%)
Integer#===: 439,075 ( 2.8%)
Hash#delete: 405,821 ( 2.6%)
Array#any?: 403,598 ( 2.6%)
String.new: 401,818 ( 2.6%)
String#b: 319,473 ( 2.0%)
String#to_sym: 272,868 ( 1.7%)
Array#all?: 260,132 ( 1.7%)
Fiber.current: 259,588 ( 1.6%)
Array#join: 257,125 ( 1.6%)
Array#include?: 247,718 ( 1.6%)
Kernel#Array: 244,574 ( 1.6%)
String#<<: 242,475 ( 1.5%)
Symbol#end_with?: 239,977 ( 1.5%)
String#force_encoding: 239,520 ( 1.5%)
Kernel#dup: 232,701 ( 1.5%)
Array#[]: 225,160 ( 1.4%)
Kernel#respond_to?: 220,246 ( 1.4%)
Top-20 calls to C functions from JIT code (75.3% of total 106,711,108):
rb_vm_opt_send_without_block: 22,031,658 (20.6%)
rb_hash_aref: 9,335,540 ( 8.7%)
rb_vm_env_write: 7,865,750 ( 7.4%)
rb_vm_send: 6,836,936 ( 6.4%)
rb_zjit_writebarrier_check_immediate: 5,623,383 ( 5.3%)
rb_vm_getinstancevariable: 5,012,846 ( 4.7%)
rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%)
rb_vm_invokesuper: 3,240,208 ( 3.0%)
rb_hash_aset: 2,061,526 ( 1.9%)
rb_obj_is_kind_of: 1,812,573 ( 1.7%)
rb_vm_invokeblock: 1,647,238 ( 1.5%)
rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%)
Hash#key?: 1,260,867 ( 1.2%)
rb_class_allocate_instance: 1,190,707 ( 1.1%)
rb_hash_new_with_size: 1,150,766 ( 1.1%)
rb_vm_setinstancevariable: 1,119,304 ( 1.0%)
rb_ec_ary_new_from_values: 1,050,781 ( 1.0%)
rb_obj_alloc: 993,445 ( 0.9%)
rb_str_concat_literals: 984,558 ( 0.9%)
Regexp#match?: 970,899 ( 0.9%)
Top-2 not optimized method types for send (100.0% of total 3,423,067):
iseq: 3,410,096 (99.6%)
optimized: 12,971 ( 0.4%)
Top-2 not optimized method types for send_without_block (100.0% of total 319,311):
optimized_send: 246,250 (77.1%)
null: 73,061 (22.9%)
Top-2 not optimized method types for super (100.0% of total 2,680,495):
cfunc: 2,660,334 (99.2%)
attrset: 20,161 ( 0.8%)
Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553):
invokeblock: 1,647,238 (62.9%)
sendforward: 748,101 (28.6%)
invokesuperforward: 199,443 ( 7.6%)
opt_send_without_block: 22,771 ( 0.9%)
Top-20 send fallback reasons (100.0% of total 34,703,584):
send_without_block_polymorphic: 12,818,893 (36.9%)
send_without_block_no_profiles: 5,442,960 (15.7%)
send_not_optimized_method_type: 3,423,067 ( 9.9%)
super_not_optimized_method_type: 2,680,495 ( 7.7%)
uncategorized: 2,617,553 ( 7.5%)
send_no_profiles: 2,083,822 ( 6.0%)
one_or_more_complex_arg_pass: 1,663,149 ( 4.8%)
send_polymorphic: 1,329,141 ( 3.8%)
send_without_block_not_optimized_need_permission: 510,815 ( 1.5%)
too_many_args_for_lir: 477,266 ( 1.4%)
singleton_class_seen: 441,058 ( 1.3%)
super_complex_args_pass: 331,767 ( 1.0%)
send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.7%)
send_without_block_megamorphic: 228,672 ( 0.7%)
super_target_complex_args_pass: 165,855 ( 0.5%)
send_without_block_not_optimized_method_type: 73,061 ( 0.2%)
obj_to_string_not_string: 67,862 ( 0.2%)
super_call_with_block: 40,004 ( 0.1%)
send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%)
super_polymorphic: 22,087 ( 0.1%)
Top-3 setivar fallback reasons (100.0% of total 1,119,304):
not_monomorphic: 1,077,792 (96.3%)
not_t_object: 41,335 ( 3.7%)
new_shape_needs_extension: 177 ( 0.0%)
Top-1 getivar fallback reasons (100.0% of total 5,012,871):
not_monomorphic: 5,012,871 (100.0%)
Top-2 definedivar fallback reasons (100.0% of total 142,798):
not_monomorphic: 142,711 (99.9%)
not_t_object: 87 ( 0.1%)
Top-6 invokeblock handler (100.0% of total 1,647,238):
monomorphic_iseq: 878,253 (53.3%)
polymorphic: 483,612 (29.4%)
monomorphic_other: 134,943 ( 8.2%)
monomorphic_ifunc: 115,175 ( 7.0%)
megamorphic: 34,939 ( 2.1%)
no_profiles: 316 ( 0.0%)
Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,581):
param_forwardable: 729,353 (35.3%)
param_block: 716,533 (34.6%)
param_rest: 327,865 (15.8%)
caller_splat: 114,365 ( 5.5%)
caller_kw_splat: 99,266 ( 4.8%)
param_kwrest: 80,149 ( 3.9%)
caller_blockarg: 877 ( 0.0%)
caller_kwarg: 173 ( 0.0%)
Top-1 compile error reasons (100.0% of total 156,707):
exception_handler: 156,707 (100.0%)
Top-5 unhandled YARV insns (100.0% of total 201,517):
getconstant: 160,920 (79.9%)
expandarray: 19,985 ( 9.9%)
setblockparam: 19,972 ( 9.9%)
checkmatch: 480 ( 0.2%)
once: 160 ( 0.1%)
Top-2 unhandled HIR insns (100.0% of total 128,647):
throw: 93,060 (72.3%)
invokebuiltin: 35,587 (27.7%)
Top-19 side exit reasons (100.0% of total 3,484,374):
guard_shape_failure: 1,042,511 (29.9%)
guard_type_failure: 812,342 (23.3%)
block_param_proxy_not_iseq_or_ifunc: 795,628 (22.8%)
unhandled_yarv_insn: 201,517 ( 5.8%)
compile_error: 156,707 ( 4.5%)
unhandled_hir_insn: 128,647 ( 3.7%)
unhandled_newarray_send_pack: 119,187 ( 3.4%)
patchpoint_method_redefined: 80,619 ( 2.3%)
unhandled_block_arg: 60,517 ( 1.7%)
block_param_proxy_modified: 49,695 ( 1.4%)
guard_less_failure: 20,033 ( 0.6%)
fixnum_lshift_overflow: 9,985 ( 0.3%)
patchpoint_stable_constant_names: 5,752 ( 0.2%)
fixnum_mult_overflow: 570 ( 0.0%)
obj_to_string_fallback: 498 ( 0.0%)
patchpoint_no_ep_escape: 109 ( 0.0%)
interrupt: 43 ( 0.0%)
guard_super_method_entry: 8 ( 0.0%)
guard_greater_eq_failure: 6 ( 0.0%)
send_count: 133,679,714
dynamic_send_count: 34,703,584 (26.0%)
optimized_send_count: 98,976,130 (74.0%)
dynamic_setivar_count: 1,119,304 ( 0.8%)
dynamic_getivar_count: 5,012,871 ( 3.7%)
dynamic_definedivar_count: 142,798 ( 0.1%)
iseq_optimized_send_count: 38,085,055 (28.5%)
inline_cfunc_optimized_send_count: 39,628,908 (29.6%)
inline_iseq_optimized_send_count: 3,624,852 ( 2.7%)
non_variadic_cfunc_optimized_send_count: 10,434,756 ( 7.8%)
variadic_cfunc_optimized_send_count: 7,202,559 ( 5.4%)
compiled_iseq_count: 2,868
failed_iseq_count: 0
compile_time: 8,809ms
profile_time: 135ms
gc_time: 255ms
invalidation_time: 21ms
vm_write_pc_count: 116,809,164
vm_write_sp_count: 116,809,164
vm_write_locals_count: 111,533,227
vm_write_stack_count: 111,533,227
vm_write_to_parent_iseq_local_count: 521,277
vm_read_from_parent_iseq_local_count: 12,757,231
guard_type_count: 126,653,751
guard_type_exit_ratio: 0.6%
guard_shape_count: 44,193,824
guard_shape_exit_ratio: 2.4%
code_region_bytes: 14,336,000
zjit_alloc_bytes: 19,282,889
total_mem_bytes: 33,618,889
side_exit_count: 3,484,374
total_insn_count: 697,672,179
vm_insn_count: 52,531,010
zjit_insn_count: 645,141,169
ratio_in_zjit: 92.5%
```
</p>
</details>
<details><summary>Optimized invokesuper stats</summary>
<p>
```
Top-20 not inlined C methods (51.1% of total 15,736,852):
Hash#key?: 1,260,867 ( 8.0%)
Regexp#match?: 970,900 ( 6.2%)
Hash#fetch: 898,248 ( 5.7%)
Integer#===: 439,075 ( 2.8%)
Hash#delete: 405,825 ( 2.6%)
Array#any?: 403,600 ( 2.6%)
String.new: 401,818 ( 2.6%)
String#b: 319,473 ( 2.0%)
String#to_sym: 272,868 ( 1.7%)
Array#all?: 260,132 ( 1.7%)
Fiber.current: 259,588 ( 1.6%)
Array#join: 257,125 ( 1.6%)
Array#include?: 247,718 ( 1.6%)
Kernel#Array: 244,579 ( 1.6%)
String#<<: 242,475 ( 1.5%)
Symbol#end_with?: 239,977 ( 1.5%)
String#force_encoding: 239,520 ( 1.5%)
Kernel#dup: 232,706 ( 1.5%)
Array#[]: 225,160 ( 1.4%)
Kernel#respond_to?: 220,246 ( 1.4%)
Top-20 calls to C functions from JIT code (73.2% of total 106,690,862):
rb_vm_opt_send_without_block: 22,031,722 (20.7%)
rb_hash_aref: 9,335,543 ( 8.8%)
rb_vm_env_write: 7,865,751 ( 7.4%)
rb_vm_send: 6,836,939 ( 6.4%)
rb_zjit_writebarrier_check_immediate: 5,623,259 ( 5.3%)
rb_vm_getinstancevariable: 5,012,844 ( 4.7%)
rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%)
rb_hash_aset: 2,061,385 ( 1.9%)
rb_obj_is_kind_of: 1,812,575 ( 1.7%)
rb_vm_invokeblock: 1,647,238 ( 1.5%)
rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%)
Hash#key?: 1,260,867 ( 1.2%)
rb_class_allocate_instance: 1,190,704 ( 1.1%)
rb_hash_new_with_size: 1,150,765 ( 1.1%)
rb_vm_setinstancevariable: 1,119,304 ( 1.0%)
rb_ec_ary_new_from_values: 1,050,780 ( 1.0%)
rb_obj_alloc: 993,446 ( 0.9%)
rb_str_concat_literals: 984,559 ( 0.9%)
Regexp#match?: 970,900 ( 0.9%)
rb_obj_as_string_result: 937,751 ( 0.9%)
Top-2 not optimized method types for send (100.0% of total 3,423,067):
iseq: 3,410,096 (99.6%)
optimized: 12,971 ( 0.4%)
Top-2 not optimized method types for send_without_block (100.0% of total 319,311):
optimized_send: 246,250 (77.1%)
null: 73,061 (22.9%)
Top-1 not optimized method types for super (100.0% of total 20,161):
attrset: 20,161 (100.0%)
Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553):
invokeblock: 1,647,238 (62.9%)
sendforward: 748,101 (28.6%)
invokesuperforward: 199,443 ( 7.6%)
opt_send_without_block: 22,771 ( 0.9%)
Top-20 send fallback reasons (100.0% of total 32,043,318):
send_without_block_polymorphic: 12,818,949 (40.0%)
send_without_block_no_profiles: 5,442,967 (17.0%)
send_not_optimized_method_type: 3,423,067 (10.7%)
uncategorized: 2,617,553 ( 8.2%)
send_no_profiles: 2,083,824 ( 6.5%)
one_or_more_complex_arg_pass: 1,663,150 ( 5.2%)
send_polymorphic: 1,329,142 ( 4.1%)
send_without_block_not_optimized_need_permission: 510,814 ( 1.6%)
too_many_args_for_lir: 477,267 ( 1.5%)
singleton_class_seen: 441,058 ( 1.4%)
super_complex_args_pass: 331,767 ( 1.0%)
send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.8%)
send_without_block_megamorphic: 228,672 ( 0.7%)
super_target_complex_args_pass: 165,855 ( 0.5%)
send_without_block_not_optimized_method_type: 73,061 ( 0.2%)
obj_to_string_not_string: 67,862 ( 0.2%)
super_call_with_block: 40,004 ( 0.1%)
send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%)
super_polymorphic: 22,088 ( 0.1%)
super_not_optimized_method_type: 20,161 ( 0.1%)
Top-3 setivar fallback reasons (100.0% of total 1,119,304):
not_monomorphic: 1,077,792 (96.3%)
not_t_object: 41,335 ( 3.7%)
new_shape_needs_extension: 177 ( 0.0%)
Top-1 getivar fallback reasons (100.0% of total 5,012,869):
not_monomorphic: 5,012,869 (100.0%)
Top-2 definedivar fallback reasons (100.0% of total 142,798):
not_monomorphic: 142,711 (99.9%)
not_t_object: 87 ( 0.1%)
Top-6 invokeblock handler (100.0% of total 1,647,238):
monomorphic_iseq: 878,253 (53.3%)
polymorphic: 483,612 (29.4%)
monomorphic_other: 134,943 ( 8.2%)
monomorphic_ifunc: 115,175 ( 7.0%)
megamorphic: 34,939 ( 2.1%)
no_profiles: 316 ( 0.0%)
Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,582):
param_forwardable: 729,353 (35.3%)
param_block: 716,534 (34.6%)
param_rest: 327,865 (15.8%)
caller_splat: 114,365 ( 5.5%)
caller_kw_splat: 99,266 ( 4.8%)
param_kwrest: 80,149 ( 3.9%)
caller_blockarg: 877 ( 0.0%)
caller_kwarg: 173 ( 0.0%)
Top-1 compile error reasons (100.0% of total 156,707):
exception_handler: 156,707 (100.0%)
Top-5 unhandled YARV insns (100.0% of total 201,517):
getconstant: 160,920 (79.9%)
expandarray: 19,985 ( 9.9%)
setblockparam: 19,972 ( 9.9%)
checkmatch: 480 ( 0.2%)
once: 160 ( 0.1%)
Top-2 unhandled HIR insns (100.0% of total 128,646):
throw: 93,060 (72.3%)
invokebuiltin: 35,586 (27.7%)
Top-19 side exit reasons (100.0% of total 3,504,293):
guard_shape_failure: 1,042,515 (29.7%)
guard_type_failure: 812,249 (23.2%)
block_param_proxy_not_iseq_or_ifunc: 795,628 (22.7%)
unhandled_yarv_insn: 201,517 ( 5.8%)
compile_error: 156,707 ( 4.5%)
unhandled_hir_insn: 128,646 ( 3.7%)
unhandled_newarray_send_pack: 119,187 ( 3.4%)
patchpoint_method_redefined: 80,779 ( 2.3%)
unhandled_block_arg: 60,517 ( 1.7%)
block_param_proxy_modified: 49,695 ( 1.4%)
guard_less_failure: 20,033 ( 0.6%)
guard_super_method_entry: 19,855 ( 0.6%)
fixnum_lshift_overflow: 9,985 ( 0.3%)
patchpoint_stable_constant_names: 5,752 ( 0.2%)
fixnum_mult_overflow: 569 ( 0.0%)
obj_to_string_fallback: 498 ( 0.0%)
patchpoint_no_ep_escape: 109 ( 0.0%)
interrupt: 46 ( 0.0%)
guard_greater_eq_failure: 6 ( 0.0%)
send_count: 133,600,402
dynamic_send_count: 32,043,318 (24.0%)
optimized_send_count: 101,557,084 (76.0%)
dynamic_setivar_count: 1,119,304 ( 0.8%)
dynamic_getivar_count: 5,012,869 ( 3.8%)
dynamic_definedivar_count: 142,798 ( 0.1%)
iseq_optimized_send_count: 38,025,870 (28.5%)
inline_cfunc_optimized_send_count: 39,628,762 (29.7%)
inline_iseq_optimized_send_count: 3,624,854 ( 2.7%)
non_variadic_cfunc_optimized_send_count: 12,631,917 ( 9.5%)
variadic_cfunc_optimized_send_count: 7,645,681 ( 5.7%)
compiled_iseq_count: 2,870
failed_iseq_count: 0
compile_time: 8,419ms
profile_time: 133ms
gc_time: 248ms
invalidation_time: 20ms
vm_write_pc_count: 116,729,857
vm_write_sp_count: 116,729,857
vm_write_locals_count: 111,453,921
vm_write_stack_count: 111,453,921
vm_write_to_parent_iseq_local_count: 521,275
vm_read_from_parent_iseq_local_count: 12,757,225
guard_type_count: 126,594,209
guard_type_exit_ratio: 0.6%
guard_shape_count: 44,193,683
guard_shape_exit_ratio: 2.4%
code_region_bytes: 14,368,768
zjit_alloc_bytes: 19,581,578
total_mem_bytes: 33,950,346
side_exit_count: 3,504,293
total_insn_count: 697,692,070
vm_insn_count: 52,828,675
zjit_insn_count: 644,863,395
ratio_in_zjit: 92.4%
```
</p>
</details>
| -rw-r--r-- | test/ruby/test_zjit.rb | 62 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 209 | ||||
| -rw-r--r-- | zjit/src/hir/opt_tests.rs | 67 |
3 files changed, 260 insertions, 78 deletions
diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 6ad06f9453..7b068e9898 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1096,16 +1096,68 @@ class TestZJIT < Test::Unit::TestCase }, call_threshold: 2 end - def test_invokesuper_to_cfunc - assert_compiles '["MyArray", 3]', %q{ - class MyArray < Array + def test_invokesuper_to_cfunc_no_args + assert_compiles '["MyString", 3]', %q{ + class MyString < String def length - ["MyArray", super] + ["MyString", super] end end def test - MyArray.new([1, 2, 3]).length + MyString.new("abc").length + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_simple_args + assert_compiles '["MyString", true]', %q{ + class MyString < String + def include?(other) + ["MyString", super(other)] + end + end + + def test + MyString.new("abc").include?("bc") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + + def test_invokesuper_to_cfunc_with_optional_arg + assert_compiles '["MyString", 6]', %q{ + class MyString < String + def byteindex(needle, offset = 0) + ["MyString", super(needle, offset)] + end + end + + def test + MyString.new("hello world").byteindex("world") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_varargs + assert_compiles '["MyString", true]', %q{ + class MyString < String + def end_with?(str) + ["MyString", super(str)] + end + end + + def test + MyString.new("abc").end_with?("bc") end test # profile invokesuper diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 8a9d5a5bb0..901beffea0 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1297,6 +1297,20 @@ fn get_local_var_name_for_printer(iseq: Option<IseqPtr>, level: u32, ep_offset: Some(format!(":{}", id.contents_lossy())) } +/// Construct a qualified method name for display/debug output. +/// Returns strings like "Array#length" for instance methods or "Foo.bar" for singleton methods. +fn qualified_method_name(class: VALUE, method_id: ID) -> String { + let method_name = method_id.contents_lossy(); + // rb_zjit_singleton_class_p also checks if it's a class + if unsafe { rb_zjit_singleton_class_p(class) } { + let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); + format!("{class_name}.{method_name}") + } else { + let class_name = get_class_name(class); + format!("{class_name}#{method_name}") + } +} + static REGEXP_FLAGS: &[(u32, &str)] = &[ (ONIG_OPTION_MULTILINE, "MULTILINE"), (ONIG_OPTION_IGNORECASE, "IGNORECASE"), @@ -3504,6 +3518,40 @@ impl Function { }; } Insn::InvokeSuper { recv, cd, blockiseq, args, state, .. } => { + // Helper to emit common guards for super call optimization. + fn emit_super_call_guards( + fun: &mut Function, + block: BlockId, + super_cme: *const rb_callable_method_entry_t, + current_cme: *const rb_callable_method_entry_t, + mid: ID, + state: InsnId, + ) { + fun.push_insn(block, Insn::PatchPoint { + invariant: Invariant::MethodRedefined { + klass: unsafe { (*super_cme).defined_class }, + method: mid, + cme: super_cme + }, + state + }); + + let lep = fun.push_insn(block, Insn::GetLEP); + fun.push_insn(block, Insn::GuardSuperMethodEntry { + lep, + cme: current_cme, + state + }); + + let block_handler = fun.push_insn(block, Insn::GetBlockHandler { lep }); + fun.push_insn(block, Insn::GuardBitEquals { + val: block_handler, + expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), + reason: SideExitReason::UnhandledBlockArg, + state + }); + } + // Don't handle calls with literal blocks (e.g., super { ... }) if !blockiseq.is_null() { self.push_insn_id(block, insn_id); @@ -3567,68 +3615,107 @@ impl Function { continue; } - // Check if it's an ISEQ method; bail if it isn't. let def_type = unsafe { get_cme_def_type(super_cme) }; - if def_type != VM_METHOD_TYPE_ISEQ { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); - continue; - } - // Check if the super method's parameters support direct send. - // If not, we can't do direct dispatch. - let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; - // TODO: pass Option<blockiseq> to can_direct_send when we start specializing super { ... } - if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); - continue; - } + if def_type == VM_METHOD_TYPE_ISEQ { + // Check if the super method's parameters support direct send. + // If not, we can't do direct dispatch. + let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; + // TODO: pass Option<blockiseq> to can_direct_send when we start specializing `super { ... }`. + if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); + continue; + } - // Add PatchPoint for method redefinition. - self.push_insn(block, Insn::PatchPoint { - invariant: Invariant::MethodRedefined { - klass: unsafe { (*super_cme).defined_class }, - method: mid, - cme: super_cme - }, - state - }); + let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) + .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { + self.push_insn_id(block, insn_id); continue; + }; - // Guard that we're calling `super` from the expected method context. - let lep = self.push_insn(block, Insn::GetLEP); - self.push_insn(block, Insn::GuardSuperMethodEntry { - lep, - cme: current_cme, - state - }); + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); - // Guard that no block is being passed (implicit or explicit). - let block_handler = self.push_insn(block, Insn::GetBlockHandler { lep }); - self.push_insn(block, Insn::GuardBitEquals { - val: block_handler, - expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), - reason: SideExitReason::UnhandledBlockArg, - state - }); + // Use SendDirect with the super method's CME and ISEQ. + let send_direct = self.push_insn(block, Insn::SendDirect { + recv, + cd, + cme: super_cme, + iseq: super_iseq, + args: processed_args, + kw_bits, + state: send_state, + blockiseq: None, + }); + self.make_equal_to(insn_id, send_direct); - let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) - .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { - self.push_insn_id(block, insn_id); continue; - }; + } else if def_type == VM_METHOD_TYPE_CFUNC { + let cfunc = unsafe { get_cme_def_body_cfunc(super_cme) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast(); + + match cfunc_argc { + // C function with fixed argument count. + 0.. => { + // Check argc matches + if args.len() != cfunc_argc as usize { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, ArgcParamMismatch); + continue; + } - // Use SendDirect with the super method's CME and ISEQ. - let send_direct = self.push_insn(block, Insn::SendDirect { - recv, - cd, - cme: super_cme, - iseq: super_iseq, - args: processed_args, - kw_bits, - state: send_state, - blockiseq: None, - }); - self.make_equal_to(insn_id, send_direct); + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallWithFrame for the C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } + + // Variadic C function: func(int argc, VALUE *argv, VALUE recv) + -1 => { + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallVariadic for the variadic C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallVariadic { + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } + + // Array-variadic: (self, args_ruby_array). + -2 => { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::Cfunc)); + continue; + } + _ => unreachable!("unknown cfunc argc: {}", cfunc_argc) + } + } else { + // Other method types (not ISEQ or CFUNC) + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); + continue; + } } _ => { self.push_insn_id(block, insn_id); } } @@ -4296,18 +4383,6 @@ impl Function { Err(()) } - fn qualified_method_name(class: VALUE, method_id: ID) -> String { - let method_name = method_id.contents_lossy(); - // rb_zjit_singleton_class_p also checks if it's a class - if unsafe { rb_zjit_singleton_class_p(class) } { - let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); - format!("{class_name}.{method_name}") - } else { - let class_name = get_class_name(class); - format!("{class_name}#{method_name}") - } - } - fn count_not_inlined_cfunc(fun: &mut Function, block: BlockId, cme: *const rb_callable_method_entry_t) { let owner = unsafe { (*cme).owner }; let called_id = unsafe { (*cme).called_id }; diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 29b1e36331..de4e2ec39d 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -11406,7 +11406,7 @@ mod hir_opt_tests { } #[test] - fn test_invokesuper_to_cfunc_remains_invokesuper() { + fn test_invokesuper_to_cfunc_optimizes_to_ccall() { eval(" class MyArray < Array def length @@ -11418,10 +11418,10 @@ mod hir_opt_tests { "); let hir = hir_string_proc("MyArray.new.method(:length)"); - assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for CFUNC:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(hir.contains("CCallWithFrame"), "Should optimize to CCallWithFrame for non-variadic cfunc:\n{hir}"); - assert_snapshot!(hir, @r" + assert_snapshot!(hir, @" fn length@<compiled>:4: bb0(): EntryPoint interpreter @@ -11431,9 +11431,64 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: unsupported target method type Cfunc + PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010) + v17:CPtr = GetLEP + GuardSuperMethodEntry v17, 0x1038 + v19:RubyValue = GetBlockHandler v17 + v20:FalseClass = GuardBitEquals v19, Value(false) + v21:BasicObject = CCallWithFrame v6, :Array#length@0x1040 CheckInterrupts - Return v11 + Return v21 + "); + } + + #[test] + fn test_invokesuper_to_variadic_cfunc_optimizes_to_ccall() { + eval(" + class MyString < String + def byteindex(needle, offset = 0) + super(needle, offset) + end + end + + MyString.new('hello world').byteindex('world', 0); MyString.new('hello world').byteindex('world', 0) + "); + + let hir = hir_string_proc("MyString.new('hello world').method(:byteindex)"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to CCallVariadic but got:\n{hir}"); + assert!(hir.contains("CCallVariadic"), "Should optimize to CCallVariadic for variadic cfunc:\n{hir}"); + + assert_snapshot!(hir, @" + fn byteindex@<compiled>:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :needle, l0, SP@5 + v3:BasicObject = GetLocal :offset, l0, SP@4 + v4:CPtr = LoadPC + v5:CPtr[CPtr(0x1000)] = Const CPtr(0x1008) + v6:CBool = IsBitEqual v4, v5 + IfTrue v6, bb2(v1, v2, v3) + Jump bb4(v1, v2, v3) + bb1(v10:BasicObject, v11:BasicObject): + EntryPoint JIT(0) + v12:NilClass = Const Value(nil) + Jump bb2(v10, v11, v12) + bb2(v19:BasicObject, v20:BasicObject, v21:BasicObject): + v24:Fixnum[0] = Const Value(0) + Jump bb4(v19, v20, v24) + bb3(v15:BasicObject, v16:BasicObject, v17:BasicObject): + EntryPoint JIT(1) + Jump bb4(v15, v16, v17) + bb4(v27:BasicObject, v28:BasicObject, v29:BasicObject): + PatchPoint MethodRedefined(String@0x1010, byteindex@0x1018, cme:0x1020) + v42:CPtr = GetLEP + GuardSuperMethodEntry v42, 0x1008 + v44:RubyValue = GetBlockHandler v42 + v45:FalseClass = GuardBitEquals v44, Value(false) + v46:BasicObject = CCallVariadic v27, :String#byteindex@0x1048, v28, v29 + CheckInterrupts + Return v46 "); } |
