diff options
| author | Stan Lo <stan.lo@shopify.com> | 2025-10-20 21:10:25 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-20 20:10:25 +0000 |
| commit | e047cea2804c987c32450279a9b8fd78655cfa9d (patch) | |
| tree | b1e9181d51164a2e019c95da03facd40f3a0fde4 | |
| parent | 33f1af6779a22ab84633b43d42dcf273a7e3bbe9 (diff) | |
ZJIT: Optimize send with block into CCallWithFrame (#14863)
Since `Send` has a block iseq, I updated `CCallWithFrame` to take an optional `blockiseq` as well, and then generate `CCallWithFrame` for `Send` when the condition is right.
## Stats
`liquid-render` Benchmark
| Metric | Before | After | Change |
|----------------------|--------------------|--------------------|--------------------- |
| send_no_profiles | 3,209,418 (34.1%) | 4,119 (0.1%) | -3,205,299 (-99.9%) |
| dynamic_send_count | 9,410,758 (23.1%) | 6,459,678 (15.9%) | -2,951,080 (-31.4%) |
| optimized_send_count | 31,269,388 (76.9%) | 34,220,474 (84.1%) | +2,951,086 (+9.4%) |
`lobsters` Benchmark
| Metric | Before | After | Change |
|----------------------|------------|------------|---------------------|
| send_no_profiles | 10,769,052 | 2,902,865 | -7,866,187 (-73.0%) |
| dynamic_send_count | 45,673,185 | 42,880,160 | -2,793,025 (-6.1%) |
| optimized_send_count | 75,142,407 | 78,378,514 | +3,236,107 (+4.3%) |
### `liquid-render` Before
<details>
```
Average of last 22, non-warmup iters: 262ms
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (96.9% of total 10,370,809):
Kernel#respond_to?: 5,069,204 (48.9%)
Hash#key?: 2,394,488 (23.1%)
Set#include?: 778,429 ( 7.5%)
String#===: 326,134 ( 3.1%)
String#<<: 203,231 ( 2.0%)
Integer#<<: 166,768 ( 1.6%)
Kernel#is_a?: 164,272 ( 1.6%)
Kernel#format: 124,262 ( 1.2%)
Integer#/: 124,262 ( 1.2%)
Array#<<: 115,325 ( 1.1%)
Regexp.last_match: 94,862 ( 0.9%)
Hash#[]=: 88,485 ( 0.9%)
String#start_with?: 55,933 ( 0.5%)
CGI::EscapeExt#escapeHTML: 55,471 ( 0.5%)
Array#shift: 55,298 ( 0.5%)
Regexp#===: 48,928 ( 0.5%)
String#=~: 48,477 ( 0.5%)
Array#unshift: 47,331 ( 0.5%)
String#empty?: 42,870 ( 0.4%)
Array#push: 41,215 ( 0.4%)
Top-20 not annotated C methods (97.1% of total 10,394,421):
Kernel#respond_to?: 5,069,204 (48.8%)
Hash#key?: 2,394,488 (23.0%)
Set#include?: 778,429 ( 7.5%)
String#===: 326,134 ( 3.1%)
Kernel#is_a?: 208,664 ( 2.0%)
String#<<: 203,231 ( 2.0%)
Integer#<<: 166,768 ( 1.6%)
Integer#/: 124,262 ( 1.2%)
Kernel#format: 124,262 ( 1.2%)
Array#<<: 115,325 ( 1.1%)
Regexp.last_match: 94,862 ( 0.9%)
Hash#[]=: 88,485 ( 0.9%)
String#start_with?: 55,933 ( 0.5%)
CGI::EscapeExt#escapeHTML: 55,471 ( 0.5%)
Array#shift: 55,298 ( 0.5%)
Regexp#===: 48,928 ( 0.5%)
String#=~: 48,477 ( 0.5%)
Array#unshift: 47,331 ( 0.5%)
String#empty?: 42,870 ( 0.4%)
Array#push: 41,215 ( 0.4%)
Top-2 not optimized method types for send (100.0% of total 2,382):
cfunc: 1,196 (50.2%)
iseq: 1,186 (49.8%)
Top-4 not optimized method types for send_without_block (100.0% of total 2,561,006):
iseq: 2,442,091 (95.4%)
optimized: 118,882 ( 4.6%)
alias: 20 ( 0.0%)
null: 13 ( 0.0%)
Top-9 not optimized instructions (100.0% of total 685,128):
invokeblock: 227,376 (33.2%)
opt_neq: 166,471 (24.3%)
opt_and: 166,471 (24.3%)
opt_eq: 66,721 ( 9.7%)
invokesuper: 39,363 ( 5.7%)
opt_le: 16,278 ( 2.4%)
opt_minus: 1,574 ( 0.2%)
opt_send_without_block: 772 ( 0.1%)
opt_or: 102 ( 0.0%)
Top-8 send fallback reasons (100.0% of total 9,410,758):
send_no_profiles: 3,209,418 (34.1%)
send_without_block_polymorphic: 2,858,558 (30.4%)
send_without_block_not_optimized_method_type: 2,561,006 (27.2%)
not_optimized_instruction: 685,128 ( 7.3%)
send_without_block_no_profiles: 91,913 ( 1.0%)
send_not_optimized_method_type: 2,382 ( 0.0%)
obj_to_string_not_string: 2,352 ( 0.0%)
send_without_block_cfunc_array_variadic: 1 ( 0.0%)
Top-3 unhandled YARV insns (100.0% of total 83,682):
getclassvariable: 83,431 (99.7%)
once: 137 ( 0.2%)
getconstant: 114 ( 0.1%)
Top-3 compile error reasons (100.0% of total 5,431,910):
register_spill_on_alloc: 4,665,393 (85.9%)
exception_handler: 766,347 (14.1%)
register_spill_on_ccall: 170 ( 0.0%)
Top-11 side exit reasons (100.0% of total 14,635,508):
compile_error: 5,431,910 (37.1%)
guard_shape_failure: 3,436,341 (23.5%)
guard_type_failure: 2,545,791 (17.4%)
unhandled_splat: 2,162,907 (14.8%)
unhandled_kwarg: 952,568 ( 6.5%)
unhandled_yarv_insn: 83,682 ( 0.6%)
unhandled_hir_insn: 19,112 ( 0.1%)
patchpoint_stable_constant_names: 1,608 ( 0.0%)
obj_to_string_fallback: 902 ( 0.0%)
patchpoint_method_redefined: 599 ( 0.0%)
block_param_proxy_not_iseq_or_ifunc: 88 ( 0.0%)
send_count: 40,680,153
dynamic_send_count: 9,410,758 (23.1%)
optimized_send_count: 31,269,395 (76.9%)
iseq_optimized_send_count: 13,886,902 (34.1%)
inline_cfunc_optimized_send_count: 7,011,684 (17.2%)
non_variadic_cfunc_optimized_send_count: 4,670,333 (11.5%)
variadic_cfunc_optimized_send_count: 5,700,476 (14.0%)
dynamic_getivar_count: 1,144,613
dynamic_setivar_count: 950,830
compiled_iseq_count: 402
failed_iseq_count: 48
compile_time: 976ms
profile_time: 3,223ms
gc_time: 22ms
invalidation_time: 0ms
vm_write_pc_count: 37,744,491
vm_write_sp_count: 37,511,865
vm_write_locals_count: 37,511,865
vm_write_stack_count: 37,511,865
vm_write_to_parent_iseq_local_count: 558,177
vm_read_from_parent_iseq_local_count: 14,317,032
code_region_bytes: 2,211,840
side_exit_count: 14,635,508
total_insn_count: 476,097,972
vm_insn_count: 253,795,154
zjit_insn_count: 222,302,818
ratio_in_zjit: 46.7%
```
</details>
### `liquid-render` After
<details>
```
Average of last 21, non-warmup iters: 272ms
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (96.8% of total 10,093,966):
Kernel#respond_to?: 4,932,224 (48.9%)
Hash#key?: 2,329,928 (23.1%)
Set#include?: 757,389 ( 7.5%)
String#===: 317,494 ( 3.1%)
String#<<: 197,831 ( 2.0%)
Integer#<<: 162,268 ( 1.6%)
Kernel#is_a?: 159,892 ( 1.6%)
Kernel#format: 120,902 ( 1.2%)
Integer#/: 120,902 ( 1.2%)
Array#<<: 112,225 ( 1.1%)
Regexp.last_match: 92,382 ( 0.9%)
Hash#[]=: 86,145 ( 0.9%)
String#start_with?: 54,953 ( 0.5%)
Array#shift: 54,038 ( 0.5%)
CGI::EscapeExt#escapeHTML: 53,971 ( 0.5%)
Regexp#===: 47,848 ( 0.5%)
String#=~: 47,237 ( 0.5%)
Array#unshift: 46,051 ( 0.5%)
String#empty?: 41,750 ( 0.4%)
Array#push: 40,115 ( 0.4%)
Top-20 not annotated C methods (97.1% of total 10,116,938):
Kernel#respond_to?: 4,932,224 (48.8%)
Hash#key?: 2,329,928 (23.0%)
Set#include?: 757,389 ( 7.5%)
String#===: 317,494 ( 3.1%)
Kernel#is_a?: 203,084 ( 2.0%)
String#<<: 197,831 ( 2.0%)
Integer#<<: 162,268 ( 1.6%)
Kernel#format: 120,902 ( 1.2%)
Integer#/: 120,902 ( 1.2%)
Array#<<: 112,225 ( 1.1%)
Regexp.last_match: 92,382 ( 0.9%)
Hash#[]=: 86,145 ( 0.9%)
String#start_with?: 54,953 ( 0.5%)
Array#shift: 54,038 ( 0.5%)
CGI::EscapeExt#escapeHTML: 53,971 ( 0.5%)
Regexp#===: 47,848 ( 0.5%)
String#=~: 47,237 ( 0.5%)
Array#unshift: 46,051 ( 0.5%)
String#empty?: 41,750 ( 0.4%)
Array#push: 40,115 ( 0.4%)
Top-2 not optimized method types for send (100.0% of total 182,938):
iseq: 178,414 (97.5%)
cfunc: 4,524 ( 2.5%)
Top-4 not optimized method types for send_without_block (100.0% of total 2,492,246):
iseq: 2,376,511 (95.4%)
optimized: 115,702 ( 4.6%)
alias: 20 ( 0.0%)
null: 13 ( 0.0%)
Top-9 not optimized instructions (100.0% of total 667,727):
invokeblock: 221,375 (33.2%)
opt_neq: 161,971 (24.3%)
opt_and: 161,971 (24.3%)
opt_eq: 64,921 ( 9.7%)
invokesuper: 39,243 ( 5.9%)
opt_le: 15,838 ( 2.4%)
opt_minus: 1,534 ( 0.2%)
opt_send_without_block: 772 ( 0.1%)
opt_or: 102 ( 0.0%)
Top-9 send fallback reasons (100.0% of total 6,287,956):
send_without_block_polymorphic: 2,782,058 (44.2%)
send_without_block_not_optimized_method_type: 2,492,246 (39.6%)
not_optimized_instruction: 667,727 (10.6%)
send_not_optimized_method_type: 182,938 ( 2.9%)
send_without_block_no_profiles: 89,613 ( 1.4%)
send_polymorphic: 66,962 ( 1.1%)
send_no_profiles: 4,059 ( 0.1%)
obj_to_string_not_string: 2,352 ( 0.0%)
send_without_block_cfunc_array_variadic: 1 ( 0.0%)
Top-3 unhandled YARV insns (100.0% of total 81,482):
getclassvariable: 81,231 (99.7%)
once: 137 ( 0.2%)
getconstant: 114 ( 0.1%)
Top-3 compile error reasons (100.0% of total 5,286,310):
register_spill_on_alloc: 4,540,413 (85.9%)
exception_handler: 745,727 (14.1%)
register_spill_on_ccall: 170 ( 0.0%)
Top-12 side exit reasons (100.0% of total 14,244,881):
compile_error: 5,286,310 (37.1%)
guard_shape_failure: 3,346,873 (23.5%)
guard_type_failure: 2,477,071 (17.4%)
unhandled_splat: 2,104,447 (14.8%)
unhandled_kwarg: 926,828 ( 6.5%)
unhandled_yarv_insn: 81,482 ( 0.6%)
unhandled_hir_insn: 18,672 ( 0.1%)
patchpoint_stable_constant_names: 1,608 ( 0.0%)
obj_to_string_fallback: 902 ( 0.0%)
patchpoint_method_redefined: 599 ( 0.0%)
block_param_proxy_not_iseq_or_ifunc: 88 ( 0.0%)
interrupt: 1 ( 0.0%)
send_count: 39,591,410
dynamic_send_count: 6,287,956 (15.9%)
optimized_send_count: 33,303,454 (84.1%)
iseq_optimized_send_count: 13,514,283 (34.1%)
inline_cfunc_optimized_send_count: 6,823,745 (17.2%)
non_variadic_cfunc_optimized_send_count: 7,417,432 (18.7%)
variadic_cfunc_optimized_send_count: 5,547,994 (14.0%)
dynamic_getivar_count: 1,110,647
dynamic_setivar_count: 927,309
compiled_iseq_count: 403
failed_iseq_count: 48
compile_time: 968ms
profile_time: 3,547ms
gc_time: 22ms
invalidation_time: 0ms
vm_write_pc_count: 36,735,108
vm_write_sp_count: 36,508,262
vm_write_locals_count: 36,508,262
vm_write_stack_count: 36,508,262
vm_write_to_parent_iseq_local_count: 543,097
vm_read_from_parent_iseq_local_count: 13,930,672
code_region_bytes: 2,228,224
side_exit_count: 14,244,881
total_insn_count: 463,357,969
vm_insn_count: 247,003,727
zjit_insn_count: 216,354,242
ratio_in_zjit: 46.7%
```
</details>
### `lobsters` Before
<details>
```
Average of last 10, non-warmup iters: 898ms
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (61.3% of total 19,495,906):
String#<<: 1,764,437 ( 9.1%)
Kernel#is_a?: 1,615,120 ( 8.3%)
Hash#[]=: 1,159,455 ( 5.9%)
Regexp#match?: 777,496 ( 4.0%)
String#empty?: 722,953 ( 3.7%)
Hash#key?: 685,258 ( 3.5%)
Kernel#respond_to?: 602,017 ( 3.1%)
TrueClass#===: 447,671 ( 2.3%)
FalseClass#===: 439,276 ( 2.3%)
Array#include?: 426,758 ( 2.2%)
Kernel#block_given?: 405,271 ( 2.1%)
Hash#fetch: 382,302 ( 2.0%)
ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%)
String#start_with?: 353,793 ( 1.8%)
Kernel#kind_of?: 340,341 ( 1.7%)
Kernel#dup: 328,162 ( 1.7%)
String.new: 306,667 ( 1.6%)
String#==: 287,549 ( 1.5%)
BasicObject#!=: 284,642 ( 1.5%)
String#length: 256,070 ( 1.3%)
Top-20 not annotated C methods (62.4% of total 19,796,172):
Kernel#is_a?: 1,993,676 (10.1%)
String#<<: 1,764,437 ( 8.9%)
Hash#[]=: 1,159,634 ( 5.9%)
Regexp#match?: 777,496 ( 3.9%)
String#empty?: 738,030 ( 3.7%)
Hash#key?: 685,258 ( 3.5%)
Kernel#respond_to?: 602,017 ( 3.0%)
TrueClass#===: 447,671 ( 2.3%)
FalseClass#===: 439,276 ( 2.2%)
Array#include?: 426,758 ( 2.2%)
Kernel#block_given?: 425,813 ( 2.2%)
Hash#fetch: 382,302 ( 1.9%)
ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%)
String#start_with?: 353,793 ( 1.8%)
Kernel#kind_of?: 340,375 ( 1.7%)
Kernel#dup: 328,169 ( 1.7%)
String.new: 306,667 ( 1.5%)
String#==: 293,520 ( 1.5%)
BasicObject#!=: 284,825 ( 1.4%)
String#length: 256,070 ( 1.3%)
Top-2 not optimized method types for send (100.0% of total 115,007):
cfunc: 76,172 (66.2%)
iseq: 38,835 (33.8%)
Top-6 not optimized method types for send_without_block (100.0% of total 8,003,641):
iseq: 3,999,211 (50.0%)
bmethod: 1,750,271 (21.9%)
optimized: 1,653,426 (20.7%)
alias: 591,342 ( 7.4%)
null: 8,174 ( 0.1%)
cfunc: 1,217 ( 0.0%)
Top-13 not optimized instructions (100.0% of total 7,590,826):
invokesuper: 4,335,446 (57.1%)
invokeblock: 1,329,215 (17.5%)
sendforward: 841,463 (11.1%)
opt_eq: 810,614 (10.7%)
opt_plus: 141,773 ( 1.9%)
opt_minus: 52,270 ( 0.7%)
opt_send_without_block: 43,248 ( 0.6%)
opt_neq: 15,047 ( 0.2%)
opt_mult: 13,824 ( 0.2%)
opt_or: 7,451 ( 0.1%)
opt_lt: 348 ( 0.0%)
opt_ge: 91 ( 0.0%)
opt_gt: 36 ( 0.0%)
Top-9 send fallback reasons (100.0% of total 45,673,212):
send_without_block_polymorphic: 17,390,335 (38.1%)
send_no_profiles: 10,769,053 (23.6%)
send_without_block_not_optimized_method_type: 8,003,641 (17.5%)
not_optimized_instruction: 7,590,826 (16.6%)
send_without_block_no_profiles: 1,757,109 ( 3.8%)
send_not_optimized_method_type: 115,007 ( 0.3%)
send_without_block_cfunc_array_variadic: 31,149 ( 0.1%)
obj_to_string_not_string: 15,518 ( 0.0%)
send_without_block_direct_too_many_args: 574 ( 0.0%)
Top-9 unhandled YARV insns (100.0% of total 1,242,228):
expandarray: 622,203 (50.1%)
checkkeyword: 316,111 (25.4%)
getclassvariable: 120,540 ( 9.7%)
getblockparam: 88,480 ( 7.1%)
invokesuperforward: 78,842 ( 6.3%)
opt_duparray_send: 14,149 ( 1.1%)
getconstant: 1,588 ( 0.1%)
checkmatch: 288 ( 0.0%)
once: 27 ( 0.0%)
Top-3 compile error reasons (100.0% of total 6,769,693):
register_spill_on_alloc: 6,188,305 (91.4%)
register_spill_on_ccall: 347,108 ( 5.1%)
exception_handler: 234,280 ( 3.5%)
Top-17 side exit reasons (100.0% of total 20,142,827):
compile_error: 6,769,693 (33.6%)
guard_type_failure: 5,169,050 (25.7%)
guard_shape_failure: 3,726,362 (18.5%)
unhandled_yarv_insn: 1,242,228 ( 6.2%)
block_param_proxy_not_iseq_or_ifunc: 984,480 ( 4.9%)
unhandled_kwarg: 800,154 ( 4.0%)
unknown_newarray_send: 539,317 ( 2.7%)
patchpoint_stable_constant_names: 340,283 ( 1.7%)
unhandled_splat: 229,440 ( 1.1%)
unhandled_hir_insn: 147,351 ( 0.7%)
patchpoint_no_singleton_class: 128,856 ( 0.6%)
patchpoint_method_redefined: 32,718 ( 0.2%)
block_param_proxy_modified: 25,274 ( 0.1%)
patchpoint_no_ep_escape: 7,559 ( 0.0%)
obj_to_string_fallback: 24 ( 0.0%)
guard_type_not_failure: 22 ( 0.0%)
interrupt: 16 ( 0.0%)
send_count: 120,815,640
dynamic_send_count: 45,673,212 (37.8%)
optimized_send_count: 75,142,428 (62.2%)
iseq_optimized_send_count: 32,188,039 (26.6%)
inline_cfunc_optimized_send_count: 23,458,483 (19.4%)
non_variadic_cfunc_optimized_send_count: 14,809,797 (12.3%)
variadic_cfunc_optimized_send_count: 4,686,109 ( 3.9%)
dynamic_getivar_count: 13,023,437
dynamic_setivar_count: 12,311,158
compiled_iseq_count: 4,806
failed_iseq_count: 466
compile_time: 8,943ms
profile_time: 99ms
gc_time: 45ms
invalidation_time: 239ms
vm_write_pc_count: 113,652,291
vm_write_sp_count: 111,209,623
vm_write_locals_count: 111,209,623
vm_write_stack_count: 111,209,623
vm_write_to_parent_iseq_local_count: 516,800
vm_read_from_parent_iseq_local_count: 11,225,587
code_region_bytes: 22,609,920
side_exit_count: 20,142,827
total_insn_count: 926,088,942
vm_insn_count: 297,636,255
zjit_insn_count: 628,452,687
ratio_in_zjit: 67.9%
```
</details>
### `lobsters` After
<details>
```
Average of last 10, non-warmup iters: 919ms
***ZJIT: Printing ZJIT statistics on exit***
Top-20 not inlined C methods (61.3% of total 19,495,868):
String#<<: 1,764,437 ( 9.1%)
Kernel#is_a?: 1,615,110 ( 8.3%)
Hash#[]=: 1,159,455 ( 5.9%)
Regexp#match?: 777,496 ( 4.0%)
String#empty?: 722,953 ( 3.7%)
Hash#key?: 685,258 ( 3.5%)
Kernel#respond_to?: 602,016 ( 3.1%)
TrueClass#===: 447,671 ( 2.3%)
FalseClass#===: 439,276 ( 2.3%)
Array#include?: 426,758 ( 2.2%)
Kernel#block_given?: 405,271 ( 2.1%)
Hash#fetch: 382,302 ( 2.0%)
ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%)
String#start_with?: 353,793 ( 1.8%)
Kernel#kind_of?: 340,341 ( 1.7%)
Kernel#dup: 328,162 ( 1.7%)
String.new: 306,667 ( 1.6%)
String#==: 287,545 ( 1.5%)
BasicObject#!=: 284,642 ( 1.5%)
String#length: 256,070 ( 1.3%)
Top-20 not annotated C methods (62.4% of total 19,796,134):
Kernel#is_a?: 1,993,666 (10.1%)
String#<<: 1,764,437 ( 8.9%)
Hash#[]=: 1,159,634 ( 5.9%)
Regexp#match?: 777,496 ( 3.9%)
String#empty?: 738,030 ( 3.7%)
Hash#key?: 685,258 ( 3.5%)
Kernel#respond_to?: 602,016 ( 3.0%)
TrueClass#===: 447,671 ( 2.3%)
FalseClass#===: 439,276 ( 2.2%)
Array#include?: 426,758 ( 2.2%)
Kernel#block_given?: 425,813 ( 2.2%)
Hash#fetch: 382,302 ( 1.9%)
ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%)
String#start_with?: 353,793 ( 1.8%)
Kernel#kind_of?: 340,375 ( 1.7%)
Kernel#dup: 328,169 ( 1.7%)
String.new: 306,667 ( 1.5%)
String#==: 293,516 ( 1.5%)
BasicObject#!=: 284,825 ( 1.4%)
String#length: 256,070 ( 1.3%)
Top-4 not optimized method types for send (100.0% of total 4,749,678):
iseq: 2,563,391 (54.0%)
cfunc: 2,064,888 (43.5%)
alias: 118,577 ( 2.5%)
null: 2,822 ( 0.1%)
Top-6 not optimized method types for send_without_block (100.0% of total 8,003,641):
iseq: 3,999,211 (50.0%)
bmethod: 1,750,271 (21.9%)
optimized: 1,653,426 (20.7%)
alias: 591,342 ( 7.4%)
null: 8,174 ( 0.1%)
cfunc: 1,217 ( 0.0%)
Top-13 not optimized instructions (100.0% of total 7,590,818):
invokesuper: 4,335,442 (57.1%)
invokeblock: 1,329,215 (17.5%)
sendforward: 841,463 (11.1%)
opt_eq: 810,610 (10.7%)
opt_plus: 141,773 ( 1.9%)
opt_minus: 52,270 ( 0.7%)
opt_send_without_block: 43,248 ( 0.6%)
opt_neq: 15,047 ( 0.2%)
opt_mult: 13,824 ( 0.2%)
opt_or: 7,451 ( 0.1%)
opt_lt: 348 ( 0.0%)
opt_ge: 91 ( 0.0%)
opt_gt: 36 ( 0.0%)
Top-10 send fallback reasons (100.0% of total 43,152,037):
send_without_block_polymorphic: 17,390,322 (40.3%)
send_without_block_not_optimized_method_type: 8,003,641 (18.5%)
not_optimized_instruction: 7,590,818 (17.6%)
send_not_optimized_method_type: 4,749,678 (11.0%)
send_no_profiles: 2,893,666 ( 6.7%)
send_without_block_no_profiles: 1,757,109 ( 4.1%)
send_polymorphic: 719,562 ( 1.7%)
send_without_block_cfunc_array_variadic: 31,149 ( 0.1%)
obj_to_string_not_string: 15,518 ( 0.0%)
send_without_block_direct_too_many_args: 574 ( 0.0%)
Top-9 unhandled YARV insns (100.0% of total 1,242,215):
expandarray: 622,203 (50.1%)
checkkeyword: 316,111 (25.4%)
getclassvariable: 120,540 ( 9.7%)
getblockparam: 88,467 ( 7.1%)
invokesuperforward: 78,842 ( 6.3%)
opt_duparray_send: 14,149 ( 1.1%)
getconstant: 1,588 ( 0.1%)
checkmatch: 288 ( 0.0%)
once: 27 ( 0.0%)
Top-3 compile error reasons (100.0% of total 6,769,688):
register_spill_on_alloc: 6,188,305 (91.4%)
register_spill_on_ccall: 347,108 ( 5.1%)
exception_handler: 234,275 ( 3.5%)
Top-17 side exit reasons (100.0% of total 20,144,372):
compile_error: 6,769,688 (33.6%)
guard_type_failure: 5,169,204 (25.7%)
guard_shape_failure: 3,726,374 (18.5%)
unhandled_yarv_insn: 1,242,215 ( 6.2%)
block_param_proxy_not_iseq_or_ifunc: 984,480 ( 4.9%)
unhandled_kwarg: 800,154 ( 4.0%)
unknown_newarray_send: 539,317 ( 2.7%)
patchpoint_stable_constant_names: 340,283 ( 1.7%)
unhandled_splat: 229,440 ( 1.1%)
unhandled_hir_insn: 147,351 ( 0.7%)
patchpoint_no_singleton_class: 130,252 ( 0.6%)
patchpoint_method_redefined: 32,716 ( 0.2%)
block_param_proxy_modified: 25,274 ( 0.1%)
patchpoint_no_ep_escape: 7,559 ( 0.0%)
obj_to_string_fallback: 24 ( 0.0%)
guard_type_not_failure: 22 ( 0.0%)
interrupt: 19 ( 0.0%)
send_count: 120,812,030
dynamic_send_count: 43,152,037 (35.7%)
optimized_send_count: 77,659,993 (64.3%)
iseq_optimized_send_count: 32,187,900 (26.6%)
inline_cfunc_optimized_send_count: 23,458,491 (19.4%)
non_variadic_cfunc_optimized_send_count: 17,327,499 (14.3%)
variadic_cfunc_optimized_send_count: 4,686,103 ( 3.9%)
dynamic_getivar_count: 13,023,424
dynamic_setivar_count: 12,310,991
compiled_iseq_count: 4,806
failed_iseq_count: 466
compile_time: 9,012ms
profile_time: 104ms
gc_time: 44ms
invalidation_time: 239ms
vm_write_pc_count: 113,648,665
vm_write_sp_count: 111,205,997
vm_write_locals_count: 111,205,997
vm_write_stack_count: 111,205,997
vm_write_to_parent_iseq_local_count: 516,800
vm_read_from_parent_iseq_local_count: 11,225,587
code_region_bytes: 23,052,288
side_exit_count: 20,144,372
total_insn_count: 926,090,214
vm_insn_count: 297,647,811
zjit_insn_count: 628,442,403
ratio_in_zjit: 67.9%
```
</details>
| -rw-r--r-- | insns.def | 1 | ||||
| -rw-r--r-- | zjit/src/codegen.rs | 43 | ||||
| -rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 53 | ||||
| -rw-r--r-- | zjit/src/hir.rs | 259 | ||||
| -rw-r--r-- | zjit/src/profile.rs | 2 |
5 files changed, 315 insertions, 43 deletions
@@ -846,6 +846,7 @@ send (CALL_DATA cd, ISEQ blockiseq) (...) (VALUE val) +// attr bool zjit_profile = true; // attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci); // attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci); { diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 1f04e61dbc..87e0ed907a 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -411,7 +411,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // Give up CCallWithFrame for 7+ args since asm.ccall() doesn't support it. Insn::CCallWithFrame { cd, state, args, .. } if args.len() > C_ARG_OPNDS.len() => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs), - Insn::CCallWithFrame { cfunc, args, cme, state, .. } => gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, &function.frame_state(*state)), + Insn::CCallWithFrame { cfunc, args, cme, state, blockiseq, .. } => + gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, *blockiseq, &function.frame_state(*state)), Insn::CCallVariadic { cfunc, recv, args, name: _, cme, state, return_type: _, elidable: _ } => { gen_ccall_variadic(jit, asm, *cfunc, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state)) } @@ -673,20 +674,36 @@ fn gen_patch_point(jit: &mut JITState, asm: &mut Assembler, invariant: &Invarian } /// Generate code for a C function call that pushes a frame -fn gen_ccall_with_frame(jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, args: Vec<Opnd>, cme: *const rb_callable_method_entry_t, state: &FrameState) -> lir::Opnd { +fn gen_ccall_with_frame( + jit: &mut JITState, + asm: &mut Assembler, + cfunc: *const u8, + args: Vec<Opnd>, + cme: *const rb_callable_method_entry_t, + blockiseq: Option<IseqPtr>, + state: &FrameState, +) -> lir::Opnd { gen_incr_counter(asm, Counter::non_variadic_cfunc_optimized_send_count); - gen_prepare_non_leaf_call(jit, asm, state); + let caller_stack_size = state.stack_size() - args.len(); + + // Can't use gen_prepare_non_leaf_call() because we need to adjust the SP + // to account for the receiver and arguments (and block arguments if any) + gen_prepare_call_with_gc(asm, state, false); + gen_save_sp(asm, caller_stack_size); + gen_spill_stack(jit, asm, state); + gen_spill_locals(jit, asm, state); gen_push_frame(asm, args.len(), state, ControlFrame { recv: args[0], iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, + block_iseq: blockiseq, }); asm_comment!(asm, "switch to new SP register"); - let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE; + let sp_offset = (caller_stack_size + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE; let new_sp = asm.add(SP, sp_offset.into()); asm.mov(SP, new_sp); @@ -738,6 +755,7 @@ fn gen_ccall_variadic( iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, + block_iseq: None, }); asm_comment!(asm, "switch to new SP register"); @@ -1130,6 +1148,7 @@ fn gen_send_without_block_direct( iseq: Some(iseq), cme, frame_type: VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, + block_iseq: None, }); asm_comment!(asm, "switch to new SP register"); @@ -1719,6 +1738,7 @@ struct ControlFrame { iseq: Option<IseqPtr>, cme: *const rb_callable_method_entry_t, frame_type: u32, + block_iseq: Option<IseqPtr>, } /// Compile an interpreter frame @@ -1735,9 +1755,20 @@ fn gen_push_frame(asm: &mut Assembler, argc: usize, state: &FrameState, frame: C }; let ep_offset = state.stack().len() as i32 + local_size - argc as i32 + VM_ENV_DATA_SIZE as i32 - 1; asm.store(Opnd::mem(64, SP, (ep_offset - 2) * SIZEOF_VALUE_I32), VALUE::from(frame.cme).into()); + + let block_handler_opnd = if let Some(block_iseq) = frame.block_iseq { + // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). + // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases + // with cfp->block_code. + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); + let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self_addr, Opnd::Imm(1)) + } else { + VM_BLOCK_HANDLER_NONE.into() + }; + // ep[-1]: block_handler or prev EP - // block_handler is not supported for now - asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), VM_BLOCK_HANDLER_NONE.into()); + asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), block_handler_opnd); // ep[0]: ENV_FLAGS asm.store(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32), frame.frame_type.into()); diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index c67e229a80..af604661b2 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -680,32 +680,33 @@ pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 243; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 244; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 245; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 370ed56857..1f77f38dc8 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -668,6 +668,7 @@ pub enum Insn { state: InsnId, return_type: Type, elidable: bool, + blockiseq: Option<IseqPtr>, }, /// Call a variadic C function with signature: func(int argc, VALUE *argv, VALUE recv) @@ -1063,11 +1064,14 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Ok(()) }, - Insn::CCallWithFrame { cfunc, args, name, .. } => { + Insn::CCallWithFrame { cfunc, args, name, blockiseq, .. } => { write!(f, "CCallWithFrame {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; for arg in args { write!(f, ", {arg}")?; } + if let Some(blockiseq) = blockiseq { + write!(f, ", block={:p}", self.ptr_map.map_ptr(blockiseq))?; + } Ok(()) }, Insn::CCallVariadic { cfunc, recv, args, name, .. } => { @@ -1598,7 +1602,17 @@ impl Function { &ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state }, &ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) }, &CCall { cfunc, ref args, name, return_type, elidable } => CCall { cfunc, args: find_vec!(args), name, return_type, elidable }, - &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable } => CCallWithFrame { cd, cfunc, args: find_vec!(args), cme, name, state: find!(state), return_type, elidable }, + &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable, blockiseq } => CCallWithFrame { + cd, + cfunc, + args: find_vec!(args), + cme, + name, + state: find!(state), + return_type, + elidable, + blockiseq, + }, &CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable } => CCallVariadic { cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable }, @@ -2134,7 +2148,7 @@ impl Function { } } // This doesn't actually optimize Send yet, just replaces the fallback reason to be more precise. - // TODO: Optimize Send + // The actual optimization is done in reduce_send_to_ccall. Insn::Send { recv, cd, state, .. } => { let frame_state = self.frame_state(state); let klass = if let Some(klass) = self.type_of(recv).runtime_exact_ruby_class() { @@ -2338,8 +2352,111 @@ impl Function { fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme: method }, state }); } - // Try to reduce one SendWithoutBlock to a CCall - fn reduce_to_ccall( + // Try to reduce a Send insn to a CCallWithFrame + fn reduce_send_to_ccall( + fun: &mut Function, + block: BlockId, + self_type: Type, + send: Insn, + send_insn_id: InsnId, + ) -> Result<(), ()> { + let Insn::Send { mut recv, cd, blockiseq, mut args, state, .. } = send else { + return Err(()); + }; + + let call_info = unsafe { (*cd).ci }; + let argc = unsafe { vm_ci_argc(call_info) }; + let method_id = unsafe { rb_vm_ci_mid(call_info) }; + + // If we have info about the class of the receiver + let (recv_class, profiled_type) = if let Some(class) = self_type.runtime_exact_ruby_class() { + (class, None) + } else { + let iseq_insn_idx = fun.frame_state(state).insn_idx; + let Some(recv_type) = fun.profiled_type_of_at(recv, iseq_insn_idx) else { return Err(()) }; + (recv_type.class(), Some(recv_type)) + }; + + // Do method lookup + let method: *const rb_callable_method_entry_struct = unsafe { rb_callable_method_entry(recv_class, method_id) }; + if method.is_null() { + return Err(()); + } + + // Filter for C methods + let def_type = unsafe { get_cme_def_type(method) }; + if def_type != VM_METHOD_TYPE_CFUNC { + return Err(()); + } + + // Find the `argc` (arity) of the C method, which describes the parameters it expects + let cfunc = unsafe { get_cme_def_body_cfunc(method) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + match cfunc_argc { + 0.. => { + // (self, arg0, arg1, ..., argc) form + // + // Bail on argc mismatch + if argc != cfunc_argc as u32 { + return Err(()); + } + + let ci_flags = unsafe { vm_ci_flag(call_info) }; + + // When seeing &block argument, fall back to dynamic dispatch for now + // TODO: Support block forwarding + if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 { + return Err(()); + } + + // Commit to the replacement. Put PatchPoint. + gen_patch_points_for_optimized_ccall(fun, block, recv_class, method_id, method, state); + if recv_class.instance_can_have_singleton_class() { + fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass: recv_class }, state }); + } + + if let Some(profiled_type) = profiled_type { + // Guard receiver class + recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); + fun.insn_types[recv.0] = fun.infer_type(recv); + } + + let blockiseq = if blockiseq.is_null() { None } else { Some(blockiseq) }; + + // Emit a call + let cfunc = unsafe { get_mct_func(cfunc) }.cast(); + let mut cfunc_args = vec![recv]; + cfunc_args.append(&mut args); + + let ccall = fun.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc, + args: cfunc_args, + cme: method, + name: method_id, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq, + }); + fun.make_equal_to(send_insn_id, ccall); + return Ok(()); + } + // Variadic method + -1 => { + // func(int argc, VALUE *argv, VALUE recv) + return Err(()); + } + -2 => { + // (self, args_ruby_array) + return Err(()); + } + _ => unreachable!("unknown cfunc kind: argc={argc}") + } + } + + // Try to reduce a SendWithoutBlock insn to a CCall/CCallWithFrame + fn reduce_send_without_block_to_ccall( fun: &mut Function, block: BlockId, self_type: Type, @@ -2440,7 +2557,17 @@ impl Function { if get_option!(stats) { count_not_inlined_cfunc(fun, block, method); } - let ccall = fun.push_insn(block, Insn::CCallWithFrame { cd, cfunc, args: cfunc_args, cme: method, name: method_id, state, return_type, elidable }); + let ccall = fun.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc, + args: cfunc_args, + cme: method, + name: method_id, + state, + return_type, + elidable, + blockiseq: None, + }); fun.make_equal_to(send_insn_id, ccall); } @@ -2555,11 +2682,21 @@ impl Function { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { - if let send @ Insn::SendWithoutBlock { recv, .. } = self.find(insn_id) { - let recv_type = self.type_of(recv); - if reduce_to_ccall(self, block, recv_type, send, insn_id).is_ok() { - continue; + let send = self.find(insn_id); + match send { + send @ Insn::SendWithoutBlock { recv, .. } => { + let recv_type = self.type_of(recv); + if reduce_send_without_block_to_ccall(self, block, recv_type, send, insn_id).is_ok() { + continue; + } + } + send @ Insn::Send { recv, .. } => { + let recv_type = self.type_of(recv); + if reduce_send_to_ccall(self, block, recv_type, send, insn_id).is_ok() { + continue; + } } + _ => {} } self.push_insn_id(block, insn_id); } @@ -12584,6 +12721,108 @@ mod opt_tests { } #[test] + fn test_optimize_send_with_block() { + eval(r#" + def test = [1, 2, 3].map { |x| x * 2 } + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v12:ArrayExact = ArrayDup v10 + PatchPoint MethodRedefined(Array@0x1008, map@0x1010, cme:0x1018) + PatchPoint NoSingletonClass(Array@0x1008) + v23:BasicObject = CCallWithFrame map@0x1040, v12, block=0x1048 + CheckInterrupts + Return v23 + "); + } + + #[test] + fn test_do_not_optimize_send_variadic_with_block() { + eval(r#" + def test = [1, 2, 3].index { |x| x == 2 } + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v12:ArrayExact = ArrayDup v10 + v14:BasicObject = Send v12, 0x1008, :index + CheckInterrupts + Return v14 + "); + } + + #[test] + fn test_do_not_optimize_send_with_block_forwarding() { + eval(r#" + def test(&block) = [].map(&block) + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:ArrayExact = NewArray + GuardBlockParamProxy l0 + v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v19:BasicObject = Send v14, 0x1008, :map, v17 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_do_not_optimize_send_to_iseq_method_with_block() { + eval(r#" + def foo + yield 1 + end + + def test = foo {} + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@<compiled>:6: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = Send v6, 0x1000, :foo + CheckInterrupts + Return v11 + "); + } + + #[test] fn test_inline_attr_reader_constant() { eval(" class C diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index e935ec9731..a6c837df5a 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -83,7 +83,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_length => profile_operands(profiler, profile, 1), YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), - YARVINSN_opt_send_without_block => { + YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; // Profile all the arguments and self (+1). |
