summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Lo <stan.lo@shopify.com>2025-10-20 21:10:25 +0100
committerGitHub <noreply@github.com>2025-10-20 20:10:25 +0000
commite047cea2804c987c32450279a9b8fd78655cfa9d (patch)
treeb1e9181d51164a2e019c95da03facd40f3a0fde4
parent33f1af6779a22ab84633b43d42dcf273a7e3bbe9 (diff)
ZJIT: Optimize send with block into CCallWithFrame (#14863)
Since `Send` has a block iseq, I updated `CCallWithFrame` to take an optional `blockiseq` as well, and then generate `CCallWithFrame` for `Send` when the condition is right. ## Stats `liquid-render` Benchmark | Metric | Before | After | Change | |----------------------|--------------------|--------------------|--------------------- | | send_no_profiles | 3,209,418 (34.1%) | 4,119 (0.1%) | -3,205,299 (-99.9%) | | dynamic_send_count | 9,410,758 (23.1%) | 6,459,678 (15.9%) | -2,951,080 (-31.4%) | | optimized_send_count | 31,269,388 (76.9%) | 34,220,474 (84.1%) | +2,951,086 (+9.4%) | `lobsters` Benchmark | Metric | Before | After | Change | |----------------------|------------|------------|---------------------| | send_no_profiles | 10,769,052 | 2,902,865 | -7,866,187 (-73.0%) | | dynamic_send_count | 45,673,185 | 42,880,160 | -2,793,025 (-6.1%) | | optimized_send_count | 75,142,407 | 78,378,514 | +3,236,107 (+4.3%) | ### `liquid-render` Before <details> ``` Average of last 22, non-warmup iters: 262ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (96.9% of total 10,370,809): Kernel#respond_to?: 5,069,204 (48.9%) Hash#key?: 2,394,488 (23.1%) Set#include?: 778,429 ( 7.5%) String#===: 326,134 ( 3.1%) String#<<: 203,231 ( 2.0%) Integer#<<: 166,768 ( 1.6%) Kernel#is_a?: 164,272 ( 1.6%) Kernel#format: 124,262 ( 1.2%) Integer#/: 124,262 ( 1.2%) Array#<<: 115,325 ( 1.1%) Regexp.last_match: 94,862 ( 0.9%) Hash#[]=: 88,485 ( 0.9%) String#start_with?: 55,933 ( 0.5%) CGI::EscapeExt#escapeHTML: 55,471 ( 0.5%) Array#shift: 55,298 ( 0.5%) Regexp#===: 48,928 ( 0.5%) String#=~: 48,477 ( 0.5%) Array#unshift: 47,331 ( 0.5%) String#empty?: 42,870 ( 0.4%) Array#push: 41,215 ( 0.4%) Top-20 not annotated C methods (97.1% of total 10,394,421): Kernel#respond_to?: 5,069,204 (48.8%) Hash#key?: 2,394,488 (23.0%) Set#include?: 778,429 ( 7.5%) String#===: 326,134 ( 3.1%) Kernel#is_a?: 208,664 ( 2.0%) String#<<: 203,231 ( 2.0%) Integer#<<: 166,768 ( 1.6%) Integer#/: 124,262 ( 1.2%) Kernel#format: 124,262 ( 1.2%) Array#<<: 115,325 ( 1.1%) Regexp.last_match: 94,862 ( 0.9%) Hash#[]=: 88,485 ( 0.9%) String#start_with?: 55,933 ( 0.5%) CGI::EscapeExt#escapeHTML: 55,471 ( 0.5%) Array#shift: 55,298 ( 0.5%) Regexp#===: 48,928 ( 0.5%) String#=~: 48,477 ( 0.5%) Array#unshift: 47,331 ( 0.5%) String#empty?: 42,870 ( 0.4%) Array#push: 41,215 ( 0.4%) Top-2 not optimized method types for send (100.0% of total 2,382): cfunc: 1,196 (50.2%) iseq: 1,186 (49.8%) Top-4 not optimized method types for send_without_block (100.0% of total 2,561,006): iseq: 2,442,091 (95.4%) optimized: 118,882 ( 4.6%) alias: 20 ( 0.0%) null: 13 ( 0.0%) Top-9 not optimized instructions (100.0% of total 685,128): invokeblock: 227,376 (33.2%) opt_neq: 166,471 (24.3%) opt_and: 166,471 (24.3%) opt_eq: 66,721 ( 9.7%) invokesuper: 39,363 ( 5.7%) opt_le: 16,278 ( 2.4%) opt_minus: 1,574 ( 0.2%) opt_send_without_block: 772 ( 0.1%) opt_or: 102 ( 0.0%) Top-8 send fallback reasons (100.0% of total 9,410,758): send_no_profiles: 3,209,418 (34.1%) send_without_block_polymorphic: 2,858,558 (30.4%) send_without_block_not_optimized_method_type: 2,561,006 (27.2%) not_optimized_instruction: 685,128 ( 7.3%) send_without_block_no_profiles: 91,913 ( 1.0%) send_not_optimized_method_type: 2,382 ( 0.0%) obj_to_string_not_string: 2,352 ( 0.0%) send_without_block_cfunc_array_variadic: 1 ( 0.0%) Top-3 unhandled YARV insns (100.0% of total 83,682): getclassvariable: 83,431 (99.7%) once: 137 ( 0.2%) getconstant: 114 ( 0.1%) Top-3 compile error reasons (100.0% of total 5,431,910): register_spill_on_alloc: 4,665,393 (85.9%) exception_handler: 766,347 (14.1%) register_spill_on_ccall: 170 ( 0.0%) Top-11 side exit reasons (100.0% of total 14,635,508): compile_error: 5,431,910 (37.1%) guard_shape_failure: 3,436,341 (23.5%) guard_type_failure: 2,545,791 (17.4%) unhandled_splat: 2,162,907 (14.8%) unhandled_kwarg: 952,568 ( 6.5%) unhandled_yarv_insn: 83,682 ( 0.6%) unhandled_hir_insn: 19,112 ( 0.1%) patchpoint_stable_constant_names: 1,608 ( 0.0%) obj_to_string_fallback: 902 ( 0.0%) patchpoint_method_redefined: 599 ( 0.0%) block_param_proxy_not_iseq_or_ifunc: 88 ( 0.0%) send_count: 40,680,153 dynamic_send_count: 9,410,758 (23.1%) optimized_send_count: 31,269,395 (76.9%) iseq_optimized_send_count: 13,886,902 (34.1%) inline_cfunc_optimized_send_count: 7,011,684 (17.2%) non_variadic_cfunc_optimized_send_count: 4,670,333 (11.5%) variadic_cfunc_optimized_send_count: 5,700,476 (14.0%) dynamic_getivar_count: 1,144,613 dynamic_setivar_count: 950,830 compiled_iseq_count: 402 failed_iseq_count: 48 compile_time: 976ms profile_time: 3,223ms gc_time: 22ms invalidation_time: 0ms vm_write_pc_count: 37,744,491 vm_write_sp_count: 37,511,865 vm_write_locals_count: 37,511,865 vm_write_stack_count: 37,511,865 vm_write_to_parent_iseq_local_count: 558,177 vm_read_from_parent_iseq_local_count: 14,317,032 code_region_bytes: 2,211,840 side_exit_count: 14,635,508 total_insn_count: 476,097,972 vm_insn_count: 253,795,154 zjit_insn_count: 222,302,818 ratio_in_zjit: 46.7% ``` </details> ### `liquid-render` After <details> ``` Average of last 21, non-warmup iters: 272ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (96.8% of total 10,093,966): Kernel#respond_to?: 4,932,224 (48.9%) Hash#key?: 2,329,928 (23.1%) Set#include?: 757,389 ( 7.5%) String#===: 317,494 ( 3.1%) String#<<: 197,831 ( 2.0%) Integer#<<: 162,268 ( 1.6%) Kernel#is_a?: 159,892 ( 1.6%) Kernel#format: 120,902 ( 1.2%) Integer#/: 120,902 ( 1.2%) Array#<<: 112,225 ( 1.1%) Regexp.last_match: 92,382 ( 0.9%) Hash#[]=: 86,145 ( 0.9%) String#start_with?: 54,953 ( 0.5%) Array#shift: 54,038 ( 0.5%) CGI::EscapeExt#escapeHTML: 53,971 ( 0.5%) Regexp#===: 47,848 ( 0.5%) String#=~: 47,237 ( 0.5%) Array#unshift: 46,051 ( 0.5%) String#empty?: 41,750 ( 0.4%) Array#push: 40,115 ( 0.4%) Top-20 not annotated C methods (97.1% of total 10,116,938): Kernel#respond_to?: 4,932,224 (48.8%) Hash#key?: 2,329,928 (23.0%) Set#include?: 757,389 ( 7.5%) String#===: 317,494 ( 3.1%) Kernel#is_a?: 203,084 ( 2.0%) String#<<: 197,831 ( 2.0%) Integer#<<: 162,268 ( 1.6%) Kernel#format: 120,902 ( 1.2%) Integer#/: 120,902 ( 1.2%) Array#<<: 112,225 ( 1.1%) Regexp.last_match: 92,382 ( 0.9%) Hash#[]=: 86,145 ( 0.9%) String#start_with?: 54,953 ( 0.5%) Array#shift: 54,038 ( 0.5%) CGI::EscapeExt#escapeHTML: 53,971 ( 0.5%) Regexp#===: 47,848 ( 0.5%) String#=~: 47,237 ( 0.5%) Array#unshift: 46,051 ( 0.5%) String#empty?: 41,750 ( 0.4%) Array#push: 40,115 ( 0.4%) Top-2 not optimized method types for send (100.0% of total 182,938): iseq: 178,414 (97.5%) cfunc: 4,524 ( 2.5%) Top-4 not optimized method types for send_without_block (100.0% of total 2,492,246): iseq: 2,376,511 (95.4%) optimized: 115,702 ( 4.6%) alias: 20 ( 0.0%) null: 13 ( 0.0%) Top-9 not optimized instructions (100.0% of total 667,727): invokeblock: 221,375 (33.2%) opt_neq: 161,971 (24.3%) opt_and: 161,971 (24.3%) opt_eq: 64,921 ( 9.7%) invokesuper: 39,243 ( 5.9%) opt_le: 15,838 ( 2.4%) opt_minus: 1,534 ( 0.2%) opt_send_without_block: 772 ( 0.1%) opt_or: 102 ( 0.0%) Top-9 send fallback reasons (100.0% of total 6,287,956): send_without_block_polymorphic: 2,782,058 (44.2%) send_without_block_not_optimized_method_type: 2,492,246 (39.6%) not_optimized_instruction: 667,727 (10.6%) send_not_optimized_method_type: 182,938 ( 2.9%) send_without_block_no_profiles: 89,613 ( 1.4%) send_polymorphic: 66,962 ( 1.1%) send_no_profiles: 4,059 ( 0.1%) obj_to_string_not_string: 2,352 ( 0.0%) send_without_block_cfunc_array_variadic: 1 ( 0.0%) Top-3 unhandled YARV insns (100.0% of total 81,482): getclassvariable: 81,231 (99.7%) once: 137 ( 0.2%) getconstant: 114 ( 0.1%) Top-3 compile error reasons (100.0% of total 5,286,310): register_spill_on_alloc: 4,540,413 (85.9%) exception_handler: 745,727 (14.1%) register_spill_on_ccall: 170 ( 0.0%) Top-12 side exit reasons (100.0% of total 14,244,881): compile_error: 5,286,310 (37.1%) guard_shape_failure: 3,346,873 (23.5%) guard_type_failure: 2,477,071 (17.4%) unhandled_splat: 2,104,447 (14.8%) unhandled_kwarg: 926,828 ( 6.5%) unhandled_yarv_insn: 81,482 ( 0.6%) unhandled_hir_insn: 18,672 ( 0.1%) patchpoint_stable_constant_names: 1,608 ( 0.0%) obj_to_string_fallback: 902 ( 0.0%) patchpoint_method_redefined: 599 ( 0.0%) block_param_proxy_not_iseq_or_ifunc: 88 ( 0.0%) interrupt: 1 ( 0.0%) send_count: 39,591,410 dynamic_send_count: 6,287,956 (15.9%) optimized_send_count: 33,303,454 (84.1%) iseq_optimized_send_count: 13,514,283 (34.1%) inline_cfunc_optimized_send_count: 6,823,745 (17.2%) non_variadic_cfunc_optimized_send_count: 7,417,432 (18.7%) variadic_cfunc_optimized_send_count: 5,547,994 (14.0%) dynamic_getivar_count: 1,110,647 dynamic_setivar_count: 927,309 compiled_iseq_count: 403 failed_iseq_count: 48 compile_time: 968ms profile_time: 3,547ms gc_time: 22ms invalidation_time: 0ms vm_write_pc_count: 36,735,108 vm_write_sp_count: 36,508,262 vm_write_locals_count: 36,508,262 vm_write_stack_count: 36,508,262 vm_write_to_parent_iseq_local_count: 543,097 vm_read_from_parent_iseq_local_count: 13,930,672 code_region_bytes: 2,228,224 side_exit_count: 14,244,881 total_insn_count: 463,357,969 vm_insn_count: 247,003,727 zjit_insn_count: 216,354,242 ratio_in_zjit: 46.7% ``` </details> ### `lobsters` Before <details> ``` Average of last 10, non-warmup iters: 898ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (61.3% of total 19,495,906): String#<<: 1,764,437 ( 9.1%) Kernel#is_a?: 1,615,120 ( 8.3%) Hash#[]=: 1,159,455 ( 5.9%) Regexp#match?: 777,496 ( 4.0%) String#empty?: 722,953 ( 3.7%) Hash#key?: 685,258 ( 3.5%) Kernel#respond_to?: 602,017 ( 3.1%) TrueClass#===: 447,671 ( 2.3%) FalseClass#===: 439,276 ( 2.3%) Array#include?: 426,758 ( 2.2%) Kernel#block_given?: 405,271 ( 2.1%) Hash#fetch: 382,302 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%) String#start_with?: 353,793 ( 1.8%) Kernel#kind_of?: 340,341 ( 1.7%) Kernel#dup: 328,162 ( 1.7%) String.new: 306,667 ( 1.6%) String#==: 287,549 ( 1.5%) BasicObject#!=: 284,642 ( 1.5%) String#length: 256,070 ( 1.3%) Top-20 not annotated C methods (62.4% of total 19,796,172): Kernel#is_a?: 1,993,676 (10.1%) String#<<: 1,764,437 ( 8.9%) Hash#[]=: 1,159,634 ( 5.9%) Regexp#match?: 777,496 ( 3.9%) String#empty?: 738,030 ( 3.7%) Hash#key?: 685,258 ( 3.5%) Kernel#respond_to?: 602,017 ( 3.0%) TrueClass#===: 447,671 ( 2.3%) FalseClass#===: 439,276 ( 2.2%) Array#include?: 426,758 ( 2.2%) Kernel#block_given?: 425,813 ( 2.2%) Hash#fetch: 382,302 ( 1.9%) ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%) String#start_with?: 353,793 ( 1.8%) Kernel#kind_of?: 340,375 ( 1.7%) Kernel#dup: 328,169 ( 1.7%) String.new: 306,667 ( 1.5%) String#==: 293,520 ( 1.5%) BasicObject#!=: 284,825 ( 1.4%) String#length: 256,070 ( 1.3%) Top-2 not optimized method types for send (100.0% of total 115,007): cfunc: 76,172 (66.2%) iseq: 38,835 (33.8%) Top-6 not optimized method types for send_without_block (100.0% of total 8,003,641): iseq: 3,999,211 (50.0%) bmethod: 1,750,271 (21.9%) optimized: 1,653,426 (20.7%) alias: 591,342 ( 7.4%) null: 8,174 ( 0.1%) cfunc: 1,217 ( 0.0%) Top-13 not optimized instructions (100.0% of total 7,590,826): invokesuper: 4,335,446 (57.1%) invokeblock: 1,329,215 (17.5%) sendforward: 841,463 (11.1%) opt_eq: 810,614 (10.7%) opt_plus: 141,773 ( 1.9%) opt_minus: 52,270 ( 0.7%) opt_send_without_block: 43,248 ( 0.6%) opt_neq: 15,047 ( 0.2%) opt_mult: 13,824 ( 0.2%) opt_or: 7,451 ( 0.1%) opt_lt: 348 ( 0.0%) opt_ge: 91 ( 0.0%) opt_gt: 36 ( 0.0%) Top-9 send fallback reasons (100.0% of total 45,673,212): send_without_block_polymorphic: 17,390,335 (38.1%) send_no_profiles: 10,769,053 (23.6%) send_without_block_not_optimized_method_type: 8,003,641 (17.5%) not_optimized_instruction: 7,590,826 (16.6%) send_without_block_no_profiles: 1,757,109 ( 3.8%) send_not_optimized_method_type: 115,007 ( 0.3%) send_without_block_cfunc_array_variadic: 31,149 ( 0.1%) obj_to_string_not_string: 15,518 ( 0.0%) send_without_block_direct_too_many_args: 574 ( 0.0%) Top-9 unhandled YARV insns (100.0% of total 1,242,228): expandarray: 622,203 (50.1%) checkkeyword: 316,111 (25.4%) getclassvariable: 120,540 ( 9.7%) getblockparam: 88,480 ( 7.1%) invokesuperforward: 78,842 ( 6.3%) opt_duparray_send: 14,149 ( 1.1%) getconstant: 1,588 ( 0.1%) checkmatch: 288 ( 0.0%) once: 27 ( 0.0%) Top-3 compile error reasons (100.0% of total 6,769,693): register_spill_on_alloc: 6,188,305 (91.4%) register_spill_on_ccall: 347,108 ( 5.1%) exception_handler: 234,280 ( 3.5%) Top-17 side exit reasons (100.0% of total 20,142,827): compile_error: 6,769,693 (33.6%) guard_type_failure: 5,169,050 (25.7%) guard_shape_failure: 3,726,362 (18.5%) unhandled_yarv_insn: 1,242,228 ( 6.2%) block_param_proxy_not_iseq_or_ifunc: 984,480 ( 4.9%) unhandled_kwarg: 800,154 ( 4.0%) unknown_newarray_send: 539,317 ( 2.7%) patchpoint_stable_constant_names: 340,283 ( 1.7%) unhandled_splat: 229,440 ( 1.1%) unhandled_hir_insn: 147,351 ( 0.7%) patchpoint_no_singleton_class: 128,856 ( 0.6%) patchpoint_method_redefined: 32,718 ( 0.2%) block_param_proxy_modified: 25,274 ( 0.1%) patchpoint_no_ep_escape: 7,559 ( 0.0%) obj_to_string_fallback: 24 ( 0.0%) guard_type_not_failure: 22 ( 0.0%) interrupt: 16 ( 0.0%) send_count: 120,815,640 dynamic_send_count: 45,673,212 (37.8%) optimized_send_count: 75,142,428 (62.2%) iseq_optimized_send_count: 32,188,039 (26.6%) inline_cfunc_optimized_send_count: 23,458,483 (19.4%) non_variadic_cfunc_optimized_send_count: 14,809,797 (12.3%) variadic_cfunc_optimized_send_count: 4,686,109 ( 3.9%) dynamic_getivar_count: 13,023,437 dynamic_setivar_count: 12,311,158 compiled_iseq_count: 4,806 failed_iseq_count: 466 compile_time: 8,943ms profile_time: 99ms gc_time: 45ms invalidation_time: 239ms vm_write_pc_count: 113,652,291 vm_write_sp_count: 111,209,623 vm_write_locals_count: 111,209,623 vm_write_stack_count: 111,209,623 vm_write_to_parent_iseq_local_count: 516,800 vm_read_from_parent_iseq_local_count: 11,225,587 code_region_bytes: 22,609,920 side_exit_count: 20,142,827 total_insn_count: 926,088,942 vm_insn_count: 297,636,255 zjit_insn_count: 628,452,687 ratio_in_zjit: 67.9% ``` </details> ### `lobsters` After <details> ``` Average of last 10, non-warmup iters: 919ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (61.3% of total 19,495,868): String#<<: 1,764,437 ( 9.1%) Kernel#is_a?: 1,615,110 ( 8.3%) Hash#[]=: 1,159,455 ( 5.9%) Regexp#match?: 777,496 ( 4.0%) String#empty?: 722,953 ( 3.7%) Hash#key?: 685,258 ( 3.5%) Kernel#respond_to?: 602,016 ( 3.1%) TrueClass#===: 447,671 ( 2.3%) FalseClass#===: 439,276 ( 2.3%) Array#include?: 426,758 ( 2.2%) Kernel#block_given?: 405,271 ( 2.1%) Hash#fetch: 382,302 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%) String#start_with?: 353,793 ( 1.8%) Kernel#kind_of?: 340,341 ( 1.7%) Kernel#dup: 328,162 ( 1.7%) String.new: 306,667 ( 1.6%) String#==: 287,545 ( 1.5%) BasicObject#!=: 284,642 ( 1.5%) String#length: 256,070 ( 1.3%) Top-20 not annotated C methods (62.4% of total 19,796,134): Kernel#is_a?: 1,993,666 (10.1%) String#<<: 1,764,437 ( 8.9%) Hash#[]=: 1,159,634 ( 5.9%) Regexp#match?: 777,496 ( 3.9%) String#empty?: 738,030 ( 3.7%) Hash#key?: 685,258 ( 3.5%) Kernel#respond_to?: 602,016 ( 3.0%) TrueClass#===: 447,671 ( 2.3%) FalseClass#===: 439,276 ( 2.2%) Array#include?: 426,758 ( 2.2%) Kernel#block_given?: 425,813 ( 2.2%) Hash#fetch: 382,302 ( 1.9%) ObjectSpace::WeakKeyMap#[]: 356,654 ( 1.8%) String#start_with?: 353,793 ( 1.8%) Kernel#kind_of?: 340,375 ( 1.7%) Kernel#dup: 328,169 ( 1.7%) String.new: 306,667 ( 1.5%) String#==: 293,516 ( 1.5%) BasicObject#!=: 284,825 ( 1.4%) String#length: 256,070 ( 1.3%) Top-4 not optimized method types for send (100.0% of total 4,749,678): iseq: 2,563,391 (54.0%) cfunc: 2,064,888 (43.5%) alias: 118,577 ( 2.5%) null: 2,822 ( 0.1%) Top-6 not optimized method types for send_without_block (100.0% of total 8,003,641): iseq: 3,999,211 (50.0%) bmethod: 1,750,271 (21.9%) optimized: 1,653,426 (20.7%) alias: 591,342 ( 7.4%) null: 8,174 ( 0.1%) cfunc: 1,217 ( 0.0%) Top-13 not optimized instructions (100.0% of total 7,590,818): invokesuper: 4,335,442 (57.1%) invokeblock: 1,329,215 (17.5%) sendforward: 841,463 (11.1%) opt_eq: 810,610 (10.7%) opt_plus: 141,773 ( 1.9%) opt_minus: 52,270 ( 0.7%) opt_send_without_block: 43,248 ( 0.6%) opt_neq: 15,047 ( 0.2%) opt_mult: 13,824 ( 0.2%) opt_or: 7,451 ( 0.1%) opt_lt: 348 ( 0.0%) opt_ge: 91 ( 0.0%) opt_gt: 36 ( 0.0%) Top-10 send fallback reasons (100.0% of total 43,152,037): send_without_block_polymorphic: 17,390,322 (40.3%) send_without_block_not_optimized_method_type: 8,003,641 (18.5%) not_optimized_instruction: 7,590,818 (17.6%) send_not_optimized_method_type: 4,749,678 (11.0%) send_no_profiles: 2,893,666 ( 6.7%) send_without_block_no_profiles: 1,757,109 ( 4.1%) send_polymorphic: 719,562 ( 1.7%) send_without_block_cfunc_array_variadic: 31,149 ( 0.1%) obj_to_string_not_string: 15,518 ( 0.0%) send_without_block_direct_too_many_args: 574 ( 0.0%) Top-9 unhandled YARV insns (100.0% of total 1,242,215): expandarray: 622,203 (50.1%) checkkeyword: 316,111 (25.4%) getclassvariable: 120,540 ( 9.7%) getblockparam: 88,467 ( 7.1%) invokesuperforward: 78,842 ( 6.3%) opt_duparray_send: 14,149 ( 1.1%) getconstant: 1,588 ( 0.1%) checkmatch: 288 ( 0.0%) once: 27 ( 0.0%) Top-3 compile error reasons (100.0% of total 6,769,688): register_spill_on_alloc: 6,188,305 (91.4%) register_spill_on_ccall: 347,108 ( 5.1%) exception_handler: 234,275 ( 3.5%) Top-17 side exit reasons (100.0% of total 20,144,372): compile_error: 6,769,688 (33.6%) guard_type_failure: 5,169,204 (25.7%) guard_shape_failure: 3,726,374 (18.5%) unhandled_yarv_insn: 1,242,215 ( 6.2%) block_param_proxy_not_iseq_or_ifunc: 984,480 ( 4.9%) unhandled_kwarg: 800,154 ( 4.0%) unknown_newarray_send: 539,317 ( 2.7%) patchpoint_stable_constant_names: 340,283 ( 1.7%) unhandled_splat: 229,440 ( 1.1%) unhandled_hir_insn: 147,351 ( 0.7%) patchpoint_no_singleton_class: 130,252 ( 0.6%) patchpoint_method_redefined: 32,716 ( 0.2%) block_param_proxy_modified: 25,274 ( 0.1%) patchpoint_no_ep_escape: 7,559 ( 0.0%) obj_to_string_fallback: 24 ( 0.0%) guard_type_not_failure: 22 ( 0.0%) interrupt: 19 ( 0.0%) send_count: 120,812,030 dynamic_send_count: 43,152,037 (35.7%) optimized_send_count: 77,659,993 (64.3%) iseq_optimized_send_count: 32,187,900 (26.6%) inline_cfunc_optimized_send_count: 23,458,491 (19.4%) non_variadic_cfunc_optimized_send_count: 17,327,499 (14.3%) variadic_cfunc_optimized_send_count: 4,686,103 ( 3.9%) dynamic_getivar_count: 13,023,424 dynamic_setivar_count: 12,310,991 compiled_iseq_count: 4,806 failed_iseq_count: 466 compile_time: 9,012ms profile_time: 104ms gc_time: 44ms invalidation_time: 239ms vm_write_pc_count: 113,648,665 vm_write_sp_count: 111,205,997 vm_write_locals_count: 111,205,997 vm_write_stack_count: 111,205,997 vm_write_to_parent_iseq_local_count: 516,800 vm_read_from_parent_iseq_local_count: 11,225,587 code_region_bytes: 23,052,288 side_exit_count: 20,144,372 total_insn_count: 926,090,214 vm_insn_count: 297,647,811 zjit_insn_count: 628,442,403 ratio_in_zjit: 67.9% ``` </details>
-rw-r--r--insns.def1
-rw-r--r--zjit/src/codegen.rs43
-rw-r--r--zjit/src/cruby_bindings.inc.rs53
-rw-r--r--zjit/src/hir.rs259
-rw-r--r--zjit/src/profile.rs2
5 files changed, 315 insertions, 43 deletions
diff --git a/insns.def b/insns.def
index 8225d1ccea..ce358da285 100644
--- a/insns.def
+++ b/insns.def
@@ -846,6 +846,7 @@ send
(CALL_DATA cd, ISEQ blockiseq)
(...)
(VALUE val)
+// attr bool zjit_profile = true;
// attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci);
// attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci);
{
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 1f04e61dbc..87e0ed907a 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -411,7 +411,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
// Give up CCallWithFrame for 7+ args since asm.ccall() doesn't support it.
Insn::CCallWithFrame { cd, state, args, .. } if args.len() > C_ARG_OPNDS.len() =>
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs),
- Insn::CCallWithFrame { cfunc, args, cme, state, .. } => gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, &function.frame_state(*state)),
+ Insn::CCallWithFrame { cfunc, args, cme, state, blockiseq, .. } =>
+ gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, *blockiseq, &function.frame_state(*state)),
Insn::CCallVariadic { cfunc, recv, args, name: _, cme, state, return_type: _, elidable: _ } => {
gen_ccall_variadic(jit, asm, *cfunc, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state))
}
@@ -673,20 +674,36 @@ fn gen_patch_point(jit: &mut JITState, asm: &mut Assembler, invariant: &Invarian
}
/// Generate code for a C function call that pushes a frame
-fn gen_ccall_with_frame(jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, args: Vec<Opnd>, cme: *const rb_callable_method_entry_t, state: &FrameState) -> lir::Opnd {
+fn gen_ccall_with_frame(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ cfunc: *const u8,
+ args: Vec<Opnd>,
+ cme: *const rb_callable_method_entry_t,
+ blockiseq: Option<IseqPtr>,
+ state: &FrameState,
+) -> lir::Opnd {
gen_incr_counter(asm, Counter::non_variadic_cfunc_optimized_send_count);
- gen_prepare_non_leaf_call(jit, asm, state);
+ let caller_stack_size = state.stack_size() - args.len();
+
+ // Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
+ // to account for the receiver and arguments (and block arguments if any)
+ gen_prepare_call_with_gc(asm, state, false);
+ gen_save_sp(asm, caller_stack_size);
+ gen_spill_stack(jit, asm, state);
+ gen_spill_locals(jit, asm, state);
gen_push_frame(asm, args.len(), state, ControlFrame {
recv: args[0],
iseq: None,
cme,
frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL,
+ block_iseq: blockiseq,
});
asm_comment!(asm, "switch to new SP register");
- let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE;
+ let sp_offset = (caller_stack_size + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE;
let new_sp = asm.add(SP, sp_offset.into());
asm.mov(SP, new_sp);
@@ -738,6 +755,7 @@ fn gen_ccall_variadic(
iseq: None,
cme,
frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL,
+ block_iseq: None,
});
asm_comment!(asm, "switch to new SP register");
@@ -1130,6 +1148,7 @@ fn gen_send_without_block_direct(
iseq: Some(iseq),
cme,
frame_type: VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL,
+ block_iseq: None,
});
asm_comment!(asm, "switch to new SP register");
@@ -1719,6 +1738,7 @@ struct ControlFrame {
iseq: Option<IseqPtr>,
cme: *const rb_callable_method_entry_t,
frame_type: u32,
+ block_iseq: Option<IseqPtr>,
}
/// Compile an interpreter frame
@@ -1735,9 +1755,20 @@ fn gen_push_frame(asm: &mut Assembler, argc: usize, state: &FrameState, frame: C
};
let ep_offset = state.stack().len() as i32 + local_size - argc as i32 + VM_ENV_DATA_SIZE as i32 - 1;
asm.store(Opnd::mem(64, SP, (ep_offset - 2) * SIZEOF_VALUE_I32), VALUE::from(frame.cme).into());
+
+ let block_handler_opnd = if let Some(block_iseq) = frame.block_iseq {
+ // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
+ // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases
+ // with cfp->block_code.
+ asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into());
+ let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF));
+ asm.or(cfp_self_addr, Opnd::Imm(1))
+ } else {
+ VM_BLOCK_HANDLER_NONE.into()
+ };
+
// ep[-1]: block_handler or prev EP
- // block_handler is not supported for now
- asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), VM_BLOCK_HANDLER_NONE.into());
+ asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), block_handler_opnd);
// ep[0]: ENV_FLAGS
asm.store(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32), frame.frame_type.into());
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index c67e229a80..af604661b2 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -680,32 +680,33 @@ pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215;
pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216;
pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217;
pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218;
-pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 219;
-pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 220;
-pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 221;
-pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 222;
-pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 223;
-pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 224;
-pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 225;
-pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 226;
-pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 227;
-pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 228;
-pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 229;
-pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 230;
-pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 231;
-pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 232;
-pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 233;
-pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 234;
-pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 235;
-pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 236;
-pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 237;
-pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 238;
-pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 239;
-pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 240;
-pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 241;
-pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 242;
-pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 243;
-pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 244;
+pub const YARVINSN_zjit_send: ruby_vminsn_type = 219;
+pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 220;
+pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 221;
+pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 222;
+pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 223;
+pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 224;
+pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 225;
+pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 226;
+pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 227;
+pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 228;
+pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 229;
+pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 230;
+pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 231;
+pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 232;
+pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 233;
+pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 234;
+pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 235;
+pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 236;
+pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 237;
+pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 238;
+pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 239;
+pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 240;
+pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 241;
+pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 242;
+pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 243;
+pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 244;
+pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 245;
pub type ruby_vminsn_type = u32;
pub type rb_iseq_callback = ::std::option::Option<
unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void),
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index 370ed56857..1f77f38dc8 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -668,6 +668,7 @@ pub enum Insn {
state: InsnId,
return_type: Type,
elidable: bool,
+ blockiseq: Option<IseqPtr>,
},
/// Call a variadic C function with signature: func(int argc, VALUE *argv, VALUE recv)
@@ -1063,11 +1064,14 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
}
Ok(())
},
- Insn::CCallWithFrame { cfunc, args, name, .. } => {
+ Insn::CCallWithFrame { cfunc, args, name, blockiseq, .. } => {
write!(f, "CCallWithFrame {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?;
for arg in args {
write!(f, ", {arg}")?;
}
+ if let Some(blockiseq) = blockiseq {
+ write!(f, ", block={:p}", self.ptr_map.map_ptr(blockiseq))?;
+ }
Ok(())
},
Insn::CCallVariadic { cfunc, recv, args, name, .. } => {
@@ -1598,7 +1602,17 @@ impl Function {
&ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state },
&ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) },
&CCall { cfunc, ref args, name, return_type, elidable } => CCall { cfunc, args: find_vec!(args), name, return_type, elidable },
- &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable } => CCallWithFrame { cd, cfunc, args: find_vec!(args), cme, name, state: find!(state), return_type, elidable },
+ &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable, blockiseq } => CCallWithFrame {
+ cd,
+ cfunc,
+ args: find_vec!(args),
+ cme,
+ name,
+ state: find!(state),
+ return_type,
+ elidable,
+ blockiseq,
+ },
&CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable } => CCallVariadic {
cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable
},
@@ -2134,7 +2148,7 @@ impl Function {
}
}
// This doesn't actually optimize Send yet, just replaces the fallback reason to be more precise.
- // TODO: Optimize Send
+ // The actual optimization is done in reduce_send_to_ccall.
Insn::Send { recv, cd, state, .. } => {
let frame_state = self.frame_state(state);
let klass = if let Some(klass) = self.type_of(recv).runtime_exact_ruby_class() {
@@ -2338,8 +2352,111 @@ impl Function {
fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme: method }, state });
}
- // Try to reduce one SendWithoutBlock to a CCall
- fn reduce_to_ccall(
+ // Try to reduce a Send insn to a CCallWithFrame
+ fn reduce_send_to_ccall(
+ fun: &mut Function,
+ block: BlockId,
+ self_type: Type,
+ send: Insn,
+ send_insn_id: InsnId,
+ ) -> Result<(), ()> {
+ let Insn::Send { mut recv, cd, blockiseq, mut args, state, .. } = send else {
+ return Err(());
+ };
+
+ let call_info = unsafe { (*cd).ci };
+ let argc = unsafe { vm_ci_argc(call_info) };
+ let method_id = unsafe { rb_vm_ci_mid(call_info) };
+
+ // If we have info about the class of the receiver
+ let (recv_class, profiled_type) = if let Some(class) = self_type.runtime_exact_ruby_class() {
+ (class, None)
+ } else {
+ let iseq_insn_idx = fun.frame_state(state).insn_idx;
+ let Some(recv_type) = fun.profiled_type_of_at(recv, iseq_insn_idx) else { return Err(()) };
+ (recv_type.class(), Some(recv_type))
+ };
+
+ // Do method lookup
+ let method: *const rb_callable_method_entry_struct = unsafe { rb_callable_method_entry(recv_class, method_id) };
+ if method.is_null() {
+ return Err(());
+ }
+
+ // Filter for C methods
+ let def_type = unsafe { get_cme_def_type(method) };
+ if def_type != VM_METHOD_TYPE_CFUNC {
+ return Err(());
+ }
+
+ // Find the `argc` (arity) of the C method, which describes the parameters it expects
+ let cfunc = unsafe { get_cme_def_body_cfunc(method) };
+ let cfunc_argc = unsafe { get_mct_argc(cfunc) };
+ match cfunc_argc {
+ 0.. => {
+ // (self, arg0, arg1, ..., argc) form
+ //
+ // Bail on argc mismatch
+ if argc != cfunc_argc as u32 {
+ return Err(());
+ }
+
+ let ci_flags = unsafe { vm_ci_flag(call_info) };
+
+ // When seeing &block argument, fall back to dynamic dispatch for now
+ // TODO: Support block forwarding
+ if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 {
+ return Err(());
+ }
+
+ // Commit to the replacement. Put PatchPoint.
+ gen_patch_points_for_optimized_ccall(fun, block, recv_class, method_id, method, state);
+ if recv_class.instance_can_have_singleton_class() {
+ fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass: recv_class }, state });
+ }
+
+ if let Some(profiled_type) = profiled_type {
+ // Guard receiver class
+ recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state });
+ fun.insn_types[recv.0] = fun.infer_type(recv);
+ }
+
+ let blockiseq = if blockiseq.is_null() { None } else { Some(blockiseq) };
+
+ // Emit a call
+ let cfunc = unsafe { get_mct_func(cfunc) }.cast();
+ let mut cfunc_args = vec![recv];
+ cfunc_args.append(&mut args);
+
+ let ccall = fun.push_insn(block, Insn::CCallWithFrame {
+ cd,
+ cfunc,
+ args: cfunc_args,
+ cme: method,
+ name: method_id,
+ state,
+ return_type: types::BasicObject,
+ elidable: false,
+ blockiseq,
+ });
+ fun.make_equal_to(send_insn_id, ccall);
+ return Ok(());
+ }
+ // Variadic method
+ -1 => {
+ // func(int argc, VALUE *argv, VALUE recv)
+ return Err(());
+ }
+ -2 => {
+ // (self, args_ruby_array)
+ return Err(());
+ }
+ _ => unreachable!("unknown cfunc kind: argc={argc}")
+ }
+ }
+
+ // Try to reduce a SendWithoutBlock insn to a CCall/CCallWithFrame
+ fn reduce_send_without_block_to_ccall(
fun: &mut Function,
block: BlockId,
self_type: Type,
@@ -2440,7 +2557,17 @@ impl Function {
if get_option!(stats) {
count_not_inlined_cfunc(fun, block, method);
}
- let ccall = fun.push_insn(block, Insn::CCallWithFrame { cd, cfunc, args: cfunc_args, cme: method, name: method_id, state, return_type, elidable });
+ let ccall = fun.push_insn(block, Insn::CCallWithFrame {
+ cd,
+ cfunc,
+ args: cfunc_args,
+ cme: method,
+ name: method_id,
+ state,
+ return_type,
+ elidable,
+ blockiseq: None,
+ });
fun.make_equal_to(send_insn_id, ccall);
}
@@ -2555,11 +2682,21 @@ impl Function {
let old_insns = std::mem::take(&mut self.blocks[block.0].insns);
assert!(self.blocks[block.0].insns.is_empty());
for insn_id in old_insns {
- if let send @ Insn::SendWithoutBlock { recv, .. } = self.find(insn_id) {
- let recv_type = self.type_of(recv);
- if reduce_to_ccall(self, block, recv_type, send, insn_id).is_ok() {
- continue;
+ let send = self.find(insn_id);
+ match send {
+ send @ Insn::SendWithoutBlock { recv, .. } => {
+ let recv_type = self.type_of(recv);
+ if reduce_send_without_block_to_ccall(self, block, recv_type, send, insn_id).is_ok() {
+ continue;
+ }
+ }
+ send @ Insn::Send { recv, .. } => {
+ let recv_type = self.type_of(recv);
+ if reduce_send_to_ccall(self, block, recv_type, send, insn_id).is_ok() {
+ continue;
+ }
}
+ _ => {}
}
self.push_insn_id(block, insn_id);
}
@@ -12584,6 +12721,108 @@ mod opt_tests {
}
#[test]
+ fn test_optimize_send_with_block() {
+ eval(r#"
+ def test = [1, 2, 3].map { |x| x * 2 }
+ test; test
+ "#);
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@<compiled>:2:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ Jump bb2(v1)
+ bb1(v4:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v4)
+ bb2(v6:BasicObject):
+ v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
+ v12:ArrayExact = ArrayDup v10
+ PatchPoint MethodRedefined(Array@0x1008, map@0x1010, cme:0x1018)
+ PatchPoint NoSingletonClass(Array@0x1008)
+ v23:BasicObject = CCallWithFrame map@0x1040, v12, block=0x1048
+ CheckInterrupts
+ Return v23
+ ");
+ }
+
+ #[test]
+ fn test_do_not_optimize_send_variadic_with_block() {
+ eval(r#"
+ def test = [1, 2, 3].index { |x| x == 2 }
+ test; test
+ "#);
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@<compiled>:2:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ Jump bb2(v1)
+ bb1(v4:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v4)
+ bb2(v6:BasicObject):
+ v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
+ v12:ArrayExact = ArrayDup v10
+ v14:BasicObject = Send v12, 0x1008, :index
+ CheckInterrupts
+ Return v14
+ ");
+ }
+
+ #[test]
+ fn test_do_not_optimize_send_with_block_forwarding() {
+ eval(r#"
+ def test(&block) = [].map(&block)
+ test; test
+ "#);
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@<compiled>:2:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ v2:BasicObject = GetLocal l0, SP@4
+ Jump bb2(v1, v2)
+ bb1(v5:BasicObject, v6:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v5, v6)
+ bb2(v8:BasicObject, v9:BasicObject):
+ v14:ArrayExact = NewArray
+ GuardBlockParamProxy l0
+ v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000))
+ v19:BasicObject = Send v14, 0x1008, :map, v17
+ CheckInterrupts
+ Return v19
+ ");
+ }
+
+ #[test]
+ fn test_do_not_optimize_send_to_iseq_method_with_block() {
+ eval(r#"
+ def foo
+ yield 1
+ end
+
+ def test = foo {}
+ test; test
+ "#);
+ assert_snapshot!(hir_string("test"), @r"
+ fn test@<compiled>:6:
+ bb0():
+ EntryPoint interpreter
+ v1:BasicObject = LoadSelf
+ Jump bb2(v1)
+ bb1(v4:BasicObject):
+ EntryPoint JIT(0)
+ Jump bb2(v4)
+ bb2(v6:BasicObject):
+ v11:BasicObject = Send v6, 0x1000, :foo
+ CheckInterrupts
+ Return v11
+ ");
+ }
+
+ #[test]
fn test_inline_attr_reader_constant() {
eval("
class C
diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs
index e935ec9731..a6c837df5a 100644
--- a/zjit/src/profile.rs
+++ b/zjit/src/profile.rs
@@ -83,7 +83,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) {
YARVINSN_opt_length => profile_operands(profiler, profile, 1),
YARVINSN_opt_size => profile_operands(profiler, profile, 1),
YARVINSN_opt_succ => profile_operands(profiler, profile, 1),
- YARVINSN_opt_send_without_block => {
+ YARVINSN_opt_send_without_block | YARVINSN_send => {
let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr();
let argc = unsafe { vm_ci_argc((*cd).ci) };
// Profile all the arguments and self (+1).