1 files changed, 417 insertions, 197 deletions
diff --git a/yjit.rb b/yjit.rb
index 09ed3faaf3..eedd00c358 100644
--- a/yjit.rb
+++ b/yjit.rb
@@ -1,252 +1,449 @@
 # frozen_string_literal: true
+# :markup: markdown
 
-# This module allows for introspection of YJIT, CRuby's experimental in-process
-# just-in-time compiler. This module exists only to help develop YJIT, as such,
-# everything in the module is highly implementation specific and comes with no
-# API stability guarantee whatsoever.
+# This module allows for introspection of \YJIT, CRuby's just-in-time compiler.
+# Everything in the module is highly implementation specific and the API might
+# be less stable compared to the standard library.
 #
-# This module may not exist if YJIT does not support the particular platform
-# for which CRuby is built. There is also no API stability guarantee as to in
-# what situations this module is defined.
+# This module may not exist if \YJIT does not support the particular platform
+# for which CRuby is built.
 module RubyVM::YJIT
-  if defined?(Disasm)
-    def self.disasm(iseq, tty: $stdout && $stdout.tty?)
-      iseq = RubyVM::InstructionSequence.of(iseq)
+  # Check if \YJIT is enabled.
+  def self.enabled?
+    Primitive.cexpr! 'RBOOL(rb_yjit_enabled_p)'
+  end
 
-      blocks = blocks_for(iseq)
-      return if blocks.empty?
+  # Check if `--yjit-stats` is used.
+  def self.stats_enabled?
+    Primitive.rb_yjit_stats_enabled_p
+  end
 
-      str = String.new
-      str << iseq.disasm
-      str << "\n"
+  # Check if rb_yjit_trace_exit_locations_enabled_p is enabled.
+  def self.trace_exit_locations_enabled? # :nodoc:
+    Primitive.rb_yjit_trace_exit_locations_enabled_p
+  end
 
-      # Sort the blocks by increasing addresses
-      sorted_blocks = blocks.sort_by(&:address)
+  # Discard statistics collected for `--yjit-stats`.
+  def self.reset_stats!
+    Primitive.rb_yjit_reset_stats_bang
+  end
 
-      highlight = ->(str) {
-        if tty
-          "\x1b[1m#{str}\x1b[0m"
-        else
-          str
-        end
-      }
-
-      cs = Disasm.new
-      sorted_blocks.each_with_index do |block, i|
-        str << "== BLOCK #{i+1}/#{blocks.length}: #{block.code.length} BYTES, ISEQ RANGE [#{block.iseq_start_index},#{block.iseq_end_index}) ".ljust(80, "=")
-        str << "\n"
-
-        comments = comments_for(block.address, block.address + block.code.length)
-        comment_idx = 0
-        cs.disasm(block.code, block.address).each do |i|
-          while (comment = comments[comment_idx]) && comment.address <= i.address
-            str << "  ; #{highlight.call(comment.comment)}\n"
-            comment_idx += 1
-          end
+  # Enable \YJIT compilation. `stats` option decides whether to enable \YJIT stats or not.
+  #
+  # * `false`: Disable stats.
+  # * `true`: Enable stats. Print stats at exit.
+  # * `:quiet`: Enable stats. Do not print stats at exit.
+  def self.enable(stats: false)
+    return false if enabled?
+    at_exit { print_and_dump_stats } if stats
+    Primitive.rb_yjit_enable(stats, stats != :quiet)
+  end
+
+  # If --yjit-trace-exits is enabled parse the hashes from
+  # Primitive.rb_yjit_get_exit_locations into a format readable
+  # by Stackprof. This will allow us to find the exact location of a
+  # side exit in YJIT based on the instruction that is exiting.
+  def self.exit_locations # :nodoc:
+    return unless trace_exit_locations_enabled?
+
+    results = Primitive.rb_yjit_get_exit_locations
+    raw_samples = results[:raw].dup
+    line_samples = results[:lines].dup
+    frames = results[:frames].dup
+    samples_count = 0
+
+    # Loop through the instructions and set the frame hash with the data.
+    # We use nonexistent.def for the file name, otherwise insns.def will be displayed
+    # and that information isn't useful in this context.
+    RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id|
+      frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil, lines: {} }
+      results[:frames][frame_id] = frame_hash
+      frames[frame_id] = frame_hash
+    end
 
-          str << sprintf(
-            "  %<address>08x:  %<instruction>s\t%<details>s\n",
-            address: i.address,
-            instruction: i.mnemonic,
-            details: i.op_str
-          )
+    # Loop through the raw_samples and build the hashes for StackProf.
+    # The loop is based off an example in the StackProf documentation and therefore
+    # this functionality can only work with that library.
+    #
+    # Raw Samples:
+    # [ length, frame1, frame2, frameN, ..., instruction, count
+    #
+    # Line Samples
+    # [ length, line_1, line_2, line_n, ..., dummy value, count
+    i = 0
+    while i < raw_samples.length
+      stack_length = raw_samples[i]
+      i += 1 # consume the stack length
+
+      sample_count = raw_samples[i + stack_length]
+
+      prev_frame_id = nil
+      stack_length.times do |idx|
+        idx += i
+        frame_id = raw_samples[idx]
+
+        if prev_frame_id
+          prev_frame = frames[prev_frame_id]
+          prev_frame[:edges][frame_id] ||= 0
+          prev_frame[:edges][frame_id] += sample_count
         end
+
+        frame_info = frames[frame_id]
+        frame_info[:total_samples] += sample_count
+
+        frame_info[:lines][line_samples[idx]] ||= [0, 0]
+        frame_info[:lines][line_samples[idx]][0] += sample_count
+
+        prev_frame_id = frame_id
       end
 
-      block_sizes = blocks.map { |block| block.code.length }
-      total_bytes = block_sizes.sum
-      str << "\n"
-      str << "Total code size: #{total_bytes} bytes"
-      str << "\n"
+      i += stack_length # consume the stack
 
-      str
-    end
+      top_frame_id = prev_frame_id
+      top_frame_line = 1
 
-    def self.comments_for(start_address, end_address)
-      Primitive.comments_for(start_address, end_address)
-    end
+      frames[top_frame_id][:samples] += sample_count
+      frames[top_frame_id][:lines] ||= {}
+      frames[top_frame_id][:lines][top_frame_line] ||= [0, 0]
+      frames[top_frame_id][:lines][top_frame_line][1] += sample_count
 
-    def self.graphviz_for(iseq)
-      iseq = RubyVM::InstructionSequence.of(iseq)
-      cs = Disasm.new
-
-      highlight = ->(comment) { "<b>#{comment}</b>" }
-      linebreak = "<br align=\"left\"/>\n"
-
-      buff = +''
-      blocks = blocks_for(iseq).sort_by(&:id)
-      buff << "digraph g {\n"
-
-      # Write the iseq info as a legend
-      buff << "  legend [shape=record fontsize=\"30\" fillcolor=\"lightgrey\" style=\"filled\"];\n"
-      buff << "  legend [label=\"{ Instruction Disassembly For: | {#{iseq.base_label}@#{iseq.absolute_path}:#{iseq.first_lineno}}}\"];\n"
-
-      # Subgraph contains disassembly
-      buff << "  subgraph disasm {\n"
-      buff << "  node [shape=record fontname=\"courier\"];\n"
-      buff << "  edge [fontname=\"courier\" penwidth=3];\n"
-      blocks.each do |block|
-        disasm = disasm_block(cs, block, highlight)
-
-        # convert newlines to breaks that graphviz understands
-        disasm.gsub!(/\n/, linebreak)
-
-        # strip leading whitespace
-        disasm.gsub!(/^\s+/, '')
-
-        buff << "b#{block.id} [label=<#{disasm}>];\n"
-        buff << block.outgoing_ids.map { |id|
-          next_block = blocks.bsearch { |nb| id <=> nb.id }
-          if next_block.address == (block.address + block.code.length)
-            "b#{block.id} -> b#{id}[label=\"Fall\"];"
-          else
-            "b#{block.id} -> b#{id}[label=\"Jump\" style=dashed];"
-          end
-        }.join("\n")
-        buff << "\n"
-      end
-      buff << "  }"
-      buff << "}"
-      buff
+      samples_count += sample_count
+      i += 1
     end
 
-    def self.disasm_block(cs, block, highlight)
-      comments = comments_for(block.address, block.address + block.code.length)
-      comment_idx = 0
-      str = +''
-      cs.disasm(block.code, block.address).each do |i|
-        while (comment = comments[comment_idx]) && comment.address <= i.address
-          str << "  ; #{highlight.call(comment.comment)}\n"
-          comment_idx += 1
-        end
+    results[:samples] = samples_count
+    # Set missed_samples and gc_samples to 0 as their values
+    # don't matter to us in this context.
+    results[:missed_samples] = 0
+    results[:gc_samples] = 0
+    results
+  end
 
-        str << sprintf(
-          "  %<address>08x:  %<instruction>s\t%<details>s\n",
-          address: i.address,
-          instruction: i.mnemonic,
-          details: i.op_str
-        )
-      end
-      str
+  # Marshal dumps exit locations to the given filename.
+  #
+  # Usage:
+  #
+  # If `--yjit-exit-locations` is passed, a file named
+  # "yjit_exit_locations.dump" will automatically be generated.
+  #
+  # If you want to collect traces manually, call `dump_exit_locations`
+  # directly.
+  #
+  # Note that calling this in a script will generate stats after the
+  # dump is created, so the stats data may include exits from the
+  # dump itself.
+  #
+  # In a script call:
+  #
+  #     at_exit do
+  #       RubyVM::YJIT.dump_exit_locations("my_file.dump")
+  #     end
+  #
+  # Then run the file with the following options:
+  #
+  #     ruby --yjit --yjit-trace-exits test.rb
+  #
+  # Once the code is done running, use Stackprof to read the dump file.
+  # See Stackprof documentation for options.
+  def self.dump_exit_locations(filename)
+    unless trace_exit_locations_enabled?
+      raise ArgumentError, "--yjit-trace-exits must be enabled to use dump_exit_locations."
     end
+
+    File.binwrite(filename, Marshal.dump(RubyVM::YJIT.exit_locations))
   end
 
-  # Return a hash for statistics generated for the --yjit-stats command line option.
-  # Return nil when option is not passed or unavailable.
-  def self.runtime_stats
-    # defined in yjit_iface.c
-    Primitive.get_yjit_stats
+  # Return a hash for statistics generated for the `--yjit-stats` command line option.
+  # Return `nil` when option is not passed or unavailable.
+  def self.runtime_stats(context: false)
+    stats = Primitive.rb_yjit_get_stats(context)
+    return stats if stats.nil?
+
+    stats[:object_shape_count] = Primitive.object_shape_count
+    return stats unless Primitive.rb_yjit_stats_enabled_p
+
+    side_exits = total_exit_count(stats)
+    total_exits = side_exits + stats[:leave_interp_return]
+
+    # Number of instructions that finish executing in YJIT.
+    # See :count-placement: about the subtraction.
+    retired_in_yjit = stats[:yjit_insns_count] - side_exits
+
+    # Average length of instruction sequences executed by YJIT
+    avg_len_in_yjit = total_exits > 0 ? retired_in_yjit.to_f / total_exits : 0
+
+    # Proportion of instructions that retire in YJIT
+    total_insns_count = retired_in_yjit + stats[:vm_insns_count]
+    yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count
+    stats[:total_insns_count] = total_insns_count
+    stats[:ratio_in_yjit] = yjit_ratio_pct
+
+    # Make those stats available in RubyVM::YJIT.runtime_stats as well
+    stats[:side_exit_count]  = side_exits
+    stats[:total_exit_count] = total_exits
+    stats[:avg_len_in_yjit]  = avg_len_in_yjit
+
+    stats
   end
 
-  # Discard statistics collected for --yjit-stats.
-  def self.reset_stats!
-    # defined in yjit_iface.c
-    Primitive.reset_stats_bang
+  # Format and print out counters as a String. This returns a non-empty
+  # content only when `--yjit-stats` is enabled.
+  def self.stats_string
+    # Lazily require StringIO to avoid breaking miniruby
+    require 'stringio'
+    strio = StringIO.new
+    _print_stats(out: strio)
+    strio.string
   end
 
-  def self.stats_enabled?
-    Primitive.yjit_stats_enabled_p
+  # Produce disassembly for an iseq. This requires a `--enable-yjit=dev` build.
+  def self.disasm(iseq) # :nodoc:
+    # If a method or proc is passed in, get its iseq
+    iseq = RubyVM::InstructionSequence.of(iseq)
+
+    if self.enabled?
+      # Produce the disassembly string
+      # Include the YARV iseq disasm in the string for additional context
+      iseq.disasm + "\n" + Primitive.rb_yjit_disasm_iseq(iseq)
+    else
+      iseq.disasm
+    end
   end
 
-  def self.enabled?
-    Primitive.cexpr! 'RBOOL(rb_yjit_enabled_p())'
+  # Produce a list of instructions compiled by YJIT for an iseq
+  def self.insns_compiled(iseq) # :nodoc:
+    return nil unless self.enabled?
+
+    # If a method or proc is passed in, get its iseq
+    iseq = RubyVM::InstructionSequence.of(iseq)
+    Primitive.rb_yjit_insns_compiled(iseq)
+  end
+
+  # Discard existing compiled code to reclaim memory
+  # and allow for recompilations in the future.
+  def self.code_gc
+    Primitive.rb_yjit_code_gc
   end
 
-  def self.simulate_oom!
-    Primitive.simulate_oom_bang
+  def self.simulate_oom! # :nodoc:
+    Primitive.rb_yjit_simulate_oom_bang
   end
 
-  # Avoid calling a method here to not interfere with compilation tests
-  if Primitive.yjit_stats_enabled_p
-    at_exit { _print_stats }
+  # Avoid calling a Ruby method here to not interfere with compilation tests
+  if Primitive.rb_yjit_stats_enabled_p
+    at_exit { print_and_dump_stats }
   end
 
   class << self
+    # :stopdoc:
     private
 
-    # Format and print out counters
-    def _print_stats
-      stats = runtime_stats
-      return unless stats
+    # Print stats and dump exit locations
+    def print_and_dump_stats # :nodoc:
+      if Primitive.rb_yjit_print_stats_p
+        _print_stats
+      end
+      _dump_locations
+    end
 
-      $stderr.puts("***YJIT: Printing YJIT statistics on exit***")
+    def _dump_locations # :nodoc:
+      return unless trace_exit_locations_enabled?
 
-      print_counters(stats, prefix: 'send_', prompt: 'method call exit reasons: ')
-      print_counters(stats, prefix: 'invokesuper_', prompt: 'invokesuper exit reasons: ')
-      print_counters(stats, prefix: 'leave_', prompt: 'leave exit reasons: ')
-      print_counters(stats, prefix: 'gbpp_', prompt: 'getblockparamproxy exit reasons: ')
-      print_counters(stats, prefix: 'getivar_', prompt: 'getinstancevariable exit reasons:')
-      print_counters(stats, prefix: 'setivar_', prompt: 'setinstancevariable exit reasons:')
-      print_counters(stats, prefix: 'oaref_', prompt: 'opt_aref exit reasons: ')
-      print_counters(stats, prefix: 'expandarray_', prompt: 'expandarray exit reasons: ')
-      print_counters(stats, prefix: 'opt_getinlinecache_', prompt: 'opt_getinlinecache exit reasons: ')
-      print_counters(stats, prefix: 'invalidate_', prompt: 'invalidation reasons: ')
+      filename = "yjit_exit_locations.dump"
+      dump_exit_locations(filename)
 
-      side_exits = total_exit_count(stats)
-      total_exits = side_exits + stats[:leave_interp_return]
+      $stderr.puts("YJIT exit locations dumped to `#{filename}`.")
+    end
+
+    # Print a summary of reasons for adverse performance events (e.g. exits)
+    def _print_stats_reasons(stats, out) # :nodoc:
+      print_counters(stats, out: out, prefix: 'send_', prompt: 'method call fallback reasons: ')
+      print_counters(stats, out: out, prefix: 'invokeblock_', prompt: 'invokeblock fallback reasons: ')
+      print_counters(stats, out: out, prefix: 'invokesuper_', prompt: 'invokesuper fallback reasons: ')
+      print_counters(stats, out: out, prefix: 'guard_send_', prompt: 'method call exit reasons: ')
+      print_counters(stats, out: out, prefix: 'guard_invokeblock_', prompt: 'invokeblock exit reasons: ')
+      print_counters(stats, out: out, prefix: 'guard_invokesuper_', prompt: 'invokesuper exit reasons: ')
+      print_counters(stats, out: out, prefix: 'gbpp_', prompt: 'getblockparamproxy exit reasons: ')
+      print_counters(stats, out: out, prefix: 'getivar_', prompt: 'getinstancevariable exit reasons:')
+      print_counters(stats, out: out, prefix: 'setivar_', prompt: 'setinstancevariable exit reasons:')
+      %w[
+        branchif
+        branchnil
+        branchunless
+        definedivar
+        expandarray
+        invokebuiltin
+        jump
+        leave
+        objtostring
+        opt_aref
+        opt_aref_with
+        opt_aset
+        opt_case_dispatch
+        opt_div
+        opt_getconstant_path
+        opt_minus
+        opt_mod
+        opt_mult
+        opt_plus
+        opt_succ
+        setlocal
+        splatkw
+      ].each do |insn|
+        print_counters(stats, out: out, prefix: "#{insn}_", prompt: "#{insn} exit reasons:", optional: true)
+      end
+      print_counters(stats, out: out, prefix: 'lshift_', prompt: 'left shift (opt_ltlt) exit reasons: ')
+      print_counters(stats, out: out, prefix: 'rshift_', prompt: 'right shift (>>) exit reasons: ')
+      print_counters(stats, out: out, prefix: 'invalidate_', prompt: 'invalidation reasons: ')
+    end
 
-      # Number of instructions that finish executing in YJIT.
-      # See :count-placement: about the subtraction.
-      retired_in_yjit = stats[:exec_instruction] - side_exits
+    # Format and print out counters
+    def _print_stats(out: $stderr) # :nodoc:
+      stats = runtime_stats(context: true)
+      return unless Primitive.rb_yjit_stats_enabled_p
 
-      # Average length of instruction sequences executed by YJIT
-      avg_len_in_yjit = retired_in_yjit.to_f / total_exits
+      out.puts("***YJIT: Printing YJIT statistics on exit***")
 
-      # Proportion of instructions that retire in YJIT
-      total_insns_count = retired_in_yjit + stats[:vm_insns_count]
-      yjit_ratio_pct = 100.0 * retired_in_yjit.to_f / total_insns_count
+      _print_stats_reasons(stats, out)
 
       # Number of failed compiler invocations
       compilation_failure = stats[:compilation_failure]
 
-      $stderr.puts "bindings_allocations:  " + ("%10d" % stats[:binding_allocations])
-      $stderr.puts "bindings_set:          " + ("%10d" % stats[:binding_set])
-      $stderr.puts "compilation_failure:   " + ("%10d" % compilation_failure) if compilation_failure != 0
-      $stderr.puts "compiled_iseq_count:   " + ("%10d" % stats[:compiled_iseq_count])
-      $stderr.puts "compiled_block_count:  " + ("%10d" % stats[:compiled_block_count])
-      $stderr.puts "invalidation_count:    " + ("%10d" % stats[:invalidation_count])
-      $stderr.puts "constant_state_bumps:  " + ("%10d" % stats[:constant_state_bumps])
-      $stderr.puts "inline_code_size:      " + ("%10d" % stats[:inline_code_size])
-      $stderr.puts "outlined_code_size:    " + ("%10d" % stats[:outlined_code_size])
-
-      $stderr.puts "total_exit_count:      " + ("%10d" % total_exits)
-      $stderr.puts "total_insns_count:     " + ("%10d" % total_insns_count)
-      $stderr.puts "vm_insns_count:        " + ("%10d" % stats[:vm_insns_count])
-      $stderr.puts "yjit_insns_count:      " + ("%10d" % stats[:exec_instruction])
-      $stderr.puts "ratio_in_yjit:         " + ("%9.1f" % yjit_ratio_pct) + "%"
-      $stderr.puts "avg_len_in_yjit:       " + ("%10.1f" % avg_len_in_yjit)
-
-      print_sorted_exit_counts(stats, prefix: "exit_")
+      code_region_overhead = stats[:code_region_size] - (stats[:inline_code_size] + stats[:outlined_code_size])
+
+      out.puts "num_send:              " + format_number(13, stats[:num_send])
+      out.puts "num_send_known_class:  " + format_number_pct(13, stats[:num_send_known_class], stats[:num_send])
+      out.puts "num_send_polymorphic:  " + format_number_pct(13, stats[:num_send_polymorphic], stats[:num_send])
+      out.puts "num_send_megamorphic:  " + format_number_pct(13, stats[:send_megamorphic], stats[:num_send])
+      out.puts "num_send_dynamic:      " + format_number_pct(13, stats[:num_send_dynamic], stats[:num_send])
+      out.puts "num_send_cfunc:        " + format_number_pct(13, stats[:num_send_cfunc], stats[:num_send])
+      out.puts "num_send_cfunc_inline: " + format_number_pct(13, stats[:num_send_cfunc_inline], stats[:num_send_cfunc])
+      out.puts "num_send_iseq:         " + format_number_pct(13, stats[:num_send_iseq], stats[:num_send])
+      out.puts "num_send_iseq_leaf:    " + format_number_pct(13, stats[:num_send_iseq_leaf], stats[:num_send_iseq])
+      out.puts "num_send_iseq_inline:  " + format_number_pct(13, stats[:num_send_iseq_inline], stats[:num_send_iseq])
+      if stats[:num_send_x86_rel32] != 0 || stats[:num_send_x86_reg] != 0
+        out.puts "num_send_x86_rel32:    " + format_number(13,  stats[:num_send_x86_rel32])
+        out.puts "num_send_x86_reg:      " + format_number(13, stats[:num_send_x86_reg])
+      end
+      out.puts "num_getivar_megamorphic: " + format_number(11, stats[:num_getivar_megamorphic])
+      out.puts "num_setivar_megamorphic: " + format_number(11, stats[:num_setivar_megamorphic])
+      out.puts "num_opt_case_megamorphic: " + format_number(10, stats[:num_opt_case_dispatch_megamorphic])
+      out.puts "num_throw:             " + format_number(13, stats[:num_throw])
+      out.puts "num_throw_break:       " + format_number_pct(13, stats[:num_throw_break], stats[:num_throw])
+      out.puts "num_throw_retry:       " + format_number_pct(13, stats[:num_throw_retry], stats[:num_throw])
+      out.puts "num_throw_return:      " + format_number_pct(13, stats[:num_throw_return], stats[:num_throw])
+      out.puts "num_lazy_frame_check:  " + format_number(13, stats[:num_lazy_frame_check])
+      out.puts "num_lazy_frame_push:   " + format_number_pct(13, stats[:num_lazy_frame_push], stats[:num_lazy_frame_check])
+      out.puts "lazy_frame_count:      " + format_number(13, stats[:lazy_frame_count])
+      out.puts "lazy_frame_failure:    " + format_number(13, stats[:lazy_frame_failure])
+
+      out.puts "iseq_stack_too_large:  " + format_number(13, stats[:iseq_stack_too_large])
+      out.puts "iseq_too_long:         " + format_number(13, stats[:iseq_too_long])
+      out.puts "temp_reg_opnd:         " + format_number(13, stats[:temp_reg_opnd])
+      out.puts "temp_mem_opnd:         " + format_number(13, stats[:temp_mem_opnd])
+      out.puts "temp_spill:            " + format_number(13, stats[:temp_spill])
+      out.puts "bindings_allocations:  " + format_number(13, stats[:binding_allocations])
+      out.puts "bindings_set:          " + format_number(13, stats[:binding_set])
+      out.puts "compilation_failure:   " + format_number(13, compilation_failure) if compilation_failure != 0
+      out.puts "live_iseq_count:       " + format_number(13, stats[:live_iseq_count])
+      out.puts "iseq_alloc_count:      " + format_number(13, stats[:iseq_alloc_count])
+      out.puts "compiled_iseq_entry:   " + format_number(13, stats[:compiled_iseq_entry])
+      out.puts "cold_iseq_entry:       " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry] + stats[:cold_iseq_entry])
+      out.puts "compiled_iseq_count:   " + format_number(13, stats[:compiled_iseq_count])
+      out.puts "compiled_blockid_count:" + format_number(13, stats[:compiled_blockid_count])
+      out.puts "compiled_block_count:  " + format_number(13, stats[:compiled_block_count])
+      if stats[:compiled_blockid_count] != 0
+        out.puts "versions_per_block:    " + format_number(13, "%4.3f" % (stats[:compiled_block_count].fdiv(stats[:compiled_blockid_count])))
+      end
+      out.puts "max_inline_versions:   " + format_number(13, stats[:max_inline_versions])
+      out.puts "compiled_branch_count: " + format_number(13, stats[:compiled_branch_count])
+      out.puts "compile_time_ms:       " + format_number(13, stats[:compile_time_ns] / (1000 * 1000))
+      out.puts "block_next_count:      " + format_number(13, stats[:block_next_count])
+      out.puts "defer_count:           " + format_number(13, stats[:defer_count])
+      out.puts "defer_empty_count:     " + format_number(13, stats[:defer_empty_count])
+
+      out.puts "branch_insn_count:     " + format_number(13, stats[:branch_insn_count])
+      out.puts "branch_known_count:    " + format_number_pct(13, stats[:branch_known_count], stats[:branch_insn_count])
+
+      out.puts "freed_iseq_count:      " + format_number(13, stats[:freed_iseq_count])
+      out.puts "invalidation_count:    " + format_number(13, stats[:invalidation_count])
+      out.puts "inline_code_size:      " + format_number(13, stats[:inline_code_size])
+      out.puts "outlined_code_size:    " + format_number(13, stats[:outlined_code_size])
+      out.puts "code_region_size:      " + format_number(13, stats[:code_region_size])
+      out.puts "code_region_overhead:  " + format_number_pct(13, code_region_overhead, stats[:code_region_size])
+
+      out.puts "freed_code_size:       " + format_number(13, stats[:freed_code_size])
+      out.puts "yjit_alloc_size:       " + format_number(13, stats[:yjit_alloc_size]) if stats.key?(:yjit_alloc_size)
+      out.puts "live_context_size:     " + format_number(13, stats[:live_context_size])
+      out.puts "live_context_count:    " + format_number(13, stats[:live_context_count])
+      out.puts "live_page_count:       " + format_number(13, stats[:live_page_count])
+      out.puts "freed_page_count:      " + format_number(13, stats[:freed_page_count])
+      out.puts "code_gc_count:         " + format_number(13, stats[:code_gc_count])
+      out.puts "num_gc_obj_refs:       " + format_number(13, stats[:num_gc_obj_refs])
+      out.puts "object_shape_count:    " + format_number(13, stats[:object_shape_count])
+      out.puts "side_exit_count:       " + format_number(13, stats[:side_exit_count])
+      out.puts "total_exit_count:      " + format_number(13, stats[:total_exit_count])
+      out.puts "total_insns_count:     " + format_number(13, stats[:total_insns_count])
+      out.puts "vm_insns_count:        " + format_number(13, stats[:vm_insns_count])
+      out.puts "yjit_insns_count:      " + format_number(13, stats[:yjit_insns_count])
+      out.puts "ratio_in_yjit:         " + ("%12.1f" % stats[:ratio_in_yjit]) + "%"
+      out.puts "avg_len_in_yjit:       " + ("%13.1f" % stats[:avg_len_in_yjit])
+
+      print_sorted_exit_counts(stats, out: out, prefix: "exit_")
+
+      print_sorted_method_calls(stats[:cfunc_calls], stats[:num_send_cfunc], out: out, type: 'C')
+      print_sorted_method_calls(stats[:iseq_calls], stats[:num_send_iseq], out: out, type: 'ISEQ')
     end
 
-    def print_sorted_exit_counts(stats, prefix:, how_many: 20, left_pad: 4)
-      exits = []
-      stats.each do |k, v|
-        if k.start_with?(prefix)
-          exits.push [k.to_s.delete_prefix(prefix), v]
-        end
+    def print_sorted_method_calls(calls, num_calls, out:, type:, how_many: 20, left_pad: 4) # :nodoc:
+      return if calls.empty?
+
+      # Sort calls by decreasing frequency and keep the top N
+      pairs = calls.map { |k,v| [k, v] }
+      pairs.sort_by! {|pair| -pair[1] }
+      pairs = pairs[0...how_many]
+
+      top_n_total = pairs.sum { |name, count| count }
+      top_n_pct = 100.0 * top_n_total / num_calls
+
+      out.puts "Top-#{pairs.size} most frequent #{type} calls (#{"%.1f" % top_n_pct}% of #{type} calls):"
+
+      count_width = format_number(0, pairs[0][1]).length
+      pairs.each do |name, count|
+        padded_count = format_number_pct(count_width, count, num_calls)
+        out.puts("  #{padded_count}: #{name}")
       end
+    end
 
-      exits = exits.sort_by { |name, count| -count }[0...how_many]
+    def print_sorted_exit_counts(stats, out:, prefix:, how_many: 20, left_pad: 4) # :nodoc:
       total_exits = total_exit_count(stats)
 
-      top_n_total = exits.map { |name, count| count }.sum
-      top_n_exit_pct = 100.0 * top_n_total / total_exits
+      if total_exits > 0
+        exits = []
+        stats.each do |k, v|
+          if k.start_with?(prefix)
+            exits.push [k.to_s.delete_prefix(prefix), v]
+          end
+        end
+
+        exits = exits.select { |_name, count| count > 0 }.max_by(how_many) { |_name, count| count }
 
-      $stderr.puts "Top-#{how_many} most frequent exit ops (#{"%.1f" % top_n_exit_pct}% of exits):"
+        top_n_total = exits.sum { |name, count| count }
+        top_n_exit_pct = 100.0 * top_n_total / total_exits
 
-      longest_insn_name_len = exits.map { |name, count| name.length }.max
-      exits.each do |name, count|
-        padding = longest_insn_name_len + left_pad
-        padded_name = "%#{padding}s" % name
-        padded_count = "%10d" % count
-        percent = 100.0 * count / total_exits
-        formatted_percent = "%.1f" % percent
-        $stderr.puts("#{padded_name}: #{padded_count} (#{formatted_percent}%)" )
+        out.puts "Top-#{exits.size} most frequent exit ops (#{"%.1f" % top_n_exit_pct}% of exits):"
+
+        count_width = format_number(0, exits[0][1]).length
+        exits.each do |name, count|
+          padded_count = format_number_pct(count_width, count, total_exits)
+          out.puts("  #{padded_count}: #{name}")
+        end
+      else
+        out.puts "total_exits:           " + format_number(10, total_exits)
       end
     end
 
-    def total_exit_count(stats, prefix: "exit_")
+    def total_exit_count(stats, prefix: "exit_") # :nodoc:
       total = 0
       stats.each do |k,v|
         total += v if k.start_with?(prefix)
@@ -254,15 +451,19 @@ module RubyVM::YJIT
       total
     end
 
-    def print_counters(counters, prefix:, prompt:)
-      $stderr.puts(prompt)
+    def print_counters(counters, out:, prefix:, prompt:, optional: false) # :nodoc:
       counters = counters.filter { |key, _| key.start_with?(prefix) }
       counters.filter! { |_, value| value != 0 }
       counters.transform_keys! { |key| key.to_s.delete_prefix(prefix) }
 
       if counters.empty?
-        $stderr.puts("    (all relevant counters are zero)")
+        unless optional
+          out.puts(prompt)
+          out.puts("    (all relevant counters are zero)")
+        end
         return
+      else
+        out.puts(prompt)
       end
 
       counters = counters.to_a
@@ -271,9 +472,28 @@ module RubyVM::YJIT
       total = counters.sum { |(_, counter_value)| counter_value }
 
       counters.reverse_each do |(name, value)|
-        percentage = value.fdiv(total) * 100
-        $stderr.printf("    %*s %10d (%4.1f%%)\n", longest_name_length, name, value, percentage);
+        padded_name = name.rjust(longest_name_length, ' ')
+        padded_count = format_number_pct(10, value, total)
+        out.puts("    #{padded_name}: #{padded_count}")
       end
     end
+
+    # Format large numbers with comma separators for readability
+    def format_number(pad, number) # :nodoc:
+      s = number.to_s
+      i = s.index('.') || s.size
+      s.insert(i -= 3, ',') while i > 3
+      s.rjust(pad, ' ')
+    end
+
+    # Format a number along with a percentage over a total value
+    def format_number_pct(pad, number, total) # :nodoc:
+      padded_count = format_number(pad, number)
+      percentage = number.fdiv(total) * 100
+      formatted_pct = "%4.1f%%" % percentage
+      "#{padded_count} (#{formatted_pct})"
+    end
+
+    # :startdoc:
   end
 end