path: root/lib/ruby_vm/rjit/compiler.rb
diff options
Diffstat (limited to 'lib/ruby_vm/rjit/compiler.rb')
1 files changed, 518 insertions, 0 deletions
diff --git a/lib/ruby_vm/rjit/compiler.rb b/lib/ruby_vm/rjit/compiler.rb
new file mode 100644
index 0000000000..e5c3adf0ec
--- /dev/null
+++ b/lib/ruby_vm/rjit/compiler.rb
@@ -0,0 +1,518 @@
+require 'ruby_vm/rjit/assembler'
+require 'ruby_vm/rjit/block'
+require 'ruby_vm/rjit/branch_stub'
+require 'ruby_vm/rjit/code_block'
+require 'ruby_vm/rjit/context'
+require 'ruby_vm/rjit/entry_stub'
+require 'ruby_vm/rjit/exit_compiler'
+require 'ruby_vm/rjit/insn_compiler'
+require 'ruby_vm/rjit/instruction'
+require 'ruby_vm/rjit/invariants'
+require 'ruby_vm/rjit/jit_state'
+require 'ruby_vm/rjit/type'
+module RubyVM::RJIT
+ # Compilation status
+ KeepCompiling = :KeepCompiling
+ CantCompile = :CantCompile
+ EndBlock = :EndBlock
+ # Ruby constants
+ Qtrue = Fiddle::Qtrue
+ Qfalse = Fiddle::Qfalse
+ Qnil = Fiddle::Qnil
+ Qundef = Fiddle::Qundef
+ # Callee-saved registers
+ # TODO: support using r12/r13 here
+ EC = :r14
+ CFP = :r15
+ SP = :rbx
+ # Scratch registers: rax, rcx, rdx
+ # Mark objects in this Array during GC
+ GC_REFS = []
+ # Maximum number of versions per block
+ # 1 means always create generic versions
+ class Compiler
+ attr_accessor :write_pos
+ def self.decode_insn(encoded)
+ INSNS.fetch(C.rb_vm_insn_decode(encoded))
+ end
+ def initialize
+ mem_size = C.rjit_opts.exec_mem_size * 1024 * 1024
+ mem_block = C.mmap(mem_size)
+ @cb = mem_block, mem_size: mem_size / 2)
+ @ocb = mem_block + mem_size / 2, mem_size: mem_size / 2, outlined: true)
+ @exit_compiler =
+ @insn_compiler =, @ocb, @exit_compiler)
+ Invariants.initialize(@cb, @ocb, self, @exit_compiler)
+ end
+ # Compile an ISEQ from its entry point.
+ # @param iseq `RubyVM::RJIT::CPointer::Struct_rb_iseq_t`
+ # @param cfp `RubyVM::RJIT::CPointer::Struct_rb_control_frame_t`
+ def compile(iseq, cfp)
+ return unless supported_platform?
+ pc = cfp.pc.to_i
+ jit =, cfp:)
+ asm =
+ compile_prologue(asm, iseq, pc)
+ compile_block(asm, jit:, pc:)
+ iseq.body.jit_entry = @cb.write(asm)
+ rescue Exception => e
+ STDERR.puts "#{e.class}: #{e.message}"
+ STDERR.puts e.backtrace
+ exit 1
+ end
+ # Compile an entry.
+ # @param entry [RubyVM::RJIT::EntryStub]
+ def entry_stub_hit(entry_stub, cfp)
+ # Compile a new entry guard as a next entry
+ pc = cfp.pc.to_i
+ next_entry = do |asm|
+ compile_entry_chain_guard(asm, cfp.iseq, pc)
+ @cb.write(asm)
+ end
+ # Try to find an existing compiled version of this block
+ ctx =
+ block = find_block(cfp.iseq, pc, ctx)
+ if block
+ # If an existing block is found, generate a jump to the block.
+ asm =
+ @cb.write(asm)
+ else
+ # If this block hasn't yet been compiled, generate blocks after the entry guard.
+ asm =
+ jit = cfp.iseq, cfp:)
+ compile_block(asm, jit:, pc:, ctx:)
+ @cb.write(asm)
+ block = jit.block
+ end
+ # Regenerate the previous entry
+ @cb.with_write_addr(entry_stub.start_addr) do
+ # The last instruction of compile_entry_chain_guard is jne
+ asm =
+ asm.jne(next_entry)
+ @cb.write(asm)
+ end
+ return block.start_addr
+ rescue Exception => e
+ STDERR.puts e.full_message
+ exit 1
+ end
+ # Compile a branch stub.
+ # @param branch_stub [RubyVM::RJIT::BranchStub]
+ # @param cfp `RubyVM::RJIT::CPointer::Struct_rb_control_frame_t`
+ # @param target0_p [TrueClass,FalseClass]
+ # @return [Integer] The starting address of the compiled branch stub
+ def branch_stub_hit(branch_stub, cfp, target0_p)
+ # Update cfp->pc for `jit.at_current_insn?`
+ target = target0_p ? branch_stub.target0 : branch_stub.target1
+ cfp.pc = target.pc
+ # Reuse an existing block if it already exists
+ block = find_block(branch_stub.iseq, target.pc, target.ctx)
+ # If the branch stub's jump is the last code, allow overwriting part of
+ # the old branch code with the new block code.
+ fallthrough = block.nil? && @cb.write_addr == branch_stub.end_addr
+ if fallthrough
+ # If the branch stub's jump is the last code, allow overwriting part of
+ # the old branch code with the new block code.
+ @cb.set_write_addr(branch_stub.start_addr)
+ branch_stub.shape = target0_p ? Next0 : Next1
+ do |branch_asm|
+ @cb.write(branch_asm)
+ end
+ end
+ # Reuse or generate a block
+ if block
+ target.address = block.start_addr
+ else
+ jit = branch_stub.iseq, cfp:)
+ target.address = do |asm|
+ compile_block(asm, jit:, pc: target.pc, ctx: target.ctx.dup)
+ @cb.write(asm)
+ end
+ block = jit.block
+ end
+ block.incoming << branch_stub # prepare for invalidate_block
+ # Re-generate the branch code for non-fallthrough cases
+ unless fallthrough
+ @cb.with_write_addr(branch_stub.start_addr) do
+ branch_asm =
+ @cb.write(branch_asm)
+ end
+ end
+ return target.address
+ rescue Exception => e
+ STDERR.puts e.full_message
+ exit 1
+ end
+ # @param iseq `RubyVM::RJIT::CPointer::Struct_rb_iseq_t`
+ # @param pc [Integer]
+ def invalidate_blocks(iseq, pc)
+ list_blocks(iseq, pc).each do |block|
+ invalidate_block(block)
+ end
+ # If they were the ISEQ's first blocks, re-compile RJIT entry as well
+ if iseq.body.iseq_encoded.to_i == pc
+ iseq.body.jit_entry = 0
+ iseq.body.jit_entry_calls = 0
+ end
+ end
+ def invalidate_block(block)
+ iseq = block.iseq
+ # Avoid touching GCed ISEQs. We assume it won't be re-entered.
+ return unless C.imemo_type_p(iseq, C.imemo_iseq)
+ # Remove this block from the version array
+ remove_block(iseq, block)
+ # Invalidate the block with entry exit
+ unless block.invalidated
+ @cb.with_write_addr(block.start_addr) do
+ asm =
+ asm.comment('invalidate_block')
+ @cb.write(asm)
+ end
+ block.invalidated = true
+ end
+ # Re-stub incoming branches
+ block.incoming.each do |branch_stub|
+ target = [branch_stub.target0, branch_stub.target1].compact.find do |target|
+ target.pc == block.pc && target.ctx == block.ctx
+ end
+ next if target.nil?
+ # TODO: Could target.address be a stub address? Is invalidation not needed in that case?
+ # If the target being re-generated is currently a fallthrough block,
+ # the fallthrough code must be rewritten with a jump to the stub.
+ if target.address == branch_stub.end_addr
+ branch_stub.shape = Default
+ end
+ target.address = do |ocb_asm|
+ @exit_compiler.compile_branch_stub(block.ctx, ocb_asm, branch_stub, target == branch_stub.target0)
+ @ocb.write(ocb_asm)
+ end
+ @cb.with_write_addr(branch_stub.start_addr) do
+ branch_asm =
+ @cb.write(branch_asm)
+ end
+ end
+ end
+ private
+ # Callee-saved: rbx, rsp, rbp, r12, r13, r14, r15
+ # Caller-saved: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
+ #
+ # @param asm [RubyVM::RJIT::Assembler]
+ def compile_prologue(asm, iseq, pc)
+ asm.comment('RJIT entry point')
+ # Save callee-saved registers used by JITed code
+ asm.push(CFP)
+ asm.push(EC)
+ asm.push(SP)
+ # Move arguments EC and CFP to dedicated registers
+, :rdi)
+, :rsi)
+ # Load sp to a dedicated register
+, [CFP, C.rb_control_frame_t.offsetof(:sp)]) # rbx = cfp->sp
+ # Setup cfp->jit_return
+, leave_exit)
+[CFP, C.rb_control_frame_t.offsetof(:jit_return)], :rax)
+ # We're compiling iseqs that we *expect* to start at `insn_idx`. But in
+ # the case of optional parameters, the interpreter can set the pc to a
+ # different location depending on the optional parameters. If an iseq
+ # has optional parameters, we'll add a runtime check that the PC we've
+ # compiled for is the same PC that the interpreter wants us to run with.
+ # If they don't match, then we'll take a side exit.
+ if iseq.body.param.flags.has_opt
+ compile_entry_chain_guard(asm, iseq, pc)
+ end
+ end
+ def compile_entry_chain_guard(asm, iseq, pc)
+ entry_stub =
+ stub_addr = do |ocb_asm|
+ @exit_compiler.compile_entry_stub(ocb_asm, entry_stub)
+ @ocb.write(ocb_asm)
+ end
+ asm.comment('guard expected PC')
+, pc)
+ asm.cmp([CFP, C.rb_control_frame_t.offsetof(:pc)], :rax)
+ asm.stub(entry_stub) do
+ asm.jne(stub_addr)
+ end
+ end
+ # @param asm [RubyVM::RJIT::Assembler]
+ # @param jit [RubyVM::RJIT::JITState]
+ # @param ctx [RubyVM::RJIT::Context]
+ def compile_block(asm, jit:, pc:, ctx:
+ # Mark the block start address and prepare an exit code storage
+ ctx = limit_block_versions(jit.iseq, pc, ctx)
+ block = jit.iseq, pc:, ctx: ctx.dup)
+ jit.block = block
+ asm.block(block)
+ iseq = jit.iseq
+ asm.comment("Block: #{iseq.body.location.label}@#{C.rb_iseq_path(iseq)}:#{iseq_lineno(iseq, pc)}")
+ # Compile each insn
+ index = (pc - iseq.body.iseq_encoded.to_i) / C.VALUE.size
+ while index < iseq.body.iseq_size
+ # Set the current instruction
+ insn = self.class.decode_insn(iseq.body.iseq_encoded[index])
+ jit.pc = (iseq.body.iseq_encoded + index).to_i
+ jit.stack_size_for_pc = ctx.stack_size
+ jit.side_exit_for_pc.clear
+ # If previous instruction requested to record the boundary
+ if jit.record_boundary_patch_point
+ # Generate an exit to this instruction and record it
+ exit_pos = do |ocb_asm|
+ @exit_compiler.compile_side_exit(jit.pc, ctx, ocb_asm)
+ @ocb.write(ocb_asm)
+ end
+ Invariants.record_global_inval_patch(asm, exit_pos)
+ jit.record_boundary_patch_point = false
+ end
+ # In debug mode, verify our existing assumption
+ if C.rjit_opts.verify_ctx && jit.at_current_insn?
+ verify_ctx(jit, ctx)
+ end
+ case status = @insn_compiler.compile(jit, ctx, asm, insn)
+ when KeepCompiling
+ # For now, reset the chain depth after each instruction as only the
+ # first instruction in the block can concern itself with the depth.
+ ctx.chain_depth = 0
+ index += insn.len
+ when EndBlock
+ # TODO: pad nops if entry exit exists (not needed for x86_64?)
+ break
+ when CantCompile
+ # Rewind stack_size using ctx.with_stack_size to allow stack_size changes
+ # before you return CantCompile.
+ @exit_compiler.compile_side_exit(jit.pc, ctx.with_stack_size(jit.stack_size_for_pc), asm)
+ # If this is the first instruction, this block never needs to be invalidated.
+ if block.pc == iseq.body.iseq_encoded.to_i + index * C.VALUE.size
+ block.invalidated = true
+ end
+ break
+ else
+ raise "compiling #{} returned unexpected status: #{status.inspect}"
+ end
+ end
+ incr_counter(:compiled_block_count)
+ add_block(iseq, block)
+ end
+ def leave_exit
+ @leave_exit ||= do |asm|
+ @exit_compiler.compile_leave_exit(asm)
+ @ocb.write(asm)
+ end
+ end
+ def incr_counter(name)
+ if C.rjit_opts.stats
+ C.rb_rjit_counters[name][0] += 1
+ end
+ end
+ # Produce a generic context when the block version limit is hit for the block
+ def limit_block_versions(iseq, pc, ctx)
+ # Guard chains implement limits separately, do nothing
+ if ctx.chain_depth > 0
+ return ctx.dup
+ end
+ # If this block version we're about to add will hit the version limit
+ if list_blocks(iseq, pc).size + 1 >= MAX_VERSIONS
+ # Produce a generic context that stores no type information,
+ # but still respects the stack_size and sp_offset constraints.
+ # This new context will then match all future requests.
+ generic_ctx =
+ generic_ctx.stack_size = ctx.stack_size
+ generic_ctx.sp_offset = ctx.sp_offset
+ if ctx.diff(generic_ctx) == TypeDiff::Incompatible
+ raise 'should substitute a compatible context'
+ end
+ return generic_ctx
+ end
+ return ctx.dup
+ end
+ def list_blocks(iseq, pc)
+ rjit_blocks(iseq)[pc]
+ end
+ # @param [Integer] pc
+ # @param [RubyVM::RJIT::Context] ctx
+ # @return [RubyVM::RJIT::Block,NilClass]
+ def find_block(iseq, pc, ctx)
+ versions = rjit_blocks(iseq)[pc]
+ best_version = nil
+ best_diff = Float::INFINITY
+ versions.each do |block|
+ # Note that we always prefer the first matching
+ # version found because of inline-cache chains
+ case ctx.diff(block.ctx)
+ in TypeDiff::Compatible[diff] if diff < best_diff
+ best_version = block
+ best_diff = diff
+ else
+ end
+ end
+ return best_version
+ end
+ # @param [RubyVM::RJIT::Block] block
+ def add_block(iseq, block)
+ rjit_blocks(iseq)[block.pc] << block
+ end
+ # @param [RubyVM::RJIT::Block] block
+ def remove_block(iseq, block)
+ rjit_blocks(iseq)[block.pc].delete(block)
+ end
+ def rjit_blocks(iseq)
+ # Guard against ISEQ GC at random moments
+ unless C.imemo_type_p(iseq, C.imemo_iseq)
+ return { |h, k| h[k] = [] }
+ end
+ unless iseq.body.rjit_blocks
+ iseq.body.rjit_blocks = { |blocks, pc| blocks[pc] = [] }
+ # For some reason, rb_rjit_iseq_mark didn't protect this Hash
+ # from being freed. So we rely on GC_REFS to keep the Hash.
+ GC_REFS << iseq.body.rjit_blocks
+ end
+ iseq.body.rjit_blocks
+ end
+ def iseq_lineno(iseq, pc)
+ C.rb_iseq_line_no(iseq, (pc - iseq.body.iseq_encoded.to_i) / C.VALUE.size)
+ rescue RangeError # bignum too big to convert into `unsigned long long' (RangeError)
+ -1
+ end
+ # Verify the ctx's types and mappings against the compile-time stack, self, and locals.
+ # @param jit [RubyVM::RJIT::JITState]
+ # @param ctx [RubyVM::RJIT::Context]
+ def verify_ctx(jit, ctx)
+ # Only able to check types when at current insn
+ assert(jit.at_current_insn?)
+ self_val = jit.peek_at_self
+ self_val_type = Type.from(self_val)
+ # Verify self operand type
+ assert_compatible(self_val_type, ctx.get_opnd_type(SelfOpnd))
+ # Verify stack operand types
+ [ctx.stack_size, MAX_TEMP_TYPES].min.times do |i|
+ learned_mapping, learned_type = ctx.get_opnd_mapping(StackOpnd[i])
+ stack_val = jit.peek_at_stack(i)
+ val_type = Type.from(stack_val)
+ case learned_mapping
+ in MapToSelf
+ if C.to_value(self_val) != C.to_value(stack_val)
+ raise "verify_ctx: stack value was mapped to self, but values did not match:\n"\
+ "stack: #{stack_val.inspect}, self: #{self_val.inspect}"
+ end
+ in MapToLocal[local_idx]
+ local_val = jit.peek_at_local(local_idx)
+ if C.to_value(local_val) != C.to_value(stack_val)
+ raise "verify_ctx: stack value was mapped to local, but values did not match:\n"\
+ "stack: #{stack_val.inspect}, local: #{local_val.inspect}"
+ end
+ in MapToStack
+ # noop
+ end
+ # If the actual type differs from the learned type
+ assert_compatible(val_type, learned_type)
+ end
+ # Verify local variable types
+ local_table_size = jit.iseq.body.local_table_size
+ [local_table_size, MAX_TEMP_TYPES].min.times do |i|
+ learned_type = ctx.get_local_type(i)
+ local_val = jit.peek_at_local(i)
+ local_type = Type.from(local_val)
+ assert_compatible(local_type, learned_type)
+ end
+ end
+ def assert_compatible(actual_type, ctx_type)
+ if actual_type.diff(ctx_type) == TypeDiff::Incompatible
+ raise "verify_ctx: ctx type (#{ctx_type.type.inspect}) is incompatible with actual type (#{actual_type.type.inspect})"
+ end
+ end
+ def assert(cond)
+ unless cond
+ raise "'#{cond.inspect}' was not true"
+ end
+ end
+ def supported_platform?
+ return @supported_platform if defined?(@supported_platform)
+ @supported_platform = RUBY_PLATFORM.match?(/x86_64/).tap do |supported|
+ warn "warning: RJIT does not support #{RUBY_PLATFORM} yet" unless supported
+ end
+ end
+ end