diff options
Diffstat (limited to 'lib/ruby_vm/rjit/compiler.rb')
-rw-r--r-- | lib/ruby_vm/rjit/compiler.rb | 518 |
1 files changed, 518 insertions, 0 deletions
diff --git a/lib/ruby_vm/rjit/compiler.rb b/lib/ruby_vm/rjit/compiler.rb new file mode 100644 index 0000000000..e5c3adf0ec --- /dev/null +++ b/lib/ruby_vm/rjit/compiler.rb @@ -0,0 +1,518 @@ +require 'ruby_vm/rjit/assembler' +require 'ruby_vm/rjit/block' +require 'ruby_vm/rjit/branch_stub' +require 'ruby_vm/rjit/code_block' +require 'ruby_vm/rjit/context' +require 'ruby_vm/rjit/entry_stub' +require 'ruby_vm/rjit/exit_compiler' +require 'ruby_vm/rjit/insn_compiler' +require 'ruby_vm/rjit/instruction' +require 'ruby_vm/rjit/invariants' +require 'ruby_vm/rjit/jit_state' +require 'ruby_vm/rjit/type' + +module RubyVM::RJIT + # Compilation status + KeepCompiling = :KeepCompiling + CantCompile = :CantCompile + EndBlock = :EndBlock + + # Ruby constants + Qtrue = Fiddle::Qtrue + Qfalse = Fiddle::Qfalse + Qnil = Fiddle::Qnil + Qundef = Fiddle::Qundef + + # Callee-saved registers + # TODO: support using r12/r13 here + EC = :r14 + CFP = :r15 + SP = :rbx + + # Scratch registers: rax, rcx, rdx + + # Mark objects in this Array during GC + GC_REFS = [] + + # Maximum number of versions per block + # 1 means always create generic versions + MAX_VERSIONS = 4 + + class Compiler + attr_accessor :write_pos + + def self.decode_insn(encoded) + INSNS.fetch(C.rb_vm_insn_decode(encoded)) + end + + def initialize + mem_size = C.rjit_opts.exec_mem_size * 1024 * 1024 + mem_block = C.mmap(mem_size) + @cb = CodeBlock.new(mem_block: mem_block, mem_size: mem_size / 2) + @ocb = CodeBlock.new(mem_block: mem_block + mem_size / 2, mem_size: mem_size / 2, outlined: true) + @exit_compiler = ExitCompiler.new + @insn_compiler = InsnCompiler.new(@cb, @ocb, @exit_compiler) + Invariants.initialize(@cb, @ocb, self, @exit_compiler) + end + + # Compile an ISEQ from its entry point. + # @param iseq `RubyVM::RJIT::CPointer::Struct_rb_iseq_t` + # @param cfp `RubyVM::RJIT::CPointer::Struct_rb_control_frame_t` + def compile(iseq, cfp) + return unless supported_platform? + pc = cfp.pc.to_i + jit = JITState.new(iseq:, cfp:) + asm = Assembler.new + compile_prologue(asm, iseq, pc) + compile_block(asm, jit:, pc:) + iseq.body.jit_entry = @cb.write(asm) + rescue Exception => e + STDERR.puts "#{e.class}: #{e.message}" + STDERR.puts e.backtrace + exit 1 + end + + # Compile an entry. + # @param entry [RubyVM::RJIT::EntryStub] + def entry_stub_hit(entry_stub, cfp) + # Compile a new entry guard as a next entry + pc = cfp.pc.to_i + next_entry = Assembler.new.then do |asm| + compile_entry_chain_guard(asm, cfp.iseq, pc) + @cb.write(asm) + end + + # Try to find an existing compiled version of this block + ctx = Context.new + block = find_block(cfp.iseq, pc, ctx) + if block + # If an existing block is found, generate a jump to the block. + asm = Assembler.new + asm.jmp(block.start_addr) + @cb.write(asm) + else + # If this block hasn't yet been compiled, generate blocks after the entry guard. + asm = Assembler.new + jit = JITState.new(iseq: cfp.iseq, cfp:) + compile_block(asm, jit:, pc:, ctx:) + @cb.write(asm) + + block = jit.block + end + + # Regenerate the previous entry + @cb.with_write_addr(entry_stub.start_addr) do + # The last instruction of compile_entry_chain_guard is jne + asm = Assembler.new + asm.jne(next_entry) + @cb.write(asm) + end + + return block.start_addr + rescue Exception => e + STDERR.puts e.full_message + exit 1 + end + + # Compile a branch stub. + # @param branch_stub [RubyVM::RJIT::BranchStub] + # @param cfp `RubyVM::RJIT::CPointer::Struct_rb_control_frame_t` + # @param target0_p [TrueClass,FalseClass] + # @return [Integer] The starting address of the compiled branch stub + def branch_stub_hit(branch_stub, cfp, target0_p) + # Update cfp->pc for `jit.at_current_insn?` + target = target0_p ? branch_stub.target0 : branch_stub.target1 + cfp.pc = target.pc + + # Reuse an existing block if it already exists + block = find_block(branch_stub.iseq, target.pc, target.ctx) + + # If the branch stub's jump is the last code, allow overwriting part of + # the old branch code with the new block code. + fallthrough = block.nil? && @cb.write_addr == branch_stub.end_addr + if fallthrough + # If the branch stub's jump is the last code, allow overwriting part of + # the old branch code with the new block code. + @cb.set_write_addr(branch_stub.start_addr) + branch_stub.shape = target0_p ? Next0 : Next1 + Assembler.new.tap do |branch_asm| + branch_stub.compile.call(branch_asm) + @cb.write(branch_asm) + end + end + + # Reuse or generate a block + if block + target.address = block.start_addr + else + jit = JITState.new(iseq: branch_stub.iseq, cfp:) + target.address = Assembler.new.then do |asm| + compile_block(asm, jit:, pc: target.pc, ctx: target.ctx.dup) + @cb.write(asm) + end + block = jit.block + end + block.incoming << branch_stub # prepare for invalidate_block + + # Re-generate the branch code for non-fallthrough cases + unless fallthrough + @cb.with_write_addr(branch_stub.start_addr) do + branch_asm = Assembler.new + branch_stub.compile.call(branch_asm) + @cb.write(branch_asm) + end + end + + return target.address + rescue Exception => e + STDERR.puts e.full_message + exit 1 + end + + # @param iseq `RubyVM::RJIT::CPointer::Struct_rb_iseq_t` + # @param pc [Integer] + def invalidate_blocks(iseq, pc) + list_blocks(iseq, pc).each do |block| + invalidate_block(block) + end + + # If they were the ISEQ's first blocks, re-compile RJIT entry as well + if iseq.body.iseq_encoded.to_i == pc + iseq.body.jit_entry = 0 + iseq.body.jit_entry_calls = 0 + end + end + + def invalidate_block(block) + iseq = block.iseq + # Avoid touching GCed ISEQs. We assume it won't be re-entered. + return unless C.imemo_type_p(iseq, C.imemo_iseq) + + # Remove this block from the version array + remove_block(iseq, block) + + # Invalidate the block with entry exit + unless block.invalidated + @cb.with_write_addr(block.start_addr) do + asm = Assembler.new + asm.comment('invalidate_block') + asm.jmp(block.entry_exit) + @cb.write(asm) + end + block.invalidated = true + end + + # Re-stub incoming branches + block.incoming.each do |branch_stub| + target = [branch_stub.target0, branch_stub.target1].compact.find do |target| + target.pc == block.pc && target.ctx == block.ctx + end + next if target.nil? + # TODO: Could target.address be a stub address? Is invalidation not needed in that case? + + # If the target being re-generated is currently a fallthrough block, + # the fallthrough code must be rewritten with a jump to the stub. + if target.address == branch_stub.end_addr + branch_stub.shape = Default + end + + target.address = Assembler.new.then do |ocb_asm| + @exit_compiler.compile_branch_stub(block.ctx, ocb_asm, branch_stub, target == branch_stub.target0) + @ocb.write(ocb_asm) + end + @cb.with_write_addr(branch_stub.start_addr) do + branch_asm = Assembler.new + branch_stub.compile.call(branch_asm) + @cb.write(branch_asm) + end + end + end + + private + + # Callee-saved: rbx, rsp, rbp, r12, r13, r14, r15 + # Caller-saved: rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 + # + # @param asm [RubyVM::RJIT::Assembler] + def compile_prologue(asm, iseq, pc) + asm.comment('RJIT entry point') + + # Save callee-saved registers used by JITed code + asm.push(CFP) + asm.push(EC) + asm.push(SP) + + # Move arguments EC and CFP to dedicated registers + asm.mov(EC, :rdi) + asm.mov(CFP, :rsi) + + # Load sp to a dedicated register + asm.mov(SP, [CFP, C.rb_control_frame_t.offsetof(:sp)]) # rbx = cfp->sp + + # Setup cfp->jit_return + asm.mov(:rax, leave_exit) + asm.mov([CFP, C.rb_control_frame_t.offsetof(:jit_return)], :rax) + + # We're compiling iseqs that we *expect* to start at `insn_idx`. But in + # the case of optional parameters, the interpreter can set the pc to a + # different location depending on the optional parameters. If an iseq + # has optional parameters, we'll add a runtime check that the PC we've + # compiled for is the same PC that the interpreter wants us to run with. + # If they don't match, then we'll take a side exit. + if iseq.body.param.flags.has_opt + compile_entry_chain_guard(asm, iseq, pc) + end + end + + def compile_entry_chain_guard(asm, iseq, pc) + entry_stub = EntryStub.new + stub_addr = Assembler.new.then do |ocb_asm| + @exit_compiler.compile_entry_stub(ocb_asm, entry_stub) + @ocb.write(ocb_asm) + end + + asm.comment('guard expected PC') + asm.mov(:rax, pc) + asm.cmp([CFP, C.rb_control_frame_t.offsetof(:pc)], :rax) + + asm.stub(entry_stub) do + asm.jne(stub_addr) + end + end + + # @param asm [RubyVM::RJIT::Assembler] + # @param jit [RubyVM::RJIT::JITState] + # @param ctx [RubyVM::RJIT::Context] + def compile_block(asm, jit:, pc:, ctx: Context.new) + # Mark the block start address and prepare an exit code storage + ctx = limit_block_versions(jit.iseq, pc, ctx) + block = Block.new(iseq: jit.iseq, pc:, ctx: ctx.dup) + jit.block = block + asm.block(block) + + iseq = jit.iseq + asm.comment("Block: #{iseq.body.location.label}@#{C.rb_iseq_path(iseq)}:#{iseq_lineno(iseq, pc)}") + + # Compile each insn + index = (pc - iseq.body.iseq_encoded.to_i) / C.VALUE.size + while index < iseq.body.iseq_size + # Set the current instruction + insn = self.class.decode_insn(iseq.body.iseq_encoded[index]) + jit.pc = (iseq.body.iseq_encoded + index).to_i + jit.stack_size_for_pc = ctx.stack_size + jit.side_exit_for_pc.clear + + # If previous instruction requested to record the boundary + if jit.record_boundary_patch_point + # Generate an exit to this instruction and record it + exit_pos = Assembler.new.then do |ocb_asm| + @exit_compiler.compile_side_exit(jit.pc, ctx, ocb_asm) + @ocb.write(ocb_asm) + end + Invariants.record_global_inval_patch(asm, exit_pos) + jit.record_boundary_patch_point = false + end + + # In debug mode, verify our existing assumption + if C.rjit_opts.verify_ctx && jit.at_current_insn? + verify_ctx(jit, ctx) + end + + case status = @insn_compiler.compile(jit, ctx, asm, insn) + when KeepCompiling + # For now, reset the chain depth after each instruction as only the + # first instruction in the block can concern itself with the depth. + ctx.chain_depth = 0 + + index += insn.len + when EndBlock + # TODO: pad nops if entry exit exists (not needed for x86_64?) + break + when CantCompile + # Rewind stack_size using ctx.with_stack_size to allow stack_size changes + # before you return CantCompile. + @exit_compiler.compile_side_exit(jit.pc, ctx.with_stack_size(jit.stack_size_for_pc), asm) + + # If this is the first instruction, this block never needs to be invalidated. + if block.pc == iseq.body.iseq_encoded.to_i + index * C.VALUE.size + block.invalidated = true + end + + break + else + raise "compiling #{insn.name} returned unexpected status: #{status.inspect}" + end + end + + incr_counter(:compiled_block_count) + add_block(iseq, block) + end + + def leave_exit + @leave_exit ||= Assembler.new.then do |asm| + @exit_compiler.compile_leave_exit(asm) + @ocb.write(asm) + end + end + + def incr_counter(name) + if C.rjit_opts.stats + C.rb_rjit_counters[name][0] += 1 + end + end + + # Produce a generic context when the block version limit is hit for the block + def limit_block_versions(iseq, pc, ctx) + # Guard chains implement limits separately, do nothing + if ctx.chain_depth > 0 + return ctx.dup + end + + # If this block version we're about to add will hit the version limit + if list_blocks(iseq, pc).size + 1 >= MAX_VERSIONS + # Produce a generic context that stores no type information, + # but still respects the stack_size and sp_offset constraints. + # This new context will then match all future requests. + generic_ctx = Context.new + generic_ctx.stack_size = ctx.stack_size + generic_ctx.sp_offset = ctx.sp_offset + + if ctx.diff(generic_ctx) == TypeDiff::Incompatible + raise 'should substitute a compatible context' + end + + return generic_ctx + end + + return ctx.dup + end + + def list_blocks(iseq, pc) + rjit_blocks(iseq)[pc] + end + + # @param [Integer] pc + # @param [RubyVM::RJIT::Context] ctx + # @return [RubyVM::RJIT::Block,NilClass] + def find_block(iseq, pc, ctx) + versions = rjit_blocks(iseq)[pc] + + best_version = nil + best_diff = Float::INFINITY + + versions.each do |block| + # Note that we always prefer the first matching + # version found because of inline-cache chains + case ctx.diff(block.ctx) + in TypeDiff::Compatible[diff] if diff < best_diff + best_version = block + best_diff = diff + else + end + end + + return best_version + end + + # @param [RubyVM::RJIT::Block] block + def add_block(iseq, block) + rjit_blocks(iseq)[block.pc] << block + end + + # @param [RubyVM::RJIT::Block] block + def remove_block(iseq, block) + rjit_blocks(iseq)[block.pc].delete(block) + end + + def rjit_blocks(iseq) + # Guard against ISEQ GC at random moments + + unless C.imemo_type_p(iseq, C.imemo_iseq) + return Hash.new { |h, k| h[k] = [] } + end + + unless iseq.body.rjit_blocks + iseq.body.rjit_blocks = Hash.new { |blocks, pc| blocks[pc] = [] } + # For some reason, rb_rjit_iseq_mark didn't protect this Hash + # from being freed. So we rely on GC_REFS to keep the Hash. + GC_REFS << iseq.body.rjit_blocks + end + iseq.body.rjit_blocks + end + + def iseq_lineno(iseq, pc) + C.rb_iseq_line_no(iseq, (pc - iseq.body.iseq_encoded.to_i) / C.VALUE.size) + rescue RangeError # bignum too big to convert into `unsigned long long' (RangeError) + -1 + end + + # Verify the ctx's types and mappings against the compile-time stack, self, and locals. + # @param jit [RubyVM::RJIT::JITState] + # @param ctx [RubyVM::RJIT::Context] + def verify_ctx(jit, ctx) + # Only able to check types when at current insn + assert(jit.at_current_insn?) + + self_val = jit.peek_at_self + self_val_type = Type.from(self_val) + + # Verify self operand type + assert_compatible(self_val_type, ctx.get_opnd_type(SelfOpnd)) + + # Verify stack operand types + [ctx.stack_size, MAX_TEMP_TYPES].min.times do |i| + learned_mapping, learned_type = ctx.get_opnd_mapping(StackOpnd[i]) + stack_val = jit.peek_at_stack(i) + val_type = Type.from(stack_val) + + case learned_mapping + in MapToSelf + if C.to_value(self_val) != C.to_value(stack_val) + raise "verify_ctx: stack value was mapped to self, but values did not match:\n"\ + "stack: #{stack_val.inspect}, self: #{self_val.inspect}" + end + in MapToLocal[local_idx] + local_val = jit.peek_at_local(local_idx) + if C.to_value(local_val) != C.to_value(stack_val) + raise "verify_ctx: stack value was mapped to local, but values did not match:\n"\ + "stack: #{stack_val.inspect}, local: #{local_val.inspect}" + end + in MapToStack + # noop + end + + # If the actual type differs from the learned type + assert_compatible(val_type, learned_type) + end + + # Verify local variable types + local_table_size = jit.iseq.body.local_table_size + [local_table_size, MAX_TEMP_TYPES].min.times do |i| + learned_type = ctx.get_local_type(i) + local_val = jit.peek_at_local(i) + local_type = Type.from(local_val) + + assert_compatible(local_type, learned_type) + end + end + + def assert_compatible(actual_type, ctx_type) + if actual_type.diff(ctx_type) == TypeDiff::Incompatible + raise "verify_ctx: ctx type (#{ctx_type.type.inspect}) is incompatible with actual type (#{actual_type.type.inspect})" + end + end + + def assert(cond) + unless cond + raise "'#{cond.inspect}' was not true" + end + end + + def supported_platform? + return @supported_platform if defined?(@supported_platform) + @supported_platform = RUBY_PLATFORM.match?(/x86_64/).tap do |supported| + warn "warning: RJIT does not support #{RUBY_PLATFORM} yet" unless supported + end + end + end +end |