#include "vm_core.h" #include "vm_callinfo.h" #include "builtin.h" #include "insns.inc" #include "insns_info.inc" #include "vm_sync.h" #include "ujit_asm.h" #include "ujit_utils.h" #include "ujit_iface.h" #include "ujit_core.h" #include "ujit_codegen.h" // Maximum number of branch instructions we can track #define MAX_BRANCHES 32768 // Table of block versions indexed by (iseq, index) tuples st_table * version_tbl; // Registered branch entries branch_t branch_entries[MAX_BRANCHES]; uint32_t num_branches = 0; /* Get an operand for the adjusted stack pointer address */ x86opnd_t ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes) { int32_t offset = (ctx->stack_size) * 8 + offset_bytes; return mem_opnd(64, REG_SP, offset); } /* Push one new value on the temp stack Return a pointer to the new stack top */ x86opnd_t ctx_stack_push(ctx_t* ctx, int type) { // Keep track of the type of the value RUBY_ASSERT(type <= RUBY_T_MASK); if (ctx->stack_size < MAX_TEMP_TYPES) ctx->temp_types[ctx->stack_size] = type; ctx->stack_size += 1; // SP points just above the topmost value int32_t offset = (ctx->stack_size - 1) * 8; return mem_opnd(64, REG_SP, offset); } /* Pop N values off the stack Return a pointer to the stack top before the pop operation */ x86opnd_t ctx_stack_pop(ctx_t* ctx, size_t n) { RUBY_ASSERT(n <= ctx->stack_size); // SP points just above the topmost value int32_t offset = (ctx->stack_size - 1) * 8; x86opnd_t top = mem_opnd(64, REG_SP, offset); // Clear the types of the popped values for (size_t i = 0; i < n; ++i) { size_t idx = ctx->stack_size - i - 1; if (idx < MAX_TEMP_TYPES) ctx->temp_types[idx] = T_NONE; } ctx->stack_size -= n; return top; } /** Get an operand pointing to a slot on the temp stack */ x86opnd_t ctx_stack_opnd(ctx_t* ctx, int32_t idx) { // SP points just above the topmost value int32_t offset = (ctx->stack_size - 1 - idx) * 8; x86opnd_t opnd = mem_opnd(64, REG_SP, offset); return opnd; } /** Get the type of the topmost value on the temp stack Returns T_NONE if unknown */ int ctx_get_top_type(ctx_t* ctx) { RUBY_ASSERT(ctx->stack_size > 0); if (ctx->stack_size > MAX_TEMP_TYPES) return T_NONE; return ctx->temp_types[ctx->stack_size - 1]; } // Add an incoming branch for a given block version static void add_incoming(block_t* p_block, uint32_t branch_idx) { // Add this branch to the list of incoming branches for the target uint32_t* new_list = malloc(sizeof(uint32_t) * p_block->num_incoming + 1); memcpy(new_list, p_block->incoming, p_block->num_incoming); new_list[p_block->num_incoming] = branch_idx; p_block->incoming = new_list; p_block->num_incoming += 1; } // Retrieve a basic block version for an (iseq, idx) tuple block_t* find_block_version(blockid_t blockid, const ctx_t* ctx) { // If there exists a version for this block id st_data_t st_version; if (rb_st_lookup(version_tbl, (st_data_t)&blockid, &st_version)) { return (block_t*)st_version; } // // TODO: use the ctx parameter to search existing versions for a match // return NULL; } // Compile a new block version immediately block_t* gen_block_version(blockid_t blockid, const ctx_t* start_ctx) { // Copy the context to avoid mutating it ctx_t ctx_copy = *start_ctx; ctx_t* ctx = &ctx_copy; // Allocate a new block version object block_t* first_block = calloc(1, sizeof(block_t)); first_block->blockid = blockid; memcpy(&first_block->ctx, ctx, sizeof(ctx_t)); // Block that is currently being compiled block_t* block = first_block; // Generate code for the first block ujit_gen_block(ctx, block); // Keep track of the new block version st_insert(version_tbl, (st_data_t)&block->blockid, (st_data_t)block); RUBY_ASSERT(find_block_version(blockid, start_ctx) != NULL); // For each successor block to compile for (;;) { // If no branches were generated, stop if (num_branches == 0) { break; } // Get the last branch entry uint32_t branch_idx = num_branches - 1; branch_t* last_branch = &branch_entries[num_branches - 1]; // If there is no next block to compile, stop if (last_branch->dst_addrs[0] || last_branch->dst_addrs[1]) { break; } if (last_branch->targets[0].iseq == NULL) { rb_bug("invalid target for last branch"); } // Allocate a new block version object block = calloc(1, sizeof(block_t)); block->blockid = last_branch->targets[0]; memcpy(&block->ctx, ctx, sizeof(ctx_t)); // Generate code for the current block ujit_gen_block(ctx, block); // Keep track of the new block version st_insert(version_tbl, (st_data_t)&block->blockid, (st_data_t)block); // Patch the last branch address last_branch->dst_addrs[0] = cb_get_ptr(cb, block->start_pos); add_incoming(block, branch_idx); RUBY_ASSERT(block->start_pos == last_branch->end_pos); } return first_block; } // Generate a block version that is an entry point inserted into an iseq uint8_t* gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx) { // The entry context makes no assumptions about types blockid_t blockid = { iseq, insn_idx }; ctx_t ctx = { { 0 }, 0 }; // Write the interpreter entry prologue uint8_t* code_ptr = ujit_entry_prologue(); // Try to generate code for the entry block block_t* block = gen_block_version(blockid, &ctx); // If we couldn't generate any code if (block->end_idx == insn_idx) { return NULL; } return code_ptr; } // Called by the generated code when a branch stub is executed // Triggers compilation of branches and code patching uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx) { uint8_t* dst_addr; RB_VM_LOCK_ENTER(); RUBY_ASSERT(branch_idx < num_branches); RUBY_ASSERT(target_idx < 2); branch_t *branch = &branch_entries[branch_idx]; blockid_t target = branch->targets[target_idx]; ctx_t* target_ctx = &branch->target_ctxs[target_idx]; //fprintf(stderr, "\nstub hit, branch idx: %d, target idx: %d\n", branch_idx, target_idx); //fprintf(stderr, "blockid.iseq=%p, blockid.idx=%d\n", target.iseq, target.idx); // If either of the target blocks will be placed next if (cb->write_pos == branch->end_pos) { //fprintf(stderr, "target idx %d will be placed next\n", target_idx); branch->shape = (uint8_t)target_idx; // Rewrite the branch with the new, potentially more compact shape cb_set_pos(cb, branch->start_pos); branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); RUBY_ASSERT(cb->write_pos <= branch->end_pos); } // Try to find a compiled version of this block block_t* p_block = find_block_version(target, target_ctx); // If this block hasn't yet been compiled if (!p_block) { p_block = gen_block_version(target, target_ctx); } // Add this branch to the list of incoming branches for the target add_incoming(p_block, branch_idx); // Update the branch target address dst_addr = cb_get_ptr(cb, p_block->start_pos); branch->dst_addrs[target_idx] = dst_addr; // Rewrite the branch with the new jump target address RUBY_ASSERT(branch->dst_addrs[0] != NULL); uint32_t cur_pos = cb->write_pos; cb_set_pos(cb, branch->start_pos); branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); RUBY_ASSERT(cb->write_pos <= branch->end_pos); branch->end_pos = cb->write_pos; cb_set_pos(cb, cur_pos); RB_VM_LOCK_LEAVE(); // Return a pointer to the compiled block version return dst_addr; } // Get a version or stub corresponding to a branch target // TODO: need incoming and target contexts uint8_t* get_branch_target( blockid_t target, const ctx_t* ctx, uint32_t branch_idx, uint32_t target_idx ) { //fprintf(stderr, "get_branch_target, block (%p, %d)\n", target.iseq, target.idx); block_t* p_block = find_block_version(target, ctx); if (p_block) { // Add an incoming branch for this version add_incoming(p_block, branch_idx); return cb_get_ptr(cb, p_block->start_pos); } // Generate an outlined stub that will call // branch_stub_hit(uint32_t branch_idx, uint32_t target_idx) uint8_t* stub_addr = cb_get_ptr(ocb, ocb->write_pos); // Save the ujit registers push(ocb, REG_CFP); push(ocb, REG_EC); push(ocb, REG_SP); push(ocb, REG_SP); mov(ocb, RDI, imm_opnd(branch_idx)); mov(ocb, RSI, imm_opnd(target_idx)); call_ptr(ocb, REG0, (void *)&branch_stub_hit); // Restore the ujit registers pop(ocb, REG_SP); pop(ocb, REG_SP); pop(ocb, REG_EC); pop(ocb, REG_CFP); // Jump to the address returned by the // branch_stub_hit call jmp_rm(ocb, RAX); return stub_addr; } void gen_branch( const ctx_t* src_ctx, blockid_t target0, const ctx_t* ctx0, blockid_t target1, const ctx_t* ctx1, branchgen_fn gen_fn ) { RUBY_ASSERT(target0.iseq != NULL); RUBY_ASSERT(target1.iseq != NULL); RUBY_ASSERT(num_branches < MAX_BRANCHES); uint32_t branch_idx = num_branches++; // Get the branch targets or stubs uint8_t* dst_addr0 = get_branch_target(target0, ctx0, branch_idx, 0); uint8_t* dst_addr1 = get_branch_target(target1, ctx1, branch_idx, 1); // Call the branch generation function uint32_t start_pos = cb->write_pos; gen_fn(cb, dst_addr0, dst_addr1, SHAPE_DEFAULT); uint32_t end_pos = cb->write_pos; // Register this branch entry branch_t branch_entry = { start_pos, end_pos, *src_ctx, { target0, target1 }, { *ctx0, *ctx1 }, { dst_addr0, dst_addr1 }, gen_fn, SHAPE_DEFAULT }; branch_entries[branch_idx] = branch_entry; } void gen_jump_branch(codeblock_t* cb, uint8_t* target0, uint8_t* target1, uint8_t shape) { switch (shape) { case SHAPE_NEXT0: break; case SHAPE_NEXT1: RUBY_ASSERT(false); break; case SHAPE_DEFAULT: jmp_ptr(cb, target0); break; } } void gen_direct_jump( const ctx_t* ctx, blockid_t target0 ) { RUBY_ASSERT(target0.iseq != NULL); RUBY_ASSERT(num_branches < MAX_BRANCHES); uint32_t branch_idx = num_branches++; // Branch targets or stub adddress uint8_t* dst_addr0; // Shape of the branch uint8_t branch_shape; // Branch start and end positions uint32_t start_pos; uint32_t end_pos; block_t* p_block = find_block_version(target0, ctx); // If the version already exists if (p_block) { add_incoming(p_block, branch_idx); dst_addr0 = cb_get_ptr(cb, p_block->start_pos); branch_shape = SHAPE_DEFAULT; // Call the branch generation function start_pos = cb->write_pos; gen_jump_branch(cb, dst_addr0, NULL, branch_shape); end_pos = cb->write_pos; } else { // The target block will follow next // It will be compiled in gen_block_version() dst_addr0 = NULL; branch_shape = SHAPE_NEXT0; start_pos = cb->write_pos; end_pos = cb->write_pos; } // Register this branch entry branch_t branch_entry = { start_pos, end_pos, *ctx, { target0, BLOCKID_NULL }, { *ctx, *ctx }, { dst_addr0, NULL }, gen_jump_branch, branch_shape }; branch_entries[branch_idx] = branch_entry; } // Invalidate one specific block version void invalidate(block_t* block) { fprintf(stderr, "invalidating block (%p, %d)\n", block->blockid.iseq, block->blockid.idx); fprintf(stderr, "block=%p\n", block); // Find the first version for this blockid block_t* first_block = NULL; rb_st_lookup(version_tbl, (st_data_t)&block->blockid, (st_data_t*)&first_block); RUBY_ASSERT(first_block != NULL); // Remove the version object from the map so we can re-generate stubs if (first_block == block) { st_data_t key = (st_data_t)&block->blockid; int success = st_delete(version_tbl, &key, NULL); RUBY_ASSERT(success); } else { bool deleted = false; for (block_t* cur = first_block; cur != NULL; cur = cur->next) { if (cur->next == block) { cur->next = cur->next->next; break; } } RUBY_ASSERT(deleted); } // Get a pointer to the generated code for this block uint8_t* code_ptr = cb_get_ptr(cb, block->start_pos); // For each incoming branch for (uint32_t i = 0; i < block->num_incoming; ++i) { uint32_t branch_idx = block->incoming[i]; branch_t* branch = &branch_entries[branch_idx]; uint32_t target_idx = (branch->dst_addrs[0] == code_ptr)? 0:1; //fprintf(stderr, "branch_idx=%d, target_idx=%d\n", branch_idx, target_idx); //fprintf(stderr, "blockid.iseq=%p, blockid.idx=%d\n", block->blockid.iseq, block->blockid.idx); // Create a stub for this branch target branch->dst_addrs[target_idx] = get_branch_target( block->blockid, &block->ctx, branch_idx, target_idx ); // Check if the invalidated block immediately follows bool target_next = block->start_pos == branch->end_pos; if (target_next) { // The new block will no longer be adjacent branch->shape = SHAPE_DEFAULT; } // Rewrite the branch with the new jump target address RUBY_ASSERT(branch->dst_addrs[0] != NULL); uint32_t cur_pos = cb->write_pos; cb_set_pos(cb, branch->start_pos); branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape); branch->end_pos = cb->write_pos; cb_set_pos(cb, cur_pos); if (target_next && branch->end_pos > block->end_pos) { rb_bug("ujit invalidate rewrote branch past block end"); } } // If the block is an entry point, it needs to be unmapped from its iseq const rb_iseq_t* iseq = block->blockid.iseq; uint32_t idx = block->blockid.idx; VALUE* entry_pc = &iseq->body->iseq_encoded[idx]; int entry_opcode = opcode_at_pc(iseq, entry_pc); // TODO: unmap_addr2insn in ujit_iface.c? Maybe we can write a function to encompass this logic? // Should check how it's used in exit and side-exit const void * const *handler_table = rb_vm_get_insns_address_table(); void* handler_addr = (void*)handler_table[entry_opcode]; iseq->body->iseq_encoded[idx] = (VALUE)handler_addr; // // Optional: may want to recompile a new deoptimized entry point // // TODO: // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub. // For now this isn't an issue // Free the old block version object free(block); fprintf(stderr, "invalidation done\n"); } int blockid_cmp(st_data_t arg0, st_data_t arg1) { const blockid_t *block0 = (const blockid_t*)arg0; const blockid_t *block1 = (const blockid_t*)arg1; return (block0->iseq != block1->iseq) || (block0->idx != block1->idx); } st_index_t blockid_hash(st_data_t arg) { const blockid_t *blockid = (const blockid_t*)arg; st_index_t hash0 = st_numhash((st_data_t)blockid->iseq); st_index_t hash1 = st_numhash((st_data_t)(uint64_t)blockid->idx); // Use XOR to combine the hashes return hash0 ^ hash1; } static const struct st_hash_type hashtype_blockid = { blockid_cmp, blockid_hash, }; void ujit_init_core(void) { // Initialize the version hash table version_tbl = st_init_table(&hashtype_blockid); }