diff options
author | Alan Wu <XrXr@users.noreply.github.com> | 2021-02-25 15:10:38 -0500 |
---|---|---|
committer | Alan Wu <XrXr@users.noreply.github.com> | 2021-10-20 18:19:30 -0400 |
commit | 57977ba30d35f6f9de3d2802d1894e1f0d23286d (patch) | |
tree | e7f5eba7e70cd10566572072e5938652d459a01f | |
parent | f93f3d6aa164ea5bc01e596c84b7c525c41bb852 (diff) |
uJIT: Implement opt_getinlinecache
* ujit: implement opt_getinlinecache
Aggressively bet that writes to constants don't happen and invalidate
all opt_getinlinecache blocks on any and all constant writes.
Use alignment padding on block_t to track this assumption. No change to
sizeof(block_t).
* Fix compile warnings when not RUBY_DEBUG
* Fix reversed condition
* Switch to st_table to keep track of assumptions
Co-authored-by: Aaron Patterson <aaron.patterson@gmail.com>
Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
-rw-r--r-- | common.mk | 1 | ||||
-rw-r--r-- | ractor.c | 2 | ||||
-rw-r--r-- | ujit.h | 1 | ||||
-rw-r--r-- | ujit_codegen.c | 57 | ||||
-rw-r--r-- | ujit_core.c | 9 | ||||
-rw-r--r-- | ujit_core.h | 9 | ||||
-rw-r--r-- | ujit_iface.c | 123 | ||||
-rw-r--r-- | ujit_iface.h | 7 |
8 files changed, 165 insertions, 44 deletions
@@ -10590,6 +10590,7 @@ ractor.$(OBJEXT): {$(VPATH)}thread.h ractor.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h ractor.$(OBJEXT): {$(VPATH)}thread_native.h ractor.$(OBJEXT): {$(VPATH)}transient_heap.h +ractor.$(OBJEXT): {$(VPATH)}ujit.h ractor.$(OBJEXT): {$(VPATH)}variable.h ractor.$(OBJEXT): {$(VPATH)}vm_core.h ractor.$(OBJEXT): {$(VPATH)}vm_debug.h @@ -16,6 +16,7 @@ #include "variable.h" #include "gc.h" #include "transient_heap.h" +#include "ujit.h" VALUE rb_cRactor; @@ -1604,6 +1605,7 @@ ractor_create(rb_execution_context_t *ec, VALUE self, VALUE loc, VALUE name, VAL r->verbose = cr->verbose; r->debug = cr->debug; + rb_ujit_before_ractor_spawn(); rb_thread_create_ractor(r, args, block); RB_GC_GUARD(rv); @@ -56,5 +56,6 @@ void rb_ujit_constant_state_changed(void); void rb_ujit_iseq_mark(const struct rb_iseq_constant_body *body); void rb_ujit_iseq_update_references(const struct rb_iseq_constant_body *body); void rb_ujit_iseq_free(const struct rb_iseq_constant_body *body); +void rb_ujit_before_ractor_spawn(void); #endif // #ifndef UJIT_H diff --git a/ujit_codegen.c b/ujit_codegen.c index 6ac6dc013c..ad48029a31 100644 --- a/ujit_codegen.c +++ b/ujit_codegen.c @@ -60,19 +60,20 @@ jit_get_arg(jitstate_t* jit, size_t arg_idx) return *(jit->pc + arg_idx + 1); } -// Load a pointer to a GC'd object into a register and keep track of the reference +// Load a VALUE into a register and keep track of the reference if it is on the GC heap. static void jit_mov_gc_ptr(jitstate_t* jit, codeblock_t* cb, x86opnd_t reg, VALUE ptr) { RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64); - RUBY_ASSERT(!SPECIAL_CONST_P(ptr)); mov(cb, reg, const_ptr_opnd((void*)ptr)); // The pointer immediate is encoded as the last part of the mov written out. uint32_t ptr_offset = cb->write_pos - sizeof(VALUE); - if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) { - rb_bug("allocation failed"); + if (!SPECIAL_CONST_P(ptr)) { + if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) { + rb_bug("allocation failed"); + } } } @@ -252,12 +253,14 @@ ujit_gen_block(ctx_t* ctx, block_t* block) break; } +#if RUBY_DEBUG // Accumulate stats about instructions executed if (rb_ujit_opts.gen_stats) { // Count instructions executed by the JIT mov(cb, REG0, const_ptr_opnd((void *)&rb_ujit_exec_insns_count)); add(cb, mem_opnd(64, REG0, 0), imm_opnd(1)); } +#endif //fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode)); //print_str(cb, insn_name(opcode)); @@ -1115,6 +1118,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c // Pointer to the klass field of the receiver &(recv->klass) x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass)); + // FIXME: This leaks when st_insert raises NoMemoryError assume_method_lookup_stable(cd->cc, cme, jit->block); // Bail if receiver class is different from compile-time call cache class @@ -1570,6 +1574,48 @@ gen_leave(jitstate_t* jit, ctx_t* ctx) return true; } +RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state; +static bool +gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx) +{ + VALUE jump_offset = jit_get_arg(jit, 0); + VALUE const_cache_as_value = jit_get_arg(jit, 1); + IC ic = (IC)const_cache_as_value; + + // See vm_ic_hit_p(). + struct iseq_inline_constant_cache_entry *ice = ic->entry; + if (!ice) return false; // cache not filled + if (ice->ic_serial != ruby_vm_global_constant_state) { + // Cache miss at compile time. + return false; + } + if (ice->ic_cref) { + // Only compile for caches that don't care about lexical scope. + return false; + } + + // Optimize for single ractor mode. + // FIXME: This leaks when st_insert raises NoMemoryError + if (!assume_single_ractor_mode(jit->block)) return false; + + // Invalidate output code on any and all constant writes + // FIXME: This leaks when st_insert raises NoMemoryError + if (!assume_stable_global_constant_state(jit->block)) return false; + + x86opnd_t stack_top = ctx_stack_push(ctx, T_NONE); + jit_mov_gc_ptr(jit, cb, REG0, ice->value); + mov(cb, stack_top, REG0); + + // Jump over the code for filling the cache + uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset; + gen_direct_jump( + ctx, + (blockid_t){ .iseq = jit->iseq, .idx = jump_idx } + ); + + return true; +} + void ujit_reg_op(int opcode, codegen_fn gen_fn, bool is_branch) { // Check that the op wasn't previously registered @@ -1620,6 +1666,9 @@ ujit_init_codegen(void) ujit_reg_op(BIN(opt_and), gen_opt_and, false); ujit_reg_op(BIN(opt_minus), gen_opt_minus, false); ujit_reg_op(BIN(opt_plus), gen_opt_plus, false); + + // Map branch instruction opcodes to codegen functions + ujit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache, true); ujit_reg_op(BIN(branchif), gen_branchif, true); ujit_reg_op(BIN(branchunless), gen_branchunless, true); ujit_reg_op(BIN(jump), gen_jump, true); diff --git a/ujit_core.c b/ujit_core.c index 0e6492b4f8..fea49ce7f6 100644 --- a/ujit_core.c +++ b/ujit_core.c @@ -175,8 +175,10 @@ add_block_version(blockid_t blockid, block_t* block) rb_bug("allocation failed"); } +#if RUBY_DEBUG // First block compiled for this iseq rb_compiled_iseq_count++; +#endif } block_t *first_version = get_first_version(iseq, blockid.idx); @@ -199,7 +201,7 @@ add_block_version(blockid_t blockid, block_t* block) RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cc); RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cme); - // Run write barrier for all objects in generated code. + // Run write barriers for all objects in generated code. uint32_t *offset_element; rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) { uint32_t offset_to_value = *offset_element; @@ -601,9 +603,12 @@ void ujit_free_block(block_t *block) { ujit_unlink_method_lookup_dependency(block); + ujit_block_assumptions_free(block); + rb_darray_free(block->incoming); - free(block); rb_darray_free(block->gc_object_offsets); + + free(block); } // Invalidate one specific block version diff --git a/ujit_core.h b/ujit_core.h index 64de5ad979..1fe58856fa 100644 --- a/ujit_core.h +++ b/ujit_core.h @@ -107,9 +107,6 @@ typedef struct ujit_block_version // Bytecode sequence (iseq, idx) this is a version of blockid_t blockid; - // Index one past the last instruction in the iseq - uint32_t end_idx; - // Context at the start of the block ctx_t ctx; @@ -120,6 +117,9 @@ typedef struct ujit_block_version // List of incoming branches indices int32_array_t incoming; + // Offsets for GC managed objects in the mainline code block + int32_array_t gc_object_offsets; + // Next block version for this blockid (singly-linked list) struct ujit_block_version *next; @@ -132,6 +132,9 @@ typedef struct ujit_block_version VALUE cme; VALUE iseq; } dependencies; + + // Index one past the last instruction in the iseq + uint32_t end_idx; } block_t; // Context object methods diff --git a/ujit_iface.c b/ujit_iface.c index 64f9fe9a81..f2eb657b3b 100644 --- a/ujit_iface.c +++ b/ujit_iface.c @@ -24,10 +24,12 @@ VALUE cUjitBlock; VALUE cUjitDisasm; VALUE cUjitDisasmInsn; +#if RUBY_DEBUG static int64_t vm_insns_count = 0; int64_t rb_ujit_exec_insns_count = 0; static int64_t exit_op_count[VM_INSTRUCTION_SIZE] = { 0 }; int64_t rb_compiled_iseq_count = 0; +#endif // Machine code blocks (executable memory) extern codeblock_t *cb; @@ -45,7 +47,7 @@ static const rb_data_type_t ujit_block_type = { }; // Write the uJIT entry point pre-call bytes -void +void cb_write_pre_call_bytes(codeblock_t* cb) { for (size_t i = 0; i < sizeof(ujit_with_ec_pre_call_bytes); ++i) @@ -53,7 +55,7 @@ cb_write_pre_call_bytes(codeblock_t* cb) } // Write the uJIT exit post-call bytes -void +void cb_write_post_call_bytes(codeblock_t* cb) { for (size_t i = 0; i < sizeof(ujit_with_ec_post_call_bytes); ++i) @@ -129,46 +131,74 @@ struct ujit_root_struct { int unused; // empty structs are not legal in C99 }; -// Map cme_or_cc => [[iseq, offset]]. An entry in the map means compiled code at iseq[offset] -// is only valid when cme_or_cc is valid -static st_table *method_lookup_dependency; - -struct compiled_region { - block_t *block; -}; +static void +block_array_shuffle_remove(rb_ujit_block_array_t blocks, block_t *to_remove) { + block_t **elem; + rb_darray_foreach(blocks, i, elem) { + if (*elem == to_remove) { + // Remove the current element by moving the last element here then popping. + *elem = rb_darray_get(blocks, rb_darray_size(blocks) - 1); + rb_darray_pop_back(blocks); + break; + } + } +} -typedef rb_darray(struct compiled_region) block_array_t; +// Map cme_or_cc => [block] +static st_table *method_lookup_dependency; static int add_lookup_dependency_i(st_data_t *key, st_data_t *value, st_data_t data, int existing) { - struct compiled_region *region = (struct compiled_region *)data; + block_t *new_block = (block_t *)data; - block_array_t regions = NULL; + rb_ujit_block_array_t blocks = NULL; if (existing) { - regions = (block_array_t )*value; + blocks = (rb_ujit_block_array_t)*value; } - if (!rb_darray_append(®ions, *region)) { + if (!rb_darray_append(&blocks, new_block)) { rb_bug("ujit: failed to add method lookup dependency"); // TODO: we could bail out of compiling instead } - *value = (st_data_t)regions; + *value = (st_data_t)blocks; return ST_CONTINUE; } -// Remember that the currently compiling region is only valid while cme and cc are valid +// Remember that the currently compiling block is only valid while cme and cc are valid void assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t *block) { RUBY_ASSERT(block != NULL); RUBY_ASSERT(block->dependencies.cc == 0 && block->dependencies.cme == 0); - struct compiled_region region = { .block = block }; - st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)®ion); + st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)block); block->dependencies.cme = (VALUE)cme; - st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)®ion); + st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)block); block->dependencies.cc = (VALUE)cc; } +static st_table *blocks_assuming_single_ractor_mode; + +// Can raise NoMemoryError. +RBIMPL_ATTR_NODISCARD() +bool +assume_single_ractor_mode(block_t *block) { + if (rb_multi_ractor_p()) return false; + + st_insert(blocks_assuming_single_ractor_mode, (st_data_t)block, 1); + return true; +} + +static st_table *blocks_assuming_stable_global_constant_state; + +// Assume that the global constant state has not changed since call to this function. +// Can raise NoMemoryError. +RBIMPL_ATTR_NODISCARD() +bool +assume_stable_global_constant_state(block_t *block) { + st_insert(blocks_assuming_stable_global_constant_state, (st_data_t)block, 1); + return true; +} + static int ujit_root_mark_i(st_data_t k, st_data_t v, st_data_t ignore) { @@ -253,11 +283,11 @@ rb_ujit_method_lookup_change(VALUE cme_or_cc) // Invalidate all regions that depend on the cme or cc st_data_t key = (st_data_t)cme_or_cc, image; if (st_delete(method_lookup_dependency, &key, &image)) { - block_array_t array = (void *)image; - struct compiled_region *elem; + rb_ujit_block_array_t array = (void *)image; + block_t **elem; rb_darray_foreach(array, i, elem) { - invalidate_block_version(elem->block); + invalidate_block_version(*elem); } rb_darray_free(array); @@ -272,19 +302,9 @@ remove_method_lookup_dependency(VALUE cc_or_cme, block_t *block) { st_data_t key = (st_data_t)cc_or_cme, image; if (st_lookup(method_lookup_dependency, key, &image)) { - block_array_t array = (void *)image; - struct compiled_region *elem; + rb_ujit_block_array_t array = (void *)image; - // Find the block we are removing - rb_darray_foreach(array, i, elem) { - if (elem->block == block) { - // Remove the current element by moving the last element here. - // Order in the region array doesn't matter. - *elem = rb_darray_get(array, rb_darray_size(array) - 1); - rb_darray_pop_back(array); - break; - } - } + block_array_shuffle_remove(array, block); if (rb_darray_size(array) == 0) { st_delete(method_lookup_dependency, &key, NULL); @@ -301,6 +321,19 @@ ujit_unlink_method_lookup_dependency(block_t *block) } void +ujit_block_assumptions_free(block_t *block) +{ + st_data_t as_st_data = (st_data_t)block; + if (blocks_assuming_stable_global_constant_state) { + st_delete(blocks_assuming_stable_global_constant_state, &as_st_data, NULL); + } + + if (blocks_assuming_single_ractor_mode) { + st_delete(blocks_assuming_single_ractor_mode, &as_st_data, NULL); + } +} + +void rb_ujit_compile_iseq(const rb_iseq_t *iseq) { #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE @@ -411,11 +444,28 @@ rb_ujit_bop_redefined(VALUE klass, const rb_method_entry_t *me, enum ruby_basic_ //fprintf(stderr, "bop redefined\n"); } +static int +block_invalidation_iterator(st_data_t key, st_data_t value, st_data_t data) { + block_t *block = (block_t *)key; + invalidate_block_version(block); // Thankfully, st_table supports deleteing while iterating + return ST_CONTINUE; +} + /* Called when the constant state changes */ void rb_ujit_constant_state_changed(void) { - //fprintf(stderr, "bop redefined\n"); + if (blocks_assuming_stable_global_constant_state) { + st_foreach(blocks_assuming_stable_global_constant_state, block_invalidation_iterator, 0); + } +} + +void +rb_ujit_before_ractor_spawn(void) +{ + if (blocks_assuming_single_ractor_mode) { + st_foreach(blocks_assuming_single_ractor_mode, block_invalidation_iterator, 0); + } } #if HAVE_LIBCAPSTONE @@ -651,6 +701,9 @@ rb_ujit_init(struct rb_ujit_options *options) rb_ujit_opts.call_threshold = 2; } + blocks_assuming_stable_global_constant_state = st_init_numtable(); + blocks_assuming_single_ractor_mode = st_init_numtable(); + ujit_init_core(); ujit_init_codegen(); diff --git a/ujit_iface.h b/ujit_iface.h index 620dc57dd1..accaa1a37f 100644 --- a/ujit_iface.h +++ b/ujit_iface.h @@ -10,6 +10,7 @@ #include "stdint.h" #include "stdbool.h" #include "internal.h" +#include "ruby/internal/attr/nodiscard.h" #include "vm_core.h" #include "vm_callinfo.h" #include "builtin.h" @@ -32,9 +33,15 @@ int opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc); void check_cfunc_dispatch(VALUE receiver, struct rb_call_data *cd, void *callee, rb_callable_method_entry_t *compile_time_cme); bool cfunc_needs_frame(const rb_method_cfunc_t *cfunc); + void assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t* block); +RBIMPL_ATTR_NODISCARD() bool assume_single_ractor_mode(block_t *block); +RBIMPL_ATTR_NODISCARD() bool assume_stable_global_constant_state(block_t *block); + // this function *must* return passed exit_pc const VALUE *rb_ujit_count_side_exit_op(const VALUE *exit_pc); + void ujit_unlink_method_lookup_dependency(block_t *block); +void ujit_block_assumptions_free(block_t *block); #endif // #ifndef UJIT_IFACE_H |