summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Wu <XrXr@users.noreply.github.com>2021-02-25 15:10:38 -0500
committerAlan Wu <XrXr@users.noreply.github.com>2021-10-20 18:19:30 -0400
commit57977ba30d35f6f9de3d2802d1894e1f0d23286d (patch)
treee7f5eba7e70cd10566572072e5938652d459a01f
parentf93f3d6aa164ea5bc01e596c84b7c525c41bb852 (diff)
uJIT: Implement opt_getinlinecache
* ujit: implement opt_getinlinecache Aggressively bet that writes to constants don't happen and invalidate all opt_getinlinecache blocks on any and all constant writes. Use alignment padding on block_t to track this assumption. No change to sizeof(block_t). * Fix compile warnings when not RUBY_DEBUG * Fix reversed condition * Switch to st_table to keep track of assumptions Co-authored-by: Aaron Patterson <aaron.patterson@gmail.com> Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
-rw-r--r--common.mk1
-rw-r--r--ractor.c2
-rw-r--r--ujit.h1
-rw-r--r--ujit_codegen.c57
-rw-r--r--ujit_core.c9
-rw-r--r--ujit_core.h9
-rw-r--r--ujit_iface.c123
-rw-r--r--ujit_iface.h7
8 files changed, 165 insertions, 44 deletions
diff --git a/common.mk b/common.mk
index abe03d487e..cdbbbf3c4f 100644
--- a/common.mk
+++ b/common.mk
@@ -10590,6 +10590,7 @@ ractor.$(OBJEXT): {$(VPATH)}thread.h
ractor.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h
ractor.$(OBJEXT): {$(VPATH)}thread_native.h
ractor.$(OBJEXT): {$(VPATH)}transient_heap.h
+ractor.$(OBJEXT): {$(VPATH)}ujit.h
ractor.$(OBJEXT): {$(VPATH)}variable.h
ractor.$(OBJEXT): {$(VPATH)}vm_core.h
ractor.$(OBJEXT): {$(VPATH)}vm_debug.h
diff --git a/ractor.c b/ractor.c
index bfc61f99fe..fa78d16411 100644
--- a/ractor.c
+++ b/ractor.c
@@ -16,6 +16,7 @@
#include "variable.h"
#include "gc.h"
#include "transient_heap.h"
+#include "ujit.h"
VALUE rb_cRactor;
@@ -1604,6 +1605,7 @@ ractor_create(rb_execution_context_t *ec, VALUE self, VALUE loc, VALUE name, VAL
r->verbose = cr->verbose;
r->debug = cr->debug;
+ rb_ujit_before_ractor_spawn();
rb_thread_create_ractor(r, args, block);
RB_GC_GUARD(rv);
diff --git a/ujit.h b/ujit.h
index 6957e9178f..f3c2bffae6 100644
--- a/ujit.h
+++ b/ujit.h
@@ -56,5 +56,6 @@ void rb_ujit_constant_state_changed(void);
void rb_ujit_iseq_mark(const struct rb_iseq_constant_body *body);
void rb_ujit_iseq_update_references(const struct rb_iseq_constant_body *body);
void rb_ujit_iseq_free(const struct rb_iseq_constant_body *body);
+void rb_ujit_before_ractor_spawn(void);
#endif // #ifndef UJIT_H
diff --git a/ujit_codegen.c b/ujit_codegen.c
index 6ac6dc013c..ad48029a31 100644
--- a/ujit_codegen.c
+++ b/ujit_codegen.c
@@ -60,19 +60,20 @@ jit_get_arg(jitstate_t* jit, size_t arg_idx)
return *(jit->pc + arg_idx + 1);
}
-// Load a pointer to a GC'd object into a register and keep track of the reference
+// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
static void
jit_mov_gc_ptr(jitstate_t* jit, codeblock_t* cb, x86opnd_t reg, VALUE ptr)
{
RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
- RUBY_ASSERT(!SPECIAL_CONST_P(ptr));
mov(cb, reg, const_ptr_opnd((void*)ptr));
// The pointer immediate is encoded as the last part of the mov written out.
uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
- if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
- rb_bug("allocation failed");
+ if (!SPECIAL_CONST_P(ptr)) {
+ if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
+ rb_bug("allocation failed");
+ }
}
}
@@ -252,12 +253,14 @@ ujit_gen_block(ctx_t* ctx, block_t* block)
break;
}
+#if RUBY_DEBUG
// Accumulate stats about instructions executed
if (rb_ujit_opts.gen_stats) {
// Count instructions executed by the JIT
mov(cb, REG0, const_ptr_opnd((void *)&rb_ujit_exec_insns_count));
add(cb, mem_opnd(64, REG0, 0), imm_opnd(1));
}
+#endif
//fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
//print_str(cb, insn_name(opcode));
@@ -1115,6 +1118,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c
// Pointer to the klass field of the receiver &(recv->klass)
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
+ // FIXME: This leaks when st_insert raises NoMemoryError
assume_method_lookup_stable(cd->cc, cme, jit->block);
// Bail if receiver class is different from compile-time call cache class
@@ -1570,6 +1574,48 @@ gen_leave(jitstate_t* jit, ctx_t* ctx)
return true;
}
+RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
+static bool
+gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx)
+{
+ VALUE jump_offset = jit_get_arg(jit, 0);
+ VALUE const_cache_as_value = jit_get_arg(jit, 1);
+ IC ic = (IC)const_cache_as_value;
+
+ // See vm_ic_hit_p().
+ struct iseq_inline_constant_cache_entry *ice = ic->entry;
+ if (!ice) return false; // cache not filled
+ if (ice->ic_serial != ruby_vm_global_constant_state) {
+ // Cache miss at compile time.
+ return false;
+ }
+ if (ice->ic_cref) {
+ // Only compile for caches that don't care about lexical scope.
+ return false;
+ }
+
+ // Optimize for single ractor mode.
+ // FIXME: This leaks when st_insert raises NoMemoryError
+ if (!assume_single_ractor_mode(jit->block)) return false;
+
+ // Invalidate output code on any and all constant writes
+ // FIXME: This leaks when st_insert raises NoMemoryError
+ if (!assume_stable_global_constant_state(jit->block)) return false;
+
+ x86opnd_t stack_top = ctx_stack_push(ctx, T_NONE);
+ jit_mov_gc_ptr(jit, cb, REG0, ice->value);
+ mov(cb, stack_top, REG0);
+
+ // Jump over the code for filling the cache
+ uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
+ gen_direct_jump(
+ ctx,
+ (blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
+ );
+
+ return true;
+}
+
void ujit_reg_op(int opcode, codegen_fn gen_fn, bool is_branch)
{
// Check that the op wasn't previously registered
@@ -1620,6 +1666,9 @@ ujit_init_codegen(void)
ujit_reg_op(BIN(opt_and), gen_opt_and, false);
ujit_reg_op(BIN(opt_minus), gen_opt_minus, false);
ujit_reg_op(BIN(opt_plus), gen_opt_plus, false);
+
+ // Map branch instruction opcodes to codegen functions
+ ujit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache, true);
ujit_reg_op(BIN(branchif), gen_branchif, true);
ujit_reg_op(BIN(branchunless), gen_branchunless, true);
ujit_reg_op(BIN(jump), gen_jump, true);
diff --git a/ujit_core.c b/ujit_core.c
index 0e6492b4f8..fea49ce7f6 100644
--- a/ujit_core.c
+++ b/ujit_core.c
@@ -175,8 +175,10 @@ add_block_version(blockid_t blockid, block_t* block)
rb_bug("allocation failed");
}
+#if RUBY_DEBUG
// First block compiled for this iseq
rb_compiled_iseq_count++;
+#endif
}
block_t *first_version = get_first_version(iseq, blockid.idx);
@@ -199,7 +201,7 @@ add_block_version(blockid_t blockid, block_t* block)
RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cc);
RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cme);
- // Run write barrier for all objects in generated code.
+ // Run write barriers for all objects in generated code.
uint32_t *offset_element;
rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
uint32_t offset_to_value = *offset_element;
@@ -601,9 +603,12 @@ void
ujit_free_block(block_t *block)
{
ujit_unlink_method_lookup_dependency(block);
+ ujit_block_assumptions_free(block);
+
rb_darray_free(block->incoming);
- free(block);
rb_darray_free(block->gc_object_offsets);
+
+ free(block);
}
// Invalidate one specific block version
diff --git a/ujit_core.h b/ujit_core.h
index 64de5ad979..1fe58856fa 100644
--- a/ujit_core.h
+++ b/ujit_core.h
@@ -107,9 +107,6 @@ typedef struct ujit_block_version
// Bytecode sequence (iseq, idx) this is a version of
blockid_t blockid;
- // Index one past the last instruction in the iseq
- uint32_t end_idx;
-
// Context at the start of the block
ctx_t ctx;
@@ -120,6 +117,9 @@ typedef struct ujit_block_version
// List of incoming branches indices
int32_array_t incoming;
+ // Offsets for GC managed objects in the mainline code block
+ int32_array_t gc_object_offsets;
+
// Next block version for this blockid (singly-linked list)
struct ujit_block_version *next;
@@ -132,6 +132,9 @@ typedef struct ujit_block_version
VALUE cme;
VALUE iseq;
} dependencies;
+
+ // Index one past the last instruction in the iseq
+ uint32_t end_idx;
} block_t;
// Context object methods
diff --git a/ujit_iface.c b/ujit_iface.c
index 64f9fe9a81..f2eb657b3b 100644
--- a/ujit_iface.c
+++ b/ujit_iface.c
@@ -24,10 +24,12 @@ VALUE cUjitBlock;
VALUE cUjitDisasm;
VALUE cUjitDisasmInsn;
+#if RUBY_DEBUG
static int64_t vm_insns_count = 0;
int64_t rb_ujit_exec_insns_count = 0;
static int64_t exit_op_count[VM_INSTRUCTION_SIZE] = { 0 };
int64_t rb_compiled_iseq_count = 0;
+#endif
// Machine code blocks (executable memory)
extern codeblock_t *cb;
@@ -45,7 +47,7 @@ static const rb_data_type_t ujit_block_type = {
};
// Write the uJIT entry point pre-call bytes
-void
+void
cb_write_pre_call_bytes(codeblock_t* cb)
{
for (size_t i = 0; i < sizeof(ujit_with_ec_pre_call_bytes); ++i)
@@ -53,7 +55,7 @@ cb_write_pre_call_bytes(codeblock_t* cb)
}
// Write the uJIT exit post-call bytes
-void
+void
cb_write_post_call_bytes(codeblock_t* cb)
{
for (size_t i = 0; i < sizeof(ujit_with_ec_post_call_bytes); ++i)
@@ -129,46 +131,74 @@ struct ujit_root_struct {
int unused; // empty structs are not legal in C99
};
-// Map cme_or_cc => [[iseq, offset]]. An entry in the map means compiled code at iseq[offset]
-// is only valid when cme_or_cc is valid
-static st_table *method_lookup_dependency;
-
-struct compiled_region {
- block_t *block;
-};
+static void
+block_array_shuffle_remove(rb_ujit_block_array_t blocks, block_t *to_remove) {
+ block_t **elem;
+ rb_darray_foreach(blocks, i, elem) {
+ if (*elem == to_remove) {
+ // Remove the current element by moving the last element here then popping.
+ *elem = rb_darray_get(blocks, rb_darray_size(blocks) - 1);
+ rb_darray_pop_back(blocks);
+ break;
+ }
+ }
+}
-typedef rb_darray(struct compiled_region) block_array_t;
+// Map cme_or_cc => [block]
+static st_table *method_lookup_dependency;
static int
add_lookup_dependency_i(st_data_t *key, st_data_t *value, st_data_t data, int existing)
{
- struct compiled_region *region = (struct compiled_region *)data;
+ block_t *new_block = (block_t *)data;
- block_array_t regions = NULL;
+ rb_ujit_block_array_t blocks = NULL;
if (existing) {
- regions = (block_array_t )*value;
+ blocks = (rb_ujit_block_array_t)*value;
}
- if (!rb_darray_append(&regions, *region)) {
+ if (!rb_darray_append(&blocks, new_block)) {
rb_bug("ujit: failed to add method lookup dependency"); // TODO: we could bail out of compiling instead
}
- *value = (st_data_t)regions;
+ *value = (st_data_t)blocks;
return ST_CONTINUE;
}
-// Remember that the currently compiling region is only valid while cme and cc are valid
+// Remember that the currently compiling block is only valid while cme and cc are valid
void
assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t *block)
{
RUBY_ASSERT(block != NULL);
RUBY_ASSERT(block->dependencies.cc == 0 && block->dependencies.cme == 0);
- struct compiled_region region = { .block = block };
- st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)&region);
+ st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)block);
block->dependencies.cme = (VALUE)cme;
- st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)&region);
+ st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)block);
block->dependencies.cc = (VALUE)cc;
}
+static st_table *blocks_assuming_single_ractor_mode;
+
+// Can raise NoMemoryError.
+RBIMPL_ATTR_NODISCARD()
+bool
+assume_single_ractor_mode(block_t *block) {
+ if (rb_multi_ractor_p()) return false;
+
+ st_insert(blocks_assuming_single_ractor_mode, (st_data_t)block, 1);
+ return true;
+}
+
+static st_table *blocks_assuming_stable_global_constant_state;
+
+// Assume that the global constant state has not changed since call to this function.
+// Can raise NoMemoryError.
+RBIMPL_ATTR_NODISCARD()
+bool
+assume_stable_global_constant_state(block_t *block) {
+ st_insert(blocks_assuming_stable_global_constant_state, (st_data_t)block, 1);
+ return true;
+}
+
static int
ujit_root_mark_i(st_data_t k, st_data_t v, st_data_t ignore)
{
@@ -253,11 +283,11 @@ rb_ujit_method_lookup_change(VALUE cme_or_cc)
// Invalidate all regions that depend on the cme or cc
st_data_t key = (st_data_t)cme_or_cc, image;
if (st_delete(method_lookup_dependency, &key, &image)) {
- block_array_t array = (void *)image;
- struct compiled_region *elem;
+ rb_ujit_block_array_t array = (void *)image;
+ block_t **elem;
rb_darray_foreach(array, i, elem) {
- invalidate_block_version(elem->block);
+ invalidate_block_version(*elem);
}
rb_darray_free(array);
@@ -272,19 +302,9 @@ remove_method_lookup_dependency(VALUE cc_or_cme, block_t *block)
{
st_data_t key = (st_data_t)cc_or_cme, image;
if (st_lookup(method_lookup_dependency, key, &image)) {
- block_array_t array = (void *)image;
- struct compiled_region *elem;
+ rb_ujit_block_array_t array = (void *)image;
- // Find the block we are removing
- rb_darray_foreach(array, i, elem) {
- if (elem->block == block) {
- // Remove the current element by moving the last element here.
- // Order in the region array doesn't matter.
- *elem = rb_darray_get(array, rb_darray_size(array) - 1);
- rb_darray_pop_back(array);
- break;
- }
- }
+ block_array_shuffle_remove(array, block);
if (rb_darray_size(array) == 0) {
st_delete(method_lookup_dependency, &key, NULL);
@@ -301,6 +321,19 @@ ujit_unlink_method_lookup_dependency(block_t *block)
}
void
+ujit_block_assumptions_free(block_t *block)
+{
+ st_data_t as_st_data = (st_data_t)block;
+ if (blocks_assuming_stable_global_constant_state) {
+ st_delete(blocks_assuming_stable_global_constant_state, &as_st_data, NULL);
+ }
+
+ if (blocks_assuming_single_ractor_mode) {
+ st_delete(blocks_assuming_single_ractor_mode, &as_st_data, NULL);
+ }
+}
+
+void
rb_ujit_compile_iseq(const rb_iseq_t *iseq)
{
#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
@@ -411,11 +444,28 @@ rb_ujit_bop_redefined(VALUE klass, const rb_method_entry_t *me, enum ruby_basic_
//fprintf(stderr, "bop redefined\n");
}
+static int
+block_invalidation_iterator(st_data_t key, st_data_t value, st_data_t data) {
+ block_t *block = (block_t *)key;
+ invalidate_block_version(block); // Thankfully, st_table supports deleteing while iterating
+ return ST_CONTINUE;
+}
+
/* Called when the constant state changes */
void
rb_ujit_constant_state_changed(void)
{
- //fprintf(stderr, "bop redefined\n");
+ if (blocks_assuming_stable_global_constant_state) {
+ st_foreach(blocks_assuming_stable_global_constant_state, block_invalidation_iterator, 0);
+ }
+}
+
+void
+rb_ujit_before_ractor_spawn(void)
+{
+ if (blocks_assuming_single_ractor_mode) {
+ st_foreach(blocks_assuming_single_ractor_mode, block_invalidation_iterator, 0);
+ }
}
#if HAVE_LIBCAPSTONE
@@ -651,6 +701,9 @@ rb_ujit_init(struct rb_ujit_options *options)
rb_ujit_opts.call_threshold = 2;
}
+ blocks_assuming_stable_global_constant_state = st_init_numtable();
+ blocks_assuming_single_ractor_mode = st_init_numtable();
+
ujit_init_core();
ujit_init_codegen();
diff --git a/ujit_iface.h b/ujit_iface.h
index 620dc57dd1..accaa1a37f 100644
--- a/ujit_iface.h
+++ b/ujit_iface.h
@@ -10,6 +10,7 @@
#include "stdint.h"
#include "stdbool.h"
#include "internal.h"
+#include "ruby/internal/attr/nodiscard.h"
#include "vm_core.h"
#include "vm_callinfo.h"
#include "builtin.h"
@@ -32,9 +33,15 @@ int opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc);
void check_cfunc_dispatch(VALUE receiver, struct rb_call_data *cd, void *callee, rb_callable_method_entry_t *compile_time_cme);
bool cfunc_needs_frame(const rb_method_cfunc_t *cfunc);
+
void assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t* block);
+RBIMPL_ATTR_NODISCARD() bool assume_single_ractor_mode(block_t *block);
+RBIMPL_ATTR_NODISCARD() bool assume_stable_global_constant_state(block_t *block);
+
// this function *must* return passed exit_pc
const VALUE *rb_ujit_count_side_exit_op(const VALUE *exit_pc);
+
void ujit_unlink_method_lookup_dependency(block_t *block);
+void ujit_block_assumptions_free(block_t *block);
#endif // #ifndef UJIT_IFACE_H