diff options
author | Kevin Newton <kddnewton@gmail.com> | 2022-03-31 11:04:25 -0400 |
---|---|---|
committer | Alan Wu <XrXr@users.noreply.github.com> | 2022-04-01 14:48:22 -0400 |
commit | 6068da8937d7e4358943f95e7450dae7179a7763 (patch) | |
tree | 68ad7d95ec12f1dec4b1b745725c9579ab2f10ec /iseq.c | |
parent | 20c190f95a28dd4e57cb96f939ff314dfb88b1f4 (diff) |
Finer-grained constant cache invalidation (take 2)
This commit reintroduces finer-grained constant cache invalidation.
After 8008fb7 got merged, it was causing issues on token-threaded
builds (such as on Windows).
The issue was that when you're iterating through instruction sequences
and using the translator functions to get back the instruction structs,
you're either using `rb_vm_insn_null_translator` or
`rb_vm_insn_addr2insn2` depending if it's a direct-threading build.
`rb_vm_insn_addr2insn2` does some normalization to always return to
you the non-trace version of whatever instruction you're looking at.
`rb_vm_insn_null_translator` does not do that normalization.
This means that when you're looping through the instructions if you're
trying to do an opcode comparison, it can change depending on the type
of threading that you're using. This can be very confusing. So, this
commit creates a new translator function
`rb_vm_insn_normalizing_translator` to always return the non-trace
version so that opcode comparisons don't have to worry about different
configurations.
[Feature #18589]
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/5716
Diffstat (limited to 'iseq.c')
-rw-r--r-- | iseq.c | 114 |
1 files changed, 114 insertions, 0 deletions
@@ -102,12 +102,77 @@ compile_data_free(struct iseq_compile_data *compile_data) } } +struct iseq_clear_ic_references_data { + IC ic; +}; + +// This iterator is used to walk through the instructions and clean any +// references to ICs that are contained within this ISEQ out of the VM's +// constant cache table. It passes around a struct that holds the current IC +// we're looking for, which can be NULL (if we haven't hit an opt_getinlinecache +// instruction yet) or set to an IC (if we've hit an opt_getinlinecache and +// haven't yet hit the associated opt_setinlinecache). +static bool +iseq_clear_ic_references_i(VALUE *code, VALUE insn, size_t index, void *data) +{ + struct iseq_clear_ic_references_data *ic_data = (struct iseq_clear_ic_references_data *) data; + + switch (insn) { + case BIN(opt_getinlinecache): { + RUBY_ASSERT_ALWAYS(ic_data->ic == NULL); + + ic_data->ic = (IC) code[index + 2]; + return true; + } + case BIN(getconstant): { + if (ic_data->ic != NULL) { + ID id = (ID) code[index + 1]; + rb_vm_t *vm = GET_VM(); + VALUE lookup_result; + + if (rb_id_table_lookup(vm->constant_cache, id, &lookup_result)) { + st_table *ics = (st_table *)lookup_result; + st_data_t ic = (st_data_t)ic_data->ic; + st_delete(ics, &ic, NULL); + + if (ics->num_entries == 0) { + rb_id_table_delete(vm->constant_cache, id); + st_free_table(ics); + } + } + } + + return true; + } + case BIN(opt_setinlinecache): { + RUBY_ASSERT_ALWAYS(ic_data->ic != NULL); + + ic_data->ic = NULL; + return true; + } + default: + return true; + } +} + +// When an ISEQ is being freed, all of its associated ICs are going to go away +// as well. Because of this, we need to walk through the ISEQ, find any +// opt_getinlinecache calls, and clear out the VM's constant cache of associated +// ICs. +static void +iseq_clear_ic_references(const rb_iseq_t *iseq) +{ + struct iseq_clear_ic_references_data data = { .ic = NULL }; + rb_iseq_each(iseq, 0, iseq_clear_ic_references_i, (void *) &data); +} + void rb_iseq_free(const rb_iseq_t *iseq) { RUBY_FREE_ENTER("iseq"); if (iseq && ISEQ_BODY(iseq)) { + iseq_clear_ic_references(iseq); struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); mjit_free_iseq(iseq); /* Notify MJIT */ rb_yjit_iseq_free(body); @@ -157,6 +222,22 @@ rb_vm_insn_null_translator(const void *addr) return (VALUE)addr; } +// The translator for OPT_DIRECT_THREADED_CODE and OPT_CALL_THREADED_CODE does +// some normalization to always return the non-trace version of instructions. To +// mirror that behavior in token-threaded environments, we normalize in this +// translator by also returning non-trace opcodes. +static VALUE +rb_vm_insn_normalizing_translator(const void *addr) +{ + VALUE opcode = (VALUE)addr; + VALUE trace_opcode_threshold = (VM_INSTRUCTION_SIZE / 2); + + if (opcode >= trace_opcode_threshold) { + return opcode - trace_opcode_threshold; + } + return opcode; +} + typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj); typedef VALUE rb_vm_insns_translator_t(const void *addr); @@ -250,6 +331,39 @@ rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data) } } +// Similar to rb_iseq_each_value, except that this walks through each +// instruction instead of the associated VALUEs. The provided iterator should +// return a boolean that indicates whether or not to continue iterating. +void +rb_iseq_each(const rb_iseq_t *iseq, size_t start_index, rb_iseq_each_i iterator, void *data) +{ + unsigned int size; + VALUE *code; + size_t index; + + rb_vm_insns_translator_t *const translator = +#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE + (FL_TEST((VALUE)iseq, ISEQ_TRANSLATED)) ? rb_vm_insn_addr2insn2 : +#endif + rb_vm_insn_normalizing_translator; // Always pass non-trace opcodes. + + const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq); + + size = body->iseq_size; + code = body->iseq_encoded; + + for (index = start_index; index < size;) { + void *addr = (void *) code[index]; + VALUE insn = translator(addr); + + if (!iterator(code, insn, index, data)) { + break; + } + + index += insn_len(insn); + } +} + static VALUE update_each_insn_value(void *ctx, VALUE obj) { |