diff options
author | Koichi Sasada <ko1@atdot.net> | 2020-01-08 16:14:01 +0900 |
---|---|---|
committer | Koichi Sasada <ko1@atdot.net> | 2020-02-22 09:58:59 +0900 |
commit | b9007b6c548f91e88fd3f2ffa23de740431fa969 (patch) | |
tree | 1746393d1c5f704e8dc7e0a458198264062273bf /mjit_compile.c | |
parent | f2286925f08406bc857f7b03ad6779a5d61443ae (diff) |
Introduce disposable call-cache.
This patch contains several ideas:
(1) Disposable inline method cache (IMC) for race-free inline method cache
* Making call-cache (CC) as a RVALUE (GC target object) and allocate new
CC on cache miss.
* This technique allows race-free access from parallel processing
elements like RCU.
(2) Introduce per-Class method cache (pCMC)
* Instead of fixed-size global method cache (GMC), pCMC allows flexible
cache size.
* Caching CCs reduces CC allocation and allow sharing CC's fast-path
between same call-info (CI) call-sites.
(3) Invalidate an inline method cache by invalidating corresponding method
entries (MEs)
* Instead of using class serials, we set "invalidated" flag for method
entry itself to represent cache invalidation.
* Compare with using class serials, the impact of method modification
(add/overwrite/delete) is small.
* Updating class serials invalidate all method caches of the class and
sub-classes.
* Proposed approach only invalidate the method cache of only one ME.
See [Feature #16614] for more details.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/2888
Diffstat (limited to 'mjit_compile.c')
-rw-r--r-- | mjit_compile.c | 42 |
1 files changed, 20 insertions, 22 deletions
diff --git a/mjit_compile.c b/mjit_compile.c index 57037af20c..e4f7cf292a 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -41,9 +41,9 @@ call_data_index(CALL_DATA cd, const struct rb_iseq_constant_body *body) // For propagating information needed for lazily pushing a frame. struct inlined_call_context { int orig_argc; // ci->orig_argc - VALUE me; // cc->me - int param_size; // def_iseq_ptr(cc->me->def)->body->param.size - int local_size; // def_iseq_ptr(cc->me->def)->body->local_table_size + VALUE me; // vm_cc_cme(cc) + int param_size; // def_iseq_ptr(vm_cc_cme(cc)->def)->body->param.size + int local_size; // def_iseq_ptr(vm_cc_cme(cc)->def)->body->local_table_size }; // Storage to keep compiler's status. This should have information @@ -57,7 +57,6 @@ struct compile_status { bool local_stack_p; // Safely-accessible cache entries copied from main thread. union iseq_inline_storage_entry *is_entries; - struct rb_call_cache *cc_entries; // Mutated optimization levels struct rb_mjit_compile_info *compile_info; // If `inlined_iseqs[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there. @@ -79,13 +78,11 @@ struct case_dispatch_var { VALUE last_value; }; -// Returns true if call cache is still not obsoleted and cc->me->def->type is available. +// Returns true if call cache is still not obsoleted and vm_cc_cme(cc)->def->type is available. static bool has_valid_method_type(CALL_CACHE cc) { - extern bool mjit_valid_class_serial_p(rb_serial_t class_serial); - return GET_GLOBAL_METHOD_STATE() == cc->method_state - && mjit_valid_class_serial_p(cc->class_serial[0]) && cc->me; + return vm_cc_cme(cc) != NULL; } // Returns true if iseq can use fastpath for setup, otherwise NULL. This becomes true in the same condition @@ -276,7 +273,8 @@ compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct fprintf(f, " return Qundef;\n"); } -extern bool mjit_copy_cache_from_main_thread(const rb_iseq_t *iseq, struct rb_call_cache *cc_entries, union iseq_inline_storage_entry *is_entries); +extern bool mjit_copy_cache_from_main_thread(const rb_iseq_t *iseq, + union iseq_inline_storage_entry *is_entries); static bool mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status) @@ -368,8 +366,6 @@ inlinable_iseq_p(const struct rb_iseq_constant_body *body) .stack_size_for_pos = (int *)alloca(sizeof(int) * body->iseq_size), \ .inlined_iseqs = compile_root_p ? \ alloca(sizeof(const struct rb_iseq_constant_body *) * body->iseq_size) : NULL, \ - .cc_entries = body->ci_size > 0 ? \ - alloca(sizeof(struct rb_call_cache) * body->ci_size) : NULL, \ .is_entries = (body->is_size > 0) ? \ alloca(sizeof(union iseq_inline_storage_entry) * body->is_size) : NULL, \ .compile_info = compile_root_p ? \ @@ -394,17 +390,18 @@ precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status #else int insn = (int)body->iseq_encoded[pos]; #endif - if (insn == BIN(opt_send_without_block)) { // `compile_inlined_cancel_handler` supports only `opt_send_without_block` CALL_DATA cd = (CALL_DATA)body->iseq_encoded[pos + 1]; const struct rb_callinfo *ci = cd->ci; - CALL_CACHE cc_copy = status->cc_entries + call_data_index(cd, body); // use copy to avoid race condition + const struct rb_callcache *cc = iseq->body->jit_unit->cc_entries[call_data_index(cd, body)]; // use copy to avoid race condition const rb_iseq_t *child_iseq; - if (has_valid_method_type(cc_copy) && - !(vm_ci_flag(ci) & VM_CALL_TAILCALL) && // inlining only non-tailcall path - cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && fastpath_applied_iseq_p(ci, cc_copy, child_iseq = def_iseq_ptr(cc_copy->me->def)) && // CC_SET_FASTPATH in vm_callee_setup_arg - inlinable_iseq_p(child_iseq->body)) { + if (has_valid_method_type(cc) && + !(vm_ci_flag(ci) & VM_CALL_TAILCALL) && // inlining only non-tailcall path + vm_cc_cme(cc)->def->type == VM_METHOD_TYPE_ISEQ && + fastpath_applied_iseq_p(ci, cc, child_iseq = def_iseq_ptr(vm_cc_cme(cc)->def)) && + // CC_SET_FASTPATH in vm_callee_setup_arg + inlinable_iseq_p(child_iseq->body)) { status->inlined_iseqs[pos] = child_iseq->body; if (mjit_opts.verbose >= 1) // print beforehand because ISeq may be GCed during copy job. @@ -418,12 +415,12 @@ precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status INIT_COMPILE_STATUS(child_status, child_iseq->body, false); child_status.inline_context = (struct inlined_call_context){ .orig_argc = vm_ci_argc(ci), - .me = (VALUE)cc_copy->me, + .me = (VALUE)vm_cc_cme(cc), .param_size = child_iseq->body->param.size, .local_size = child_iseq->body->local_table_size }; - if ((child_status.cc_entries != NULL || child_status.is_entries != NULL) - && !mjit_copy_cache_from_main_thread(child_iseq, child_status.cc_entries, child_status.is_entries)) + if ((child_iseq->body->ci_size > 0 || child_status.is_entries != NULL) + && !mjit_copy_cache_from_main_thread(child_iseq, child_status.is_entries)) return false; fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self, const rb_iseq_t *original_iseq));\n", pos); @@ -454,9 +451,10 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname) struct compile_status status; INIT_COMPILE_STATUS(status, iseq->body, true); - if ((status.cc_entries != NULL || status.is_entries != NULL) - && !mjit_copy_cache_from_main_thread(iseq, status.cc_entries, status.is_entries)) + if ((iseq->body->ci_size > 0 || status.is_entries != NULL) + && !mjit_copy_cache_from_main_thread(iseq, status.is_entries)) { return false; + } if (!status.compile_info->disable_send_cache && !status.compile_info->disable_inlining) { if (!precompile_inlinable_iseqs(f, iseq, &status)) |