diff options
author | k0kubun <k0kubun@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2019-04-16 17:02:16 +0000 |
---|---|---|
committer | k0kubun <k0kubun@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2019-04-16 17:02:16 +0000 |
commit | d71b78575b08f55d35194b2c091719b2c4463ea3 (patch) | |
tree | dd0dba7549e3bc80ddc70c24c7e10120b5acf54e /mjit_compile.c | |
parent | b914166fcdba3faf5d4cb5e3a18d49691546ce21 (diff) |
Introduce frame-omitted method inlining
for ISeq including only leaf and no-handles_sp insns except leaf.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67574 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'mjit_compile.c')
-rw-r--r-- | mjit_compile.c | 127 |
1 files changed, 109 insertions, 18 deletions
diff --git a/mjit_compile.c b/mjit_compile.c index 4f80bfb0da..e2ff5bb954 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -25,6 +25,14 @@ #define NOT_COMPILED_STACK_SIZE -1 #define ALREADY_COMPILED_P(status, pos) (status->stack_size_for_pos[pos] != NOT_COMPILED_STACK_SIZE) +// For propagating information needed for lazily pushing a frame. +struct inlined_call_context { + int orig_argc; // ci->orig_argc + VALUE me; // cc->me + int param_size; // def_iseq_ptr(cc->me->def)->body->param.size + int local_size; // def_iseq_ptr(cc->me->def)->body->local_table_size +}; + // Storage to keep compiler's status. This should have information // which is global during one `mjit_compile` call. Ones conditional // in each branch should be stored in `compile_branch`. @@ -39,8 +47,9 @@ struct compile_status { struct rb_call_cache *cc_entries; // Mutated optimization levels struct rb_mjit_compile_info *compile_info; - // If `iseq_for_pos[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there. - const struct rb_iseq_constant_body **iseq_for_pos; + // If `inlined_iseqs[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there. + const struct rb_iseq_constant_body **inlined_iseqs; + struct inlined_call_context inline_context; }; // Storage to keep data which is consistent in each conditional branch. @@ -66,10 +75,10 @@ has_valid_method_type(CALL_CACHE cc) && mjit_valid_class_serial_p(cc->class_serial) && cc->me; } -// Returns true if iseq is inlinable, otherwise NULL. This becomes true in the same condition +// Returns true if iseq can use fastpath for setup, otherwise NULL. This becomes true in the same condition // as CC_SET_FASTPATH (in vm_callee_setup_arg) is called from vm_call_iseq_setup. static bool -inlinable_iseq_p(const CALL_INFO ci, const CALL_CACHE cc, const rb_iseq_t *iseq) +fastpath_applied_iseq_p(const CALL_INFO ci, const CALL_CACHE cc, const rb_iseq_t *iseq) { extern bool rb_simple_iseq_p(const rb_iseq_t *iseq); return iseq != NULL @@ -187,11 +196,48 @@ compile_insns(FILE *f, const struct rb_iseq_constant_body *body, unsigned int st } } +// Print the block to cancel inlined method call. It's supporting only `opt_send_without_block` for now. +static void +compile_inlined_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct inlined_call_context *inline_context) +{ + fprintf(f, "\ncancel:\n"); + fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel);\n"); + + // Swap pc/sp set on cancel with original pc/sp. + fprintf(f, " const VALUE current_pc = reg_cfp->pc;\n"); + fprintf(f, " const VALUE current_sp = reg_cfp->sp;\n"); + fprintf(f, " reg_cfp->pc = orig_pc;\n"); + fprintf(f, " reg_cfp->sp = orig_sp;\n\n"); + + // Lazily push the current call frame. + fprintf(f, " struct rb_calling_info calling;\n"); + fprintf(f, " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n"); // assumes `opt_send_without_block` + fprintf(f, " calling.argc = %d;\n", inline_context->orig_argc); + fprintf(f, " calling.recv = reg_cfp->self;\n"); + fprintf(f, " reg_cfp->self = orig_self;\n"); + fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, (const rb_callable_method_entry_t *)0x%"PRIxVALUE", 0, %d, %d);\n\n", + inline_context->me, inline_context->param_size, inline_context->local_size); // fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE + + // Start usual cancel from here. + fprintf(f, " reg_cfp = ec->cfp;\n"); // work on the new frame + fprintf(f, " reg_cfp->pc = current_pc;\n"); + fprintf(f, " reg_cfp->sp = current_sp;\n"); + for (unsigned int i = 0; i < body->stack_max; i++) { // should be always `status->local_stack_p` + fprintf(f, " *(vm_base_ptr(reg_cfp) + %d) = stack[%d];\n", i, i); + } + // We're not just returning Qundef here so that caller's normal cancel handler can + // push back `stack` to `cfp->sp`. + fprintf(f, " return vm_exec(ec, ec->cfp);\n"); +} + // Print the block to cancel JIT execution. static void compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status) { - unsigned int i; + if (status->inlined_iseqs == NULL) { // the current ISeq is being inlined + compile_inlined_cancel_handler(f, body, &status->inline_context); + return; + } fprintf(f, "\nsend_cancel:\n"); fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_send_inline);\n"); @@ -208,7 +254,7 @@ compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct fprintf(f, "\ncancel:\n"); fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel);\n"); if (status->local_stack_p) { - for (i = 0; i < body->stack_max; i++) { + for (unsigned int i = 0; i < body->stack_max; i++) { fprintf(f, " *(vm_base_ptr(reg_cfp) + %d) = stack[%d];\n", i, i); } } @@ -253,11 +299,45 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status) return status->success; } +// Return true if the ISeq can be inlined without pushing a new control frame. +static bool +inlinable_iseq_p(const struct rb_iseq_constant_body *body) +{ + // 1) If catch_except_p, caller frame should be preserved when callee catches an exception. + // Then we need to wrap `vm_exec()` but then we can't inline the call inside it. + // + // 2) If `body->catch_except_p` is false and `handles_sp?` of an insn is false, + // sp is not moved as we assume `status->local_stack_p = !body->catch_except_p`. + // + // 3) If `body->catch_except_p` is false and `always_leaf?` of an insn is true, + // pc is not moved. + if (body->catch_except_p) + return false; + + unsigned int pos = 0; + while (pos < body->iseq_size) { +#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE + int insn = rb_vm_insn_addr2insn((void *)body->iseq_encoded[pos]); +#else + int insn = (int)body->iseq_encoded[pos]; +#endif + // All insns in the ISeq except `leave` (to be overridden in the inlined code) + // should meet following strong assumptions: + // * Do not require `cfp->sp` motion + // * Do not move `cfp->pc` + // * Do not read any `cfp->pc` + if (insn != BIN(leave) && insn_may_depend_on_sp_or_pc(insn, body->iseq_encoded + (pos + 1))) + return false; + pos += insn_len(insn); + } + return true; +} + // This needs to be macro instead of a function because it's using `alloca`. #define INIT_COMPILE_STATUS(status, body, compile_root_p) do { \ status = (struct compile_status){ \ .stack_size_for_pos = (int *)alloca(sizeof(int) * body->iseq_size), \ - .iseq_for_pos = compile_root_p ? \ + .inlined_iseqs = compile_root_p ? \ alloca(sizeof(const struct rb_iseq_constant_body *) * body->iseq_size) : NULL, \ .cc_entries = (body->ci_size + body->ci_kw_size) > 0 ? \ alloca(sizeof(struct rb_call_cache) * (body->ci_size + body->ci_kw_size)) : NULL, \ @@ -268,15 +348,16 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status) }; \ memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size); \ if (compile_root_p) \ - memset(status.iseq_for_pos, 0, sizeof(const struct rb_iseq_constant_body *) * body->iseq_size); \ + memset(status.inlined_iseqs, 0, sizeof(const struct rb_iseq_constant_body *) * body->iseq_size); \ else \ memset(status.compile_info, 0, sizeof(struct rb_mjit_compile_info)); \ } while (0) // Compile inlinable ISeqs to C code in `f`. It returns true if it succeeds to compile them. static bool -precompile_inlinable_iseqs(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status) +precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status *status) { + const struct rb_iseq_constant_body *body = iseq->body; unsigned int pos = 0; while (pos < body->iseq_size) { #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE @@ -285,30 +366,40 @@ precompile_inlinable_iseqs(FILE *f, const struct rb_iseq_constant_body *body, st int insn = (int)body->iseq_encoded[pos]; #endif - if (insn == BIN(opt_send_without_block) || insn == BIN(send)) { + if (insn == BIN(opt_send_without_block)) { // `compile_inlined_cancel_handler` supports only `opt_send_without_block` CALL_INFO ci = (CALL_INFO)body->iseq_encoded[pos + 1]; CALL_CACHE cc_copy = status->cc_entries + ((CALL_CACHE)body->iseq_encoded[pos + 2] - body->cc_entries); // use copy to avoid race condition const rb_iseq_t *child_iseq; if (has_valid_method_type(cc_copy) && !(ci->flag & VM_CALL_TAILCALL) && // inlining only non-tailcall path - cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && inlinable_iseq_p(ci, cc_copy, child_iseq = def_iseq_ptr(cc_copy->me->def)) && // CC_SET_FASTPATH in vm_callee_setup_arg - !child_iseq->body->catch_except_p && // if catch_except_p, caller frame should be preserved when callee catches an exception. - mjit_target_iseq_p(child_iseq->body)) { - status->iseq_for_pos[pos] = child_iseq->body; + cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && fastpath_applied_iseq_p(ci, cc_copy, child_iseq = def_iseq_ptr(cc_copy->me->def)) && // CC_SET_FASTPATH in vm_callee_setup_arg + inlinable_iseq_p(child_iseq->body)) { + status->inlined_iseqs[pos] = child_iseq->body; if (mjit_opts.verbose >= 1) // print beforehand because ISeq may be GCed during copy job. - fprintf(stderr, "JIT inline: %s@%s:%d\n", RSTRING_PTR(child_iseq->body->location.label), + fprintf(stderr, "JIT inline: %s@%s:%d => %s@%s:%d\n", + RSTRING_PTR(iseq->body->location.label), + RSTRING_PTR(rb_iseq_path(iseq)), FIX2INT(iseq->body->location.first_lineno), + RSTRING_PTR(child_iseq->body->location.label), RSTRING_PTR(rb_iseq_path(child_iseq)), FIX2INT(child_iseq->body->location.first_lineno)); struct compile_status child_status; INIT_COMPILE_STATUS(child_status, child_iseq->body, false); + child_status.inline_context = (struct inlined_call_context){ + .orig_argc = ci->orig_argc, + .me = (VALUE)cc_copy->me, + .param_size = child_iseq->body->param.size, + .local_size = child_iseq->body->local_table_size + }; if ((child_status.cc_entries != NULL || child_status.is_entries != NULL) && !mjit_copy_cache_from_main_thread(child_iseq, child_status.cc_entries, child_status.is_entries)) return false; - fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp));\n", pos); - fprintf(f, "static inline VALUE\n_mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n", pos); + fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self));\n", pos); + fprintf(f, "static inline VALUE\n_mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self)\n{\n", pos); + fprintf(f, " const VALUE *orig_pc = reg_cfp->pc;\n"); + fprintf(f, " const VALUE *orig_sp = reg_cfp->sp;\n"); bool success = mjit_compile_body(f, child_iseq, &child_status); fprintf(f, "\n} /* end of _mjit_inlined_%d */\n\n", pos); @@ -337,7 +428,7 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname) && !mjit_copy_cache_from_main_thread(iseq, status.cc_entries, status.is_entries)) return false; - bool success = precompile_inlinable_iseqs(f, iseq->body, &status); + bool success = precompile_inlinable_iseqs(f, iseq, &status); if (!success) return false; |