From 8a15857a7f283101a3923b76c71a1f05bfe58512 Mon Sep 17 00:00:00 2001 From: k0kubun Date: Sun, 4 Mar 2018 07:04:40 +0000 Subject: mjit_compile.c: use local variables for stack if catch_except_p is FALSE. If catch_except_p is TRUE, stack values should be on VM's stack when exception is thrown and the JIT-ed frame is re-executed by VM's exception handler. If it's FALSE, the JIT-ed frame won't be re-executed and don't need to keep values on VM's stack. Using local variables allows us to reduce cfp->sp motion. Moving cfp->sp is needed only for insns whose handles_frame? is false. So it improves performance. _mjit_compile_insn.erb: Prepare `stack_size` variable for GET_SP, STACK_ADDR_FROM_TOP, TOPN macros. Share pc and sp motion partial view. Use cancel handler created in mjit_compile.c. _mjit_compile_send.erb: ditto. Also, when iseq->body->catch_except_p is TRUE, this stops to call mjit_exec directly. I described the reason in vm_insnhelper.h's comment for EXEC_EC_CFP. _mjit_compile_pc_and_sp.erb: Shared logic for moving sp and pc. As you can see from thsi file, when status->local_stack_p is TRUE and insn.handles_frame? is false, moving sp is skipped. But if insn.handles_frame? is true, values should be rolled back to VM's stack. common.mk: add dependency for the file _mjit_compile_insn_body.erb: Set sp value before canceling JIT on DISPATCH_ORIGINAL_INSN. Replace GET_SP, STACK_ADDR_FROM_TOP, TOPN macros for the case ocal_stack_p is TRUE and insn.handles_frame? is false. In that case, values are not available on VM's stack and those macros should be replaced. mjit_compile.inc.erb: updated comments of macros which are supported by JIT compiler. All references to `cfp->sp` should be replaced and thus INC_SP, SET_SV, PUSH are no longer supported for now, because they are not used now. vm_exec.h: moved EXEC_EC_CFP definition to vm_insnhelper.h because it's tighly coupled to CALL_METHOD. vm_insnhelper.h: Have revised EXEC_EC_CFP definition moved from vm_exec.h. Now it triggers mjit_exec for VM, and has the guard for catch_except_p on JIT-ed code. See comments for details. CALL_METHOD delegates triggering mjit_exec to EXEC_EC_CFP. insns.def: Stopped using EXEC_EC_CFP for the case we don't want to trigger mjit_exec. Those insns (defineclass, opt_call_c_function) are not supported by JIT and it's safe to use RESTORE_REGS(), NEXT_INSN(). expandarray is changed to pass GET_SP() to replace the macro in _mjit_compile_insn_body.erb. vm_insnhelper.c: change to take sp for the above reason. [close https://github.com/ruby/ruby/pull/1828] This patch resurrects the performance which was attached in [Feature #14235]. * Benchmark Optcarrot (with configuration for benchmark_driver.gem) https://github.com/benchmark-driver/optcarrot $ benchmark-driver benchmark.yml --verbose 1 --rbenv 'before;before+JIT::before,--jit;after;after+JIT::after,--jit' --repeat-count 10 before: ruby 2.6.0dev (2018-03-04 trunk 62652) [x86_64-linux] before+JIT: ruby 2.6.0dev (2018-03-04 trunk 62652) +JIT [x86_64-linux] after: ruby 2.6.0dev (2018-03-04 local-variable.. 62652) [x86_64-linux] last_commit=mjit_compile.c: use local variables for stack after+JIT: ruby 2.6.0dev (2018-03-04 local-variable.. 62652) +JIT [x86_64-linux] last_commit=mjit_compile.c: use local variables for stack Calculating ------------------------------------- before before+JIT after after+JIT optcarrot 53.552 59.680 53.697 63.358 fps Comparison: optcarrot after+JIT: 63.4 fps before+JIT: 59.7 fps - 1.06x slower after: 53.7 fps - 1.18x slower before: 53.6 fps - 1.18x slower git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62655 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- mjit_compile.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'mjit_compile.c') diff --git a/mjit_compile.c b/mjit_compile.c index eaf7c832b2..5bf14a1388 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -20,6 +20,9 @@ struct compile_status { int success; /* has TRUE if compilation has had no issue */ int *compiled_for_pos; /* compiled_for_pos[pos] has TRUE if the pos is compiled */ + /* If TRUE, JIT-ed code will use local variables to store pushed values instead of + using VM's stack and moving stack pointer. */ + int local_stack_p; }; /* Storage to keep data which is consistent in each conditional branch. @@ -151,6 +154,20 @@ compile_insns(FILE *f, const struct rb_iseq_constant_body *body, unsigned int st } } +/* Print the block to cancel JIT execution. */ +static void +compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status) +{ + unsigned int i; + fprintf(f, "\ncancel:\n"); + if (status->local_stack_p) { + for (i = 0; i < body->stack_max; i++) { + fprintf(f, " *((VALUE *)reg_cfp->bp + %d) = stack[%d];\n", i + 1, i); + } + } + fprintf(f, " return Qundef;\n"); +} + /* Compile ISeq to C code in F. It returns 1 if it succeeds to compile. */ int mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *funcname) @@ -158,12 +175,18 @@ mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *func struct compile_status status; status.success = TRUE; status.compiled_for_pos = ZALLOC_N(int, body->iseq_size); + status.local_stack_p = !body->catch_except_p; #ifdef _WIN32 fprintf(f, "__declspec(dllexport)\n"); #endif fprintf(f, "VALUE\n%s(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n", funcname); - fprintf(f, " VALUE *stack = reg_cfp->sp;\n"); + if (status.local_stack_p) { + fprintf(f, " VALUE stack[%d];\n", body->stack_max); + } + else { + fprintf(f, " VALUE *stack = reg_cfp->sp;\n"); + } fprintf(f, " static const VALUE *const original_body_iseq = (VALUE *)0x%"PRIxVALUE";\n", (VALUE)body->iseq_encoded); @@ -186,6 +209,7 @@ mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *func fprintf(f, " }\n"); compile_insns(f, body, 0, 0, &status); + compile_cancel_handler(f, body, &status); fprintf(f, "\n} /* end of %s */\n", funcname); xfree(status.compiled_for_pos); -- cgit v1.2.3