From ed935aa5be0e5e6b8d53c3e7d76a9ce395dfa18b Mon Sep 17 00:00:00 2001 From: k0kubun Date: Sun, 4 Feb 2018 11:22:28 +0000 Subject: mjit_compile.c: merge initial JIT compiler which has been developed by Takashi Kokubun as YARV-MJIT. Many of its bugs are fixed by wanabe . This JIT compiler is designed to be a safe migration path to introduce JIT compiler to MRI. So this commit does not include any bytecode changes or dynamic instruction modifications, which are done in original MJIT. This commit even strips off some aggressive optimizations from YARV-MJIT, and thus it's slower than YARV-MJIT too. But it's still fairly faster than Ruby 2.5 in some benchmarks (attached below). Note that this JIT compiler passes `make test`, `make test-all`, `make test-spec` without JIT, and even with JIT. Not only it's perfectly safe with JIT disabled because it does not replace VM instructions unlike MJIT, but also with JIT enabled it stably runs Ruby applications including Rails applications. I'm expecting this version as just "initial" JIT compiler. I have many optimization ideas which are skipped for initial merging, and you may easily replace this JIT compiler with a faster one by just replacing mjit_compile.c. `mjit_compile` interface is designed for the purpose. common.mk: update dependencies for mjit_compile.c. internal.h: declare `rb_vm_insn_addr2insn` for MJIT. vm.c: exclude some definitions if `-DMJIT_HEADER` is provided to compiler. This avoids to include some functions which take a long time to compile, e.g. vm_exec_core. Some of the purpose is achieved in transform_mjit_header.rb (see `IGNORED_FUNCTIONS`) but others are manually resolved for now. Load mjit_helper.h for MJIT header. mjit_helper.h: New. This is a file used only by JIT-ed code. I'll refactor `mjit_call_cfunc` later. vm_eval.c: add some #ifdef switches to skip compiling some functions like Init_vm_eval. win32/mkexports.rb: export thread/ec functions, which are used by MJIT. include/ruby/defines.h: add MJIT_FUNC_EXPORTED macro alis to clarify that a function is exported only for MJIT. array.c: export a function used by MJIT. bignum.c: ditto. class.c: ditto. compile.c: ditto. error.c: ditto. gc.c: ditto. hash.c: ditto. iseq.c: ditto. numeric.c: ditto. object.c: ditto. proc.c: ditto. re.c: ditto. st.c: ditto. string.c: ditto. thread.c: ditto. variable.c: ditto. vm_backtrace.c: ditto. vm_insnhelper.c: ditto. vm_method.c: ditto. I would like to improve maintainability of function exports, but I believe this way is acceptable as initial merging if we clarify the new exports are for MJIT (so that we can use them as TODO list to fix) and add unit tests to detect unresolved symbols. I'll add unit tests of JIT compilations in succeeding commits. Author: Takashi Kokubun Contributor: wanabe Part of [Feature #14235] --- * Known issues * Code generated by gcc is faster than clang. The benchmark may be worse in macOS. Following benchmark result is provided by gcc w/ Linux. * Performance is decreased when Google Chrome is running * JIT can work on MinGW, but it doesn't improve performance at least in short running benchmark. * Currently it doesn't perform well with Rails. We'll try to fix this before release. --- * Benchmark reslts Benchmarked with: Intel 4.0GHz i7-4790K with 16GB memory under x86-64 Ubuntu 8 Cores - 2.0.0-p0: Ruby 2.0.0-p0 - r62186: Ruby trunk (early 2.6.0), before MJIT changes - JIT off: On this commit, but without `--jit` option - JIT on: On this commit, and with `--jit` option ** Optcarrot fps Benchmark: https://github.com/mame/optcarrot | |2.0.0-p0 |r62186 |JIT off |JIT on | |:--------|:--------|:--------|:--------|:--------| |fps |37.32 |51.46 |51.31 |58.88 | |vs 2.0.0 |1.00x |1.38x |1.37x |1.58x | ** MJIT benchmarks Benchmark: https://github.com/benchmark-driver/mjit-benchmarks (Original: https://github.com/vnmakarov/ruby/tree/rtl_mjit_branch/MJIT-benchmarks) | |2.0.0-p0 |r62186 |JIT off |JIT on | |:----------|:--------|:--------|:--------|:--------| |aread |1.00 |1.09 |1.07 |2.19 | |aref |1.00 |1.13 |1.11 |2.22 | |aset |1.00 |1.50 |1.45 |2.64 | |awrite |1.00 |1.17 |1.13 |2.20 | |call |1.00 |1.29 |1.26 |2.02 | |const2 |1.00 |1.10 |1.10 |2.19 | |const |1.00 |1.11 |1.10 |2.19 | |fannk |1.00 |1.04 |1.02 |1.00 | |fib |1.00 |1.32 |1.31 |1.84 | |ivread |1.00 |1.13 |1.12 |2.43 | |ivwrite |1.00 |1.23 |1.21 |2.40 | |mandelbrot |1.00 |1.13 |1.16 |1.28 | |meteor |1.00 |2.97 |2.92 |3.17 | |nbody |1.00 |1.17 |1.15 |1.49 | |nest-ntimes|1.00 |1.22 |1.20 |1.39 | |nest-while |1.00 |1.10 |1.10 |1.37 | |norm |1.00 |1.18 |1.16 |1.24 | |nsvb |1.00 |1.16 |1.16 |1.17 | |red-black |1.00 |1.02 |0.99 |1.12 | |sieve |1.00 |1.30 |1.28 |1.62 | |trees |1.00 |1.14 |1.13 |1.19 | |while |1.00 |1.12 |1.11 |2.41 | ** Discourse's script/bench.rb Benchmark: https://github.com/discourse/discourse/blob/v1.8.7/script/bench.rb NOTE: Rails performance was somehow a little degraded with JIT for now. We should fix this. (At least I know opt_aref is performing badly in JIT and I have an idea to fix it. Please wait for the fix.) *** JIT off Your Results: (note for timings- percentile is first, duration is second in millisecs) categories_admin: 50: 17 75: 18 90: 22 99: 29 home_admin: 50: 21 75: 21 90: 27 99: 40 topic_admin: 50: 17 75: 18 90: 22 99: 32 categories: 50: 35 75: 41 90: 43 99: 77 home: 50: 39 75: 46 90: 49 99: 95 topic: 50: 46 75: 52 90: 56 99: 101 *** JIT on Your Results: (note for timings- percentile is first, duration is second in millisecs) categories_admin: 50: 19 75: 21 90: 25 99: 33 home_admin: 50: 24 75: 26 90: 30 99: 35 topic_admin: 50: 19 75: 20 90: 25 99: 30 categories: 50: 40 75: 44 90: 48 99: 76 home: 50: 42 75: 48 90: 51 99: 89 topic: 50: 49 75: 55 90: 58 99: 99 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62197 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- mjit_compile.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 3 deletions(-) (limited to 'mjit_compile.c') diff --git a/mjit_compile.c b/mjit_compile.c index a48ae84e4c..b323192fe9 100644 --- a/mjit_compile.c +++ b/mjit_compile.c @@ -8,11 +8,163 @@ #include "internal.h" #include "vm_core.h" +#include "vm_exec.h" +#include "mjit.h" +#include "insns.inc" +#include "insns_info.inc" +#include "vm_insnhelper.h" -/* Compile ISeq to C code in F. Return TRUE if it succeeds to compile. */ +/* Storage to keep compiler's status. This should have information + which is global during one `mjit_compile` call. Ones conditional + in each branch should be stored in `compile_branch`. */ +struct compile_status { + int success; /* has TRUE if compilation has had no issue */ + int *compiled_for_pos; /* compiled_for_pos[pos] has TRUE if the pos is compiled */ +}; + +/* Storage to keep data which is consistent in each conditional branch. + This is created and used for one `compile_insns` call and its values + should be copied for extra `compile_insns` call. */ +struct compile_branch { + unsigned int stack_size; /* this simulates sp (stack pointer) of YARV */ + int finish_p; /* if TRUE, compilation in this branch should stop and let another branch to be compiled */ +}; + +struct case_dispatch_var { + FILE *f; + unsigned int base_pos; + VALUE last_value; +}; + +/* Returns iseq from cc if it's available and still not obsoleted. */ +static const rb_iseq_t * +get_iseq_if_available(CALL_CACHE cc) +{ + if (GET_GLOBAL_METHOD_STATE() == cc->method_state + && mjit_valid_class_serial_p(cc->class_serial) + && cc->me && cc->me->def->type == VM_METHOD_TYPE_ISEQ) { + return rb_iseq_check(cc->me->def->body.iseq.iseqptr); + } + return NULL; +} + +/* TODO: move to somewhere shared with vm_args.c */ +#define IS_ARGS_SPLAT(ci) ((ci)->flag & VM_CALL_ARGS_SPLAT) +#define IS_ARGS_KEYWORD(ci) ((ci)->flag & VM_CALL_KWARG) + +/* Returns TRUE if iseq is inlinable, otherwise NULL. This becomes TRUE in the same condition + as CI_SET_FASTPATH (in vm_callee_setup_arg) is called from vm_call_iseq_setup. */ +static int +inlinable_iseq_p(CALL_INFO ci, CALL_CACHE cc, const rb_iseq_t *iseq) +{ + extern int simple_iseq_p(const rb_iseq_t *iseq); + return iseq != NULL + && simple_iseq_p(iseq) && !(ci->flag & VM_CALL_KW_SPLAT) /* top of vm_callee_setup_arg */ + && (!IS_ARGS_SPLAT(ci) && !IS_ARGS_KEYWORD(ci) && !(METHOD_ENTRY_VISI(cc->me) == METHOD_VISI_PROTECTED)); /* CI_SET_FASTPATH */ +} + +static int +compile_case_dispatch_each(VALUE key, VALUE value, VALUE arg) +{ + struct case_dispatch_var *var = (struct case_dispatch_var *)arg; + unsigned int offset; + + if (var->last_value != value) { + offset = FIX2INT(value); + var->last_value = value; + fprintf(var->f, " case %d:\n", offset); + fprintf(var->f, " goto label_%d;\n", var->base_pos + offset); + fprintf(var->f, " break;\n"); + } + return ST_CONTINUE; +} + +static void compile_insns(FILE *f, const struct rb_iseq_constant_body *body, unsigned int stack_size, + unsigned int pos, struct compile_status *status); + +/* Main function of JIT compilation, vm_exec_core counterpart for JIT. Compile one insn to `f`, may modify + b->stack_size and return next position. + + When you add a new instruction to insns.def, it would be nice to have JIT compilation support here but + it's optional. This JIT compiler just ignores ISeq which includes unknown instruction, and ISeq which + does not have it can be compiled as usual. */ +static unsigned int +compile_insn(FILE *f, const struct rb_iseq_constant_body *body, const int insn, const VALUE *operands, + const unsigned int pos, struct compile_status *status, struct compile_branch *b) +{ + unsigned int next_pos = pos + insn_len(insn); + +/*****************/ + #include "mjit_compile.inc" +/*****************/ + + return next_pos; +} + +/* Compile one conditional branch. If it has branchXXX insn, this should be + called multiple times for each branch. */ +static void +compile_insns(FILE *f, const struct rb_iseq_constant_body *body, unsigned int stack_size, + unsigned int pos, struct compile_status *status) +{ + int insn; + struct compile_branch branch; + + branch.stack_size = stack_size; + branch.finish_p = FALSE; + + while (pos < body->iseq_size && !status->compiled_for_pos[pos] && !branch.finish_p) { +#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE + insn = rb_vm_insn_addr2insn((void *)body->iseq_encoded[pos]); +#else + insn = (int)body->iseq_encoded[pos]; +#endif + status->compiled_for_pos[pos] = TRUE; + + fprintf(f, "\nlabel_%d: /* %s */\n", pos, insn_name(insn)); + pos = compile_insn(f, body, insn, body->iseq_encoded + (pos+1), pos, status, &branch); + if (status->success && branch.stack_size > body->stack_max) { + if (mjit_opts.warnings || mjit_opts.verbose) + fprintf(stderr, "MJIT warning: JIT stack exceeded its max\n"); + status->success = FALSE; + } + if (!status->success) + break; + } +} + +/* Compile ISeq to C code in F. It returns 1 if it succeeds to compile. */ int mjit_compile(FILE *f, const struct rb_iseq_constant_body *body, const char *funcname) { - /* TODO: Write your own JIT compiler here. */ - return FALSE; + struct compile_status status; + status.success = TRUE; + status.compiled_for_pos = ZALLOC_N(int, body->iseq_size); + + fprintf(f, "VALUE %s(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp) {\n", funcname); + fprintf(f, " VALUE *stack = reg_cfp->sp;\n"); + + /* Simulate `opt_pc` in setup_parameters_complex */ + if (body->param.flags.has_opt) { + int i; + fprintf(f, "\n"); + fprintf(f, " switch (reg_cfp->pc - reg_cfp->iseq->body->iseq_encoded) {\n"); + for (i = 0; i <= body->param.opt_num; i++) { + VALUE pc_offset = body->param.opt_table[i]; + fprintf(f, " case %"PRIdVALUE":\n", pc_offset); + fprintf(f, " goto label_%"PRIdVALUE";\n", pc_offset); + } + fprintf(f, " }\n"); + } + + /* ISeq might be used for catch table too. For that usage, this code cancels JIT execution. */ + fprintf(f, " if (reg_cfp->pc != 0x%"PRIxVALUE") {\n", (VALUE)body->iseq_encoded); + fprintf(f, " return Qundef;\n"); + fprintf(f, " }\n"); + + compile_insns(f, body, 0, 0, &status); + fprintf(f, "}\n"); + + xfree(status.compiled_for_pos); + return status.success; } -- cgit v1.2.3