From 8a15857a7f283101a3923b76c71a1f05bfe58512 Mon Sep 17 00:00:00 2001 From: k0kubun Date: Sun, 4 Mar 2018 07:04:40 +0000 Subject: mjit_compile.c: use local variables for stack if catch_except_p is FALSE. If catch_except_p is TRUE, stack values should be on VM's stack when exception is thrown and the JIT-ed frame is re-executed by VM's exception handler. If it's FALSE, the JIT-ed frame won't be re-executed and don't need to keep values on VM's stack. Using local variables allows us to reduce cfp->sp motion. Moving cfp->sp is needed only for insns whose handles_frame? is false. So it improves performance. _mjit_compile_insn.erb: Prepare `stack_size` variable for GET_SP, STACK_ADDR_FROM_TOP, TOPN macros. Share pc and sp motion partial view. Use cancel handler created in mjit_compile.c. _mjit_compile_send.erb: ditto. Also, when iseq->body->catch_except_p is TRUE, this stops to call mjit_exec directly. I described the reason in vm_insnhelper.h's comment for EXEC_EC_CFP. _mjit_compile_pc_and_sp.erb: Shared logic for moving sp and pc. As you can see from thsi file, when status->local_stack_p is TRUE and insn.handles_frame? is false, moving sp is skipped. But if insn.handles_frame? is true, values should be rolled back to VM's stack. common.mk: add dependency for the file _mjit_compile_insn_body.erb: Set sp value before canceling JIT on DISPATCH_ORIGINAL_INSN. Replace GET_SP, STACK_ADDR_FROM_TOP, TOPN macros for the case ocal_stack_p is TRUE and insn.handles_frame? is false. In that case, values are not available on VM's stack and those macros should be replaced. mjit_compile.inc.erb: updated comments of macros which are supported by JIT compiler. All references to `cfp->sp` should be replaced and thus INC_SP, SET_SV, PUSH are no longer supported for now, because they are not used now. vm_exec.h: moved EXEC_EC_CFP definition to vm_insnhelper.h because it's tighly coupled to CALL_METHOD. vm_insnhelper.h: Have revised EXEC_EC_CFP definition moved from vm_exec.h. Now it triggers mjit_exec for VM, and has the guard for catch_except_p on JIT-ed code. See comments for details. CALL_METHOD delegates triggering mjit_exec to EXEC_EC_CFP. insns.def: Stopped using EXEC_EC_CFP for the case we don't want to trigger mjit_exec. Those insns (defineclass, opt_call_c_function) are not supported by JIT and it's safe to use RESTORE_REGS(), NEXT_INSN(). expandarray is changed to pass GET_SP() to replace the macro in _mjit_compile_insn_body.erb. vm_insnhelper.c: change to take sp for the above reason. [close https://github.com/ruby/ruby/pull/1828] This patch resurrects the performance which was attached in [Feature #14235]. * Benchmark Optcarrot (with configuration for benchmark_driver.gem) https://github.com/benchmark-driver/optcarrot $ benchmark-driver benchmark.yml --verbose 1 --rbenv 'before;before+JIT::before,--jit;after;after+JIT::after,--jit' --repeat-count 10 before: ruby 2.6.0dev (2018-03-04 trunk 62652) [x86_64-linux] before+JIT: ruby 2.6.0dev (2018-03-04 trunk 62652) +JIT [x86_64-linux] after: ruby 2.6.0dev (2018-03-04 local-variable.. 62652) [x86_64-linux] last_commit=mjit_compile.c: use local variables for stack after+JIT: ruby 2.6.0dev (2018-03-04 local-variable.. 62652) +JIT [x86_64-linux] last_commit=mjit_compile.c: use local variables for stack Calculating ------------------------------------- before before+JIT after after+JIT optcarrot 53.552 59.680 53.697 63.358 fps Comparison: optcarrot after+JIT: 63.4 fps before+JIT: 59.7 fps - 1.06x slower after: 53.7 fps - 1.18x slower before: 53.6 fps - 1.18x slower git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62655 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- tool/ruby_vm/views/_mjit_compile_insn.erb | 15 +++++----- tool/ruby_vm/views/_mjit_compile_insn_body.erb | 38 +++++++++++++++++++++++--- tool/ruby_vm/views/_mjit_compile_pc_and_sp.erb | 38 ++++++++++++++++++++++++++ tool/ruby_vm/views/_mjit_compile_send.erb | 28 +++++++++++++------ tool/ruby_vm/views/mjit_compile.inc.erb | 9 ++---- 5 files changed, 102 insertions(+), 26 deletions(-) create mode 100644 tool/ruby_vm/views/_mjit_compile_pc_and_sp.erb (limited to 'tool') diff --git a/tool/ruby_vm/views/_mjit_compile_insn.erb b/tool/ruby_vm/views/_mjit_compile_insn.erb index 42ee0469a9..5627be4ada 100644 --- a/tool/ruby_vm/views/_mjit_compile_insn.erb +++ b/tool/ruby_vm/views/_mjit_compile_insn.erb @@ -20,6 +20,11 @@ MAYBE_UNUSED(<%= ope.fetch(:decl) %>) = (<%= ope.fetch(:type) %>)operands[<%= i %>]; % end % +% # JIT: Declare stack_size to be used in some macro of _mjit_compile_insn_body.erb + if (status->local_stack_p) { + fprintf(f, " MAYBE_UNUSED(unsigned int) stack_size = %u;\n", b->stack_size); + } +% % # JIT: Declare variables for operands, popped values and return values % ret_decls = insn.rets.map { |r| "MAYBE_UNUSED(#{r.fetch(:type)}) #{r.fetch(:name)}"} # TODO: fix #declarations to return Hash... % insn.declarations.each do |decl| @@ -52,13 +57,7 @@ % end % % # JIT: move sp and pc if necessary -% if insn.handles_frame? - fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", next_pos); /* ADD_PC(INSN_ATTR(width)); */ - fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1 - <%= insn.pops.size %>); /* POPN(INSN_ATTR(popn)); */ -% else - fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); - fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1); -% end +<%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%> % % # JIT: Print insn body in insns.def <%= render 'mjit_compile_insn_body', locals: { insn: insn } -%> @@ -75,7 +74,7 @@ % if trace_enablable_insns.include?(insn.name) fprintf(f, " if (UNLIKELY(ruby_vm_event_enabled_flags & ISEQ_TRACE_EVENTS)) {\n"); fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + (int)<%= insn.call_attribute('sp_inc') %> + 1); - fprintf(f, " return Qundef; /* cancel JIT */\n"); + fprintf(f, " goto cancel;\n"); fprintf(f, " }\n"); % end % diff --git a/tool/ruby_vm/views/_mjit_compile_insn_body.erb b/tool/ruby_vm/views/_mjit_compile_insn_body.erb index 24c07d590b..b209786525 100644 --- a/tool/ruby_vm/views/_mjit_compile_insn_body.erb +++ b/tool/ruby_vm/views/_mjit_compile_insn_body.erb @@ -17,9 +17,6 @@ % # % expand_simple_macros = lambda do |arg_expr| % arg_expr.dup.tap do |expr| -% # For `opt_xxx`'s fallbacks. -% expr.gsub!(/\bDISPATCH_ORIGINAL_INSN\([^)]+\);/, 'return Qundef; /* cancel JIT */') -% % # For `leave`. We can't proceed next ISeq in the same JIT function. % expr.gsub!(/^(?\s*)RESTORE_REGS\(\);\n/) do % indent = Regexp.last_match[:indent] @@ -42,7 +39,9 @@ % # % # Expand dynamic macro here (only JUMP for now) % # -% if line =~ /\A\s+JUMP\((?[^)]+)\);\s+\z/ +% # TODO: support combination of following macros in the same line +% case line +% when /\A\s+JUMP\((?[^)]+)\);\s+\z/ % dest = Regexp.last_match[:dest] % % if insn.name == 'opt_case_dispatch' # special case... TODO: use another macro to avoid checking name @@ -68,7 +67,38 @@ next_pos = pos + insn_len(insn) + (unsigned int)<%= dest %>; fprintf(f, " goto label_%d;\n", next_pos); % end +% when /\A\s+DISPATCH_ORIGINAL_INSN\([^)]+\);\s+\z/ +% # For `opt_xxx`'s fallbacks. + if (status->local_stack_p) { + fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1); + } + fprintf(f, " goto cancel;\n"); % else +% if insn.handles_frame? +% # If insn.handles_frame? is true, cfp->sp might be changed inside insns (like vm_caller_setup_arg_block) +% # and thus we need to use cfp->sp, even when local_stack_p is TRUE. When insn.handles_frame? is true, +% # cfp->sp should be available too because _mjit_compile_pc_and_sp.erb sets it. fprintf(f, <%= to_cstr.call(line) %>); +% else +% # If local_stack_p is TRUE and insn.handles_frame? is false, stack values are only available in local variables +% # for stack. So we need to replace those macros if local_stack_p is TRUE here. +% case line +% when /\bGET_SP\(\)/ +% # reg_cfp->sp + fprintf(f, <%= to_cstr.call(line.sub(/\bGET_SP\(\)/, '%s')) %>, (status->local_stack_p ? "(stack + stack_size)" : "GET_SP()")); +% when /\bSTACK_ADDR_FROM_TOP\((?[^)]+)\)/ +% # #define STACK_ADDR_FROM_TOP(n) (GET_SP()-(n)) +% num = Regexp.last_match[:num] + fprintf(f, <%= to_cstr.call(line.sub(/\bSTACK_ADDR_FROM_TOP\(([^)]+)\)/, '%s')) %>, + (status->local_stack_p ? "stack + (stack_size - (<%= num %>))" : "STACK_ADDR_FROM_TOP(<%= num %>)")); +% when /\bTOPN\((?[^)]+)\)/ +% # #define TOPN(n) (*(GET_SP()-(n)-1)) +% num = Regexp.last_match[:num] + fprintf(f, <%= to_cstr.call(line.sub(/\bTOPN\(([^)]+)\)/, '%s')) %>, + (status->local_stack_p ? "*(stack + (stack_size - (<%= num %>) - 1))" : "TOPN(<%= num %>)")); +% else + fprintf(f, <%= to_cstr.call(line) %>); +% end +% end % end % end diff --git a/tool/ruby_vm/views/_mjit_compile_pc_and_sp.erb b/tool/ruby_vm/views/_mjit_compile_pc_and_sp.erb new file mode 100644 index 0000000000..ae142e9ee8 --- /dev/null +++ b/tool/ruby_vm/views/_mjit_compile_pc_and_sp.erb @@ -0,0 +1,38 @@ +% # Copyright (c) 2018 Takashi Kokubun. All rights reserved. +% # +% # This file is a part of the programming language Ruby. Permission is hereby +% # granted, to either redistribute and/or modify this file, provided that the +% # conditions mentioned in the file COPYING are met. Consult the file for +% # details. +% +% # JIT: move pc so that catch table lookup condition is met +% if insn.handles_frame? + fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", next_pos); /* ADD_PC(INSN_ATTR(width)); */ +% else + fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); +% end +% +% # JIT: move sp to use or preserve stack variables + if (status->local_stack_p) { +% # sp motion is optimized away for `handles_frame? #=> false` case. +% # Thus sp should be set properly before `goto cancel`. +% if insn.handles_frame? + fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + stack_size + 1 - <%= insn.pops.size %>;\n"); /* POPN(INSN_ATTR(popn)); */ +% +% # JIT-only behavior (pushing JIT's local variables to VM's stack): + { + rb_snum_t i, push_size; + push_size = -<%= insn.call_attribute('sp_inc') %> + <%= insn.rets.size %> - <%= insn.pops.size %>; + for (i = 0; i < push_size; i++) { /* TODO: use memcpy? */ + fprintf(f, " *(reg_cfp->sp + %ld) = stack[%ld];\n", i - push_size, (rb_snum_t)b->stack_size - push_size + i); + } + } +% end + } + else { +% if insn.handles_frame? + fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1 - <%= insn.pops.size %>); /* POPN(INSN_ATTR(popn)); */ +% else + fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1); +% end + } diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb index 8df74a9d83..6781bba696 100644 --- a/tool/ruby_vm/views/_mjit_compile_send.erb +++ b/tool/ruby_vm/views/_mjit_compile_send.erb @@ -22,15 +22,20 @@ if (inlinable_iseq_p(ci, cc, iseq = get_iseq_if_available(cc))) { int param_size = iseq->body->param.size; /* TODO: check calling->argc for argument_arity_error */ + fprintf(f, "{\n"); +% # JIT: Declare stack_size to be used in some macro of _mjit_compile_insn_body.erb + if (status->local_stack_p) { + fprintf(f, " MAYBE_UNUSED(unsigned int) stack_size = %u;\n", b->stack_size); + } + % # JIT: move sp and pc if necessary - fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", next_pos); /* ADD_PC(INSN_ATTR(width)); */ - fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + 1 - <%= insn.pops.size %>); /* POPN(INSN_ATTR(popn)); */ +<%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%> % # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things. fprintf(f, " if (UNLIKELY(GET_GLOBAL_METHOD_STATE() != %"PRI_SERIALT_PREFIX"u ||\n", cc->method_state); fprintf(f, " RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc->class_serial); fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos); - fprintf(f, " return Qundef; /* cancel JIT */\n"); + fprintf(f, " goto cancel;\n"); fprintf(f, " }\n"); % # JIT: Print insn body in insns.def @@ -52,10 +57,16 @@ fprintf(f, " vm_push_frame(ec, (const rb_iseq_t *)0x%"PRIxVALUE", VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, calling.recv, " "calling.block_handler, 0x%"PRIxVALUE", (const VALUE *)0x%"PRIxVALUE", argv + %d, %d, %d);\n", (VALUE)iseq, (VALUE)cc->me, (VALUE)iseq->body->iseq_encoded, param_size, iseq->body->local_table_size - param_size, iseq->body->stack_max); - fprintf(f, " if ((v = mjit_exec(ec)) == Qundef) {\n"); - fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); /* This is vm_call0_body's code after vm_call_iseq_setup */ - fprintf(f, " v = vm_exec(ec, FALSE);\n"); - fprintf(f, " }\n"); + if (iseq->body->catch_except_p) { + fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); + fprintf(f, " v = vm_exec(ec, TRUE);\n"); + } + else { + fprintf(f, " if ((v = mjit_exec(ec)) == Qundef) {\n"); + fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); /* This is vm_call0_body's code after vm_call_iseq_setup */ + fprintf(f, " v = vm_exec(ec, FALSE);\n"); + fprintf(f, " }\n"); + } fprintf(f, " stack[%d] = v;\n", b->stack_size - argc - 1); fprintf(f, " }\n"); @@ -64,12 +75,13 @@ % # JIT: We should evaluate ISeq modified for TracePoint if it's enabled. Note: This is slow. fprintf(f, " if (UNLIKELY(ruby_vm_event_enabled_flags & ISEQ_TRACE_EVENTS)) {\n"); fprintf(f, " reg_cfp->sp = (VALUE *)reg_cfp->bp + %d;\n", b->stack_size + (int)<%= insn.call_attribute('sp_inc') %> + 1); - fprintf(f, " return Qundef; /* cancel JIT */\n"); + fprintf(f, " goto cancel;\n"); fprintf(f, " }\n"); % # compiler: Move JIT compiler's internal stack pointer b->stack_size += <%= insn.call_attribute('sp_inc') %>; + fprintf(f, "}\n"); break; } } diff --git a/tool/ruby_vm/views/mjit_compile.inc.erb b/tool/ruby_vm/views/mjit_compile.inc.erb index b6788a6593..9961b18453 100644 --- a/tool/ruby_vm/views/mjit_compile.inc.erb +++ b/tool/ruby_vm/views/mjit_compile.inc.erb @@ -31,16 +31,13 @@ % # reg_cfp: the second argument of _mjitXXX % # GET_CFP(): refers to `reg_cfp` % # GET_EP(): refers to `reg_cfp->ep` -% # GET_SP(): refers to `reg_cfp->sp` -% # INC_SP(): refers to `reg_cfp->sp` -% # SET_SV(): refers to `reg_cfp->sp` -% # PUSH(): refers to `SET_SV()`, `INC_SP()` +% # GET_SP(): refers to `reg_cfp->sp`, or `(stack + stack_size)` if local_stack_p % # GET_SELF(): refers to `reg_cfp->self` % # GET_LEP(): refers to `VM_EP_LEP(reg_cfp->ep)` % # EXEC_EC_CFP(): refers to `val = vm_exec(ec, TRUE)` with frame setup % # CALL_METHOD(): using `GET_CFP()` and `EXEC_EC_CFP()` -% # TOPN(): refers to `reg_cfp->sp`, which needs to have correct sp (of course) -% # STACK_ADDR_FROM_TOP(): refers to `reg_cfp->sp`, same problem here +% # TOPN(): refers to `reg_cfp->sp`, or `*(stack + (stack_size - num - 1))` if local_stack_p +% # STACK_ADDR_FROM_TOP(): refers to `reg_cfp->sp`, or `stack + (stack_size - num)` if local_stack_p % # DISPATCH_ORIGINAL_INSN(): expanded in _mjit_compile_insn.erb % # THROW_EXCEPTION(): specially defined for JIT % # RESTORE_REGS(): specially defined for `leave` -- cgit v1.2.3