summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author卜部昌平 <shyouhei@ruby-lang.org>2019-10-07 12:59:57 +0900
committer卜部昌平 <shyouhei@ruby-lang.org>2019-11-07 17:41:30 +0900
commitd45a013a1a3bcc860e6f7f303220b3297e2abdbc (patch)
tree9dd459d6b41542cdfcd75cede71a96e06981e3e0
parent3c252651e1ee28d015dbe1648dfdf0140232b733 (diff)
extend rb_call_cache
Prior to this changeset, majority of inline cache mishits resulted into the same method entry when rb_callable_method_entry() resolves a method search. Let's not call the function at the first place on such situations. In doing so we extend the struct rb_call_cache from 44 bytes (in case of 64 bit machine) to 64 bytes, and fill the gap with secondary class serial(s). Call cache's class serials now behavies as a LRU cache. Calculating ------------------------------------- ours 2.7 2.6 vm2_poly_same_method 2.339M 1.744M 1.369M i/s - 6.000M times in 2.565086s 3.441329s 4.381386s Comparison: vm2_poly_same_method ours: 2339103.0 i/s 2.7: 1743512.3 i/s - 1.34x slower 2.6: 1369429.8 i/s - 1.71x slower
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/2583
-rw-r--r--internal.h25
-rw-r--r--mjit_compile.c2
-rw-r--r--tool/ruby_vm/loaders/insns_def.rb2
-rw-r--r--tool/ruby_vm/views/_mjit_compile_send.erb2
-rw-r--r--vm_eval.c2
-rw-r--r--vm_insnhelper.c61
6 files changed, 75 insertions, 19 deletions
diff --git a/internal.h b/internal.h
index 703dd57699..1b27df0009 100644
--- a/internal.h
+++ b/internal.h
@@ -2357,10 +2357,32 @@ struct rb_execution_context_struct;
struct rb_control_frame_struct;
struct rb_calling_info;
struct rb_call_data;
+/* I have several reasons to chose 64 here:
+ *
+ * - A cache line must be a power-of-two size.
+ * - Setting this to anything less than or equal to 32 boosts nothing.
+ * - I have never seen an architecture that has 128 byte L1 cache line.
+ * - I know Intel Core and Sparc T4 at least uses 64.
+ * - I know jemalloc internally has this exact same `#define CACHE_LINE 64`.
+ * https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/jemalloc_internal_types.h
+ */
+#define CACHELINE 64
struct rb_call_cache {
/* inline cache: keys */
rb_serial_t method_state;
- rb_serial_t class_serial;
+ rb_serial_t class_serial[
+ (CACHELINE
+ - sizeof(rb_serial_t) /* method_state */
+ - sizeof(struct rb_callable_method_entry_struct *) /* me */
+ - sizeof(struct rb_callable_method_definition_struct *) /* def */
+ - sizeof(enum method_missing_reason) /* aux */
+ - sizeof(VALUE (*)( /* call */
+ struct rb_execution_context_struct *e,
+ struct rb_control_frame_struct *,
+ struct rb_calling_info *,
+ const struct rb_call_data *)))
+ / sizeof(rb_serial_t)
+ ];
/* inline cache: values */
const struct rb_callable_method_entry_struct *me;
@@ -2377,6 +2399,7 @@ struct rb_call_cache {
int inc_sp; /* used by cfunc */
} aux;
};
+STATIC_ASSERT(cachelined, sizeof(struct rb_call_cache) <= CACHELINE);
struct rb_call_info {
/* fixed at compile time */
ID mid;
diff --git a/mjit_compile.c b/mjit_compile.c
index 27ea836ef4..bf5143f6ed 100644
--- a/mjit_compile.c
+++ b/mjit_compile.c
@@ -87,7 +87,7 @@ has_valid_method_type(CALL_CACHE cc)
{
extern bool mjit_valid_class_serial_p(rb_serial_t class_serial);
return GET_GLOBAL_METHOD_STATE() == cc->method_state
- && mjit_valid_class_serial_p(cc->class_serial) && cc->me;
+ && mjit_valid_class_serial_p(cc->class_serial[0]) && cc->me;
}
// Returns true if iseq can use fastpath for setup, otherwise NULL. This becomes true in the same condition
diff --git a/tool/ruby_vm/loaders/insns_def.rb b/tool/ruby_vm/loaders/insns_def.rb
index a29d13a661..47e4ba29f5 100644
--- a/tool/ruby_vm/loaders/insns_def.rb
+++ b/tool/ruby_vm/loaders/insns_def.rb
@@ -21,7 +21,7 @@ grammar = %r'
(?<keyword> typedef | extern | static | auto | register |
struct | union | enum ){0}
(?<C> (?: \g<block> | [^{}]+ )* ){0}
- (?<block> \{ \g<ws>* ^ \g<C> $ \g<ws>* \} ){0}
+ (?<block> \{ \g<ws>* \g<C> \g<ws>* \} ){0}
(?<ws> \g<comment> | \s ){0}
(?<ident> [_a-zA-Z] [0-9_a-zA-Z]* ){0}
(?<type> (?: \g<keyword> \g<ws>+ )* \g<ident> ){0}
diff --git a/tool/ruby_vm/views/_mjit_compile_send.erb b/tool/ruby_vm/views/_mjit_compile_send.erb
index 95e7846820..ec8eec5589 100644
--- a/tool/ruby_vm/views/_mjit_compile_send.erb
+++ b/tool/ruby_vm/views/_mjit_compile_send.erb
@@ -36,7 +36,7 @@
% # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things.
fprintf(f, " if (UNLIKELY(GET_GLOBAL_METHOD_STATE() != %"PRI_SERIALT_PREFIX"u ||\n", cc_copy->method_state);
- fprintf(f, " RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc_copy->class_serial);
+ fprintf(f, " RCLASS_SERIAL(CLASS_OF(stack[%d])) != %"PRI_SERIALT_PREFIX"u)) {\n", b->stack_size - 1 - argc, cc_copy->class_serial[0]);
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos);
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size);
fprintf(f, " goto send_cancel;\n");
diff --git a/vm_eval.c b/vm_eval.c
index 230b96b115..4c73d7337e 100644
--- a/vm_eval.c
+++ b/vm_eval.c
@@ -47,7 +47,7 @@ rb_vm_call0(rb_execution_context_t *ec, VALUE recv, ID id, int argc, const VALUE
{
struct rb_calling_info calling = { Qundef, recv, argc, kw_splat, };
struct rb_call_info ci = { id, (kw_splat ? VM_CALL_KW_SPLAT : 0), argc, };
- struct rb_call_cache cc = { 0, 0, me, me->def, vm_call_general, { 0, }, };
+ struct rb_call_cache cc = { 0, { 0, }, me, me->def, vm_call_general, { 0, }, };
struct rb_call_data cd = { cc, ci, };
return vm_call0_body(ec, &calling, &cd, argv);
}
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 5e1cfccf3c..f8be5f6f33 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1422,16 +1422,58 @@ rb_vm_search_method_slowpath(struct rb_call_data *cd, VALUE klass)
struct rb_call_cache *cc = &cd->cc;
const rb_callable_method_entry_t *me =
rb_callable_method_entry(klass, ci->mid);
- *cc = (struct rb_call_cache) {
+ struct rb_call_cache buf = {
GET_GLOBAL_METHOD_STATE(),
- RCLASS_SERIAL(klass),
+ { RCLASS_SERIAL(klass) },
me,
me ? me->def : NULL,
calccall(cd, me),
};
+ if (buf.call != vm_call_general) {
+ for (int i = 0; i < numberof(cc->class_serial) - 1; i++) {
+ buf.class_serial[i + 1] = cc->class_serial[i];
+ }
+ }
+ MEMCPY(cc, &buf, struct rb_call_cache, 1);
VM_ASSERT(callable_method_entry_p(cc->me));
}
+static inline bool
+vm_cache_check_for_class_serial(struct rb_call_cache *cc, rb_serial_t class_serial)
+{
+ int i;
+ rb_serial_t j;
+
+ for (i = 0; i < numberof(cc->class_serial); i++) {
+ j = cc->class_serial[i];
+
+ if (! j) {
+ break;
+ }
+ else if (j != class_serial) {
+ continue;
+ }
+ else if (! i) {
+ return true;
+ }
+ else {
+ goto hit;
+ }
+ }
+
+ RB_DEBUG_COUNTER_INC(mc_class_serial_miss);
+ return false;
+
+ hit:
+ for (; i > 0; i--) {
+ cc->class_serial[i] = cc->class_serial[i - 1];
+ }
+
+ cc->class_serial[0] = j;
+ MEMZERO(&cc->aux, cc->aux, 1); /* cc->call is valid, but cc->aux might not. */
+ return true;
+}
+
static void
vm_search_method_fastpath(struct rb_call_data *cd, VALUE klass)
{
@@ -1440,8 +1482,7 @@ vm_search_method_fastpath(struct rb_call_data *cd, VALUE klass)
#if OPT_INLINE_METHOD_CACHE
if (LIKELY(RB_DEBUG_COUNTER_INC_UNLESS(mc_global_state_miss,
GET_GLOBAL_METHOD_STATE() == cc->method_state) &&
- RB_DEBUG_COUNTER_INC_UNLESS(mc_class_serial_miss,
- RCLASS_SERIAL(klass) == cc->class_serial))) {
+ vm_cache_check_for_class_serial(cc, RCLASS_SERIAL(klass)))) {
/* cache hit! */
VM_ASSERT(cc->call != NULL);
RB_DEBUG_COUNTER_INC(mc_inline_hit);
@@ -1605,24 +1646,16 @@ opt_eql_func(VALUE recv, VALUE obj, CALL_DATA cd)
VALUE
rb_equal_opt(VALUE obj1, VALUE obj2)
{
- struct rb_call_data cd;
+ struct rb_call_data cd = { .ci = { .mid = idEq, }, };
- cd.ci.mid = idEq;
- cd.cc.method_state = 0;
- cd.cc.class_serial = 0;
- cd.cc.me = NULL;
return opt_eq_func(obj1, obj2, &cd);
}
VALUE
rb_eql_opt(VALUE obj1, VALUE obj2)
{
- struct rb_call_data cd;
+ struct rb_call_data cd = { .ci = { .mid = idEqlP, }, };
- cd.ci.mid = idEqlP;
- cd.cc.method_state = 0;
- cd.cc.class_serial = 0;
- cd.cc.me = NULL;
return opt_eql_func(obj1, obj2, &cd);
}