From d45a013a1a3bcc860e6f7f303220b3297e2abdbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=9C=E9=83=A8=E6=98=8C=E5=B9=B3?= Date: Mon, 7 Oct 2019 12:59:57 +0900 Subject: extend rb_call_cache Prior to this changeset, majority of inline cache mishits resulted into the same method entry when rb_callable_method_entry() resolves a method search. Let's not call the function at the first place on such situations. In doing so we extend the struct rb_call_cache from 44 bytes (in case of 64 bit machine) to 64 bytes, and fill the gap with secondary class serial(s). Call cache's class serials now behavies as a LRU cache. Calculating ------------------------------------- ours 2.7 2.6 vm2_poly_same_method 2.339M 1.744M 1.369M i/s - 6.000M times in 2.565086s 3.441329s 4.381386s Comparison: vm2_poly_same_method ours: 2339103.0 i/s 2.7: 1743512.3 i/s - 1.34x slower 2.6: 1369429.8 i/s - 1.71x slower --- internal.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'internal.h') diff --git a/internal.h b/internal.h index 703dd57699..1b27df0009 100644 --- a/internal.h +++ b/internal.h @@ -2357,10 +2357,32 @@ struct rb_execution_context_struct; struct rb_control_frame_struct; struct rb_calling_info; struct rb_call_data; +/* I have several reasons to chose 64 here: + * + * - A cache line must be a power-of-two size. + * - Setting this to anything less than or equal to 32 boosts nothing. + * - I have never seen an architecture that has 128 byte L1 cache line. + * - I know Intel Core and Sparc T4 at least uses 64. + * - I know jemalloc internally has this exact same `#define CACHE_LINE 64`. + * https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/jemalloc_internal_types.h + */ +#define CACHELINE 64 struct rb_call_cache { /* inline cache: keys */ rb_serial_t method_state; - rb_serial_t class_serial; + rb_serial_t class_serial[ + (CACHELINE + - sizeof(rb_serial_t) /* method_state */ + - sizeof(struct rb_callable_method_entry_struct *) /* me */ + - sizeof(struct rb_callable_method_definition_struct *) /* def */ + - sizeof(enum method_missing_reason) /* aux */ + - sizeof(VALUE (*)( /* call */ + struct rb_execution_context_struct *e, + struct rb_control_frame_struct *, + struct rb_calling_info *, + const struct rb_call_data *))) + / sizeof(rb_serial_t) + ]; /* inline cache: values */ const struct rb_callable_method_entry_struct *me; @@ -2377,6 +2399,7 @@ struct rb_call_cache { int inc_sp; /* used by cfunc */ } aux; }; +STATIC_ASSERT(cachelined, sizeof(struct rb_call_cache) <= CACHELINE); struct rb_call_info { /* fixed at compile time */ ID mid; -- cgit v1.2.3