From 89e7997622038f82115f34dbb4ea382e02bed163 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 30 Jul 2019 21:36:05 -0400 Subject: Combine call info and cache to speed up method invocation To perform a regular method call, the VM needs two structs, `rb_call_info` and `rb_call_cache`. At the moment, we allocate these two structures in separate buffers. In the worst case, the CPU needs to read 4 cache lines to complete a method call. Putting the two structures together reduces the maximum number of cache line reads to 2. Combining the structures also saves 8 bytes per call site as the current layout uses separate two pointers for the call info and the call cache. This saves about 2 MiB on Discourse. This change improves the Optcarrot benchmark at least 3%. For more details, see attached bugs.ruby-lang.org ticket. Complications: - A new instruction attribute `comptime_sp_inc` is introduced to calculate SP increase at compile time without using call caches. At compile time, a `TS_CALLDATA` operand points to a call info struct, but at runtime, the same operand points to a call data struct. Instruction that explicitly define `sp_inc` also need to define `comptime_sp_inc`. - MJIT code for copying call cache becomes slightly more complicated. - This changes the bytecode format, which might break existing tools. [Misc #16258] --- vm_core.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'vm_core.h') diff --git a/vm_core.h b/vm_core.h index 685005336b..5c8c91b593 100644 --- a/vm_core.h +++ b/vm_core.h @@ -258,6 +258,16 @@ struct rb_calling_info { int kw_splat; }; +struct rb_call_data { + struct rb_call_cache cc; + struct rb_call_info ci; +}; + +struct rb_kwarg_call_data { + struct rb_call_cache cc; + struct rb_call_info_with_kwarg ci_kw; +}; + struct rb_execution_context_struct; typedef VALUE (*vm_call_handler)(struct rb_execution_context_struct *ec, struct rb_control_frame_struct *cfp, struct rb_calling_info *calling, const struct rb_call_info *ci, struct rb_call_cache *cc); @@ -417,12 +427,12 @@ struct rb_iseq_constant_body { struct rb_iseq_struct *local_iseq; /* local_iseq->flip_cnt can be modified */ union iseq_inline_storage_entry *is_entries; - struct rb_call_info *ci_entries; /* struct rb_call_info ci_entries[ci_size]; - * struct rb_call_info_with_kwarg cikw_entries[ci_kw_size]; - * So that: - * struct rb_call_info_with_kwarg *cikw_entries = &body->ci_entries[ci_size]; - */ - struct rb_call_cache *cc_entries; /* size is ci_size + ci_kw_size */ + struct rb_call_data *call_data; /* A buffer for two arrays: + * struct rb_call_data calls[ci_size]; + * struct rb_kwarg_call_data kw_calls[ci_kw_size]; + * Such that: + * struct rb_kwarg_call_data *kw_calls = &body->call_data[ci_size]; + */ struct { rb_snum_t flip_count; @@ -1121,6 +1131,7 @@ typedef struct iseq_inline_cache_entry *IC; typedef union iseq_inline_storage_entry *ISE; typedef struct rb_call_info *CALL_INFO; typedef struct rb_call_cache *CALL_CACHE; +typedef struct rb_call_data *CALL_DATA; void rb_vm_change_state(void); -- cgit v1.2.3