summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>2023-10-12 10:05:34 -0400
committerGitHub <noreply@github.com>2023-10-12 10:05:34 -0400
commitb2e1ddffa55463f1180af7f9b269a2d89140b131 (patch)
treedf428c260b64f5d84126bbe4b957784d02563dc3
parent0c42c28531ea73a4a3e09dc12abe9f3264b87860 (diff)
YJIT: port call threshold logic from Rust to C for performance (#8628)
* Port call threshold logic from Rust to C for performance * Prefix global/field names with yjit_ * Fix linker error * Fix preprocessor condition for rb_yjit_threshold_hit * Fix third linker issue * Exclude yjit_calls_at_interv from RJIT bindgen --------- Co-authored-by: Takashi Kokubun <takashikkbn@gmail.com>
-rwxr-xr-xtool/rjit/bindgen.rb2
-rw-r--r--vm.c51
-rw-r--r--vm_core.h2
-rw-r--r--yjit.h8
-rw-r--r--yjit.rb2
-rw-r--r--yjit/src/core.rs4
-rw-r--r--yjit/src/options.rs26
-rw-r--r--yjit/src/stats.rs15
-rw-r--r--yjit/src/yjit.rs51
9 files changed, 83 insertions, 78 deletions
diff --git a/tool/rjit/bindgen.rb b/tool/rjit/bindgen.rb
index b66870e1d2..deb989dcfd 100755
--- a/tool/rjit/bindgen.rb
+++ b/tool/rjit/bindgen.rb
@@ -637,7 +637,7 @@ generator = BindingGenerator.new(
skip_fields: {
'rb_execution_context_struct.machine': %w[regs], # differs between macOS and Linux
rb_execution_context_struct: %w[method_missing_reason], # non-leading bit fields not supported
- rb_iseq_constant_body: %w[jit_exception jit_exception_calls yjit_payload], # conditionally defined
+ rb_iseq_constant_body: %w[jit_exception jit_exception_calls yjit_payload yjit_calls_at_interv], # conditionally defined
rb_thread_struct: %w[status has_dedicated_nt to_kill abort_on_exception report_on_exception pending_interrupt_queue_checked],
:'' => %w[is_from_method is_lambda is_isolated], # rb_proc_t
},
diff --git a/vm.c b/vm.c
index 1dc2b3f02c..fd4d7f933a 100644
--- a/vm.c
+++ b/vm.c
@@ -369,6 +369,49 @@ extern VALUE rb_vm_invoke_bmethod(rb_execution_context_t *ec, rb_proc_t *proc, V
const rb_callable_method_entry_t *me);
static VALUE vm_invoke_proc(rb_execution_context_t *ec, rb_proc_t *proc, VALUE self, int argc, const VALUE *argv, int kw_splat, VALUE block_handler);
+#if USE_YJIT
+// Counter to serve as a proxy for execution time, total number of calls
+static uint64_t yjit_total_entry_hits = 0;
+
+// Number of calls used to estimate how hot an ISEQ is
+#define YJIT_CALL_COUNT_INTERV 20u
+
+/// Test whether we are ready to compile an ISEQ or not
+static inline bool
+rb_yjit_threshold_hit(const rb_iseq_t *iseq, uint64_t entry_calls)
+{
+ yjit_total_entry_hits += 1;
+
+ // Record the number of calls at the beginning of the interval
+ if (entry_calls + YJIT_CALL_COUNT_INTERV == rb_yjit_call_threshold) {
+ iseq->body->yjit_calls_at_interv = yjit_total_entry_hits;
+ }
+
+ // Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
+ // This give us a ratio of how hot/cold this ISEQ is
+ if (entry_calls == rb_yjit_call_threshold) {
+ // We expect threshold 1 to compile everything immediately
+ if (rb_yjit_call_threshold < YJIT_CALL_COUNT_INTERV) {
+ return true;
+ }
+
+ uint64_t num_calls = yjit_total_entry_hits - iseq->body->yjit_calls_at_interv;
+
+ // Reject ISEQs that don't get called often enough
+ if (num_calls > rb_yjit_cold_threshold) {
+ rb_yjit_incr_counter("cold_iseq_entry");
+ return false;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+#else
+#define rb_yjit_threshold_hit(iseq, entry_calls) false
+#endif
+
#if USE_RJIT || USE_YJIT
// Generate JIT code that supports the following kinds of ISEQ entries:
// * The first ISEQ on vm_exec (e.g. <main>, or Ruby methods/blocks
@@ -396,10 +439,8 @@ jit_compile(rb_execution_context_t *ec)
rb_yjit_compile_iseq(iseq, ec, false);
}
}
- else { // rb_rjit_call_p
- if (body->jit_entry_calls == rb_rjit_call_threshold()) {
- rb_rjit_compile(iseq);
- }
+ else if (body->jit_entry_calls == rb_rjit_call_threshold()) {
+ rb_rjit_compile(iseq);
}
}
return body->jit_entry;
@@ -442,7 +483,7 @@ jit_compile_exception(rb_execution_context_t *ec)
// Increment the ISEQ's call counter and trigger JIT compilation if not compiled
if (body->jit_exception == NULL) {
body->jit_exception_calls++;
- if (body->jit_exception_calls == rb_yjit_call_threshold()) {
+ if (body->jit_exception_calls == rb_yjit_call_threshold) {
rb_yjit_compile_iseq(iseq, ec, true);
}
}
diff --git a/vm_core.h b/vm_core.h
index 6f2fca6537..acad6280be 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -524,6 +524,8 @@ struct rb_iseq_constant_body {
#if USE_YJIT
// YJIT stores some data on each iseq.
void *yjit_payload;
+ // Used to estimate how frequently this ISEQ gets called
+ uint64_t yjit_calls_at_interv;
#endif
};
diff --git a/yjit.h b/yjit.h
index ede2f80a11..84a3655156 100644
--- a/yjit.h
+++ b/yjit.h
@@ -25,10 +25,11 @@
#endif
// Expose these as declarations since we are building YJIT.
+extern uint64_t rb_yjit_call_threshold;
+extern uint64_t rb_yjit_cold_threshold;
+void rb_yjit_incr_counter(const char *counter_name);
bool rb_yjit_enabled_p(void);
bool rb_yjit_compile_new_iseqs(void);
-unsigned long rb_yjit_call_threshold(void);
-bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls);
void rb_yjit_invalidate_all_method_lookup_assumptions(void);
void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme);
void rb_yjit_collect_binding_alloc(void);
@@ -49,10 +50,9 @@ void rb_yjit_show_usage(int help, int highlight, unsigned int width, int columns
// !USE_YJIT
// In these builds, YJIT could never be turned on. Provide dummy implementations.
+static inline void rb_yjit_incr_counter(const char *counter_name) {}
static inline bool rb_yjit_enabled_p(void) { return false; }
static inline bool rb_yjit_compile_new_iseqs(void) { return false; }
-static inline unsigned long rb_yjit_call_threshold(void) { return 0; }
-static inline bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls) { return false; }
static inline void rb_yjit_invalidate_all_method_lookup_assumptions(void) {}
static inline void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme) {}
static inline void rb_yjit_collect_binding_alloc(void) {}
diff --git a/yjit.rb b/yjit.rb
index b933d680fa..a3c03046b4 100644
--- a/yjit.rb
+++ b/yjit.rb
@@ -317,7 +317,7 @@ module RubyVM::YJIT
out.puts "bindings_set: " + format_number(13, stats[:binding_set])
out.puts "compilation_failure: " + format_number(13, compilation_failure) if compilation_failure != 0
out.puts "compiled_iseq_entry: " + format_number(13, stats[:compiled_iseq_entry])
- out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry])
+ out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry] + stats[:cold_iseq_entry])
out.puts "compiled_iseq_count: " + format_number(13, stats[:compiled_iseq_count])
out.puts "compiled_blockid_count:" + format_number(13, stats[:compiled_blockid_count])
out.puts "compiled_block_count: " + format_number(13, stats[:compiled_block_count])
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index e493fa5466..8fb1a6a6ba 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -976,7 +976,6 @@ impl fmt::Debug for MutableBranchList {
}
}
-
/// This is all the data YJIT stores on an iseq
/// This will be dynamically allocated by C code
/// C code should pass an &mut IseqPayload to us
@@ -995,9 +994,6 @@ pub struct IseqPayload {
// Blocks that are invalidated but are not yet deallocated.
// The code GC will free them later.
pub dead_blocks: Vec<BlockRef>,
-
- // Used to estimate how frequently this ISEQ gets called
- pub call_count_at_interv: u64,
}
impl IseqPayload {
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index 455b3806fb..1d8e711a81 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -2,6 +2,18 @@ use std::{ffi::{CStr, CString}, ptr::null};
use crate::backend::current::TEMP_REGS;
use std::os::raw::{c_char, c_int, c_uint};
+// This option is exposed to the C side a a global variable for performance, see vm.c
+// Number of method calls after which to start generating code
+// Threshold==1 means compile on first execution
+#[no_mangle]
+static mut rb_yjit_call_threshold: u64 = 30;
+
+// This option is exposed to the C side a a global variable for performance, see vm.c
+// Number of execution requests after which a method is no longer
+// considered hot. Raising this results in more generated code.
+#[no_mangle]
+static mut rb_yjit_cold_threshold: u64 = 200_000;
+
// Command-line options
#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
@@ -10,14 +22,6 @@ pub struct Options {
// Note that the command line argument is expressed in MiB and not bytes
pub exec_mem_size: usize,
- // Number of method calls after which to start generating code
- // Threshold==1 means compile on first execution
- pub call_threshold: usize,
-
- // Number of execution requests after which a method is no longer
- // considered hot. Raising this results in more generated code.
- pub cold_threshold: usize,
-
// Generate versions greedily until the limit is hit
pub greedy_versioning: bool,
@@ -63,8 +67,6 @@ pub struct Options {
// Initialize the options to default values
pub static mut OPTIONS: Options = Options {
exec_mem_size: 128 * 1024 * 1024,
- call_threshold: 30,
- cold_threshold: 200_000,
greedy_versioning: false,
no_type_prop: false,
max_versions: 4,
@@ -155,14 +157,14 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
},
("call-threshold", _) => match opt_val.parse() {
- Ok(n) => unsafe { OPTIONS.call_threshold = n },
+ Ok(n) => unsafe { rb_yjit_call_threshold = n },
Err(_) => {
return None;
}
},
("cold-threshold", _) => match opt_val.parse() {
- Ok(n) => unsafe { OPTIONS.cold_threshold = n },
+ Ok(n) => unsafe { rb_yjit_cold_threshold = n },
Err(_) => {
return None;
}
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 653c832991..c9b21c2ae9 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -567,6 +567,21 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V
}
}
+/// Increment a counter by name from the CRuby side
+/// Warning: this is not fast because it requires a hash lookup, so don't use in tight loops
+#[no_mangle]
+pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char) {
+ use std::ffi::CStr;
+
+ if !get_option!(gen_stats) {
+ return;
+ }
+
+ let counter_name = unsafe { CStr::from_ptr(counter_name).to_str().unwrap() };
+ let counter_ptr = get_counter_ptr(counter_name);
+ unsafe { *counter_ptr += 1 };
+}
+
/// Export all YJIT statistics as a Ruby hash.
fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
// If YJIT is not enabled, return Qnil
diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs
index 8d44e5ef6e..c83ee883d4 100644
--- a/yjit/src/yjit.rs
+++ b/yjit/src/yjit.rs
@@ -46,57 +46,6 @@ pub fn yjit_enabled_p() -> bool {
YJIT_ENABLED.load(Ordering::Acquire)
}
-/// Make the call threshold available to C
-#[no_mangle]
-pub extern "C" fn rb_yjit_call_threshold() -> raw::c_ulong {
- get_option!(call_threshold) as raw::c_ulong
-}
-
-// Counter to serve as a proxy for execution time, total number of calls
-static mut TOTAL_ENTRY_HITS: u64 = 0;
-
-// Number of calls used to estimate how hot an ISEQ is
-static CALL_COUNT_INTERV: u64 = 20;
-
-/// Test whether we are ready to compile an ISEQ or not
-#[no_mangle]
-pub extern "C" fn rb_yjit_threshold_hit(iseq: IseqPtr, total_calls: u64) -> bool {
-
- let call_threshold = get_option!(call_threshold) as u64;
-
- unsafe { TOTAL_ENTRY_HITS += 1; }
-
- // Record the number of calls at the beginning of the interval
- if total_calls + CALL_COUNT_INTERV == call_threshold {
- let payload = get_or_create_iseq_payload(iseq);
- let call_count = unsafe { TOTAL_ENTRY_HITS };
- payload.call_count_at_interv = call_count;
- }
-
- // Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
- // This give us a ratio of how hot/cold this ISEQ is
- if total_calls == call_threshold {
- // We expect threshold 1 to compile everything immediately
- if call_threshold < CALL_COUNT_INTERV {
- return true;
- }
-
- let payload = get_or_create_iseq_payload(iseq);
- let call_count = unsafe { TOTAL_ENTRY_HITS };
- let num_calls = call_count - payload.call_count_at_interv;
-
- // Reject ISEQs that don't get called often enough
- if num_calls > get_option!(cold_threshold) as u64 {
- incr_counter!(cold_iseq_entry);
- return false;
- }
-
- return true;
- }
-
- return false;
-}
-
/// This function is called from C code
#[no_mangle]
pub extern "C" fn rb_yjit_init_rust() {