diff options
Diffstat (limited to 'gc/mmtk/src')
| -rw-r--r-- | gc/mmtk/src/abi.rs | 104 | ||||
| -rw-r--r-- | gc/mmtk/src/api.rs | 330 | ||||
| -rw-r--r-- | gc/mmtk/src/binding.rs | 10 | ||||
| -rw-r--r-- | gc/mmtk/src/collection.rs | 61 | ||||
| -rw-r--r-- | gc/mmtk/src/heap/cpu_heap_trigger.rs | 370 | ||||
| -rw-r--r-- | gc/mmtk/src/heap/mod.rs | 9 | ||||
| -rw-r--r-- | gc/mmtk/src/heap/ruby_heap_trigger.rs | 105 | ||||
| -rw-r--r-- | gc/mmtk/src/lib.rs | 36 | ||||
| -rw-r--r-- | gc/mmtk/src/object_model.rs | 53 | ||||
| -rw-r--r-- | gc/mmtk/src/pinning_registry.rs | 187 | ||||
| -rw-r--r-- | gc/mmtk/src/scanning.rs | 72 | ||||
| -rw-r--r-- | gc/mmtk/src/utils.rs | 60 | ||||
| -rw-r--r-- | gc/mmtk/src/weak_proc.rs | 257 |
13 files changed, 1339 insertions, 315 deletions
diff --git a/gc/mmtk/src/abi.rs b/gc/mmtk/src/abi.rs index 5b56d199b2..30890e0853 100644 --- a/gc/mmtk/src/abi.rs +++ b/gc/mmtk/src/abi.rs @@ -1,8 +1,12 @@ use crate::api::RubyMutator; +use crate::extra_assert; use crate::Ruby; use libc::c_int; use mmtk::scheduler::GCWorker; -use mmtk::util::{Address, ObjectReference, VMMutatorThread, VMWorkerThread}; +use mmtk::util::Address; +use mmtk::util::ObjectReference; +use mmtk::util::VMMutatorThread; +use mmtk::util::VMWorkerThread; // For the C binding pub const OBJREF_OFFSET: usize = 8; @@ -10,16 +14,38 @@ pub const MIN_OBJ_ALIGN: usize = 8; // Even on 32-bit machine. A Ruby object is pub const GC_THREAD_KIND_WORKER: libc::c_int = 1; -const HAS_MOVED_GIVTBL: usize = 1 << 63; const HIDDEN_SIZE_MASK: usize = 0x0000FFFFFFFFFFFF; -// Should keep in sync with C code. -const RUBY_FL_EXIVAR: usize = 1 << 10; - // An opaque type for the C counterpart. #[allow(non_camel_case_types)] pub struct st_table; +#[repr(C)] +pub struct HiddenHeader { + pub prefix: usize, +} + +impl HiddenHeader { + #[inline(always)] + pub fn is_sane(&self) -> bool { + self.prefix & !HIDDEN_SIZE_MASK == 0 + } + + #[inline(always)] + fn assert_sane(&self) { + extra_assert!( + self.is_sane(), + "Hidden header is corrupted: {:x}", + self.prefix + ); + } + + pub fn payload_size(&self) -> usize { + self.assert_sane(); + self.prefix & HIDDEN_SIZE_MASK + } +} + /// Provide convenient methods for accessing Ruby objects. /// TODO: Wrap C functions in `RubyUpcalls` as Rust-friendly methods. pub struct RubyObjectAccess { @@ -47,32 +73,17 @@ impl RubyObjectAccess { self.suffix_addr() + Self::suffix_size() } - fn hidden_field(&self) -> Address { - self.obj_start() - } - - fn load_hidden_field(&self) -> usize { - unsafe { self.hidden_field().load::<usize>() } + fn hidden_header(&self) -> &'static HiddenHeader { + unsafe { self.obj_start().as_ref() } } - fn update_hidden_field<F>(&self, f: F) - where - F: FnOnce(usize) -> usize, - { - let old_value = self.load_hidden_field(); - let new_value = f(old_value); - unsafe { - self.hidden_field().store(new_value); - } + #[allow(unused)] // Maybe we need to mutate the hidden header in the future. + fn hidden_header_mut(&self) -> &'static mut HiddenHeader { + unsafe { self.obj_start().as_mut_ref() } } pub fn payload_size(&self) -> usize { - self.load_hidden_field() & HIDDEN_SIZE_MASK - } - - pub fn set_payload_size(&self, size: usize) { - debug_assert!((size & HIDDEN_SIZE_MASK) == size); - self.update_hidden_field(|old| old & !HIDDEN_SIZE_MASK | size & HIDDEN_SIZE_MASK); + self.hidden_header().payload_size() } fn flags_field(&self) -> Address { @@ -83,22 +94,6 @@ impl RubyObjectAccess { unsafe { self.flags_field().load::<usize>() } } - pub fn has_exivar_flag(&self) -> bool { - (self.load_flags() & RUBY_FL_EXIVAR) != 0 - } - - pub fn has_moved_givtbl(&self) -> bool { - (self.load_hidden_field() & HAS_MOVED_GIVTBL) != 0 - } - - pub fn set_has_moved_givtbl(&self) { - self.update_hidden_field(|old| old | HAS_MOVED_GIVTBL) - } - - pub fn clear_has_moved_givtbl(&self) { - self.update_hidden_field(|old| old & !HAS_MOVED_GIVTBL) - } - pub fn prefix_size() -> usize { // Currently, a hidden size field of word size is placed before each object. OBJREF_OFFSET @@ -163,7 +158,7 @@ impl ObjectClosure { F2: 'env + FnOnce() -> T, { debug_assert!( - self.c_function == THE_UNREGISTERED_CLOSURE_FUNC, + std::ptr::fn_addr_eq(self.c_function, THE_UNREGISTERED_CLOSURE_FUNC), "set_temporarily_and_run_code is recursively called." ); self.c_function = Self::c_function_registered::<F1>; @@ -232,7 +227,7 @@ impl GCThreadTLS { /// Has undefined behavior if `ptr` is invalid. pub unsafe fn check_cast(ptr: *mut GCThreadTLS) -> &'static mut GCThreadTLS { assert!(!ptr.is_null()); - let result = &mut *ptr; + let result = unsafe { &mut *ptr }; debug_assert!({ let kind = result.kind; kind == GC_THREAD_KIND_WORKER @@ -247,7 +242,7 @@ impl GCThreadTLS { /// Has undefined behavior if `ptr` is invalid. pub unsafe fn from_vwt_check(vwt: VMWorkerThread) -> &'static mut GCThreadTLS { let ptr = Self::from_vwt(vwt); - Self::check_cast(ptr) + unsafe { Self::check_cast(ptr) } } #[allow(clippy::not_unsafe_ptr_arg_deref)] // `transmute` does not dereference pointer @@ -283,7 +278,7 @@ impl RawVecOfObjRef { /// /// This function turns raw pointer into a Vec without check. pub unsafe fn into_vec(self) -> Vec<ObjectReference> { - Vec::from_raw_parts(self.ptr, self.len, self.capa) + unsafe { Vec::from_raw_parts(self.ptr, self.len, self.capa) } } } @@ -296,7 +291,6 @@ impl From<Vec<ObjectReference>> for RawVecOfObjRef { #[repr(C)] #[derive(Clone)] pub struct RubyBindingOptions { - pub ractor_check_mode: bool, pub suffix_size: usize, } @@ -306,8 +300,10 @@ pub struct RubyUpcalls { pub init_gc_worker_thread: extern "C" fn(gc_worker_tls: *mut GCThreadTLS), pub is_mutator: extern "C" fn() -> bool, pub stop_the_world: extern "C" fn(), - pub resume_mutators: extern "C" fn(), + pub resume_mutators: extern "C" fn(gc_may_move: bool), pub block_for_gc: extern "C" fn(tls: VMMutatorThread), + pub before_updating_jit_code: extern "C" fn(), + pub after_updating_jit_code: extern "C" fn(), pub number_of_mutators: extern "C" fn() -> usize, pub get_mutators: extern "C" fn( visit_mutator: extern "C" fn(*mut RubyMutator, *mut libc::c_void), @@ -315,16 +311,18 @@ pub struct RubyUpcalls { ), pub scan_gc_roots: extern "C" fn(), pub scan_objspace: extern "C" fn(), - pub scan_roots_in_mutator_thread: - extern "C" fn(mutator_tls: VMMutatorThread, worker_tls: VMWorkerThread), - pub scan_object_ruby_style: extern "C" fn(object: ObjectReference), + pub move_obj_during_marking: extern "C" fn(from: ObjectReference, to: ObjectReference), + pub update_object_references: extern "C" fn(object: ObjectReference), pub call_gc_mark_children: extern "C" fn(object: ObjectReference), + pub handle_weak_references: extern "C" fn(object: ObjectReference, moving: bool), pub call_obj_free: extern "C" fn(object: ObjectReference), pub vm_live_bytes: extern "C" fn() -> usize, - pub update_global_tables: extern "C" fn(tbl_idx: c_int), + pub update_global_tables: extern "C" fn(tbl_idx: c_int, moving: bool), pub global_tables_count: extern "C" fn() -> c_int, pub update_finalizer_table: extern "C" fn(), - pub update_obj_id_tables: extern "C" fn(), + pub special_const_p: extern "C" fn(object: ObjectReference) -> bool, + pub mutator_thread_panic_handler: extern "C" fn(), + pub gc_thread_panic_handler: extern "C" fn(), } unsafe impl Sync for RubyUpcalls {} diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index 91718cead6..c0540fe0c8 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -1,16 +1,28 @@ -use std::sync::atomic::Ordering; +// Functions in this module are unsafe for one reason: +// They are called by C functions and they need to pass raw pointers to Rust. +#![allow(clippy::missing_safety_doc)] + +use mmtk::util::alloc::BumpPointer; +use mmtk::util::alloc::ImmixAllocator; +use mmtk::util::conversions; use mmtk::util::options::PlanSelector; +use std::str::FromStr; +use std::sync::atomic::Ordering; use crate::abi::RawVecOfObjRef; use crate::abi::RubyBindingOptions; use crate::abi::RubyUpcalls; use crate::binding; use crate::binding::RubyBinding; +use crate::heap::CpuHeapTriggerConfig; +use crate::heap::RubyHeapTriggerConfig; +use crate::heap::CPU_HEAP_TRIGGER_CONFIG; +use crate::heap::RUBY_HEAP_TRIGGER_CONFIG; use crate::mmtk; -use crate::Ruby; -use crate::RubySlot; use crate::utils::default_heap_max; use crate::utils::parse_capacity; +use crate::Ruby; +use crate::RubySlot; use mmtk::memory_manager; use mmtk::memory_manager::mmtk_init; use mmtk::util::constants::MIN_OBJECT_SIZE; @@ -37,73 +49,139 @@ pub extern "C" fn mmtk_is_reachable(object: ObjectReference) -> bool { // =============== Bootup =============== -fn mmtk_builder_default_parse_threads() -> usize { - let threads_str = std::env::var("MMTK_THREADS") - .unwrap_or("0".to_string()); - - threads_str - .parse::<usize>() - .unwrap_or_else(|_err| { - eprintln!("[FATAL] Invalid MMTK_THREADS {}", threads_str); +fn parse_env_var_with<T, F: FnOnce(&str) -> Option<T>>(key: &str, parse: F) -> Option<T> { + let val = match std::env::var(key) { + Ok(val) => val, + Err(std::env::VarError::NotPresent) => return None, + Err(std::env::VarError::NotUnicode(os_string)) => { + eprintln!("[FATAL] Invalid {key} {os_string:?}"); std::process::exit(1); - }) -} + } + }; -fn mmtk_builder_default_parse_heap_min() -> usize { - const DEFAULT_HEAP_MIN: usize = 1 << 20; + let parsed = parse(&val).unwrap_or_else(|| { + eprintln!("[FATAL] Invalid {key} {val}"); + std::process::exit(1); + }); - let heap_min_str = std::env::var("MMTK_HEAP_MIN") - .unwrap_or(DEFAULT_HEAP_MIN.to_string()); + Some(parsed) +} - let size = parse_capacity(&heap_min_str, 0); - if size == 0 { - eprintln!("[FATAL] Invalid MMTK_HEAP_MIN {}", heap_min_str); - std::process::exit(1); - } +fn parse_env_var<T: FromStr>(key: &str) -> Option<T> { + parse_env_var_with(key, |s| s.parse().ok()) +} + +fn mmtk_builder_default_parse_threads() -> Option<usize> { + parse_env_var("MMTK_THREADS") +} - size +fn mmtk_builder_default_parse_heap_min() -> usize { + const DEFAULT_HEAP_MIN: usize = 1 << 20; + parse_env_var_with("MMTK_HEAP_MIN", parse_capacity).unwrap_or(DEFAULT_HEAP_MIN) } fn mmtk_builder_default_parse_heap_max() -> usize { - let heap_max_str = std::env::var("MMTK_HEAP_MAX") - .unwrap_or(default_heap_max().to_string()); + parse_env_var_with("MMTK_HEAP_MAX", parse_capacity).unwrap_or_else(default_heap_max) +} - let size = parse_capacity(&heap_max_str, 0); - if size == 0 { - eprintln!("[FATAL] Invalid MMTK_HEAP_MAX {}", heap_max_str); - std::process::exit(1); - } +fn parse_float_env_var(key: &str, default: f64, min: f64, max: f64) -> f64 { + parse_env_var_with(key, |s| { + let mut float = f64::from_str(s).unwrap_or(default); + + if float <= min { + eprintln!( + "{key} has value {float} which must be greater than {min}, using default instead" + ); + float = default; + } - size + if float >= max { + eprintln!( + "{key} has value {float} which must be less than {max}, using default instead" + ); + float = default; + } + + Some(float) + }) + .unwrap_or(default) } fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCTriggerSelector { - let heap_mode_str = std::env::var("MMTK_HEAP_MODE") - .unwrap_or("dynamic".to_string()); - - match heap_mode_str.as_str() { - "fixed" => GCTriggerSelector::FixedHeapSize(heap_max), - "dynamic" => GCTriggerSelector::DynamicHeapSize(heap_min, heap_max), - _ => { - eprintln!("[FATAL] Invalid MMTK_HEAP_MODE {}", heap_mode_str); - std::process::exit(1); + let make_fixed = || GCTriggerSelector::FixedHeapSize(heap_max); + let make_dynamic = || GCTriggerSelector::DynamicHeapSize(heap_min, heap_max); + + parse_env_var_with("MMTK_HEAP_MODE", |s| match s { + "fixed" => Some(make_fixed()), + "dynamic" => Some(make_dynamic()), + "ruby" => { + let min_ratio = parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MIN_RATIO", 0.2, 0.0, 1.0); + let goal_ratio = + parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_GOAL_RATIO", 0.4, min_ratio, 1.0); + let max_ratio = + parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MAX_RATIO", 0.65, goal_ratio, 1.0); + + crate::heap::RUBY_HEAP_TRIGGER_CONFIG + .set(RubyHeapTriggerConfig { + min_heap_pages: conversions::bytes_to_pages_up(heap_min), + max_heap_pages: conversions::bytes_to_pages_up(heap_max), + heap_pages_min_ratio: min_ratio, + heap_pages_goal_ratio: goal_ratio, + heap_pages_max_ratio: max_ratio, + }) + .unwrap_or_else(|_| panic!("RUBY_HEAP_TRIGGER_CONFIG is already set")); + + Some(GCTriggerSelector::Delegated) } - } + "cpu" => { + // CPU-overhead-driven heap sizing based on Tavakolisomeh et al., + // "Heap Size Adjustment with CPU Control", MPLR '23. + // + // Target is expressed as a percentage (0, 100) via + // `MMTK_GC_CPU_TARGET`. The paper recommends 15 for ZGC (a + // concurrent collector); we default to 5 for MMTk-Ruby. With + // MMTk's stop-the-world Immix, every percent of GC CPU is also + // a percent of wall-clock the mutator is blocked on, so a much + // smaller budget is appropriate. An empirical sweep across + // ruby-bench (railsbench, lobsters, psych-load, liquid-render, + // lee) found target=5 to be Pareto-optimal: ~6% geomean speedup + // vs. the `ruby` heap mode with effectively identical geomean + // peak RSS. + let target_percent = parse_float_env_var("MMTK_GC_CPU_TARGET", 5.0, 0.0, 100.0); + let window_size = parse_env_var::<usize>("MMTK_GC_CPU_WINDOW").unwrap_or(3); + let window_size = window_size.max(1); + + let min_heap_pages = conversions::bytes_to_pages_up(heap_min); + let max_heap_pages = conversions::bytes_to_pages_up(heap_max); + // Start at the min heap size, as the other delegated triggers do. + // The control loop will adjust from here after the first GC cycle. + let initial_heap_pages = min_heap_pages; + + CPU_HEAP_TRIGGER_CONFIG + .set(CpuHeapTriggerConfig { + min_heap_pages, + max_heap_pages, + initial_heap_pages, + target_gc_cpu: target_percent / 100.0, + window_size, + }) + .unwrap_or_else(|_| panic!("CPU_HEAP_TRIGGER_CONFIG is already set")); + + Some(GCTriggerSelector::Delegated) + } + _ => None, + }) + .unwrap_or_else(make_dynamic) } fn mmtk_builder_default_parse_plan() -> PlanSelector { - let plan_str = std::env::var("MMTK_PLAN") - .unwrap_or("MarkSweep".to_string()); - - match plan_str.as_str() { - "NoGC" => PlanSelector::NoGC, - "MarkSweep" => PlanSelector::MarkSweep, - "Immix" => PlanSelector::Immix, - _ => { - eprintln!("[FATAL] Invalid MMTK_PLAN {}", plan_str); - std::process::exit(1); - } - } + parse_env_var_with("MMTK_PLAN", |s| match s { + "NoGC" => Some(PlanSelector::NoGC), + "MarkSweep" => Some(PlanSelector::MarkSweep), + "Immix" => Some(PlanSelector::Immix), + _ => None, + }) + .unwrap_or(PlanSelector::Immix) } #[no_mangle] @@ -111,9 +189,12 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder { let mut builder = MMTKBuilder::new_no_env_vars(); builder.options.no_finalizer.set(true); - let threads = mmtk_builder_default_parse_threads(); - if threads > 0 { - builder.options.threads.set(threads); + if let Some(threads) = mmtk_builder_default_parse_threads() { + if !builder.options.threads.set(threads) { + // MMTk will validate it and reject 0. + eprintln!("[FATAL] Failed to set the number of MMTk threads to {threads}"); + std::process::exit(1); + } } let heap_min = mmtk_builder_default_parse_heap_min(); @@ -121,11 +202,14 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder { let heap_max = mmtk_builder_default_parse_heap_max(); if heap_min >= heap_max { - eprintln!("[FATAL] MMTK_HEAP_MIN({}) >= MMTK_HEAP_MAX({})", heap_min, heap_max); + eprintln!("[FATAL] MMTK_HEAP_MIN({heap_min}) >= MMTK_HEAP_MAX({heap_max})"); std::process::exit(1); } - builder.options.gc_trigger.set(mmtk_builder_default_parse_heap_mode(heap_min, heap_max)); + builder + .options + .gc_trigger + .set(mmtk_builder_default_parse_heap_mode(heap_min, heap_max)); builder.options.plan.set(mmtk_builder_default_parse_plan()); @@ -133,20 +217,26 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder { } #[no_mangle] -pub extern "C" fn mmtk_init_binding( +pub unsafe extern "C" fn mmtk_init_binding( builder: *mut MMTKBuilder, - _binding_options: *const RubyBindingOptions, + binding_options: *const RubyBindingOptions, upcalls: *const RubyUpcalls, - weak_reference_dead_value: ObjectReference, ) { + crate::MUTATOR_THREAD_PANIC_HANDLER + .set((unsafe { (*upcalls).clone() }).mutator_thread_panic_handler) + .unwrap_or_else(|_| panic!("MUTATOR_THREAD_PANIC_HANDLER is already initialized")); + crate::set_panic_hook(); - let builder = unsafe { Box::from_raw(builder) }; - let binding_options = RubyBindingOptions {ractor_check_mode: false, suffix_size: 0}; + let builder: Box<MMTKBuilder> = unsafe { Box::from_raw(builder) }; + let binding_options = unsafe { (*binding_options).clone() }; let mmtk_boxed = mmtk_init(&builder); let mmtk_static = Box::leak(Box::new(mmtk_boxed)); - let binding = RubyBinding::new(mmtk_static, &binding_options, upcalls, weak_reference_dead_value); + let mut binding = RubyBinding::new(mmtk_static, &binding_options, upcalls); + binding + .weak_proc + .init_parallel_obj_free_candidates(memory_manager::num_of_workers(binding.mmtk)); crate::BINDING .set(binding) @@ -164,7 +254,25 @@ pub extern "C" fn mmtk_bind_mutator(tls: VMMutatorThread) -> *mut RubyMutator { } #[no_mangle] -pub extern "C" fn mmtk_destroy_mutator(mutator: *mut RubyMutator) { +pub unsafe extern "C" fn mmtk_get_bump_pointer_allocator(m: *mut RubyMutator) -> *mut BumpPointer { + match *crate::BINDING.get().unwrap().mmtk.get_options().plan { + PlanSelector::Immix => { + let mutator: &mut Mutator<Ruby> = unsafe { &mut *m }; + let allocator = + unsafe { mutator.allocator_mut(mmtk::util::alloc::AllocatorSelector::Immix(0)) }; + + if let Some(immix_allocator) = allocator.downcast_mut::<ImmixAllocator<Ruby>>() { + &mut immix_allocator.bump_pointer as *mut BumpPointer + } else { + panic!("Failed to get bump pointer allocator"); + } + } + _ => std::ptr::null_mut(), + } +} + +#[no_mangle] +pub unsafe extern "C" fn mmtk_destroy_mutator(mutator: *mut RubyMutator) { // notify mmtk-core about destroyed mutator memory_manager::destroy_mutator(unsafe { &mut *mutator }); // turn the ptr back to a box, and let Rust properly reclaim it @@ -174,13 +282,19 @@ pub extern "C" fn mmtk_destroy_mutator(mutator: *mut RubyMutator) { // =============== GC =============== #[no_mangle] -pub extern "C" fn mmtk_handle_user_collection_request(tls: VMMutatorThread) { - memory_manager::handle_user_collection_request::<Ruby>(mmtk(), tls); +pub extern "C" fn mmtk_handle_user_collection_request( + tls: VMMutatorThread, + force: bool, + exhaustive: bool, +) { + crate::mmtk().handle_user_collection_request(tls, force, exhaustive); } #[no_mangle] pub extern "C" fn mmtk_set_gc_enabled(enable: bool) { - crate::CONFIGURATION.gc_enabled.store(enable, Ordering::Relaxed); + crate::CONFIGURATION + .gc_enabled + .store(enable, Ordering::Relaxed); } #[no_mangle] @@ -191,7 +305,7 @@ pub extern "C" fn mmtk_gc_enabled_p() -> bool { // =============== Object allocation =============== #[no_mangle] -pub extern "C" fn mmtk_alloc( +pub unsafe extern "C" fn mmtk_alloc( mutator: *mut RubyMutator, size: usize, align: usize, @@ -209,7 +323,7 @@ pub extern "C" fn mmtk_alloc( } #[no_mangle] -pub extern "C" fn mmtk_post_alloc( +pub unsafe extern "C" fn mmtk_post_alloc( mutator: *mut RubyMutator, refer: ObjectReference, bytes: usize, @@ -218,28 +332,46 @@ pub extern "C" fn mmtk_post_alloc( memory_manager::post_alloc::<Ruby>(unsafe { &mut *mutator }, refer, bytes, semantics) } -// TODO: Replace with buffered mmtk_add_obj_free_candidates #[no_mangle] -pub extern "C" fn mmtk_add_obj_free_candidate(object: ObjectReference) { - binding().weak_proc.add_obj_free_candidate(object) +pub unsafe extern "C" fn mmtk_add_obj_free_candidates( + objects: *const ObjectReference, + count: usize, + can_parallel_free: bool, +) { + let objects = unsafe { std::slice::from_raw_parts(objects, count) }; + binding() + .weak_proc + .add_obj_free_candidates_batch(objects, can_parallel_free) +} + +// =============== Weak references =============== + +#[no_mangle] +pub extern "C" fn mmtk_declare_weak_references(object: ObjectReference) { + binding().weak_proc.add_weak_reference(object); } -// =============== Marking =============== +#[no_mangle] +pub extern "C" fn mmtk_weak_references_alive_p(object: ObjectReference) -> bool { + object.is_reachable() +} #[no_mangle] -pub extern "C" fn mmtk_mark_weak(ptr: &'static mut ObjectReference) { - binding().weak_proc.add_weak_reference(ptr); +pub extern "C" fn mmtk_weak_references_count() -> usize { + binding().weak_proc.weak_references_count() } +// =============== Compaction =============== + #[no_mangle] -pub extern "C" fn mmtk_remove_weak(ptr: &ObjectReference) { - binding().weak_proc.remove_weak_reference(ptr); +pub extern "C" fn mmtk_register_pinning_obj(obj: ObjectReference) { + crate::binding().pinning_registry.register(obj); } // =============== Write barriers =============== #[no_mangle] -pub extern "C" fn mmtk_object_reference_write_post( +pub unsafe extern "C" fn mmtk_object_reference_write_post( mutator: *mut RubyMutator, object: ObjectReference, ) { @@ -343,7 +475,7 @@ pub extern "C" fn mmtk_plan() -> *const u8 { PlanSelector::NoGC => NO_GC.as_ptr(), PlanSelector::MarkSweep => MARK_SWEEP.as_ptr(), PlanSelector::Immix => IMMIX.as_ptr(), - _ => panic!("Unknown plan") + _ => panic!("Unknown plan"), } } @@ -351,11 +483,21 @@ pub extern "C" fn mmtk_plan() -> *const u8 { pub extern "C" fn mmtk_heap_mode() -> *const u8 { static FIXED_HEAP: &[u8] = b"fixed\0"; static DYNAMIC_HEAP: &[u8] = b"dynamic\0"; + static RUBY_HEAP: &[u8] = b"ruby\0"; + static CPU_HEAP: &[u8] = b"cpu\0"; match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => FIXED_HEAP.as_ptr(), GCTriggerSelector::DynamicHeapSize(_, _) => DYNAMIC_HEAP.as_ptr(), - _ => panic!("Unknown heap mode") + GCTriggerSelector::Delegated => { + // Two delegated triggers exist; disambiguate via the populated + // config singleton. + if CPU_HEAP_TRIGGER_CONFIG.get().is_some() { + CPU_HEAP.as_ptr() + } else { + RUBY_HEAP.as_ptr() + } + } } } @@ -364,7 +506,18 @@ pub extern "C" fn mmtk_heap_min() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(_) => 0, GCTriggerSelector::DynamicHeapSize(min_size, _) => min_size, - _ => panic!("Unknown heap mode") + GCTriggerSelector::Delegated => { + if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() { + conversions::pages_to_bytes(cfg.min_heap_pages) + } else { + conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .min_heap_pages, + ) + } + } } } @@ -373,7 +526,18 @@ pub extern "C" fn mmtk_heap_max() -> usize { match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger { GCTriggerSelector::FixedHeapSize(max_size) => max_size, GCTriggerSelector::DynamicHeapSize(_, max_size) => max_size, - _ => panic!("Unknown heap mode") + GCTriggerSelector::Delegated => { + if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() { + conversions::pages_to_bytes(cfg.max_heap_pages) + } else { + conversions::pages_to_bytes( + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("RUBY_HEAP_TRIGGER_CONFIG not set") + .max_heap_pages, + ) + } + } } } diff --git a/gc/mmtk/src/binding.rs b/gc/mmtk/src/binding.rs index e0f8640e1c..36d4a992fd 100644 --- a/gc/mmtk/src/binding.rs +++ b/gc/mmtk/src/binding.rs @@ -9,6 +9,7 @@ use mmtk::MMTK; use crate::abi; use crate::abi::RubyBindingOptions; +use crate::pinning_registry::PinningRegistry; use crate::weak_proc::WeakProcessor; use crate::Ruby; @@ -54,10 +55,9 @@ pub struct RubyBinding { pub upcalls: *const abi::RubyUpcalls, pub plan_name: Mutex<Option<CString>>, pub weak_proc: WeakProcessor, + pub pinning_registry: PinningRegistry, pub gc_thread_join_handles: Mutex<Vec<JoinHandle<()>>>, pub wb_unprotected_objects: Mutex<HashSet<ObjectReference>>, - - pub weak_reference_dead_value: ObjectReference, } unsafe impl Sync for RubyBinding {} @@ -68,7 +68,6 @@ impl RubyBinding { mmtk: &'static MMTK<Ruby>, binding_options: &RubyBindingOptions, upcalls: *const abi::RubyUpcalls, - weak_reference_dead_value: ObjectReference, ) -> Self { unsafe { crate::BINDING_FAST.suffix_size = binding_options.suffix_size; @@ -80,10 +79,9 @@ impl RubyBinding { upcalls, plan_name: Mutex::new(None), weak_proc: WeakProcessor::new(), + pinning_registry: PinningRegistry::new(), gc_thread_join_handles: Default::default(), wb_unprotected_objects: Default::default(), - - weak_reference_dead_value } } @@ -119,7 +117,7 @@ impl RubyBinding { } pub fn register_wb_unprotected_object(&self, object: ObjectReference) { - debug!("Registering WB-unprotected object: {}", object); + debug!("Registering WB-unprotected object: {object}"); let mut objects = self.wb_unprotected_objects.lock().unwrap(); objects.insert(object); } diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs index 0570b64e3a..648efa4e27 100644 --- a/gc/mmtk/src/collection.rs +++ b/gc/mmtk/src/collection.rs @@ -1,14 +1,26 @@ use crate::abi::GCThreadTLS; use crate::api::RubyMutator; -use crate::{mmtk, upcalls, Ruby}; +use crate::heap::CpuHeapTrigger; +use crate::heap::RubyHeapTrigger; +use crate::heap::CPU_HEAP_TRIGGER_CONFIG; +use crate::mmtk; +use crate::upcalls; +use crate::Ruby; use mmtk::memory_manager; use mmtk::scheduler::*; -use mmtk::util::{VMMutatorThread, VMThread, VMWorkerThread}; -use mmtk::vm::{Collection, GCThreadContext}; +use mmtk::util::heap::GCTriggerPolicy; +use mmtk::util::VMMutatorThread; +use mmtk::util::VMThread; +use mmtk::util::VMWorkerThread; +use mmtk::vm::Collection; +use mmtk::vm::GCThreadContext; +use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; use std::thread; +static CURRENT_GC_MAY_MOVE: AtomicBool = AtomicBool::new(false); + pub struct VMCollection {} impl Collection<Ruby> for VMCollection { @@ -16,11 +28,21 @@ impl Collection<Ruby> for VMCollection { crate::CONFIGURATION.gc_enabled.load(Ordering::Relaxed) } - fn stop_all_mutators<F>(_tls: VMWorkerThread, mut mutator_visitor: F) + fn stop_all_mutators<F>(tls: VMWorkerThread, mut mutator_visitor: F) where F: FnMut(&'static mut mmtk::Mutator<Ruby>), { (upcalls().stop_the_world)(); + + if crate::mmtk().get_plan().current_gc_may_move_object() { + CURRENT_GC_MAY_MOVE.store(true, Ordering::Relaxed); + (upcalls().before_updating_jit_code)(); + } else { + CURRENT_GC_MAY_MOVE.store(false, Ordering::Relaxed); + } + + crate::binding().pinning_registry.pin_children(tls); + (upcalls().get_mutators)( Self::notify_mutator_ready::<F>, &mut mutator_visitor as *mut F as *mut _, @@ -28,7 +50,13 @@ impl Collection<Ruby> for VMCollection { } fn resume_mutators(_tls: VMWorkerThread) { - (upcalls().resume_mutators)(); + let current_gc_may_move = CURRENT_GC_MAY_MOVE.load(Ordering::Relaxed); + + if current_gc_may_move { + (upcalls().after_updating_jit_code)(); + } + + (upcalls().resume_mutators)(current_gc_may_move); } fn block_for_gc(tls: VMMutatorThread) { @@ -41,10 +69,7 @@ impl Collection<Ruby> for VMCollection { .name("MMTk Worker Thread".to_string()) .spawn(move || { let ordinal = worker.ordinal; - debug!( - "Hello! This is MMTk Worker Thread running! ordinal: {}", - ordinal - ); + debug!("Hello! This is MMTk Worker Thread running! ordinal: {ordinal}"); crate::register_gc_thread(thread::current().id()); let ptr_worker = &mut *worker as *mut GCWorker<Ruby>; let gc_thread_tls = @@ -55,10 +80,7 @@ impl Collection<Ruby> for VMCollection { GCThreadTLS::to_vwt(gc_thread_tls), worker, ); - debug!( - "An MMTk Worker Thread is quitting. Good bye! ordinal: {}", - ordinal - ); + debug!("An MMTk Worker Thread is quitting. Good bye! ordinal: {ordinal}"); crate::unregister_gc_thread(thread::current().id()); }) .unwrap(), @@ -73,6 +95,19 @@ impl Collection<Ruby> for VMCollection { fn vm_live_bytes() -> usize { (upcalls().vm_live_bytes)() } + + fn create_gc_trigger() -> Box<dyn GCTriggerPolicy<Ruby>> { + // `GCTriggerSelector::Delegated` is currently used by two different + // heap modes: `ruby` (the Ruby-like free-slot ratio trigger) and `cpu` + // (the CPU-overhead trigger from Tavakolisomeh et al., MPLR '23). + // Which one is active is determined by which `OnceCell` config the + // `MMTK_HEAP_MODE` parser populated. + if CPU_HEAP_TRIGGER_CONFIG.get().is_some() { + Box::new(CpuHeapTrigger::default()) + } else { + Box::new(RubyHeapTrigger::default()) + } + } } impl VMCollection { diff --git a/gc/mmtk/src/heap/cpu_heap_trigger.rs b/gc/mmtk/src/heap/cpu_heap_trigger.rs new file mode 100644 index 0000000000..ef5a79fe9a --- /dev/null +++ b/gc/mmtk/src/heap/cpu_heap_trigger.rs @@ -0,0 +1,370 @@ +//! A GC trigger that adjusts the heap size based on the CPU overhead of GC. +//! +//! This is an implementation of the heap sizing policy described in +//! Tavakolisomeh, Shimchenko, Österlund, Bruno, Ferreira, Wrigstad, +//! "Heap Size Adjustment with CPU Control", MPLR '23. +//! <https://doi.org/10.1145/3617651.3622988> +//! +//! The idea: rather than letting heap size control GC frequency, let a +//! user-supplied *target GC CPU overhead* control the heap size. After each GC +//! cycle, we measure the GC CPU overhead (fraction of process CPU time spent +//! in GC) and compare it to the target. If GC is over budget we grow the heap +//! (reducing GC frequency); if it is under budget we shrink the heap (trading +//! memory for more frequent collections). +//! +//! ## Algorithm +//! +//! After each GC cycle we compute, using an average of the last `n` cycles: +//! +//! ```text +//! GC_CPU = T_GC / T_APP (Eq. 1) +//! overhead_error = GC_CPU - target (Eq. 2) +//! sigmoid_error = 1 / (1 + e^(-overhead_error)) (Eq. 3) +//! adjustment_factor = sigmoid_error + 0.5 (in (0.5, 1.5)) (Eq. 4) +//! new_size = current_size * adjustment_factor (Eq. 5) +//! ``` +//! +//! where: +//! - `T_GC` is the wall-clock duration of each GC cycle. +//! - `T_APP` is process CPU time elapsed between consecutive GC cycles (sum of +//! CPU time over all threads — mutators, GC workers, compilers, etc.), read +//! via `clock_gettime(CLOCK_PROCESS_CPUTIME_ID)`. +//! +//! The final heap size is then clamped to the range +//! `[max(1.1 * used, min_heap_pages), max_heap_pages]`, providing 10% headroom +//! above current live memory to avoid triggering GC on an effectively-empty +//! heap. +//! +//! ## Differences from the paper +//! +//! The paper targets ZGC, a concurrent generational collector. MMTk's Ruby +//! binding currently ships stop-the-world collectors (Immix, MarkSweep). The +//! paper's formula still applies: with a STW collector the process CPU time +//! during GC closely tracks the wall-clock GC time, and mutator CPU time +//! during the mutator phase is correctly attributed. For generational plans +//! we skip nursery-only GCs, consistent with MemBalancer. + +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; +use std::sync::Mutex; + +use mmtk::util::heap::GCTriggerPolicy; +use mmtk::util::heap::SpaceStats; +use mmtk::Plan; +use mmtk::MMTK; +use once_cell::sync::OnceCell; + +use crate::Ruby; + +pub static CPU_HEAP_TRIGGER_CONFIG: OnceCell<CpuHeapTriggerConfig> = OnceCell::new(); + +/// Configuration for the [`CpuHeapTrigger`]. +pub struct CpuHeapTriggerConfig { + /// Lower bound on heap size (in pages). The trigger will never shrink below + /// this value. + pub min_heap_pages: usize, + /// Upper bound on heap size (in pages). The trigger will never grow above + /// this value. + pub max_heap_pages: usize, + /// Initial heap size (in pages). + pub initial_heap_pages: usize, + /// Target GC CPU overhead as a fraction of total process CPU time. For + /// example, `0.15` means the policy will try to keep GC CPU usage near 15%. + /// Valid range: `(0.0, 1.0)`. + pub target_gc_cpu: f64, + /// Number of recent GC cycles averaged together when computing the CPU + /// overhead signal. Smoothes out short-term fluctuations. The paper uses 3. + pub window_size: usize, +} + +/// A single GC cycle's timing measurements. +#[derive(Clone, Copy, Debug, Default)] +struct GcSample { + /// Wall-clock seconds spent inside this GC cycle. + gc_seconds: f64, + /// Seconds of process CPU time elapsed since the previous GC cycle ended. + /// This covers both mutator time and (on multi-threaded mutators) any + /// mutator CPU time consumed in parallel with the previous GC. + app_cpu_seconds: f64, +} + +struct CpuHeapTriggerState { + /// Ring buffer of the last `window_size` samples. Oldest-first. + samples: Vec<GcSample>, + /// Wall-clock time when the current GC cycle started. `None` when no GC is + /// in progress. + gc_start_wall: Option<std::time::Instant>, + /// Process CPU time (seconds) recorded at the end of the previous GC + /// cycle. `None` until the first cycle completes. + last_gc_end_cpu: Option<f64>, +} + +impl CpuHeapTriggerState { + fn new() -> Self { + Self { + samples: Vec::new(), + gc_start_wall: None, + last_gc_end_cpu: None, + } + } + + /// Pushes a new sample, dropping the oldest when the window is full. + fn push_sample(&mut self, sample: GcSample, window_size: usize) { + if self.samples.len() >= window_size { + self.samples.remove(0); + } + self.samples.push(sample); + } + + /// Returns the arithmetic mean GC CPU overhead across the window, or + /// `None` if we don't yet have a full sample (which happens on the first + /// GC cycle — we have no baseline for `app_cpu_seconds`). + fn mean_gc_cpu(&self) -> Option<f64> { + if self.samples.is_empty() { + return None; + } + let total_gc: f64 = self.samples.iter().map(|s| s.gc_seconds).sum(); + let total_app: f64 = self.samples.iter().map(|s| s.app_cpu_seconds).sum(); + if total_app <= 0.0 { + return None; + } + Some(total_gc / total_app) + } +} + +pub struct CpuHeapTrigger { + /// Target heap size in pages. Updated at the end of each GC cycle. + target_heap_pages: AtomicUsize, + /// Mutable timing state. Wrapped in a `Mutex` because `on_gc_start` and + /// `on_gc_end` are the only mutation sites and they are not on an + /// allocation hot path; avoiding the complexity of lock-free state is + /// worth the trivial contention. + state: Mutex<CpuHeapTriggerState>, +} + +impl Default for CpuHeapTrigger { + fn default() -> Self { + let cfg = Self::get_config(); + Self { + target_heap_pages: AtomicUsize::new(cfg.initial_heap_pages), + state: Mutex::new(CpuHeapTriggerState::new()), + } + } +} + +impl GCTriggerPolicy<Ruby> for CpuHeapTrigger { + fn is_gc_required( + &self, + space_full: bool, + space: Option<SpaceStats<Ruby>>, + plan: &dyn Plan<VM = Ruby>, + ) -> bool { + // Let the plan decide, matching the other triggers. + plan.collection_required(space_full, space) + } + + fn on_gc_start(&self, _mmtk: &'static MMTK<Ruby>) { + let mut state = self.state.lock().unwrap(); + state.gc_start_wall = Some(std::time::Instant::now()); + } + + fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) { + // Skip nursery-only GCs for generational plans. The heap resizing + // decision is driven by the (much more expensive) full collections + // where the signal-to-noise ratio is high enough to be useful. + if let Some(gen_plan) = mmtk.get_plan().generational() { + if gen_plan.is_current_gc_nursery() { + return; + } + } + + let cfg = Self::get_config(); + let gc_end_cpu = process_cpu_time_seconds(); + + let mut state = self.state.lock().unwrap(); + + // Duration of this GC cycle (wall clock). + let gc_seconds = state + .gc_start_wall + .take() + .map(|start| start.elapsed().as_secs_f64()) + .unwrap_or(0.0); + + // Process CPU time elapsed since the previous GC cycle ended. We + // require at least one previous end timestamp to produce a valid + // sample — without it we cannot compute `T_APP`. + if let (Some(last_end), Some(now)) = (state.last_gc_end_cpu, gc_end_cpu) { + let app_cpu_seconds = (now - last_end).max(0.0); + // Only record non-degenerate samples to avoid poisoning the window + // with zero-time entries from back-to-back GCs. + if app_cpu_seconds > 0.0 { + state.push_sample( + GcSample { + gc_seconds, + app_cpu_seconds, + }, + cfg.window_size, + ); + } + } + state.last_gc_end_cpu = gc_end_cpu; + + // Compute the new heap size only when we have samples to average over. + if let Some(gc_cpu) = state.mean_gc_cpu() { + // Drop the lock before doing the (relatively cheap) math and + // atomic update; nothing below needs the state. + drop(state); + + let overhead_error = gc_cpu - cfg.target_gc_cpu; // Eq. (2) + let sigmoid_error = sigmoid(overhead_error); // Eq. (3) + let adjustment_factor = sigmoid_error + 0.5; // Eq. (4), range (0.5, 1.5) + + let current = self.target_heap_pages.load(Ordering::Relaxed); + let suggested = ((current as f64) * adjustment_factor) as usize; // Eq. (5) + + // Clamp: + // - upper bound: configured max + // - lower bound: max(1.1 * used, min) — 10% headroom above current + // live memory, so we never request a heap so small that GC is + // triggered immediately on return from this one. + let used = mmtk.get_plan().get_used_pages(); + let floor = ((used as f64) * 1.1).ceil() as usize; + let lower = floor.max(cfg.min_heap_pages).min(cfg.max_heap_pages); + let upper = cfg.max_heap_pages; + let new_target = suggested.clamp(lower, upper); + + self.target_heap_pages.store(new_target, Ordering::Relaxed); + + info!( + "CpuHeapTrigger: gc_cpu={:.4} target={:.4} factor={:.4} \ + pages {} -> {} (used={}, clamp=[{}, {}])", + gc_cpu, + cfg.target_gc_cpu, + adjustment_factor, + current, + new_target, + used, + lower, + upper + ); + } + } + + fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool { + plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_current_heap_size_in_pages(&self) -> usize { + self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_max_heap_size_in_pages(&self) -> usize { + Self::get_config().max_heap_pages + } + + fn can_heap_size_grow(&self) -> bool { + self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages + } +} + +impl CpuHeapTrigger { + fn get_config<'b>() -> &'b CpuHeapTriggerConfig { + CPU_HEAP_TRIGGER_CONFIG + .get() + .expect("Attempt to use CPU_HEAP_TRIGGER_CONFIG before it is initialized") + } +} + +/// Standard logistic sigmoid. Returns 0.5 when x == 0, asymptotes to 0 and 1. +fn sigmoid(x: f64) -> f64 { + 1.0 / (1.0 + (-x).exp()) +} + +/// Reads the process-wide CPU time as a floating-point number of seconds, +/// summed across all threads of this process. Returns `None` if the clock +/// query fails (which should be essentially impossible on supported +/// platforms). +fn process_cpu_time_seconds() -> Option<f64> { + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + // SAFETY: `clock_gettime` writes exactly `sizeof(timespec)` bytes to the + // pointer we pass, which is a valid local stack allocation. + let rc = unsafe { libc::clock_gettime(libc::CLOCK_PROCESS_CPUTIME_ID, &mut ts) }; + if rc != 0 { + return None; + } + Some((ts.tv_sec as f64) + (ts.tv_nsec as f64) / 1_000_000_000.0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sigmoid_is_well_behaved() { + assert!((sigmoid(0.0) - 0.5).abs() < 1e-12); + assert!(sigmoid(-100.0) < 1e-9); + assert!(sigmoid(100.0) > 1.0 - 1e-9); + // Monotonic. + assert!(sigmoid(-1.0) < sigmoid(0.0)); + assert!(sigmoid(0.0) < sigmoid(1.0)); + } + + #[test] + fn adjustment_factor_is_within_paper_bounds() { + // Eq. (4): adjustment_factor = sigmoid(e) + 0.5 must lie in (0.5, 1.5). + for e in [-10.0_f64, -1.0, 0.0, 1.0, 10.0] { + let f = sigmoid(e) + 0.5; + assert!(f > 0.5 && f < 1.5, "factor {f} out of range for e={e}"); + } + } + + #[test] + fn mean_gc_cpu_is_total_weighted() { + let mut state = CpuHeapTriggerState::new(); + state.push_sample( + GcSample { + gc_seconds: 1.0, + app_cpu_seconds: 10.0, + }, + 3, + ); + state.push_sample( + GcSample { + gc_seconds: 3.0, + app_cpu_seconds: 10.0, + }, + 3, + ); + // (1 + 3) / (10 + 10) = 0.2 + assert!((state.mean_gc_cpu().unwrap() - 0.2).abs() < 1e-12); + } + + #[test] + fn window_drops_oldest() { + let mut state = CpuHeapTriggerState::new(); + for i in 0..5 { + state.push_sample( + GcSample { + gc_seconds: i as f64, + app_cpu_seconds: 1.0, + }, + 3, + ); + } + assert_eq!(state.samples.len(), 3); + // After pushing 0,1,2,3,4 with window 3, we should have [2,3,4]. + assert_eq!(state.samples[0].gc_seconds, 2.0); + assert_eq!(state.samples[2].gc_seconds, 4.0); + } + + #[test] + fn no_sample_without_prior_gc() { + // First GC cycle cannot produce a sample (no previous end time). The + // push happens only when last_gc_end_cpu is Some. + let state = CpuHeapTriggerState::new(); + assert!(state.mean_gc_cpu().is_none()); + } +} diff --git a/gc/mmtk/src/heap/mod.rs b/gc/mmtk/src/heap/mod.rs new file mode 100644 index 0000000000..05a35efb23 --- /dev/null +++ b/gc/mmtk/src/heap/mod.rs @@ -0,0 +1,9 @@ +mod cpu_heap_trigger; +mod ruby_heap_trigger; + +pub use cpu_heap_trigger::CpuHeapTrigger; +pub use cpu_heap_trigger::CpuHeapTriggerConfig; +pub use cpu_heap_trigger::CPU_HEAP_TRIGGER_CONFIG; +pub use ruby_heap_trigger::RubyHeapTrigger; +pub use ruby_heap_trigger::RubyHeapTriggerConfig; +pub use ruby_heap_trigger::RUBY_HEAP_TRIGGER_CONFIG; diff --git a/gc/mmtk/src/heap/ruby_heap_trigger.rs b/gc/mmtk/src/heap/ruby_heap_trigger.rs new file mode 100644 index 0000000000..fe1130043d --- /dev/null +++ b/gc/mmtk/src/heap/ruby_heap_trigger.rs @@ -0,0 +1,105 @@ +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +use mmtk::util::heap::GCTriggerPolicy; +use mmtk::util::heap::SpaceStats; +use mmtk::Plan; +use mmtk::MMTK; +use once_cell::sync::OnceCell; + +use crate::Ruby; + +pub static RUBY_HEAP_TRIGGER_CONFIG: OnceCell<RubyHeapTriggerConfig> = OnceCell::new(); + +pub struct RubyHeapTriggerConfig { + /// Min heap size + pub min_heap_pages: usize, + /// Max heap size + pub max_heap_pages: usize, + /// Minimum ratio of empty space after a GC before the heap will grow + pub heap_pages_min_ratio: f64, + /// Ratio the heap will grow by + pub heap_pages_goal_ratio: f64, + /// Maximum ratio of empty space after a GC before the heap will shrink + pub heap_pages_max_ratio: f64, +} + +pub struct RubyHeapTrigger { + /// Target number of heap pages + target_heap_pages: AtomicUsize, +} + +impl GCTriggerPolicy<Ruby> for RubyHeapTrigger { + fn is_gc_required( + &self, + space_full: bool, + space: Option<SpaceStats<Ruby>>, + plan: &dyn Plan<VM = Ruby>, + ) -> bool { + // Let the plan decide + plan.collection_required(space_full, space) + } + + fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) { + if let Some(plan) = mmtk.get_plan().generational() { + if plan.is_current_gc_nursery() { + // Nursery GC + } else { + // Full GC + } + + panic!("TODO: support for generational GC not implemented") + } else { + let used_pages = mmtk.get_plan().get_used_pages(); + + let target_min = + (used_pages as f64 * (1.0 + Self::get_config().heap_pages_min_ratio)) as usize; + let target_max = + (used_pages as f64 * (1.0 + Self::get_config().heap_pages_max_ratio)) as usize; + let new_target = + (((used_pages as f64) * (1.0 + Self::get_config().heap_pages_goal_ratio)) as usize) + .clamp( + Self::get_config().min_heap_pages, + Self::get_config().max_heap_pages, + ); + + if used_pages < target_min || used_pages > target_max { + self.target_heap_pages.store(new_target, Ordering::Relaxed); + } + } + } + + fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool { + plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_current_heap_size_in_pages(&self) -> usize { + self.target_heap_pages.load(Ordering::Relaxed) + } + + fn get_max_heap_size_in_pages(&self) -> usize { + Self::get_config().max_heap_pages + } + + fn can_heap_size_grow(&self) -> bool { + self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages + } +} + +impl Default for RubyHeapTrigger { + fn default() -> Self { + let min_heap_pages = Self::get_config().min_heap_pages; + + Self { + target_heap_pages: AtomicUsize::new(min_heap_pages), + } + } +} + +impl RubyHeapTrigger { + fn get_config<'b>() -> &'b RubyHeapTriggerConfig { + RUBY_HEAP_TRIGGER_CONFIG + .get() + .expect("Attempt to use RUBY_HEAP_TRIGGER_CONFIG before it is initialized") + } +} diff --git a/gc/mmtk/src/lib.rs b/gc/mmtk/src/lib.rs index 01497e9c42..52dc782051 100644 --- a/gc/mmtk/src/lib.rs +++ b/gc/mmtk/src/lib.rs @@ -1,3 +1,7 @@ +// Warn about unsafe operations in functions that are already marked as unsafe. +// This will become default in Rust 2024 edition. +#![warn(unsafe_op_in_unsafe_fn)] + extern crate libc; extern crate mmtk; #[macro_use] @@ -10,8 +14,11 @@ use std::sync::Mutex; use std::thread::ThreadId; use abi::RubyUpcalls; -use binding::{RubyBinding, RubyBindingFast, RubyConfiguration}; -use mmtk::vm::slot::{SimpleSlot, UnimplementedMemorySlice}; +use binding::RubyBinding; +use binding::RubyBindingFast; +use binding::RubyConfiguration; +use mmtk::vm::slot::SimpleSlot; +use mmtk::vm::slot::UnimplementedMemorySlice; use mmtk::vm::VMBinding; use mmtk::MMTK; use once_cell::sync::OnceCell; @@ -21,7 +28,9 @@ pub mod active_plan; pub mod api; pub mod binding; pub mod collection; +pub mod heap; pub mod object_model; +pub mod pinning_registry; pub mod reference_glue; pub mod scanning; pub mod utils; @@ -51,6 +60,11 @@ impl VMBinding for Ruby { type VMMemorySlice = RubyMemorySlice; } +/// The callback for mutator thread panic handler (which calls rb_bug to output +/// debugging information such as the Ruby backtrace and memory maps). +/// This is set before BINDING is set because mmtk_init could panic. +pub static MUTATOR_THREAD_PANIC_HANDLER: OnceCell<extern "C" fn()> = OnceCell::new(); + /// The singleton object for the Ruby binding itself. pub static BINDING: OnceCell<RubyBinding> = OnceCell::new(); @@ -112,8 +126,6 @@ fn handle_gc_thread_panic(panic_info: &PanicHookInfo) { eprintln!("Unknown backtrace status: {s:?}"); } } - - std::process::abort(); } pub(crate) fn set_panic_hook() { @@ -126,8 +138,24 @@ pub(crate) fn set_panic_hook() { std::panic::set_hook(Box::new(move |panic_info| { if is_gc_thread(std::thread::current().id()) { handle_gc_thread_panic(panic_info); + + (crate::binding().upcalls().gc_thread_panic_handler)(); } else { old_hook(panic_info); + (crate::MUTATOR_THREAD_PANIC_HANDLER + .get() + .expect("MUTATOR_THREAD_PANIC_HANDLER is not set"))(); } })); } + +/// This kind of assertion is enabled if either building in debug mode or the +/// "extra_assert" feature is enabled. +#[macro_export] +macro_rules! extra_assert { + ($($arg:tt)*) => { + if std::cfg!(any(debug_assertions, feature = "extra_assert")) { + std::assert!($($arg)*); + } + }; +} diff --git a/gc/mmtk/src/object_model.rs b/gc/mmtk/src/object_model.rs index abeef1f2b9..d673ca11a0 100644 --- a/gc/mmtk/src/object_model.rs +++ b/gc/mmtk/src/object_model.rs @@ -1,8 +1,15 @@ -use crate::abi::{RubyObjectAccess, OBJREF_OFFSET}; -use crate::{abi, Ruby}; +use std::ptr::copy_nonoverlapping; + +use crate::abi; +use crate::abi::RubyObjectAccess; +use crate::abi::MIN_OBJ_ALIGN; +use crate::abi::OBJREF_OFFSET; +use crate::Ruby; use mmtk::util::constants::BITS_IN_BYTE; -use mmtk::util::copy::{CopySemantics, GCWorkerCopyContext}; -use mmtk::util::{Address, ObjectReference}; +use mmtk::util::copy::CopySemantics; +use mmtk::util::copy::GCWorkerCopyContext; +use mmtk::util::Address; +use mmtk::util::ObjectReference; use mmtk::vm::*; pub struct VMObjectModel {} @@ -36,13 +43,39 @@ impl ObjectModel<Ruby> for VMObjectModel { const NEED_VO_BITS_DURING_TRACING: bool = true; fn copy( - _from: ObjectReference, - _semantics: CopySemantics, - _copy_context: &mut GCWorkerCopyContext<Ruby>, + from: ObjectReference, + semantics: CopySemantics, + copy_context: &mut GCWorkerCopyContext<Ruby>, ) -> ObjectReference { - unimplemented!( - "Copying GC not currently supported" - ) + let from_acc = RubyObjectAccess::from_objref(from); + let from_start = from_acc.obj_start(); + let object_size = from_acc.object_size(); + let to_start = copy_context.alloc_copy(from, object_size, MIN_OBJ_ALIGN, 0, semantics); + debug_assert!(!to_start.is_zero()); + let to_payload = to_start.add(OBJREF_OFFSET); + unsafe { + copy_nonoverlapping::<u8>(from_start.to_ptr(), to_start.to_mut_ptr(), object_size); + } + let to_obj = unsafe { ObjectReference::from_raw_address_unchecked(to_payload) }; + copy_context.post_copy(to_obj, object_size, semantics); + trace!("Copied object from {} to {}", from, to_obj); + + (crate::binding().upcalls().move_obj_during_marking)(from, to_obj); + + #[cfg(feature = "clear_old_copy")] + { + trace!( + "Clearing old copy {} ({}-{})", + from, + from_start, + from_start + object_size + ); + // For debug purpose, we clear the old copy so that if the Ruby VM reads from the old + // copy again, it will likely result in an error. + unsafe { std::ptr::write_bytes::<u8>(from_start.to_mut_ptr(), 0, object_size) } + } + + to_obj } fn copy_to(_from: ObjectReference, _to: ObjectReference, _region: Address) -> Address { diff --git a/gc/mmtk/src/pinning_registry.rs b/gc/mmtk/src/pinning_registry.rs new file mode 100644 index 0000000000..b498b508f1 --- /dev/null +++ b/gc/mmtk/src/pinning_registry.rs @@ -0,0 +1,187 @@ +use std::sync::Mutex; + +use mmtk::memory_manager; +use mmtk::scheduler::GCWork; +use mmtk::scheduler::GCWorker; +use mmtk::scheduler::WorkBucketStage; +use mmtk::util::ObjectReference; +use mmtk::util::VMWorkerThread; +use mmtk::MMTK; + +use crate::abi::GCThreadTLS; +use crate::upcalls; +use crate::Ruby; + +pub struct PinningRegistry { + pinning_objs: Mutex<Vec<ObjectReference>>, + pinned_objs: Mutex<Vec<ObjectReference>>, +} + +impl PinningRegistry { + pub fn new() -> Self { + Self { + pinning_objs: Default::default(), + pinned_objs: Default::default(), + } + } + + pub fn register(&self, object: ObjectReference) { + let mut pinning_objs = self.pinning_objs.lock().unwrap(); + pinning_objs.push(object); + } + + pub fn pin_children(&self, tls: VMWorkerThread) { + if !crate::mmtk().get_plan().current_gc_may_move_object() { + log::debug!("The current GC is non-moving, skipping pinning children."); + return; + } + + let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) }; + let worker = gc_tls.worker(); + + let pinning_objs = self + .pinning_objs + .try_lock() + .expect("PinningRegistry should not have races during GC."); + + let packet_size = 512; + let work_packets = pinning_objs + .chunks(packet_size) + .map(|chunk| { + Box::new(PinPinningChildren { + pinning_objs: chunk.to_vec(), + }) as _ + }) + .collect(); + + worker.scheduler().work_buckets[WorkBucketStage::Prepare].bulk_add(work_packets); + } + + pub fn cleanup(&self, worker: &mut GCWorker<Ruby>) { + worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(RemoveDeadPinnings); + if crate::mmtk().get_plan().current_gc_may_move_object() { + let packet = { + let mut pinned_objs = self + .pinned_objs + .try_lock() + .expect("Unexpected contention on pinned_objs"); + UnpinPinnedObjects { + objs: std::mem::take(&mut pinned_objs), + } + }; + + worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(packet); + } else { + debug!("The current GC is non-moving, skipping unpinning objects."); + debug_assert_eq!( + { + let pinned_objs = self + .pinned_objs + .try_lock() + .expect("Unexpected contention on pinned_objs"); + pinned_objs.len() + }, + 0 + ); + } + } +} + +impl Default for PinningRegistry { + fn default() -> Self { + Self::new() + } +} + +struct PinPinningChildren { + pinning_objs: Vec<ObjectReference>, +} + +impl GCWork<Ruby> for PinPinningChildren { + fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) { + let gc_tls = unsafe { GCThreadTLS::from_vwt_check(worker.tls) }; + let mut pinned_objs = vec![]; + let mut newly_pinned_objs = vec![]; + + let visit_object = |_worker, target_object: ObjectReference, pin| { + log::trace!( + " -> {} {}", + if pin { "(pin)" } else { " " }, + target_object + ); + if pin { + debug_assert!( + target_object.get_forwarded_object().is_none(), + "Trying to pin {target_object} but has been moved" + ); + + pinned_objs.push(target_object); + } + target_object + }; + + gc_tls + .object_closure + .set_temporarily_and_run_code(visit_object, || { + for obj in self.pinning_objs.iter().cloned() { + log::trace!(" Pinning: {}", obj); + (upcalls().call_gc_mark_children)(obj); + } + }); + + for target_object in pinned_objs { + if memory_manager::pin_object(target_object) { + newly_pinned_objs.push(target_object); + } + } + + let mut pinned_objs = crate::binding() + .pinning_registry + .pinned_objs + .lock() + .unwrap(); + pinned_objs.append(&mut newly_pinned_objs); + } +} + +struct RemoveDeadPinnings; + +impl GCWork<Ruby> for RemoveDeadPinnings { + fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) { + log::debug!("Removing dead Pinnings..."); + + let registry = &crate::binding().pinning_registry; + { + let mut pinning_objs = registry + .pinning_objs + .try_lock() + .expect("PinningRegistry should not have races during GC."); + + pinning_objs.retain_mut(|obj| { + if obj.is_live() { + let new_obj = obj.get_forwarded_object().unwrap_or(*obj); + *obj = new_obj; + true + } else { + log::trace!(" Dead Pinning removed: {}", *obj); + false + } + }); + } + } +} + +struct UnpinPinnedObjects { + objs: Vec<ObjectReference>, +} + +impl GCWork<Ruby> for UnpinPinnedObjects { + fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) { + log::debug!("Unpinning pinned objects..."); + + for obj in self.objs.iter() { + let unpinned = memory_manager::unpin_object(*obj); + debug_assert!(unpinned); + } + } +} diff --git a/gc/mmtk/src/scanning.rs b/gc/mmtk/src/scanning.rs index 33466b9db6..355a2e7759 100644 --- a/gc/mmtk/src/scanning.rs +++ b/gc/mmtk/src/scanning.rs @@ -1,11 +1,20 @@ use crate::abi::GCThreadTLS; +use crate::upcalls; use crate::utils::ChunkedVecCollector; -use crate::{upcalls, Ruby, RubySlot}; -use mmtk::scheduler::{GCWork, GCWorker, WorkBucketStage}; -use mmtk::util::{ObjectReference, VMWorkerThread}; -use mmtk::vm::{ObjectTracer, RootsWorkFactory, Scanning, SlotVisitor}; -use mmtk::{Mutator, MutatorContext}; +use crate::Ruby; +use crate::RubySlot; +use mmtk::memory_manager; +use mmtk::scheduler::GCWork; +use mmtk::scheduler::GCWorker; +use mmtk::scheduler::WorkBucketStage; +use mmtk::util::ObjectReference; +use mmtk::util::VMWorkerThread; +use mmtk::vm::ObjectTracer; +use mmtk::vm::RootsWorkFactory; +use mmtk::vm::Scanning; +use mmtk::vm::SlotVisitor; +use mmtk::Mutator; pub struct VMScanning {} @@ -45,20 +54,33 @@ impl Scanning<Ruby> for VMScanning { mmtk::memory_manager::is_mmtk_object(target_object.to_raw_address()).is_some(), "Destination is not an MMTk object. Src: {object} dst: {target_object}" ); + + debug_assert!( + // If we are in a moving GC, all objects should be pinned by PinningRegistry. + // If it is requested that target_object be pinned but it is not pinned, then + // it is a bug because it could be moved. + if crate::mmtk().get_plan().current_gc_may_move_object() && pin { + memory_manager::is_pinned(target_object) + } else { + true + }, + "Object {object} is trying to pin {target_object}" + ); + let forwarded_target = object_tracer.trace_object(target_object); if forwarded_target != target_object { - trace!( - " Forwarded target {} -> {}", - target_object, - forwarded_target - ); + trace!(" Forwarded target {target_object} -> {forwarded_target}"); } forwarded_target }; gc_tls .object_closure .set_temporarily_and_run_code(visit_object, || { - (upcalls().scan_object_ruby_style)(object); + (upcalls().call_gc_mark_children)(object); + + if crate::mmtk().get_plan().current_gc_may_move_object() { + (upcalls().update_object_references)(object); + } }); } @@ -67,14 +89,13 @@ impl Scanning<Ruby> for VMScanning { } fn scan_roots_in_mutator_thread( - tls: VMWorkerThread, - mutator: &'static mut Mutator<Ruby>, - mut factory: impl RootsWorkFactory<RubySlot>, + _tls: VMWorkerThread, + _mutator: &'static mut Mutator<Ruby>, + mut _factory: impl RootsWorkFactory<RubySlot>, ) { - let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) }; - Self::collect_object_roots_in("scan_thread_root", gc_tls, &mut factory, || { - (upcalls().scan_roots_in_mutator_thread)(mutator.get_tls(), tls); - }); + // Do nothing. All stacks (including Ruby stacks and machine stacks) are reachable from + // `rb_vm_t` -> ractor -> thread -> fiber -> stacks. It is part of `ScanGCRoots` which + // calls `rb_gc_mark_roots` -> `rb_vm_mark`. } fn scan_vm_specific_roots(tls: VMWorkerThread, factory: impl RootsWorkFactory<RubySlot>) { @@ -136,6 +157,7 @@ impl Scanning<Ruby> for VMScanning { crate::binding() .weak_proc .process_weak_stuff(worker, tracer_context); + crate::binding().pinning_registry.cleanup(worker); false } @@ -252,15 +274,15 @@ impl<F: RootsWorkFactory<RubySlot>> GCWork<Ruby> for ScanWbUnprotectedRoots<F> { VMScanning::collect_object_roots_in("wb_unprot_roots", gc_tls, &mut self.factory, || { for object in self.objects.iter().copied() { if object.is_reachable() { - debug!( - "[wb_unprot_roots] Visiting WB-unprotected object (parent): {}", - object - ); - (upcalls().scan_object_ruby_style)(object); + debug!("[wb_unprot_roots] Visiting WB-unprotected object (parent): {object}"); + (upcalls().call_gc_mark_children)(object); + + if crate::mmtk().get_plan().current_gc_may_move_object() { + (upcalls().update_object_references)(object); + } } else { debug!( - "[wb_unprot_roots] Skipping young WB-unprotected object (parent): {}", - object + "[wb_unprot_roots] Skipping young WB-unprotected object (parent): {object}" ); } } diff --git a/gc/mmtk/src/utils.rs b/gc/mmtk/src/utils.rs index de929c3952..d1979eaf58 100644 --- a/gc/mmtk/src/utils.rs +++ b/gc/mmtk/src/utils.rs @@ -1,10 +1,13 @@ -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; use atomic_refcell::AtomicRefCell; -use mmtk::scheduler::{GCWork, GCWorker, WorkBucketStage}; +use mmtk::scheduler::GCWork; +use mmtk::scheduler::GCWorker; +use mmtk::scheduler::WorkBucketStage; -use sysinfo::System; use crate::Ruby; +use sysinfo::System; pub struct ChunkedVecCollector<T> { vecs: Vec<Vec<T>>, @@ -97,32 +100,29 @@ pub fn default_heap_max() -> usize { .expect("Invalid Memory size") as usize } -pub fn parse_capacity(input: &String, default: usize) -> usize { +pub fn parse_capacity(input: &str) -> Option<usize> { let trimmed = input.trim(); const KIBIBYTE: usize = 1024; const MEBIBYTE: usize = 1024 * KIBIBYTE; const GIBIBYTE: usize = 1024 * MEBIBYTE; - let (val, suffix) = if let Some(pos) = trimmed.find(|c: char| !c.is_numeric()) { - (&trimmed[..pos], &trimmed[pos..]) + let (number, suffix) = if let Some(pos) = trimmed.find(|c: char| !c.is_numeric()) { + trimmed.split_at(pos) } else { (trimmed, "") }; - // 1MiB is the default heap size - match (val, suffix) { - (number, "GiB") => number.parse::<usize>() - .and_then(|v| Ok(v * GIBIBYTE)) - .unwrap_or(default), - (number, "MiB") => number.parse::<usize>() - .and_then(|v| Ok(v * MEBIBYTE)) - .unwrap_or(default), - (number, "KiB") => number.parse::<usize>() - .and_then(|v| Ok(v * KIBIBYTE)) - .unwrap_or(default), - (number, suffix) if suffix.is_empty() => number.parse::<usize>().unwrap_or(default), - (_, _) => default + let Ok(v) = number.parse::<usize>() else { + return None; + }; + + match suffix { + "GiB" => Some(v * GIBIBYTE), + "MiB" => Some(v * MEBIBYTE), + "KiB" => Some(v * KIBIBYTE), + "" => Some(v), + _ => None, } } @@ -132,32 +132,30 @@ mod tests { #[test] fn test_parse_capacity_parses_bare_bytes() { - assert_eq!(1234, parse_capacity(&String::from("1234"), 0)); + assert_eq!(Some(1234), parse_capacity("1234")); } #[test] fn test_parse_capacity_parses_kibibytes() { - assert_eq!(10240, parse_capacity(&String::from("10KiB"), 0)) + assert_eq!(Some(10240), parse_capacity("10KiB")); } #[test] fn test_parse_capacity_parses_mebibytes() { - assert_eq!(10485760, parse_capacity(&String::from("10MiB"), 0)) + assert_eq!(Some(10485760), parse_capacity("10MiB")) } #[test] fn test_parse_capacity_parses_gibibytes() { - assert_eq!(10737418240, parse_capacity(&String::from("10GiB"), 0)) + assert_eq!(Some(10737418240), parse_capacity("10GiB")) } #[test] - fn test_parses_nonsense_value_as_default_max() { - let default = 100; - - assert_eq!(default, parse_capacity(&String::from("notanumber"), default)); - assert_eq!(default, parse_capacity(&String::from("5tartswithanumber"), default)); - assert_eq!(default, parse_capacity(&String::from("number1nthemiddle"), default)); - assert_eq!(default, parse_capacity(&String::from("numberattheend111"), default)); - assert_eq!(default, parse_capacity(&String::from("mult1pl3numb3r5"), default)); + fn test_parse_capacity_parses_nonsense_values() { + assert_eq!(None, parse_capacity("notanumber")); + assert_eq!(None, parse_capacity("5tartswithanumber")); + assert_eq!(None, parse_capacity("number1nthemiddle")); + assert_eq!(None, parse_capacity("numberattheend111")); + assert_eq!(None, parse_capacity("mult1pl3numb3r5")); } } diff --git a/gc/mmtk/src/weak_proc.rs b/gc/mmtk/src/weak_proc.rs index 11f7f5abbf..d38dbe04a4 100644 --- a/gc/mmtk/src/weak_proc.rs +++ b/gc/mmtk/src/weak_proc.rs @@ -1,23 +1,23 @@ use std::sync::Mutex; -use mmtk::{ - scheduler::{GCWork, GCWorker, WorkBucketStage}, - util::ObjectReference, - vm::ObjectTracerContext, -}; - -use crate::{ - abi::GCThreadTLS, - upcalls, - Ruby, -}; +use mmtk::scheduler::GCWork; +use mmtk::scheduler::GCWorker; +use mmtk::scheduler::WorkBucketStage; +use mmtk::util::ObjectReference; +use mmtk::vm::ObjectTracerContext; + +use crate::abi::GCThreadTLS; +use crate::upcalls; +use crate::Ruby; pub struct WeakProcessor { + non_parallel_obj_free_candidates: Mutex<Vec<ObjectReference>>, + parallel_obj_free_candidates: Vec<Mutex<Vec<ObjectReference>>>, + /// Objects that needs `obj_free` called when dying. /// If it is a bottleneck, replace it with a lock-free data structure, /// or add candidates in batch. - obj_free_candidates: Mutex<Vec<ObjectReference>>, - weak_references: Mutex<Vec<&'static mut ObjectReference>>, + weak_references: Mutex<Vec<ObjectReference>>, } impl Default for WeakProcessor { @@ -29,47 +29,74 @@ impl Default for WeakProcessor { impl WeakProcessor { pub fn new() -> Self { Self { - obj_free_candidates: Mutex::new(Vec::new()), + non_parallel_obj_free_candidates: Mutex::new(Vec::new()), + parallel_obj_free_candidates: vec![Mutex::new(Vec::new())], weak_references: Mutex::new(Vec::new()), } } - /// Add an object as a candidate for `obj_free`. - /// - /// Multiple mutators can call it concurrently, so it has `&self`. - pub fn add_obj_free_candidate(&self, object: ObjectReference) { - let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap(); - obj_free_candidates.push(object); + pub fn init_parallel_obj_free_candidates(&mut self, num_workers: usize) { + debug_assert_eq!(self.parallel_obj_free_candidates.len(), 1); + + for _ in 1..num_workers { + self.parallel_obj_free_candidates + .push(Mutex::new(Vec::new())); + } } - /// Add many objects as candidates for `obj_free`. + /// Add a batch of objects as candidates for `obj_free`. /// - /// Multiple mutators can call it concurrently, so it has `&self`. - pub fn add_obj_free_candidates(&self, objects: &[ObjectReference]) { - let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap(); - for object in objects.iter().copied() { - obj_free_candidates.push(object); + /// Amortizes mutex acquisition over the entire batch. Called when a + /// mutator's local buffer is flushed (buffer full or stop-the-world). + pub fn add_obj_free_candidates_batch( + &self, + objects: &[ObjectReference], + can_parallel_free: bool, + ) { + if objects.is_empty() { + return; + } + + if can_parallel_free { + let num_buckets = self.parallel_obj_free_candidates.len(); + for idx in 0..num_buckets { + let mut bucket = self.parallel_obj_free_candidates[idx].lock().unwrap(); + for (i, &obj) in objects.iter().enumerate() { + if i % num_buckets == idx { + bucket.push(obj); + } + } + } + } else { + self.non_parallel_obj_free_candidates + .lock() + .unwrap() + .extend_from_slice(objects); } } pub fn get_all_obj_free_candidates(&self) -> Vec<ObjectReference> { - let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap(); - std::mem::take(obj_free_candidates.as_mut()) + // let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap(); + let mut all_obj_free_candidates = self + .non_parallel_obj_free_candidates + .lock() + .unwrap() + .to_vec(); + + for candidates_mutex in &self.parallel_obj_free_candidates { + all_obj_free_candidates.extend(candidates_mutex.lock().unwrap().to_vec()); + } + + std::mem::take(all_obj_free_candidates.as_mut()) } - pub fn add_weak_reference(&self, ptr: &'static mut ObjectReference) { + pub fn add_weak_reference(&self, object: ObjectReference) { let mut weak_references = self.weak_references.lock().unwrap(); - weak_references.push(ptr); + weak_references.push(object); } - pub fn remove_weak_reference(&self, ptr: &ObjectReference) { - let mut weak_references = self.weak_references.lock().unwrap(); - for (i, curr_ptr) in weak_references.iter().enumerate() { - if *curr_ptr == ptr { - weak_references.swap_remove(i); - break; - } - } + pub fn weak_references_count(&self) -> usize { + self.weak_references.lock().unwrap().len() } pub fn process_weak_stuff( @@ -77,80 +104,133 @@ impl WeakProcessor { worker: &mut GCWorker<Ruby>, _tracer_context: impl ObjectTracerContext<Ruby>, ) { - worker.add_work(WorkBucketStage::VMRefClosure, ProcessObjFreeCandidates); + worker.add_work( + WorkBucketStage::VMRefClosure, + ProcessNonParallelObjFreeCanadidates {}, + ); + + for index in 0..self.parallel_obj_free_candidates.len() { + worker.add_work( + WorkBucketStage::VMRefClosure, + ProcessParallelObjFreeCandidates { index }, + ); + } + worker.add_work(WorkBucketStage::VMRefClosure, ProcessWeakReferences); worker.add_work(WorkBucketStage::Prepare, UpdateFinalizerObjIdTables); let global_tables_count = (crate::upcalls().global_tables_count)(); let work_packets = (0..global_tables_count) - .map(|i| { - Box::new(UpdateGlobalTables { idx: i }) as _ - }) - .collect(); + .map(|i| Box::new(UpdateGlobalTables { idx: i }) as _) + .collect(); worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].bulk_add(work_packets); - worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].bulk_add(vec![ - Box::new(UpdateWbUnprotectedObjectsList) as _, - ]); + worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure] + .bulk_add(vec![Box::new(UpdateWbUnprotectedObjectsList) as _]); + } +} + +fn process_obj_free_candidates(obj_free_candidates: &mut Vec<ObjectReference>) { + // Process obj_free + let mut new_candidates = Vec::new(); + + for object in obj_free_candidates.iter().copied() { + if object.is_reachable() { + // Forward and add back to the candidate list. + let new_object = object.forward(); + trace!("Forwarding obj_free candidate: {object} -> {new_object}"); + new_candidates.push(new_object); + } else { + (upcalls().call_obj_free)(object); + } + } + + *obj_free_candidates = new_candidates; +} + +struct ProcessParallelObjFreeCandidates { + index: usize, +} + +impl GCWork<Ruby> for ProcessParallelObjFreeCandidates { + fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) { + let mut obj_free_candidates = crate::binding().weak_proc.parallel_obj_free_candidates + [self.index] + .try_lock() + .expect("Lock for parallel_obj_free_candidates should not be held"); + + process_obj_free_candidates(&mut obj_free_candidates); } } -struct ProcessObjFreeCandidates; +struct ProcessNonParallelObjFreeCanadidates; -impl GCWork<Ruby> for ProcessObjFreeCandidates { +impl GCWork<Ruby> for ProcessNonParallelObjFreeCanadidates { fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) { - // If it blocks, it is a bug. let mut obj_free_candidates = crate::binding() .weak_proc - .obj_free_candidates + .non_parallel_obj_free_candidates .try_lock() - .expect("It's GC time. No mutators should hold this lock at this time."); + .expect("Lock for non_parallel_obj_free_candidates should not be held"); - let n_cands = obj_free_candidates.len(); + process_obj_free_candidates(&mut obj_free_candidates); + } +} - debug!("Total: {} candidates", n_cands); +struct ProcessWeakReferences; - // Process obj_free - let mut new_candidates = Vec::new(); +impl GCWork<Ruby> for ProcessWeakReferences { + fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) { + if crate::mmtk().get_plan().current_gc_may_move_object() { + let gc_tls: &mut GCThreadTLS = unsafe { GCThreadTLS::from_vwt_check(worker.tls) }; - for object in obj_free_candidates.iter().copied() { - if object.is_reachable() { - // Forward and add back to the candidate list. - let new_object = object.forward(); - trace!( - "Forwarding obj_free candidate: {} -> {}", - object, - new_object + let visit_object = |_worker, target_object: ObjectReference, _pin| { + debug_assert!( + mmtk::memory_manager::is_mmtk_object(target_object.to_raw_address()).is_some(), + "Destination is not an MMTk object" ); - new_candidates.push(new_object); - } else { - (upcalls().call_obj_free)(object); - } - } - *obj_free_candidates = new_candidates; + target_object + .get_forwarded_object() + .unwrap_or(target_object) + }; + + gc_tls + .object_closure + .set_temporarily_and_run_code(visit_object, || { + self.process_weak_references(true); + }) + } else { + self.process_weak_references(false); + } } } -struct ProcessWeakReferences; - -impl GCWork<Ruby> for ProcessWeakReferences { - fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) { +impl ProcessWeakReferences { + fn process_weak_references(&mut self, moving_gc: bool) { let mut weak_references = crate::binding() .weak_proc .weak_references .try_lock() .expect("Mutators should not be holding the lock."); - for ptr_ptr in weak_references.iter_mut() { - if !(**ptr_ptr).is_reachable() { - **ptr_ptr = crate::binding().weak_reference_dead_value; - } + weak_references.retain_mut(|object_ptr| { + let object = object_ptr.get_forwarded_object().unwrap_or(*object_ptr); + + if object != *object_ptr { + *object_ptr = object; } - weak_references.clear(); + if object.is_reachable() { + (upcalls().handle_weak_references)(object, moving_gc); + + true + } else { + false + } + }); } } @@ -165,11 +245,10 @@ trait GlobalTableProcessingWork { let forward_object = |_worker, object: ObjectReference, _pin| { debug_assert!( mmtk::memory_manager::is_mmtk_object(object.to_raw_address()).is_some(), - "{} is not an MMTk object", - object + "{object} is not an MMTk object" ); let result = object.forward(); - trace!("Forwarding reference: {} -> {}", object, result); + trace!("Forwarding reference: {object} -> {result}"); result }; @@ -185,7 +264,6 @@ struct UpdateFinalizerObjIdTables; impl GlobalTableProcessingWork for UpdateFinalizerObjIdTables { fn process_table(&mut self) { (crate::upcalls().update_finalizer_table)(); - (crate::upcalls().update_obj_id_tables)(); } } impl GCWork<Ruby> for UpdateFinalizerObjIdTables { @@ -195,11 +273,14 @@ impl GCWork<Ruby> for UpdateFinalizerObjIdTables { } struct UpdateGlobalTables { - idx: i32 + idx: i32, } impl GlobalTableProcessingWork for UpdateGlobalTables { fn process_table(&mut self) { - (crate::upcalls().update_global_tables)(self.idx) + (crate::upcalls().update_global_tables)( + self.idx, + crate::mmtk().get_plan().current_gc_may_move_object(), + ) } } impl GCWork<Ruby> for UpdateGlobalTables { @@ -224,14 +305,10 @@ impl GCWork<Ruby> for UpdateWbUnprotectedObjectsList { if object.is_reachable() { // Forward and add back to the candidate list. let new_object = object.forward(); - trace!( - "Forwarding WB-unprotected object: {} -> {}", - object, - new_object - ); + trace!("Forwarding WB-unprotected object: {object} -> {new_object}"); objects.insert(new_object); } else { - trace!("Removing WB-unprotected object from list: {}", object); + trace!("Removing WB-unprotected object from list: {object}"); } } |
