13 files changed, 1339 insertions, 315 deletions
diff --git a/gc/mmtk/src/abi.rs b/gc/mmtk/src/abi.rs
index 5b56d199b2..30890e0853 100644
--- a/gc/mmtk/src/abi.rs
+++ b/gc/mmtk/src/abi.rs
@@ -1,8 +1,12 @@
 use crate::api::RubyMutator;
+use crate::extra_assert;
 use crate::Ruby;
 use libc::c_int;
 use mmtk::scheduler::GCWorker;
-use mmtk::util::{Address, ObjectReference, VMMutatorThread, VMWorkerThread};
+use mmtk::util::Address;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMMutatorThread;
+use mmtk::util::VMWorkerThread;
 
 // For the C binding
 pub const OBJREF_OFFSET: usize = 8;
@@ -10,16 +14,38 @@ pub const MIN_OBJ_ALIGN: usize = 8; // Even on 32-bit machine.  A Ruby object is
 
 pub const GC_THREAD_KIND_WORKER: libc::c_int = 1;
 
-const HAS_MOVED_GIVTBL: usize = 1 << 63;
 const HIDDEN_SIZE_MASK: usize = 0x0000FFFFFFFFFFFF;
 
-// Should keep in sync with C code.
-const RUBY_FL_EXIVAR: usize = 1 << 10;
-
 // An opaque type for the C counterpart.
 #[allow(non_camel_case_types)]
 pub struct st_table;
 
+#[repr(C)]
+pub struct HiddenHeader {
+    pub prefix: usize,
+}
+
+impl HiddenHeader {
+    #[inline(always)]
+    pub fn is_sane(&self) -> bool {
+        self.prefix & !HIDDEN_SIZE_MASK == 0
+    }
+
+    #[inline(always)]
+    fn assert_sane(&self) {
+        extra_assert!(
+            self.is_sane(),
+            "Hidden header is corrupted: {:x}",
+            self.prefix
+        );
+    }
+
+    pub fn payload_size(&self) -> usize {
+        self.assert_sane();
+        self.prefix & HIDDEN_SIZE_MASK
+    }
+}
+
 /// Provide convenient methods for accessing Ruby objects.
 /// TODO: Wrap C functions in `RubyUpcalls` as Rust-friendly methods.
 pub struct RubyObjectAccess {
@@ -47,32 +73,17 @@ impl RubyObjectAccess {
         self.suffix_addr() + Self::suffix_size()
     }
 
-    fn hidden_field(&self) -> Address {
-        self.obj_start()
-    }
-
-    fn load_hidden_field(&self) -> usize {
-        unsafe { self.hidden_field().load::<usize>() }
+    fn hidden_header(&self) -> &'static HiddenHeader {
+        unsafe { self.obj_start().as_ref() }
     }
 
-    fn update_hidden_field<F>(&self, f: F)
-    where
-        F: FnOnce(usize) -> usize,
-    {
-        let old_value = self.load_hidden_field();
-        let new_value = f(old_value);
-        unsafe {
-            self.hidden_field().store(new_value);
-        }
+    #[allow(unused)] // Maybe we need to mutate the hidden header in the future.
+    fn hidden_header_mut(&self) -> &'static mut HiddenHeader {
+        unsafe { self.obj_start().as_mut_ref() }
     }
 
     pub fn payload_size(&self) -> usize {
-        self.load_hidden_field() & HIDDEN_SIZE_MASK
-    }
-
-    pub fn set_payload_size(&self, size: usize) {
-        debug_assert!((size & HIDDEN_SIZE_MASK) == size);
-        self.update_hidden_field(|old| old & !HIDDEN_SIZE_MASK | size & HIDDEN_SIZE_MASK);
+        self.hidden_header().payload_size()
     }
 
     fn flags_field(&self) -> Address {
@@ -83,22 +94,6 @@ impl RubyObjectAccess {
         unsafe { self.flags_field().load::<usize>() }
     }
 
-    pub fn has_exivar_flag(&self) -> bool {
-        (self.load_flags() & RUBY_FL_EXIVAR) != 0
-    }
-
-    pub fn has_moved_givtbl(&self) -> bool {
-        (self.load_hidden_field() & HAS_MOVED_GIVTBL) != 0
-    }
-
-    pub fn set_has_moved_givtbl(&self) {
-        self.update_hidden_field(|old| old | HAS_MOVED_GIVTBL)
-    }
-
-    pub fn clear_has_moved_givtbl(&self) {
-        self.update_hidden_field(|old| old & !HAS_MOVED_GIVTBL)
-    }
-
     pub fn prefix_size() -> usize {
         // Currently, a hidden size field of word size is placed before each object.
         OBJREF_OFFSET
@@ -163,7 +158,7 @@ impl ObjectClosure {
         F2: 'env + FnOnce() -> T,
     {
         debug_assert!(
-            self.c_function == THE_UNREGISTERED_CLOSURE_FUNC,
+            std::ptr::fn_addr_eq(self.c_function, THE_UNREGISTERED_CLOSURE_FUNC),
             "set_temporarily_and_run_code is recursively called."
         );
         self.c_function = Self::c_function_registered::<F1>;
@@ -232,7 +227,7 @@ impl GCThreadTLS {
     /// Has undefined behavior if `ptr` is invalid.
     pub unsafe fn check_cast(ptr: *mut GCThreadTLS) -> &'static mut GCThreadTLS {
         assert!(!ptr.is_null());
-        let result = &mut *ptr;
+        let result = unsafe { &mut *ptr };
         debug_assert!({
             let kind = result.kind;
             kind == GC_THREAD_KIND_WORKER
@@ -247,7 +242,7 @@ impl GCThreadTLS {
     /// Has undefined behavior if `ptr` is invalid.
     pub unsafe fn from_vwt_check(vwt: VMWorkerThread) -> &'static mut GCThreadTLS {
         let ptr = Self::from_vwt(vwt);
-        Self::check_cast(ptr)
+        unsafe { Self::check_cast(ptr) }
     }
 
     #[allow(clippy::not_unsafe_ptr_arg_deref)] // `transmute` does not dereference pointer
@@ -283,7 +278,7 @@ impl RawVecOfObjRef {
     ///
     /// This function turns raw pointer into a Vec without check.
     pub unsafe fn into_vec(self) -> Vec<ObjectReference> {
-        Vec::from_raw_parts(self.ptr, self.len, self.capa)
+        unsafe { Vec::from_raw_parts(self.ptr, self.len, self.capa) }
     }
 }
 
@@ -296,7 +291,6 @@ impl From<Vec<ObjectReference>> for RawVecOfObjRef {
 #[repr(C)]
 #[derive(Clone)]
 pub struct RubyBindingOptions {
-    pub ractor_check_mode: bool,
     pub suffix_size: usize,
 }
 
@@ -306,8 +300,10 @@ pub struct RubyUpcalls {
     pub init_gc_worker_thread: extern "C" fn(gc_worker_tls: *mut GCThreadTLS),
     pub is_mutator: extern "C" fn() -> bool,
     pub stop_the_world: extern "C" fn(),
-    pub resume_mutators: extern "C" fn(),
+    pub resume_mutators: extern "C" fn(gc_may_move: bool),
     pub block_for_gc: extern "C" fn(tls: VMMutatorThread),
+    pub before_updating_jit_code: extern "C" fn(),
+    pub after_updating_jit_code: extern "C" fn(),
     pub number_of_mutators: extern "C" fn() -> usize,
     pub get_mutators: extern "C" fn(
         visit_mutator: extern "C" fn(*mut RubyMutator, *mut libc::c_void),
@@ -315,16 +311,18 @@ pub struct RubyUpcalls {
     ),
     pub scan_gc_roots: extern "C" fn(),
     pub scan_objspace: extern "C" fn(),
-    pub scan_roots_in_mutator_thread:
-        extern "C" fn(mutator_tls: VMMutatorThread, worker_tls: VMWorkerThread),
-    pub scan_object_ruby_style: extern "C" fn(object: ObjectReference),
+    pub move_obj_during_marking: extern "C" fn(from: ObjectReference, to: ObjectReference),
+    pub update_object_references: extern "C" fn(object: ObjectReference),
     pub call_gc_mark_children: extern "C" fn(object: ObjectReference),
+    pub handle_weak_references: extern "C" fn(object: ObjectReference, moving: bool),
     pub call_obj_free: extern "C" fn(object: ObjectReference),
     pub vm_live_bytes: extern "C" fn() -> usize,
-    pub update_global_tables: extern "C" fn(tbl_idx: c_int),
+    pub update_global_tables: extern "C" fn(tbl_idx: c_int, moving: bool),
     pub global_tables_count: extern "C" fn() -> c_int,
     pub update_finalizer_table: extern "C" fn(),
-    pub update_obj_id_tables: extern "C" fn(),
+    pub special_const_p: extern "C" fn(object: ObjectReference) -> bool,
+    pub mutator_thread_panic_handler: extern "C" fn(),
+    pub gc_thread_panic_handler: extern "C" fn(),
 }
 
 unsafe impl Sync for RubyUpcalls {}
diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs
index 91718cead6..c0540fe0c8 100644
--- a/gc/mmtk/src/api.rs
+++ b/gc/mmtk/src/api.rs
@@ -1,16 +1,28 @@
-use std::sync::atomic::Ordering;
+// Functions in this module are unsafe for one reason:
+// They are called by C functions and they need to pass raw pointers to Rust.
+#![allow(clippy::missing_safety_doc)]
+
+use mmtk::util::alloc::BumpPointer;
+use mmtk::util::alloc::ImmixAllocator;
+use mmtk::util::conversions;
 use mmtk::util::options::PlanSelector;
+use std::str::FromStr;
+use std::sync::atomic::Ordering;
 
 use crate::abi::RawVecOfObjRef;
 use crate::abi::RubyBindingOptions;
 use crate::abi::RubyUpcalls;
 use crate::binding;
 use crate::binding::RubyBinding;
+use crate::heap::CpuHeapTriggerConfig;
+use crate::heap::RubyHeapTriggerConfig;
+use crate::heap::CPU_HEAP_TRIGGER_CONFIG;
+use crate::heap::RUBY_HEAP_TRIGGER_CONFIG;
 use crate::mmtk;
-use crate::Ruby;
-use crate::RubySlot;
 use crate::utils::default_heap_max;
 use crate::utils::parse_capacity;
+use crate::Ruby;
+use crate::RubySlot;
 use mmtk::memory_manager;
 use mmtk::memory_manager::mmtk_init;
 use mmtk::util::constants::MIN_OBJECT_SIZE;
@@ -37,73 +49,139 @@ pub extern "C" fn mmtk_is_reachable(object: ObjectReference) -> bool {
 
 // =============== Bootup ===============
 
-fn mmtk_builder_default_parse_threads() -> usize {
-    let threads_str = std::env::var("MMTK_THREADS")
-        .unwrap_or("0".to_string());
-
-    threads_str
-        .parse::<usize>()
-        .unwrap_or_else(|_err| {
-            eprintln!("[FATAL] Invalid MMTK_THREADS {}", threads_str);
+fn parse_env_var_with<T, F: FnOnce(&str) -> Option<T>>(key: &str, parse: F) -> Option<T> {
+    let val = match std::env::var(key) {
+        Ok(val) => val,
+        Err(std::env::VarError::NotPresent) => return None,
+        Err(std::env::VarError::NotUnicode(os_string)) => {
+            eprintln!("[FATAL] Invalid {key} {os_string:?}");
             std::process::exit(1);
-        })
-}
+        }
+    };
 
-fn mmtk_builder_default_parse_heap_min() -> usize {
-    const DEFAULT_HEAP_MIN: usize = 1 << 20;
+    let parsed = parse(&val).unwrap_or_else(|| {
+        eprintln!("[FATAL] Invalid {key} {val}");
+        std::process::exit(1);
+    });
 
-    let heap_min_str = std::env::var("MMTK_HEAP_MIN")
-        .unwrap_or(DEFAULT_HEAP_MIN.to_string());
+    Some(parsed)
+}
 
-    let size = parse_capacity(&heap_min_str, 0);
-    if size == 0 {
-        eprintln!("[FATAL] Invalid MMTK_HEAP_MIN {}", heap_min_str);
-        std::process::exit(1);
-    }
+fn parse_env_var<T: FromStr>(key: &str) -> Option<T> {
+    parse_env_var_with(key, |s| s.parse().ok())
+}
+
+fn mmtk_builder_default_parse_threads() -> Option<usize> {
+    parse_env_var("MMTK_THREADS")
+}
 
-    size
+fn mmtk_builder_default_parse_heap_min() -> usize {
+    const DEFAULT_HEAP_MIN: usize = 1 << 20;
+    parse_env_var_with("MMTK_HEAP_MIN", parse_capacity).unwrap_or(DEFAULT_HEAP_MIN)
 }
 
 fn mmtk_builder_default_parse_heap_max() -> usize {
-    let heap_max_str = std::env::var("MMTK_HEAP_MAX")
-        .unwrap_or(default_heap_max().to_string());
+    parse_env_var_with("MMTK_HEAP_MAX", parse_capacity).unwrap_or_else(default_heap_max)
+}
 
-    let size = parse_capacity(&heap_max_str, 0);
-    if size == 0 {
-        eprintln!("[FATAL] Invalid MMTK_HEAP_MAX {}", heap_max_str);
-        std::process::exit(1);
-    }
+fn parse_float_env_var(key: &str, default: f64, min: f64, max: f64) -> f64 {
+    parse_env_var_with(key, |s| {
+        let mut float = f64::from_str(s).unwrap_or(default);
+
+        if float <= min {
+            eprintln!(
+                "{key} has value {float} which must be greater than {min}, using default instead"
+            );
+            float = default;
+        }
 
-    size
+        if float >= max {
+            eprintln!(
+                "{key} has value {float} which must be less than {max}, using default instead"
+            );
+            float = default;
+        }
+
+        Some(float)
+    })
+    .unwrap_or(default)
 }
 
 fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCTriggerSelector {
-    let heap_mode_str = std::env::var("MMTK_HEAP_MODE")
-        .unwrap_or("dynamic".to_string());
-
-    match heap_mode_str.as_str() {
-        "fixed" => GCTriggerSelector::FixedHeapSize(heap_max),
-        "dynamic" => GCTriggerSelector::DynamicHeapSize(heap_min, heap_max),
-        _ => {
-            eprintln!("[FATAL] Invalid MMTK_HEAP_MODE {}", heap_mode_str);
-            std::process::exit(1);
+    let make_fixed = || GCTriggerSelector::FixedHeapSize(heap_max);
+    let make_dynamic = || GCTriggerSelector::DynamicHeapSize(heap_min, heap_max);
+
+    parse_env_var_with("MMTK_HEAP_MODE", |s| match s {
+        "fixed" => Some(make_fixed()),
+        "dynamic" => Some(make_dynamic()),
+        "ruby" => {
+            let min_ratio = parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MIN_RATIO", 0.2, 0.0, 1.0);
+            let goal_ratio =
+                parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_GOAL_RATIO", 0.4, min_ratio, 1.0);
+            let max_ratio =
+                parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MAX_RATIO", 0.65, goal_ratio, 1.0);
+
+            crate::heap::RUBY_HEAP_TRIGGER_CONFIG
+                .set(RubyHeapTriggerConfig {
+                    min_heap_pages: conversions::bytes_to_pages_up(heap_min),
+                    max_heap_pages: conversions::bytes_to_pages_up(heap_max),
+                    heap_pages_min_ratio: min_ratio,
+                    heap_pages_goal_ratio: goal_ratio,
+                    heap_pages_max_ratio: max_ratio,
+                })
+                .unwrap_or_else(|_| panic!("RUBY_HEAP_TRIGGER_CONFIG is already set"));
+
+            Some(GCTriggerSelector::Delegated)
         }
-    }
+        "cpu" => {
+            // CPU-overhead-driven heap sizing based on Tavakolisomeh et al.,
+            // "Heap Size Adjustment with CPU Control", MPLR '23.
+            //
+            // Target is expressed as a percentage (0, 100) via
+            // `MMTK_GC_CPU_TARGET`. The paper recommends 15 for ZGC (a
+            // concurrent collector); we default to 5 for MMTk-Ruby. With
+            // MMTk's stop-the-world Immix, every percent of GC CPU is also
+            // a percent of wall-clock the mutator is blocked on, so a much
+            // smaller budget is appropriate. An empirical sweep across
+            // ruby-bench (railsbench, lobsters, psych-load, liquid-render,
+            // lee) found target=5 to be Pareto-optimal: ~6% geomean speedup
+            // vs. the `ruby` heap mode with effectively identical geomean
+            // peak RSS.
+            let target_percent = parse_float_env_var("MMTK_GC_CPU_TARGET", 5.0, 0.0, 100.0);
+            let window_size = parse_env_var::<usize>("MMTK_GC_CPU_WINDOW").unwrap_or(3);
+            let window_size = window_size.max(1);
+
+            let min_heap_pages = conversions::bytes_to_pages_up(heap_min);
+            let max_heap_pages = conversions::bytes_to_pages_up(heap_max);
+            // Start at the min heap size, as the other delegated triggers do.
+            // The control loop will adjust from here after the first GC cycle.
+            let initial_heap_pages = min_heap_pages;
+
+            CPU_HEAP_TRIGGER_CONFIG
+                .set(CpuHeapTriggerConfig {
+                    min_heap_pages,
+                    max_heap_pages,
+                    initial_heap_pages,
+                    target_gc_cpu: target_percent / 100.0,
+                    window_size,
+                })
+                .unwrap_or_else(|_| panic!("CPU_HEAP_TRIGGER_CONFIG is already set"));
+
+            Some(GCTriggerSelector::Delegated)
+        }
+        _ => None,
+    })
+    .unwrap_or_else(make_dynamic)
 }
 
 fn mmtk_builder_default_parse_plan() -> PlanSelector {
-    let plan_str = std::env::var("MMTK_PLAN")
-        .unwrap_or("MarkSweep".to_string());
-
-    match plan_str.as_str() {
-        "NoGC" => PlanSelector::NoGC,
-        "MarkSweep" => PlanSelector::MarkSweep,
-        "Immix" => PlanSelector::Immix,
-        _ => {
-            eprintln!("[FATAL] Invalid MMTK_PLAN {}", plan_str);
-            std::process::exit(1);
-        }
-    }
+    parse_env_var_with("MMTK_PLAN", |s| match s {
+        "NoGC" => Some(PlanSelector::NoGC),
+        "MarkSweep" => Some(PlanSelector::MarkSweep),
+        "Immix" => Some(PlanSelector::Immix),
+        _ => None,
+    })
+    .unwrap_or(PlanSelector::Immix)
 }
 
 #[no_mangle]
@@ -111,9 +189,12 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder {
     let mut builder = MMTKBuilder::new_no_env_vars();
     builder.options.no_finalizer.set(true);
 
-    let threads = mmtk_builder_default_parse_threads();
-    if threads > 0 {
-        builder.options.threads.set(threads);
+    if let Some(threads) = mmtk_builder_default_parse_threads() {
+        if !builder.options.threads.set(threads) {
+            // MMTk will validate it and reject 0.
+            eprintln!("[FATAL] Failed to set the number of MMTk threads to {threads}");
+            std::process::exit(1);
+        }
     }
 
     let heap_min = mmtk_builder_default_parse_heap_min();
@@ -121,11 +202,14 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder {
     let heap_max = mmtk_builder_default_parse_heap_max();
 
     if heap_min >= heap_max {
-        eprintln!("[FATAL] MMTK_HEAP_MIN({}) >= MMTK_HEAP_MAX({})", heap_min, heap_max);
+        eprintln!("[FATAL] MMTK_HEAP_MIN({heap_min}) >= MMTK_HEAP_MAX({heap_max})");
         std::process::exit(1);
     }
 
-    builder.options.gc_trigger.set(mmtk_builder_default_parse_heap_mode(heap_min, heap_max));
+    builder
+        .options
+        .gc_trigger
+        .set(mmtk_builder_default_parse_heap_mode(heap_min, heap_max));
 
     builder.options.plan.set(mmtk_builder_default_parse_plan());
 
@@ -133,20 +217,26 @@ pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder {
 }
 
 #[no_mangle]
-pub extern "C" fn mmtk_init_binding(
+pub unsafe extern "C" fn mmtk_init_binding(
     builder: *mut MMTKBuilder,
-    _binding_options: *const RubyBindingOptions,
+    binding_options: *const RubyBindingOptions,
     upcalls: *const RubyUpcalls,
-    weak_reference_dead_value: ObjectReference,
 ) {
+    crate::MUTATOR_THREAD_PANIC_HANDLER
+        .set((unsafe { (*upcalls).clone() }).mutator_thread_panic_handler)
+        .unwrap_or_else(|_| panic!("MUTATOR_THREAD_PANIC_HANDLER is already initialized"));
+
     crate::set_panic_hook();
 
-    let builder = unsafe { Box::from_raw(builder) };
-    let binding_options = RubyBindingOptions {ractor_check_mode: false, suffix_size: 0};
+    let builder: Box<MMTKBuilder> = unsafe { Box::from_raw(builder) };
+    let binding_options = unsafe { (*binding_options).clone() };
     let mmtk_boxed = mmtk_init(&builder);
     let mmtk_static = Box::leak(Box::new(mmtk_boxed));
 
-    let binding = RubyBinding::new(mmtk_static, &binding_options, upcalls, weak_reference_dead_value);
+    let mut binding = RubyBinding::new(mmtk_static, &binding_options, upcalls);
+    binding
+        .weak_proc
+        .init_parallel_obj_free_candidates(memory_manager::num_of_workers(binding.mmtk));
 
     crate::BINDING
         .set(binding)
@@ -164,7 +254,25 @@ pub extern "C" fn mmtk_bind_mutator(tls: VMMutatorThread) -> *mut RubyMutator {
 }
 
 #[no_mangle]
-pub extern "C" fn mmtk_destroy_mutator(mutator: *mut RubyMutator) {
+pub unsafe extern "C" fn mmtk_get_bump_pointer_allocator(m: *mut RubyMutator) -> *mut BumpPointer {
+    match *crate::BINDING.get().unwrap().mmtk.get_options().plan {
+        PlanSelector::Immix => {
+            let mutator: &mut Mutator<Ruby> = unsafe { &mut *m };
+            let allocator =
+                unsafe { mutator.allocator_mut(mmtk::util::alloc::AllocatorSelector::Immix(0)) };
+
+            if let Some(immix_allocator) = allocator.downcast_mut::<ImmixAllocator<Ruby>>() {
+                &mut immix_allocator.bump_pointer as *mut BumpPointer
+            } else {
+                panic!("Failed to get bump pointer allocator");
+            }
+        }
+        _ => std::ptr::null_mut(),
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_destroy_mutator(mutator: *mut RubyMutator) {
     // notify mmtk-core about destroyed mutator
     memory_manager::destroy_mutator(unsafe { &mut *mutator });
     // turn the ptr back to a box, and let Rust properly reclaim it
@@ -174,13 +282,19 @@ pub extern "C" fn mmtk_destroy_mutator(mutator: *mut RubyMutator) {
 // =============== GC ===============
 
 #[no_mangle]
-pub extern "C" fn mmtk_handle_user_collection_request(tls: VMMutatorThread) {
-    memory_manager::handle_user_collection_request::<Ruby>(mmtk(), tls);
+pub extern "C" fn mmtk_handle_user_collection_request(
+    tls: VMMutatorThread,
+    force: bool,
+    exhaustive: bool,
+) {
+    crate::mmtk().handle_user_collection_request(tls, force, exhaustive);
 }
 
 #[no_mangle]
 pub extern "C" fn mmtk_set_gc_enabled(enable: bool) {
-    crate::CONFIGURATION.gc_enabled.store(enable, Ordering::Relaxed);
+    crate::CONFIGURATION
+        .gc_enabled
+        .store(enable, Ordering::Relaxed);
 }
 
 #[no_mangle]
@@ -191,7 +305,7 @@ pub extern "C" fn mmtk_gc_enabled_p() -> bool {
 // =============== Object allocation ===============
 
 #[no_mangle]
-pub extern "C" fn mmtk_alloc(
+pub unsafe extern "C" fn mmtk_alloc(
     mutator: *mut RubyMutator,
     size: usize,
     align: usize,
@@ -209,7 +323,7 @@ pub extern "C" fn mmtk_alloc(
 }
 
 #[no_mangle]
-pub extern "C" fn mmtk_post_alloc(
+pub unsafe extern "C" fn mmtk_post_alloc(
     mutator: *mut RubyMutator,
     refer: ObjectReference,
     bytes: usize,
@@ -218,28 +332,46 @@ pub extern "C" fn mmtk_post_alloc(
     memory_manager::post_alloc::<Ruby>(unsafe { &mut *mutator }, refer, bytes, semantics)
 }
 
-// TODO: Replace with buffered mmtk_add_obj_free_candidates
 #[no_mangle]
-pub extern "C" fn mmtk_add_obj_free_candidate(object: ObjectReference) {
-    binding().weak_proc.add_obj_free_candidate(object)
+pub unsafe extern "C" fn mmtk_add_obj_free_candidates(
+    objects: *const ObjectReference,
+    count: usize,
+    can_parallel_free: bool,
+) {
+    let objects = unsafe { std::slice::from_raw_parts(objects, count) };
+    binding()
+        .weak_proc
+        .add_obj_free_candidates_batch(objects, can_parallel_free)
+}
+
+// =============== Weak references ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_declare_weak_references(object: ObjectReference) {
+    binding().weak_proc.add_weak_reference(object);
 }
 
-// =============== Marking ===============
+#[no_mangle]
+pub extern "C" fn mmtk_weak_references_alive_p(object: ObjectReference) -> bool {
+    object.is_reachable()
+}
 
 #[no_mangle]
-pub extern "C" fn mmtk_mark_weak(ptr: &'static mut ObjectReference) {
-    binding().weak_proc.add_weak_reference(ptr);
+pub extern "C" fn mmtk_weak_references_count() -> usize {
+    binding().weak_proc.weak_references_count()
 }
 
+// =============== Compaction ===============
+
 #[no_mangle]
-pub extern "C" fn mmtk_remove_weak(ptr: &ObjectReference) {
-    binding().weak_proc.remove_weak_reference(ptr);
+pub extern "C" fn mmtk_register_pinning_obj(obj: ObjectReference) {
+    crate::binding().pinning_registry.register(obj);
 }
 
 // =============== Write barriers ===============
 
 #[no_mangle]
-pub extern "C" fn mmtk_object_reference_write_post(
+pub unsafe extern "C" fn mmtk_object_reference_write_post(
     mutator: *mut RubyMutator,
     object: ObjectReference,
 ) {
@@ -343,7 +475,7 @@ pub extern "C" fn mmtk_plan() -> *const u8 {
         PlanSelector::NoGC => NO_GC.as_ptr(),
         PlanSelector::MarkSweep => MARK_SWEEP.as_ptr(),
         PlanSelector::Immix => IMMIX.as_ptr(),
-        _ => panic!("Unknown plan")
+        _ => panic!("Unknown plan"),
     }
 }
 
@@ -351,11 +483,21 @@ pub extern "C" fn mmtk_plan() -> *const u8 {
 pub extern "C" fn mmtk_heap_mode() -> *const u8 {
     static FIXED_HEAP: &[u8] = b"fixed\0";
     static DYNAMIC_HEAP: &[u8] = b"dynamic\0";
+    static RUBY_HEAP: &[u8] = b"ruby\0";
+    static CPU_HEAP: &[u8] = b"cpu\0";
 
     match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
         GCTriggerSelector::FixedHeapSize(_) => FIXED_HEAP.as_ptr(),
         GCTriggerSelector::DynamicHeapSize(_, _) => DYNAMIC_HEAP.as_ptr(),
-        _ => panic!("Unknown heap mode")
+        GCTriggerSelector::Delegated => {
+            // Two delegated triggers exist; disambiguate via the populated
+            // config singleton.
+            if CPU_HEAP_TRIGGER_CONFIG.get().is_some() {
+                CPU_HEAP.as_ptr()
+            } else {
+                RUBY_HEAP.as_ptr()
+            }
+        }
     }
 }
 
@@ -364,7 +506,18 @@ pub extern "C" fn mmtk_heap_min() -> usize {
     match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
         GCTriggerSelector::FixedHeapSize(_) => 0,
         GCTriggerSelector::DynamicHeapSize(min_size, _) => min_size,
-        _ => panic!("Unknown heap mode")
+        GCTriggerSelector::Delegated => {
+            if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() {
+                conversions::pages_to_bytes(cfg.min_heap_pages)
+            } else {
+                conversions::pages_to_bytes(
+                    RUBY_HEAP_TRIGGER_CONFIG
+                        .get()
+                        .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
+                        .min_heap_pages,
+                )
+            }
+        }
     }
 }
 
@@ -373,7 +526,18 @@ pub extern "C" fn mmtk_heap_max() -> usize {
     match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
         GCTriggerSelector::FixedHeapSize(max_size) => max_size,
         GCTriggerSelector::DynamicHeapSize(_, max_size) => max_size,
-        _ => panic!("Unknown heap mode")
+        GCTriggerSelector::Delegated => {
+            if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() {
+                conversions::pages_to_bytes(cfg.max_heap_pages)
+            } else {
+                conversions::pages_to_bytes(
+                    RUBY_HEAP_TRIGGER_CONFIG
+                        .get()
+                        .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
+                        .max_heap_pages,
+                )
+            }
+        }
     }
 }
 
diff --git a/gc/mmtk/src/binding.rs b/gc/mmtk/src/binding.rs
index e0f8640e1c..36d4a992fd 100644
--- a/gc/mmtk/src/binding.rs
+++ b/gc/mmtk/src/binding.rs
@@ -9,6 +9,7 @@ use mmtk::MMTK;
 
 use crate::abi;
 use crate::abi::RubyBindingOptions;
+use crate::pinning_registry::PinningRegistry;
 use crate::weak_proc::WeakProcessor;
 use crate::Ruby;
 
@@ -54,10 +55,9 @@ pub struct RubyBinding {
     pub upcalls: *const abi::RubyUpcalls,
     pub plan_name: Mutex<Option<CString>>,
     pub weak_proc: WeakProcessor,
+    pub pinning_registry: PinningRegistry,
     pub gc_thread_join_handles: Mutex<Vec<JoinHandle<()>>>,
     pub wb_unprotected_objects: Mutex<HashSet<ObjectReference>>,
-
-    pub weak_reference_dead_value: ObjectReference,
 }
 
 unsafe impl Sync for RubyBinding {}
@@ -68,7 +68,6 @@ impl RubyBinding {
         mmtk: &'static MMTK<Ruby>,
         binding_options: &RubyBindingOptions,
         upcalls: *const abi::RubyUpcalls,
-        weak_reference_dead_value: ObjectReference,
     ) -> Self {
         unsafe {
             crate::BINDING_FAST.suffix_size = binding_options.suffix_size;
@@ -80,10 +79,9 @@ impl RubyBinding {
             upcalls,
             plan_name: Mutex::new(None),
             weak_proc: WeakProcessor::new(),
+            pinning_registry: PinningRegistry::new(),
             gc_thread_join_handles: Default::default(),
             wb_unprotected_objects: Default::default(),
-
-            weak_reference_dead_value
         }
     }
 
@@ -119,7 +117,7 @@ impl RubyBinding {
     }
 
     pub fn register_wb_unprotected_object(&self, object: ObjectReference) {
-        debug!("Registering WB-unprotected object: {}", object);
+        debug!("Registering WB-unprotected object: {object}");
         let mut objects = self.wb_unprotected_objects.lock().unwrap();
         objects.insert(object);
     }
diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs
index 0570b64e3a..648efa4e27 100644
--- a/gc/mmtk/src/collection.rs
+++ b/gc/mmtk/src/collection.rs
@@ -1,14 +1,26 @@
 use crate::abi::GCThreadTLS;
 
 use crate::api::RubyMutator;
-use crate::{mmtk, upcalls, Ruby};
+use crate::heap::CpuHeapTrigger;
+use crate::heap::RubyHeapTrigger;
+use crate::heap::CPU_HEAP_TRIGGER_CONFIG;
+use crate::mmtk;
+use crate::upcalls;
+use crate::Ruby;
 use mmtk::memory_manager;
 use mmtk::scheduler::*;
-use mmtk::util::{VMMutatorThread, VMThread, VMWorkerThread};
-use mmtk::vm::{Collection, GCThreadContext};
+use mmtk::util::heap::GCTriggerPolicy;
+use mmtk::util::VMMutatorThread;
+use mmtk::util::VMThread;
+use mmtk::util::VMWorkerThread;
+use mmtk::vm::Collection;
+use mmtk::vm::GCThreadContext;
+use std::sync::atomic::AtomicBool;
 use std::sync::atomic::Ordering;
 use std::thread;
 
+static CURRENT_GC_MAY_MOVE: AtomicBool = AtomicBool::new(false);
+
 pub struct VMCollection {}
 
 impl Collection<Ruby> for VMCollection {
@@ -16,11 +28,21 @@ impl Collection<Ruby> for VMCollection {
         crate::CONFIGURATION.gc_enabled.load(Ordering::Relaxed)
     }
 
-    fn stop_all_mutators<F>(_tls: VMWorkerThread, mut mutator_visitor: F)
+    fn stop_all_mutators<F>(tls: VMWorkerThread, mut mutator_visitor: F)
     where
         F: FnMut(&'static mut mmtk::Mutator<Ruby>),
     {
         (upcalls().stop_the_world)();
+
+        if crate::mmtk().get_plan().current_gc_may_move_object() {
+            CURRENT_GC_MAY_MOVE.store(true, Ordering::Relaxed);
+            (upcalls().before_updating_jit_code)();
+        } else {
+            CURRENT_GC_MAY_MOVE.store(false, Ordering::Relaxed);
+        }
+
+        crate::binding().pinning_registry.pin_children(tls);
+
         (upcalls().get_mutators)(
             Self::notify_mutator_ready::<F>,
             &mut mutator_visitor as *mut F as *mut _,
@@ -28,7 +50,13 @@ impl Collection<Ruby> for VMCollection {
     }
 
     fn resume_mutators(_tls: VMWorkerThread) {
-        (upcalls().resume_mutators)();
+        let current_gc_may_move = CURRENT_GC_MAY_MOVE.load(Ordering::Relaxed);
+
+        if current_gc_may_move {
+            (upcalls().after_updating_jit_code)();
+        }
+
+        (upcalls().resume_mutators)(current_gc_may_move);
     }
 
     fn block_for_gc(tls: VMMutatorThread) {
@@ -41,10 +69,7 @@ impl Collection<Ruby> for VMCollection {
                 .name("MMTk Worker Thread".to_string())
                 .spawn(move || {
                     let ordinal = worker.ordinal;
-                    debug!(
-                        "Hello! This is MMTk Worker Thread running! ordinal: {}",
-                        ordinal
-                    );
+                    debug!("Hello! This is MMTk Worker Thread running! ordinal: {ordinal}");
                     crate::register_gc_thread(thread::current().id());
                     let ptr_worker = &mut *worker as *mut GCWorker<Ruby>;
                     let gc_thread_tls =
@@ -55,10 +80,7 @@ impl Collection<Ruby> for VMCollection {
                         GCThreadTLS::to_vwt(gc_thread_tls),
                         worker,
                     );
-                    debug!(
-                        "An MMTk Worker Thread is quitting. Good bye! ordinal: {}",
-                        ordinal
-                    );
+                    debug!("An MMTk Worker Thread is quitting. Good bye! ordinal: {ordinal}");
                     crate::unregister_gc_thread(thread::current().id());
                 })
                 .unwrap(),
@@ -73,6 +95,19 @@ impl Collection<Ruby> for VMCollection {
     fn vm_live_bytes() -> usize {
         (upcalls().vm_live_bytes)()
     }
+
+    fn create_gc_trigger() -> Box<dyn GCTriggerPolicy<Ruby>> {
+        // `GCTriggerSelector::Delegated` is currently used by two different
+        // heap modes: `ruby` (the Ruby-like free-slot ratio trigger) and `cpu`
+        // (the CPU-overhead trigger from Tavakolisomeh et al., MPLR '23).
+        // Which one is active is determined by which `OnceCell` config the
+        // `MMTK_HEAP_MODE` parser populated.
+        if CPU_HEAP_TRIGGER_CONFIG.get().is_some() {
+            Box::new(CpuHeapTrigger::default())
+        } else {
+            Box::new(RubyHeapTrigger::default())
+        }
+    }
 }
 
 impl VMCollection {
diff --git a/gc/mmtk/src/heap/cpu_heap_trigger.rs b/gc/mmtk/src/heap/cpu_heap_trigger.rs
new file mode 100644
index 0000000000..ef5a79fe9a
--- /dev/null
+++ b/gc/mmtk/src/heap/cpu_heap_trigger.rs
@@ -0,0 +1,370 @@
+//! A GC trigger that adjusts the heap size based on the CPU overhead of GC.
+//!
+//! This is an implementation of the heap sizing policy described in
+//! Tavakolisomeh, Shimchenko, Österlund, Bruno, Ferreira, Wrigstad,
+//! "Heap Size Adjustment with CPU Control", MPLR '23.
+//! <https://doi.org/10.1145/3617651.3622988>
+//!
+//! The idea: rather than letting heap size control GC frequency, let a
+//! user-supplied *target GC CPU overhead* control the heap size. After each GC
+//! cycle, we measure the GC CPU overhead (fraction of process CPU time spent
+//! in GC) and compare it to the target. If GC is over budget we grow the heap
+//! (reducing GC frequency); if it is under budget we shrink the heap (trading
+//! memory for more frequent collections).
+//!
+//! ## Algorithm
+//!
+//! After each GC cycle we compute, using an average of the last `n` cycles:
+//!
+//! ```text
+//! GC_CPU             = T_GC / T_APP                                  (Eq. 1)
+//! overhead_error     = GC_CPU - target                                (Eq. 2)
+//! sigmoid_error      = 1 / (1 + e^(-overhead_error))                  (Eq. 3)
+//! adjustment_factor  = sigmoid_error + 0.5   (in (0.5, 1.5))          (Eq. 4)
+//! new_size           = current_size * adjustment_factor               (Eq. 5)
+//! ```
+//!
+//! where:
+//! - `T_GC` is the wall-clock duration of each GC cycle.
+//! - `T_APP` is process CPU time elapsed between consecutive GC cycles (sum of
+//!   CPU time over all threads — mutators, GC workers, compilers, etc.), read
+//!   via `clock_gettime(CLOCK_PROCESS_CPUTIME_ID)`.
+//!
+//! The final heap size is then clamped to the range
+//! `[max(1.1 * used, min_heap_pages), max_heap_pages]`, providing 10% headroom
+//! above current live memory to avoid triggering GC on an effectively-empty
+//! heap.
+//!
+//! ## Differences from the paper
+//!
+//! The paper targets ZGC, a concurrent generational collector. MMTk's Ruby
+//! binding currently ships stop-the-world collectors (Immix, MarkSweep). The
+//! paper's formula still applies: with a STW collector the process CPU time
+//! during GC closely tracks the wall-clock GC time, and mutator CPU time
+//! during the mutator phase is correctly attributed. For generational plans
+//! we skip nursery-only GCs, consistent with MemBalancer.
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use std::sync::Mutex;
+
+use mmtk::util::heap::GCTriggerPolicy;
+use mmtk::util::heap::SpaceStats;
+use mmtk::Plan;
+use mmtk::MMTK;
+use once_cell::sync::OnceCell;
+
+use crate::Ruby;
+
+pub static CPU_HEAP_TRIGGER_CONFIG: OnceCell<CpuHeapTriggerConfig> = OnceCell::new();
+
+/// Configuration for the [`CpuHeapTrigger`].
+pub struct CpuHeapTriggerConfig {
+    /// Lower bound on heap size (in pages). The trigger will never shrink below
+    /// this value.
+    pub min_heap_pages: usize,
+    /// Upper bound on heap size (in pages). The trigger will never grow above
+    /// this value.
+    pub max_heap_pages: usize,
+    /// Initial heap size (in pages).
+    pub initial_heap_pages: usize,
+    /// Target GC CPU overhead as a fraction of total process CPU time. For
+    /// example, `0.15` means the policy will try to keep GC CPU usage near 15%.
+    /// Valid range: `(0.0, 1.0)`.
+    pub target_gc_cpu: f64,
+    /// Number of recent GC cycles averaged together when computing the CPU
+    /// overhead signal. Smoothes out short-term fluctuations. The paper uses 3.
+    pub window_size: usize,
+}
+
+/// A single GC cycle's timing measurements.
+#[derive(Clone, Copy, Debug, Default)]
+struct GcSample {
+    /// Wall-clock seconds spent inside this GC cycle.
+    gc_seconds: f64,
+    /// Seconds of process CPU time elapsed since the previous GC cycle ended.
+    /// This covers both mutator time and (on multi-threaded mutators) any
+    /// mutator CPU time consumed in parallel with the previous GC.
+    app_cpu_seconds: f64,
+}
+
+struct CpuHeapTriggerState {
+    /// Ring buffer of the last `window_size` samples. Oldest-first.
+    samples: Vec<GcSample>,
+    /// Wall-clock time when the current GC cycle started. `None` when no GC is
+    /// in progress.
+    gc_start_wall: Option<std::time::Instant>,
+    /// Process CPU time (seconds) recorded at the end of the previous GC
+    /// cycle. `None` until the first cycle completes.
+    last_gc_end_cpu: Option<f64>,
+}
+
+impl CpuHeapTriggerState {
+    fn new() -> Self {
+        Self {
+            samples: Vec::new(),
+            gc_start_wall: None,
+            last_gc_end_cpu: None,
+        }
+    }
+
+    /// Pushes a new sample, dropping the oldest when the window is full.
+    fn push_sample(&mut self, sample: GcSample, window_size: usize) {
+        if self.samples.len() >= window_size {
+            self.samples.remove(0);
+        }
+        self.samples.push(sample);
+    }
+
+    /// Returns the arithmetic mean GC CPU overhead across the window, or
+    /// `None` if we don't yet have a full sample (which happens on the first
+    /// GC cycle — we have no baseline for `app_cpu_seconds`).
+    fn mean_gc_cpu(&self) -> Option<f64> {
+        if self.samples.is_empty() {
+            return None;
+        }
+        let total_gc: f64 = self.samples.iter().map(|s| s.gc_seconds).sum();
+        let total_app: f64 = self.samples.iter().map(|s| s.app_cpu_seconds).sum();
+        if total_app <= 0.0 {
+            return None;
+        }
+        Some(total_gc / total_app)
+    }
+}
+
+pub struct CpuHeapTrigger {
+    /// Target heap size in pages. Updated at the end of each GC cycle.
+    target_heap_pages: AtomicUsize,
+    /// Mutable timing state. Wrapped in a `Mutex` because `on_gc_start` and
+    /// `on_gc_end` are the only mutation sites and they are not on an
+    /// allocation hot path; avoiding the complexity of lock-free state is
+    /// worth the trivial contention.
+    state: Mutex<CpuHeapTriggerState>,
+}
+
+impl Default for CpuHeapTrigger {
+    fn default() -> Self {
+        let cfg = Self::get_config();
+        Self {
+            target_heap_pages: AtomicUsize::new(cfg.initial_heap_pages),
+            state: Mutex::new(CpuHeapTriggerState::new()),
+        }
+    }
+}
+
+impl GCTriggerPolicy<Ruby> for CpuHeapTrigger {
+    fn is_gc_required(
+        &self,
+        space_full: bool,
+        space: Option<SpaceStats<Ruby>>,
+        plan: &dyn Plan<VM = Ruby>,
+    ) -> bool {
+        // Let the plan decide, matching the other triggers.
+        plan.collection_required(space_full, space)
+    }
+
+    fn on_gc_start(&self, _mmtk: &'static MMTK<Ruby>) {
+        let mut state = self.state.lock().unwrap();
+        state.gc_start_wall = Some(std::time::Instant::now());
+    }
+
+    fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) {
+        // Skip nursery-only GCs for generational plans. The heap resizing
+        // decision is driven by the (much more expensive) full collections
+        // where the signal-to-noise ratio is high enough to be useful.
+        if let Some(gen_plan) = mmtk.get_plan().generational() {
+            if gen_plan.is_current_gc_nursery() {
+                return;
+            }
+        }
+
+        let cfg = Self::get_config();
+        let gc_end_cpu = process_cpu_time_seconds();
+
+        let mut state = self.state.lock().unwrap();
+
+        // Duration of this GC cycle (wall clock).
+        let gc_seconds = state
+            .gc_start_wall
+            .take()
+            .map(|start| start.elapsed().as_secs_f64())
+            .unwrap_or(0.0);
+
+        // Process CPU time elapsed since the previous GC cycle ended. We
+        // require at least one previous end timestamp to produce a valid
+        // sample — without it we cannot compute `T_APP`.
+        if let (Some(last_end), Some(now)) = (state.last_gc_end_cpu, gc_end_cpu) {
+            let app_cpu_seconds = (now - last_end).max(0.0);
+            // Only record non-degenerate samples to avoid poisoning the window
+            // with zero-time entries from back-to-back GCs.
+            if app_cpu_seconds > 0.0 {
+                state.push_sample(
+                    GcSample {
+                        gc_seconds,
+                        app_cpu_seconds,
+                    },
+                    cfg.window_size,
+                );
+            }
+        }
+        state.last_gc_end_cpu = gc_end_cpu;
+
+        // Compute the new heap size only when we have samples to average over.
+        if let Some(gc_cpu) = state.mean_gc_cpu() {
+            // Drop the lock before doing the (relatively cheap) math and
+            // atomic update; nothing below needs the state.
+            drop(state);
+
+            let overhead_error = gc_cpu - cfg.target_gc_cpu; // Eq. (2)
+            let sigmoid_error = sigmoid(overhead_error); // Eq. (3)
+            let adjustment_factor = sigmoid_error + 0.5; // Eq. (4), range (0.5, 1.5)
+
+            let current = self.target_heap_pages.load(Ordering::Relaxed);
+            let suggested = ((current as f64) * adjustment_factor) as usize; // Eq. (5)
+
+            // Clamp:
+            // - upper bound: configured max
+            // - lower bound: max(1.1 * used, min) — 10% headroom above current
+            //   live memory, so we never request a heap so small that GC is
+            //   triggered immediately on return from this one.
+            let used = mmtk.get_plan().get_used_pages();
+            let floor = ((used as f64) * 1.1).ceil() as usize;
+            let lower = floor.max(cfg.min_heap_pages).min(cfg.max_heap_pages);
+            let upper = cfg.max_heap_pages;
+            let new_target = suggested.clamp(lower, upper);
+
+            self.target_heap_pages.store(new_target, Ordering::Relaxed);
+
+            info!(
+                "CpuHeapTrigger: gc_cpu={:.4} target={:.4} factor={:.4} \
+                 pages {} -> {} (used={}, clamp=[{}, {}])",
+                gc_cpu,
+                cfg.target_gc_cpu,
+                adjustment_factor,
+                current,
+                new_target,
+                used,
+                lower,
+                upper
+            );
+        }
+    }
+
+    fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool {
+        plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_current_heap_size_in_pages(&self) -> usize {
+        self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_max_heap_size_in_pages(&self) -> usize {
+        Self::get_config().max_heap_pages
+    }
+
+    fn can_heap_size_grow(&self) -> bool {
+        self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages
+    }
+}
+
+impl CpuHeapTrigger {
+    fn get_config<'b>() -> &'b CpuHeapTriggerConfig {
+        CPU_HEAP_TRIGGER_CONFIG
+            .get()
+            .expect("Attempt to use CPU_HEAP_TRIGGER_CONFIG before it is initialized")
+    }
+}
+
+/// Standard logistic sigmoid. Returns 0.5 when x == 0, asymptotes to 0 and 1.
+fn sigmoid(x: f64) -> f64 {
+    1.0 / (1.0 + (-x).exp())
+}
+
+/// Reads the process-wide CPU time as a floating-point number of seconds,
+/// summed across all threads of this process. Returns `None` if the clock
+/// query fails (which should be essentially impossible on supported
+/// platforms).
+fn process_cpu_time_seconds() -> Option<f64> {
+    let mut ts = libc::timespec {
+        tv_sec: 0,
+        tv_nsec: 0,
+    };
+    // SAFETY: `clock_gettime` writes exactly `sizeof(timespec)` bytes to the
+    // pointer we pass, which is a valid local stack allocation.
+    let rc = unsafe { libc::clock_gettime(libc::CLOCK_PROCESS_CPUTIME_ID, &mut ts) };
+    if rc != 0 {
+        return None;
+    }
+    Some((ts.tv_sec as f64) + (ts.tv_nsec as f64) / 1_000_000_000.0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sigmoid_is_well_behaved() {
+        assert!((sigmoid(0.0) - 0.5).abs() < 1e-12);
+        assert!(sigmoid(-100.0) < 1e-9);
+        assert!(sigmoid(100.0) > 1.0 - 1e-9);
+        // Monotonic.
+        assert!(sigmoid(-1.0) < sigmoid(0.0));
+        assert!(sigmoid(0.0) < sigmoid(1.0));
+    }
+
+    #[test]
+    fn adjustment_factor_is_within_paper_bounds() {
+        // Eq. (4): adjustment_factor = sigmoid(e) + 0.5 must lie in (0.5, 1.5).
+        for e in [-10.0_f64, -1.0, 0.0, 1.0, 10.0] {
+            let f = sigmoid(e) + 0.5;
+            assert!(f > 0.5 && f < 1.5, "factor {f} out of range for e={e}");
+        }
+    }
+
+    #[test]
+    fn mean_gc_cpu_is_total_weighted() {
+        let mut state = CpuHeapTriggerState::new();
+        state.push_sample(
+            GcSample {
+                gc_seconds: 1.0,
+                app_cpu_seconds: 10.0,
+            },
+            3,
+        );
+        state.push_sample(
+            GcSample {
+                gc_seconds: 3.0,
+                app_cpu_seconds: 10.0,
+            },
+            3,
+        );
+        // (1 + 3) / (10 + 10) = 0.2
+        assert!((state.mean_gc_cpu().unwrap() - 0.2).abs() < 1e-12);
+    }
+
+    #[test]
+    fn window_drops_oldest() {
+        let mut state = CpuHeapTriggerState::new();
+        for i in 0..5 {
+            state.push_sample(
+                GcSample {
+                    gc_seconds: i as f64,
+                    app_cpu_seconds: 1.0,
+                },
+                3,
+            );
+        }
+        assert_eq!(state.samples.len(), 3);
+        // After pushing 0,1,2,3,4 with window 3, we should have [2,3,4].
+        assert_eq!(state.samples[0].gc_seconds, 2.0);
+        assert_eq!(state.samples[2].gc_seconds, 4.0);
+    }
+
+    #[test]
+    fn no_sample_without_prior_gc() {
+        // First GC cycle cannot produce a sample (no previous end time). The
+        // push happens only when last_gc_end_cpu is Some.
+        let state = CpuHeapTriggerState::new();
+        assert!(state.mean_gc_cpu().is_none());
+    }
+}
diff --git a/gc/mmtk/src/heap/mod.rs b/gc/mmtk/src/heap/mod.rs
new file mode 100644
index 0000000000..05a35efb23
--- /dev/null
+++ b/gc/mmtk/src/heap/mod.rs
@@ -0,0 +1,9 @@
+mod cpu_heap_trigger;
+mod ruby_heap_trigger;
+
+pub use cpu_heap_trigger::CpuHeapTrigger;
+pub use cpu_heap_trigger::CpuHeapTriggerConfig;
+pub use cpu_heap_trigger::CPU_HEAP_TRIGGER_CONFIG;
+pub use ruby_heap_trigger::RubyHeapTrigger;
+pub use ruby_heap_trigger::RubyHeapTriggerConfig;
+pub use ruby_heap_trigger::RUBY_HEAP_TRIGGER_CONFIG;
diff --git a/gc/mmtk/src/heap/ruby_heap_trigger.rs b/gc/mmtk/src/heap/ruby_heap_trigger.rs
new file mode 100644
index 0000000000..fe1130043d
--- /dev/null
+++ b/gc/mmtk/src/heap/ruby_heap_trigger.rs
@@ -0,0 +1,105 @@
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
+use mmtk::util::heap::GCTriggerPolicy;
+use mmtk::util::heap::SpaceStats;
+use mmtk::Plan;
+use mmtk::MMTK;
+use once_cell::sync::OnceCell;
+
+use crate::Ruby;
+
+pub static RUBY_HEAP_TRIGGER_CONFIG: OnceCell<RubyHeapTriggerConfig> = OnceCell::new();
+
+pub struct RubyHeapTriggerConfig {
+    /// Min heap size
+    pub min_heap_pages: usize,
+    /// Max heap size
+    pub max_heap_pages: usize,
+    /// Minimum ratio of empty space after a GC before the heap will grow
+    pub heap_pages_min_ratio: f64,
+    /// Ratio the heap will grow by
+    pub heap_pages_goal_ratio: f64,
+    /// Maximum ratio of empty space after a GC before the heap will shrink
+    pub heap_pages_max_ratio: f64,
+}
+
+pub struct RubyHeapTrigger {
+    /// Target number of heap pages
+    target_heap_pages: AtomicUsize,
+}
+
+impl GCTriggerPolicy<Ruby> for RubyHeapTrigger {
+    fn is_gc_required(
+        &self,
+        space_full: bool,
+        space: Option<SpaceStats<Ruby>>,
+        plan: &dyn Plan<VM = Ruby>,
+    ) -> bool {
+        // Let the plan decide
+        plan.collection_required(space_full, space)
+    }
+
+    fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) {
+        if let Some(plan) = mmtk.get_plan().generational() {
+            if plan.is_current_gc_nursery() {
+                // Nursery GC
+            } else {
+                // Full GC
+            }
+
+            panic!("TODO: support for generational GC not implemented")
+        } else {
+            let used_pages = mmtk.get_plan().get_used_pages();
+
+            let target_min =
+                (used_pages as f64 * (1.0 + Self::get_config().heap_pages_min_ratio)) as usize;
+            let target_max =
+                (used_pages as f64 * (1.0 + Self::get_config().heap_pages_max_ratio)) as usize;
+            let new_target =
+                (((used_pages as f64) * (1.0 + Self::get_config().heap_pages_goal_ratio)) as usize)
+                    .clamp(
+                        Self::get_config().min_heap_pages,
+                        Self::get_config().max_heap_pages,
+                    );
+
+            if used_pages < target_min || used_pages > target_max {
+                self.target_heap_pages.store(new_target, Ordering::Relaxed);
+            }
+        }
+    }
+
+    fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool {
+        plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_current_heap_size_in_pages(&self) -> usize {
+        self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_max_heap_size_in_pages(&self) -> usize {
+        Self::get_config().max_heap_pages
+    }
+
+    fn can_heap_size_grow(&self) -> bool {
+        self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages
+    }
+}
+
+impl Default for RubyHeapTrigger {
+    fn default() -> Self {
+        let min_heap_pages = Self::get_config().min_heap_pages;
+
+        Self {
+            target_heap_pages: AtomicUsize::new(min_heap_pages),
+        }
+    }
+}
+
+impl RubyHeapTrigger {
+    fn get_config<'b>() -> &'b RubyHeapTriggerConfig {
+        RUBY_HEAP_TRIGGER_CONFIG
+            .get()
+            .expect("Attempt to use RUBY_HEAP_TRIGGER_CONFIG before it is initialized")
+    }
+}
diff --git a/gc/mmtk/src/lib.rs b/gc/mmtk/src/lib.rs
index 01497e9c42..52dc782051 100644
--- a/gc/mmtk/src/lib.rs
+++ b/gc/mmtk/src/lib.rs
@@ -1,3 +1,7 @@
+// Warn about unsafe operations in functions that are already marked as unsafe.
+// This will become default in Rust 2024 edition.
+#![warn(unsafe_op_in_unsafe_fn)]
+
 extern crate libc;
 extern crate mmtk;
 #[macro_use]
@@ -10,8 +14,11 @@ use std::sync::Mutex;
 use std::thread::ThreadId;
 
 use abi::RubyUpcalls;
-use binding::{RubyBinding, RubyBindingFast, RubyConfiguration};
-use mmtk::vm::slot::{SimpleSlot, UnimplementedMemorySlice};
+use binding::RubyBinding;
+use binding::RubyBindingFast;
+use binding::RubyConfiguration;
+use mmtk::vm::slot::SimpleSlot;
+use mmtk::vm::slot::UnimplementedMemorySlice;
 use mmtk::vm::VMBinding;
 use mmtk::MMTK;
 use once_cell::sync::OnceCell;
@@ -21,7 +28,9 @@ pub mod active_plan;
 pub mod api;
 pub mod binding;
 pub mod collection;
+pub mod heap;
 pub mod object_model;
+pub mod pinning_registry;
 pub mod reference_glue;
 pub mod scanning;
 pub mod utils;
@@ -51,6 +60,11 @@ impl VMBinding for Ruby {
     type VMMemorySlice = RubyMemorySlice;
 }
 
+/// The callback for mutator thread panic handler (which calls rb_bug to output
+/// debugging information such as the Ruby backtrace and memory maps).
+/// This is set before BINDING is set because mmtk_init could panic.
+pub static MUTATOR_THREAD_PANIC_HANDLER: OnceCell<extern "C" fn()> = OnceCell::new();
+
 /// The singleton object for the Ruby binding itself.
 pub static BINDING: OnceCell<RubyBinding> = OnceCell::new();
 
@@ -112,8 +126,6 @@ fn handle_gc_thread_panic(panic_info: &PanicHookInfo) {
             eprintln!("Unknown backtrace status: {s:?}");
         }
     }
-
-    std::process::abort();
 }
 
 pub(crate) fn set_panic_hook() {
@@ -126,8 +138,24 @@ pub(crate) fn set_panic_hook() {
     std::panic::set_hook(Box::new(move |panic_info| {
         if is_gc_thread(std::thread::current().id()) {
             handle_gc_thread_panic(panic_info);
+
+            (crate::binding().upcalls().gc_thread_panic_handler)();
         } else {
             old_hook(panic_info);
+            (crate::MUTATOR_THREAD_PANIC_HANDLER
+                .get()
+                .expect("MUTATOR_THREAD_PANIC_HANDLER is not set"))();
         }
     }));
 }
+
+/// This kind of assertion is enabled if either building in debug mode or the
+/// "extra_assert" feature is enabled.
+#[macro_export]
+macro_rules! extra_assert {
+    ($($arg:tt)*) => {
+        if std::cfg!(any(debug_assertions, feature = "extra_assert")) {
+            std::assert!($($arg)*);
+        }
+    };
+}
diff --git a/gc/mmtk/src/object_model.rs b/gc/mmtk/src/object_model.rs
index abeef1f2b9..d673ca11a0 100644
--- a/gc/mmtk/src/object_model.rs
+++ b/gc/mmtk/src/object_model.rs
@@ -1,8 +1,15 @@
-use crate::abi::{RubyObjectAccess, OBJREF_OFFSET};
-use crate::{abi, Ruby};
+use std::ptr::copy_nonoverlapping;
+
+use crate::abi;
+use crate::abi::RubyObjectAccess;
+use crate::abi::MIN_OBJ_ALIGN;
+use crate::abi::OBJREF_OFFSET;
+use crate::Ruby;
 use mmtk::util::constants::BITS_IN_BYTE;
-use mmtk::util::copy::{CopySemantics, GCWorkerCopyContext};
-use mmtk::util::{Address, ObjectReference};
+use mmtk::util::copy::CopySemantics;
+use mmtk::util::copy::GCWorkerCopyContext;
+use mmtk::util::Address;
+use mmtk::util::ObjectReference;
 use mmtk::vm::*;
 
 pub struct VMObjectModel {}
@@ -36,13 +43,39 @@ impl ObjectModel<Ruby> for VMObjectModel {
     const NEED_VO_BITS_DURING_TRACING: bool = true;
 
     fn copy(
-        _from: ObjectReference,
-        _semantics: CopySemantics,
-        _copy_context: &mut GCWorkerCopyContext<Ruby>,
+        from: ObjectReference,
+        semantics: CopySemantics,
+        copy_context: &mut GCWorkerCopyContext<Ruby>,
     ) -> ObjectReference {
-        unimplemented!(
-            "Copying GC not currently supported"
-        )
+        let from_acc = RubyObjectAccess::from_objref(from);
+        let from_start = from_acc.obj_start();
+        let object_size = from_acc.object_size();
+        let to_start = copy_context.alloc_copy(from, object_size, MIN_OBJ_ALIGN, 0, semantics);
+        debug_assert!(!to_start.is_zero());
+        let to_payload = to_start.add(OBJREF_OFFSET);
+        unsafe {
+            copy_nonoverlapping::<u8>(from_start.to_ptr(), to_start.to_mut_ptr(), object_size);
+        }
+        let to_obj = unsafe { ObjectReference::from_raw_address_unchecked(to_payload) };
+        copy_context.post_copy(to_obj, object_size, semantics);
+        trace!("Copied object from {} to {}", from, to_obj);
+
+        (crate::binding().upcalls().move_obj_during_marking)(from, to_obj);
+
+        #[cfg(feature = "clear_old_copy")]
+        {
+            trace!(
+                "Clearing old copy {} ({}-{})",
+                from,
+                from_start,
+                from_start + object_size
+            );
+            // For debug purpose, we clear the old copy so that if the Ruby VM reads from the old
+            // copy again, it will likely result in an error.
+            unsafe { std::ptr::write_bytes::<u8>(from_start.to_mut_ptr(), 0, object_size) }
+        }
+
+        to_obj
     }
 
     fn copy_to(_from: ObjectReference, _to: ObjectReference, _region: Address) -> Address {
diff --git a/gc/mmtk/src/pinning_registry.rs b/gc/mmtk/src/pinning_registry.rs
new file mode 100644
index 0000000000..b498b508f1
--- /dev/null
+++ b/gc/mmtk/src/pinning_registry.rs
@@ -0,0 +1,187 @@
+use std::sync::Mutex;
+
+use mmtk::memory_manager;
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMWorkerThread;
+use mmtk::MMTK;
+
+use crate::abi::GCThreadTLS;
+use crate::upcalls;
+use crate::Ruby;
+
+pub struct PinningRegistry {
+    pinning_objs: Mutex<Vec<ObjectReference>>,
+    pinned_objs: Mutex<Vec<ObjectReference>>,
+}
+
+impl PinningRegistry {
+    pub fn new() -> Self {
+        Self {
+            pinning_objs: Default::default(),
+            pinned_objs: Default::default(),
+        }
+    }
+
+    pub fn register(&self, object: ObjectReference) {
+        let mut pinning_objs = self.pinning_objs.lock().unwrap();
+        pinning_objs.push(object);
+    }
+
+    pub fn pin_children(&self, tls: VMWorkerThread) {
+        if !crate::mmtk().get_plan().current_gc_may_move_object() {
+            log::debug!("The current GC is non-moving, skipping pinning children.");
+            return;
+        }
+
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) };
+        let worker = gc_tls.worker();
+
+        let pinning_objs = self
+            .pinning_objs
+            .try_lock()
+            .expect("PinningRegistry should not have races during GC.");
+
+        let packet_size = 512;
+        let work_packets = pinning_objs
+            .chunks(packet_size)
+            .map(|chunk| {
+                Box::new(PinPinningChildren {
+                    pinning_objs: chunk.to_vec(),
+                }) as _
+            })
+            .collect();
+
+        worker.scheduler().work_buckets[WorkBucketStage::Prepare].bulk_add(work_packets);
+    }
+
+    pub fn cleanup(&self, worker: &mut GCWorker<Ruby>) {
+        worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(RemoveDeadPinnings);
+        if crate::mmtk().get_plan().current_gc_may_move_object() {
+            let packet = {
+                let mut pinned_objs = self
+                    .pinned_objs
+                    .try_lock()
+                    .expect("Unexpected contention on pinned_objs");
+                UnpinPinnedObjects {
+                    objs: std::mem::take(&mut pinned_objs),
+                }
+            };
+
+            worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(packet);
+        } else {
+            debug!("The current GC is non-moving, skipping unpinning objects.");
+            debug_assert_eq!(
+                {
+                    let pinned_objs = self
+                        .pinned_objs
+                        .try_lock()
+                        .expect("Unexpected contention on pinned_objs");
+                    pinned_objs.len()
+                },
+                0
+            );
+        }
+    }
+}
+
+impl Default for PinningRegistry {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+struct PinPinningChildren {
+    pinning_objs: Vec<ObjectReference>,
+}
+
+impl GCWork<Ruby> for PinPinningChildren {
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) {
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(worker.tls) };
+        let mut pinned_objs = vec![];
+        let mut newly_pinned_objs = vec![];
+
+        let visit_object = |_worker, target_object: ObjectReference, pin| {
+            log::trace!(
+                "    -> {} {}",
+                if pin { "(pin)" } else { "     " },
+                target_object
+            );
+            if pin {
+                debug_assert!(
+                    target_object.get_forwarded_object().is_none(),
+                    "Trying to pin {target_object} but has been moved"
+                );
+
+                pinned_objs.push(target_object);
+            }
+            target_object
+        };
+
+        gc_tls
+            .object_closure
+            .set_temporarily_and_run_code(visit_object, || {
+                for obj in self.pinning_objs.iter().cloned() {
+                    log::trace!("  Pinning: {}", obj);
+                    (upcalls().call_gc_mark_children)(obj);
+                }
+            });
+
+        for target_object in pinned_objs {
+            if memory_manager::pin_object(target_object) {
+                newly_pinned_objs.push(target_object);
+            }
+        }
+
+        let mut pinned_objs = crate::binding()
+            .pinning_registry
+            .pinned_objs
+            .lock()
+            .unwrap();
+        pinned_objs.append(&mut newly_pinned_objs);
+    }
+}
+
+struct RemoveDeadPinnings;
+
+impl GCWork<Ruby> for RemoveDeadPinnings {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) {
+        log::debug!("Removing dead Pinnings...");
+
+        let registry = &crate::binding().pinning_registry;
+        {
+            let mut pinning_objs = registry
+                .pinning_objs
+                .try_lock()
+                .expect("PinningRegistry should not have races during GC.");
+
+            pinning_objs.retain_mut(|obj| {
+                if obj.is_live() {
+                    let new_obj = obj.get_forwarded_object().unwrap_or(*obj);
+                    *obj = new_obj;
+                    true
+                } else {
+                    log::trace!("  Dead Pinning removed: {}", *obj);
+                    false
+                }
+            });
+        }
+    }
+}
+
+struct UnpinPinnedObjects {
+    objs: Vec<ObjectReference>,
+}
+
+impl GCWork<Ruby> for UnpinPinnedObjects {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) {
+        log::debug!("Unpinning pinned objects...");
+
+        for obj in self.objs.iter() {
+            let unpinned = memory_manager::unpin_object(*obj);
+            debug_assert!(unpinned);
+        }
+    }
+}
diff --git a/gc/mmtk/src/scanning.rs b/gc/mmtk/src/scanning.rs
index 33466b9db6..355a2e7759 100644
--- a/gc/mmtk/src/scanning.rs
+++ b/gc/mmtk/src/scanning.rs
@@ -1,11 +1,20 @@
 use crate::abi::GCThreadTLS;
 
+use crate::upcalls;
 use crate::utils::ChunkedVecCollector;
-use crate::{upcalls, Ruby, RubySlot};
-use mmtk::scheduler::{GCWork, GCWorker, WorkBucketStage};
-use mmtk::util::{ObjectReference, VMWorkerThread};
-use mmtk::vm::{ObjectTracer, RootsWorkFactory, Scanning, SlotVisitor};
-use mmtk::{Mutator, MutatorContext};
+use crate::Ruby;
+use crate::RubySlot;
+use mmtk::memory_manager;
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMWorkerThread;
+use mmtk::vm::ObjectTracer;
+use mmtk::vm::RootsWorkFactory;
+use mmtk::vm::Scanning;
+use mmtk::vm::SlotVisitor;
+use mmtk::Mutator;
 
 pub struct VMScanning {}
 
@@ -45,20 +54,33 @@ impl Scanning<Ruby> for VMScanning {
                 mmtk::memory_manager::is_mmtk_object(target_object.to_raw_address()).is_some(),
                 "Destination is not an MMTk object. Src: {object} dst: {target_object}"
             );
+
+            debug_assert!(
+                // If we are in a moving GC, all objects should be pinned by PinningRegistry.
+                // If it is requested that target_object be pinned but it is not pinned, then
+                // it is a bug because it could be moved.
+                if crate::mmtk().get_plan().current_gc_may_move_object() && pin {
+                    memory_manager::is_pinned(target_object)
+                } else {
+                    true
+                },
+                "Object {object} is trying to pin {target_object}"
+            );
+
             let forwarded_target = object_tracer.trace_object(target_object);
             if forwarded_target != target_object {
-                trace!(
-                    "  Forwarded target {} -> {}",
-                    target_object,
-                    forwarded_target
-                );
+                trace!("  Forwarded target {target_object} -> {forwarded_target}");
             }
             forwarded_target
         };
         gc_tls
             .object_closure
             .set_temporarily_and_run_code(visit_object, || {
-                (upcalls().scan_object_ruby_style)(object);
+                (upcalls().call_gc_mark_children)(object);
+
+                if crate::mmtk().get_plan().current_gc_may_move_object() {
+                    (upcalls().update_object_references)(object);
+                }
             });
     }
 
@@ -67,14 +89,13 @@ impl Scanning<Ruby> for VMScanning {
     }
 
     fn scan_roots_in_mutator_thread(
-        tls: VMWorkerThread,
-        mutator: &'static mut Mutator<Ruby>,
-        mut factory: impl RootsWorkFactory<RubySlot>,
+        _tls: VMWorkerThread,
+        _mutator: &'static mut Mutator<Ruby>,
+        mut _factory: impl RootsWorkFactory<RubySlot>,
     ) {
-        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) };
-        Self::collect_object_roots_in("scan_thread_root", gc_tls, &mut factory, || {
-            (upcalls().scan_roots_in_mutator_thread)(mutator.get_tls(), tls);
-        });
+        // Do nothing.  All stacks (including Ruby stacks and machine stacks) are reachable from
+        // `rb_vm_t` -> ractor -> thread -> fiber -> stacks.  It is part of `ScanGCRoots` which
+        // calls `rb_gc_mark_roots` -> `rb_vm_mark`.
     }
 
     fn scan_vm_specific_roots(tls: VMWorkerThread, factory: impl RootsWorkFactory<RubySlot>) {
@@ -136,6 +157,7 @@ impl Scanning<Ruby> for VMScanning {
         crate::binding()
             .weak_proc
             .process_weak_stuff(worker, tracer_context);
+        crate::binding().pinning_registry.cleanup(worker);
         false
     }
 
@@ -252,15 +274,15 @@ impl<F: RootsWorkFactory<RubySlot>> GCWork<Ruby> for ScanWbUnprotectedRoots<F> {
         VMScanning::collect_object_roots_in("wb_unprot_roots", gc_tls, &mut self.factory, || {
             for object in self.objects.iter().copied() {
                 if object.is_reachable() {
-                    debug!(
-                        "[wb_unprot_roots] Visiting WB-unprotected object (parent): {}",
-                        object
-                    );
-                    (upcalls().scan_object_ruby_style)(object);
+                    debug!("[wb_unprot_roots] Visiting WB-unprotected object (parent): {object}");
+                    (upcalls().call_gc_mark_children)(object);
+
+                    if crate::mmtk().get_plan().current_gc_may_move_object() {
+                        (upcalls().update_object_references)(object);
+                    }
                 } else {
                     debug!(
-                        "[wb_unprot_roots] Skipping young WB-unprotected object (parent): {}",
-                        object
+                        "[wb_unprot_roots] Skipping young WB-unprotected object (parent): {object}"
                     );
                 }
             }
diff --git a/gc/mmtk/src/utils.rs b/gc/mmtk/src/utils.rs
index de929c3952..d1979eaf58 100644
--- a/gc/mmtk/src/utils.rs
+++ b/gc/mmtk/src/utils.rs
@@ -1,10 +1,13 @@
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
 
 use atomic_refcell::AtomicRefCell;
-use mmtk::scheduler::{GCWork, GCWorker, WorkBucketStage};
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
 
-use sysinfo::System;
 use crate::Ruby;
+use sysinfo::System;
 
 pub struct ChunkedVecCollector<T> {
     vecs: Vec<Vec<T>>,
@@ -97,32 +100,29 @@ pub fn default_heap_max() -> usize {
         .expect("Invalid Memory size") as usize
 }
 
-pub fn parse_capacity(input: &String, default: usize) -> usize {
+pub fn parse_capacity(input: &str) -> Option<usize> {
     let trimmed = input.trim();
 
     const KIBIBYTE: usize = 1024;
     const MEBIBYTE: usize = 1024 * KIBIBYTE;
     const GIBIBYTE: usize = 1024 * MEBIBYTE;
 
-    let (val, suffix) = if let Some(pos) = trimmed.find(|c: char| !c.is_numeric()) {
-        (&trimmed[..pos], &trimmed[pos..])
+    let (number, suffix) = if let Some(pos) = trimmed.find(|c: char| !c.is_numeric()) {
+        trimmed.split_at(pos)
     } else {
         (trimmed, "")
     };
 
-    // 1MiB is the default heap size
-    match (val, suffix) {
-        (number, "GiB") => number.parse::<usize>()
-            .and_then(|v| Ok(v * GIBIBYTE))
-            .unwrap_or(default),
-        (number, "MiB") => number.parse::<usize>()
-            .and_then(|v| Ok(v * MEBIBYTE))
-            .unwrap_or(default),
-        (number, "KiB") => number.parse::<usize>()
-            .and_then(|v| Ok(v * KIBIBYTE))
-            .unwrap_or(default),
-        (number, suffix) if suffix.is_empty() => number.parse::<usize>().unwrap_or(default),
-        (_, _) => default
+    let Ok(v) = number.parse::<usize>() else {
+        return None;
+    };
+
+    match suffix {
+        "GiB" => Some(v * GIBIBYTE),
+        "MiB" => Some(v * MEBIBYTE),
+        "KiB" => Some(v * KIBIBYTE),
+        "" => Some(v),
+        _ => None,
     }
 }
 
@@ -132,32 +132,30 @@ mod tests {
 
     #[test]
     fn test_parse_capacity_parses_bare_bytes() {
-        assert_eq!(1234, parse_capacity(&String::from("1234"), 0));
+        assert_eq!(Some(1234), parse_capacity("1234"));
     }
 
     #[test]
     fn test_parse_capacity_parses_kibibytes() {
-        assert_eq!(10240, parse_capacity(&String::from("10KiB"), 0))
+        assert_eq!(Some(10240), parse_capacity("10KiB"));
     }
 
     #[test]
     fn test_parse_capacity_parses_mebibytes() {
-        assert_eq!(10485760, parse_capacity(&String::from("10MiB"), 0))
+        assert_eq!(Some(10485760), parse_capacity("10MiB"))
     }
 
     #[test]
     fn test_parse_capacity_parses_gibibytes() {
-        assert_eq!(10737418240, parse_capacity(&String::from("10GiB"), 0))
+        assert_eq!(Some(10737418240), parse_capacity("10GiB"))
     }
 
     #[test]
-    fn test_parses_nonsense_value_as_default_max() {
-        let default = 100;
-
-        assert_eq!(default, parse_capacity(&String::from("notanumber"), default));
-        assert_eq!(default, parse_capacity(&String::from("5tartswithanumber"), default));
-        assert_eq!(default, parse_capacity(&String::from("number1nthemiddle"), default));
-        assert_eq!(default, parse_capacity(&String::from("numberattheend111"), default));
-        assert_eq!(default, parse_capacity(&String::from("mult1pl3numb3r5"), default));
+    fn test_parse_capacity_parses_nonsense_values() {
+        assert_eq!(None, parse_capacity("notanumber"));
+        assert_eq!(None, parse_capacity("5tartswithanumber"));
+        assert_eq!(None, parse_capacity("number1nthemiddle"));
+        assert_eq!(None, parse_capacity("numberattheend111"));
+        assert_eq!(None, parse_capacity("mult1pl3numb3r5"));
     }
 }
diff --git a/gc/mmtk/src/weak_proc.rs b/gc/mmtk/src/weak_proc.rs
index 11f7f5abbf..d38dbe04a4 100644
--- a/gc/mmtk/src/weak_proc.rs
+++ b/gc/mmtk/src/weak_proc.rs
@@ -1,23 +1,23 @@
 use std::sync::Mutex;
 
-use mmtk::{
-    scheduler::{GCWork, GCWorker, WorkBucketStage},
-    util::ObjectReference,
-    vm::ObjectTracerContext,
-};
-
-use crate::{
-    abi::GCThreadTLS,
-    upcalls,
-    Ruby,
-};
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
+use mmtk::util::ObjectReference;
+use mmtk::vm::ObjectTracerContext;
+
+use crate::abi::GCThreadTLS;
+use crate::upcalls;
+use crate::Ruby;
 
 pub struct WeakProcessor {
+    non_parallel_obj_free_candidates: Mutex<Vec<ObjectReference>>,
+    parallel_obj_free_candidates: Vec<Mutex<Vec<ObjectReference>>>,
+
     /// Objects that needs `obj_free` called when dying.
     /// If it is a bottleneck, replace it with a lock-free data structure,
     /// or add candidates in batch.
-    obj_free_candidates: Mutex<Vec<ObjectReference>>,
-    weak_references: Mutex<Vec<&'static mut ObjectReference>>,
+    weak_references: Mutex<Vec<ObjectReference>>,
 }
 
 impl Default for WeakProcessor {
@@ -29,47 +29,74 @@ impl Default for WeakProcessor {
 impl WeakProcessor {
     pub fn new() -> Self {
         Self {
-            obj_free_candidates: Mutex::new(Vec::new()),
+            non_parallel_obj_free_candidates: Mutex::new(Vec::new()),
+            parallel_obj_free_candidates: vec![Mutex::new(Vec::new())],
             weak_references: Mutex::new(Vec::new()),
         }
     }
 
-    /// Add an object as a candidate for `obj_free`.
-    ///
-    /// Multiple mutators can call it concurrently, so it has `&self`.
-    pub fn add_obj_free_candidate(&self, object: ObjectReference) {
-        let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap();
-        obj_free_candidates.push(object);
+    pub fn init_parallel_obj_free_candidates(&mut self, num_workers: usize) {
+        debug_assert_eq!(self.parallel_obj_free_candidates.len(), 1);
+
+        for _ in 1..num_workers {
+            self.parallel_obj_free_candidates
+                .push(Mutex::new(Vec::new()));
+        }
     }
 
-    /// Add many objects as candidates for `obj_free`.
+    /// Add a batch of objects as candidates for `obj_free`.
     ///
-    /// Multiple mutators can call it concurrently, so it has `&self`.
-    pub fn add_obj_free_candidates(&self, objects: &[ObjectReference]) {
-        let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap();
-        for object in objects.iter().copied() {
-            obj_free_candidates.push(object);
+    /// Amortizes mutex acquisition over the entire batch. Called when a
+    /// mutator's local buffer is flushed (buffer full or stop-the-world).
+    pub fn add_obj_free_candidates_batch(
+        &self,
+        objects: &[ObjectReference],
+        can_parallel_free: bool,
+    ) {
+        if objects.is_empty() {
+            return;
+        }
+
+        if can_parallel_free {
+            let num_buckets = self.parallel_obj_free_candidates.len();
+            for idx in 0..num_buckets {
+                let mut bucket = self.parallel_obj_free_candidates[idx].lock().unwrap();
+                for (i, &obj) in objects.iter().enumerate() {
+                    if i % num_buckets == idx {
+                        bucket.push(obj);
+                    }
+                }
+            }
+        } else {
+            self.non_parallel_obj_free_candidates
+                .lock()
+                .unwrap()
+                .extend_from_slice(objects);
         }
     }
 
     pub fn get_all_obj_free_candidates(&self) -> Vec<ObjectReference> {
-        let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap();
-        std::mem::take(obj_free_candidates.as_mut())
+        // let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap();
+        let mut all_obj_free_candidates = self
+            .non_parallel_obj_free_candidates
+            .lock()
+            .unwrap()
+            .to_vec();
+
+        for candidates_mutex in &self.parallel_obj_free_candidates {
+            all_obj_free_candidates.extend(candidates_mutex.lock().unwrap().to_vec());
+        }
+
+        std::mem::take(all_obj_free_candidates.as_mut())
     }
 
-    pub fn add_weak_reference(&self, ptr: &'static mut ObjectReference) {
+    pub fn add_weak_reference(&self, object: ObjectReference) {
         let mut weak_references = self.weak_references.lock().unwrap();
-        weak_references.push(ptr);
+        weak_references.push(object);
     }
 
-    pub fn remove_weak_reference(&self, ptr: &ObjectReference) {
-        let mut weak_references = self.weak_references.lock().unwrap();
-        for (i, curr_ptr) in weak_references.iter().enumerate() {
-            if *curr_ptr == ptr {
-                weak_references.swap_remove(i);
-                break;
-            }
-        }
+    pub fn weak_references_count(&self) -> usize {
+        self.weak_references.lock().unwrap().len()
     }
 
     pub fn process_weak_stuff(
@@ -77,80 +104,133 @@ impl WeakProcessor {
         worker: &mut GCWorker<Ruby>,
         _tracer_context: impl ObjectTracerContext<Ruby>,
     ) {
-        worker.add_work(WorkBucketStage::VMRefClosure, ProcessObjFreeCandidates);
+        worker.add_work(
+            WorkBucketStage::VMRefClosure,
+            ProcessNonParallelObjFreeCanadidates {},
+        );
+
+        for index in 0..self.parallel_obj_free_candidates.len() {
+            worker.add_work(
+                WorkBucketStage::VMRefClosure,
+                ProcessParallelObjFreeCandidates { index },
+            );
+        }
+
         worker.add_work(WorkBucketStage::VMRefClosure, ProcessWeakReferences);
 
         worker.add_work(WorkBucketStage::Prepare, UpdateFinalizerObjIdTables);
 
         let global_tables_count = (crate::upcalls().global_tables_count)();
         let work_packets = (0..global_tables_count)
-                .map(|i| {
-                    Box::new(UpdateGlobalTables { idx: i }) as _
-                })
-                .collect();
+            .map(|i| Box::new(UpdateGlobalTables { idx: i }) as _)
+            .collect();
 
         worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].bulk_add(work_packets);
 
-        worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].bulk_add(vec![
-            Box::new(UpdateWbUnprotectedObjectsList) as _,
-        ]);
+        worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure]
+            .bulk_add(vec![Box::new(UpdateWbUnprotectedObjectsList) as _]);
+    }
+}
+
+fn process_obj_free_candidates(obj_free_candidates: &mut Vec<ObjectReference>) {
+    // Process obj_free
+    let mut new_candidates = Vec::new();
+
+    for object in obj_free_candidates.iter().copied() {
+        if object.is_reachable() {
+            // Forward and add back to the candidate list.
+            let new_object = object.forward();
+            trace!("Forwarding obj_free candidate: {object} -> {new_object}");
+            new_candidates.push(new_object);
+        } else {
+            (upcalls().call_obj_free)(object);
+        }
+    }
+
+    *obj_free_candidates = new_candidates;
+}
+
+struct ProcessParallelObjFreeCandidates {
+    index: usize,
+}
+
+impl GCWork<Ruby> for ProcessParallelObjFreeCandidates {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        let mut obj_free_candidates = crate::binding().weak_proc.parallel_obj_free_candidates
+            [self.index]
+            .try_lock()
+            .expect("Lock for parallel_obj_free_candidates should not be held");
+
+        process_obj_free_candidates(&mut obj_free_candidates);
     }
 }
 
-struct ProcessObjFreeCandidates;
+struct ProcessNonParallelObjFreeCanadidates;
 
-impl GCWork<Ruby> for ProcessObjFreeCandidates {
+impl GCWork<Ruby> for ProcessNonParallelObjFreeCanadidates {
     fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
-        // If it blocks, it is a bug.
         let mut obj_free_candidates = crate::binding()
             .weak_proc
-            .obj_free_candidates
+            .non_parallel_obj_free_candidates
             .try_lock()
-            .expect("It's GC time.  No mutators should hold this lock at this time.");
+            .expect("Lock for non_parallel_obj_free_candidates should not be held");
 
-        let n_cands = obj_free_candidates.len();
+        process_obj_free_candidates(&mut obj_free_candidates);
+    }
+}
 
-        debug!("Total: {} candidates", n_cands);
+struct ProcessWeakReferences;
 
-        // Process obj_free
-        let mut new_candidates = Vec::new();
+impl GCWork<Ruby> for ProcessWeakReferences {
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        if crate::mmtk().get_plan().current_gc_may_move_object() {
+            let gc_tls: &mut GCThreadTLS = unsafe { GCThreadTLS::from_vwt_check(worker.tls) };
 
-        for object in obj_free_candidates.iter().copied() {
-            if object.is_reachable() {
-                // Forward and add back to the candidate list.
-                let new_object = object.forward();
-                trace!(
-                    "Forwarding obj_free candidate: {} -> {}",
-                    object,
-                    new_object
+            let visit_object = |_worker, target_object: ObjectReference, _pin| {
+                debug_assert!(
+                    mmtk::memory_manager::is_mmtk_object(target_object.to_raw_address()).is_some(),
+                    "Destination is not an MMTk object"
                 );
-                new_candidates.push(new_object);
-            } else {
-                (upcalls().call_obj_free)(object);
-            }
-        }
 
-        *obj_free_candidates = new_candidates;
+                target_object
+                    .get_forwarded_object()
+                    .unwrap_or(target_object)
+            };
+
+            gc_tls
+                .object_closure
+                .set_temporarily_and_run_code(visit_object, || {
+                    self.process_weak_references(true);
+                })
+        } else {
+            self.process_weak_references(false);
+        }
     }
 }
 
-struct ProcessWeakReferences;
-
-impl GCWork<Ruby> for ProcessWeakReferences {
-    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+impl ProcessWeakReferences {
+    fn process_weak_references(&mut self, moving_gc: bool) {
         let mut weak_references = crate::binding()
             .weak_proc
             .weak_references
             .try_lock()
             .expect("Mutators should not be holding the lock.");
 
-            for ptr_ptr in weak_references.iter_mut() {
-                if !(**ptr_ptr).is_reachable() {
-                    **ptr_ptr = crate::binding().weak_reference_dead_value;
-                }
+        weak_references.retain_mut(|object_ptr| {
+            let object = object_ptr.get_forwarded_object().unwrap_or(*object_ptr);
+
+            if object != *object_ptr {
+                *object_ptr = object;
             }
 
-            weak_references.clear();
+            if object.is_reachable() {
+                (upcalls().handle_weak_references)(object, moving_gc);
+
+                true
+            } else {
+                false
+            }
+        });
     }
 }
 
@@ -165,11 +245,10 @@ trait GlobalTableProcessingWork {
         let forward_object = |_worker, object: ObjectReference, _pin| {
             debug_assert!(
                 mmtk::memory_manager::is_mmtk_object(object.to_raw_address()).is_some(),
-                "{} is not an MMTk object",
-                object
+                "{object} is not an MMTk object"
             );
             let result = object.forward();
-            trace!("Forwarding reference: {} -> {}", object, result);
+            trace!("Forwarding reference: {object} -> {result}");
             result
         };
 
@@ -185,7 +264,6 @@ struct UpdateFinalizerObjIdTables;
 impl GlobalTableProcessingWork for UpdateFinalizerObjIdTables {
     fn process_table(&mut self) {
         (crate::upcalls().update_finalizer_table)();
-        (crate::upcalls().update_obj_id_tables)();
     }
 }
 impl GCWork<Ruby> for UpdateFinalizerObjIdTables {
@@ -195,11 +273,14 @@ impl GCWork<Ruby> for UpdateFinalizerObjIdTables {
 }
 
 struct UpdateGlobalTables {
-    idx: i32
+    idx: i32,
 }
 impl GlobalTableProcessingWork for UpdateGlobalTables {
     fn process_table(&mut self) {
-        (crate::upcalls().update_global_tables)(self.idx)
+        (crate::upcalls().update_global_tables)(
+            self.idx,
+            crate::mmtk().get_plan().current_gc_may_move_object(),
+        )
     }
 }
 impl GCWork<Ruby> for UpdateGlobalTables {
@@ -224,14 +305,10 @@ impl GCWork<Ruby> for UpdateWbUnprotectedObjectsList {
             if object.is_reachable() {
                 // Forward and add back to the candidate list.
                 let new_object = object.forward();
-                trace!(
-                    "Forwarding WB-unprotected object: {} -> {}",
-                    object,
-                    new_object
-                );
+                trace!("Forwarding WB-unprotected object: {object} -> {new_object}");
                 objects.insert(new_object);
             } else {
-                trace!("Removing WB-unprotected object from list: {}", object);
+                trace!("Removing WB-unprotected object from list: {object}");
             }
         }