15 files changed, 2955 insertions, 0 deletions
diff --git a/gc/mmtk/src/abi.rs b/gc/mmtk/src/abi.rs
new file mode 100644
index 0000000000..30890e0853
--- /dev/null
+++ b/gc/mmtk/src/abi.rs
@@ -0,0 +1,335 @@
+use crate::api::RubyMutator;
+use crate::extra_assert;
+use crate::Ruby;
+use libc::c_int;
+use mmtk::scheduler::GCWorker;
+use mmtk::util::Address;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMMutatorThread;
+use mmtk::util::VMWorkerThread;
+
+// For the C binding
+pub const OBJREF_OFFSET: usize = 8;
+pub const MIN_OBJ_ALIGN: usize = 8; // Even on 32-bit machine.  A Ruby object is at least 40 bytes large.
+
+pub const GC_THREAD_KIND_WORKER: libc::c_int = 1;
+
+const HIDDEN_SIZE_MASK: usize = 0x0000FFFFFFFFFFFF;
+
+// An opaque type for the C counterpart.
+#[allow(non_camel_case_types)]
+pub struct st_table;
+
+#[repr(C)]
+pub struct HiddenHeader {
+    pub prefix: usize,
+}
+
+impl HiddenHeader {
+    #[inline(always)]
+    pub fn is_sane(&self) -> bool {
+        self.prefix & !HIDDEN_SIZE_MASK == 0
+    }
+
+    #[inline(always)]
+    fn assert_sane(&self) {
+        extra_assert!(
+            self.is_sane(),
+            "Hidden header is corrupted: {:x}",
+            self.prefix
+        );
+    }
+
+    pub fn payload_size(&self) -> usize {
+        self.assert_sane();
+        self.prefix & HIDDEN_SIZE_MASK
+    }
+}
+
+/// Provide convenient methods for accessing Ruby objects.
+/// TODO: Wrap C functions in `RubyUpcalls` as Rust-friendly methods.
+pub struct RubyObjectAccess {
+    objref: ObjectReference,
+}
+
+impl RubyObjectAccess {
+    pub fn from_objref(objref: ObjectReference) -> Self {
+        Self { objref }
+    }
+
+    pub fn obj_start(&self) -> Address {
+        self.objref.to_raw_address().sub(Self::prefix_size())
+    }
+
+    pub fn payload_addr(&self) -> Address {
+        self.objref.to_raw_address()
+    }
+
+    pub fn suffix_addr(&self) -> Address {
+        self.objref.to_raw_address().add(self.payload_size())
+    }
+
+    pub fn obj_end(&self) -> Address {
+        self.suffix_addr() + Self::suffix_size()
+    }
+
+    fn hidden_header(&self) -> &'static HiddenHeader {
+        unsafe { self.obj_start().as_ref() }
+    }
+
+    #[allow(unused)] // Maybe we need to mutate the hidden header in the future.
+    fn hidden_header_mut(&self) -> &'static mut HiddenHeader {
+        unsafe { self.obj_start().as_mut_ref() }
+    }
+
+    pub fn payload_size(&self) -> usize {
+        self.hidden_header().payload_size()
+    }
+
+    fn flags_field(&self) -> Address {
+        self.objref.to_raw_address()
+    }
+
+    pub fn load_flags(&self) -> usize {
+        unsafe { self.flags_field().load::<usize>() }
+    }
+
+    pub fn prefix_size() -> usize {
+        // Currently, a hidden size field of word size is placed before each object.
+        OBJREF_OFFSET
+    }
+
+    pub fn suffix_size() -> usize {
+        // In RACTOR_CHECK_MODE, Ruby hides a field after each object to hold the Ractor ID.
+        unsafe { crate::BINDING_FAST.suffix_size }
+    }
+
+    pub fn object_size(&self) -> usize {
+        Self::prefix_size() + self.payload_size() + Self::suffix_size()
+    }
+}
+
+type ObjectClosureFunction =
+    extern "C" fn(*mut libc::c_void, *mut libc::c_void, ObjectReference, bool) -> ObjectReference;
+
+#[repr(C)]
+pub struct ObjectClosure {
+    /// The function to be called from C.
+    pub c_function: ObjectClosureFunction,
+    /// The pointer to the Rust-level closure object.
+    pub rust_closure: *mut libc::c_void,
+}
+
+impl Default for ObjectClosure {
+    fn default() -> Self {
+        Self {
+            c_function: THE_UNREGISTERED_CLOSURE_FUNC,
+            rust_closure: std::ptr::null_mut(),
+        }
+    }
+}
+
+/// Rust doesn't require function items to have a unique address.
+/// We therefore force using this particular constant.
+///
+/// See: https://rust-lang.github.io/rust-clippy/master/index.html#fn_address_comparisons
+const THE_UNREGISTERED_CLOSURE_FUNC: ObjectClosureFunction = ObjectClosure::c_function_unregistered;
+
+impl ObjectClosure {
+    /// Set this ObjectClosure temporarily to `visit_object`, and execute `f`.  During the execution of
+    /// `f`, the Ruby VM may call this ObjectClosure.  When the Ruby VM calls this ObjectClosure,
+    /// it effectively calls `visit_object`.
+    ///
+    /// This method is intended to run Ruby VM code in `f` with temporarily modified behavior of
+    /// `rb_gc_mark`, `rb_gc_mark_movable` and `rb_gc_location`
+    ///
+    /// Both `f` and `visit_object` may access and modify local variables in the environment where
+    /// `set_temporarily_and_run_code` called.
+    ///
+    /// Note that this function is not reentrant.  Don't call this function in either `callback` or
+    /// `f`.
+    pub fn set_temporarily_and_run_code<'env, T, F1, F2>(
+        &mut self,
+        mut visit_object: F1,
+        f: F2,
+    ) -> T
+    where
+        F1: 'env + FnMut(&'static mut GCWorker<Ruby>, ObjectReference, bool) -> ObjectReference,
+        F2: 'env + FnOnce() -> T,
+    {
+        debug_assert!(
+            std::ptr::fn_addr_eq(self.c_function, THE_UNREGISTERED_CLOSURE_FUNC),
+            "set_temporarily_and_run_code is recursively called."
+        );
+        self.c_function = Self::c_function_registered::<F1>;
+        self.rust_closure = &mut visit_object as *mut F1 as *mut libc::c_void;
+        let result = f();
+        *self = Default::default();
+        result
+    }
+
+    extern "C" fn c_function_registered<F>(
+        rust_closure: *mut libc::c_void,
+        worker: *mut libc::c_void,
+        object: ObjectReference,
+        pin: bool,
+    ) -> ObjectReference
+    where
+        F: FnMut(&'static mut GCWorker<Ruby>, ObjectReference, bool) -> ObjectReference,
+    {
+        let rust_closure = unsafe { &mut *(rust_closure as *mut F) };
+        let worker = unsafe { &mut *(worker as *mut GCWorker<Ruby>) };
+        rust_closure(worker, object, pin)
+    }
+
+    extern "C" fn c_function_unregistered(
+        _rust_closure: *mut libc::c_void,
+        worker: *mut libc::c_void,
+        object: ObjectReference,
+        pin: bool,
+    ) -> ObjectReference {
+        let worker = unsafe { &mut *(worker as *mut GCWorker<Ruby>) };
+        panic!(
+            "object_closure is not set.  worker ordinal: {}, object: {}, pin: {}",
+            worker.ordinal, object, pin
+        );
+    }
+}
+
+#[repr(C)]
+pub struct GCThreadTLS {
+    pub kind: libc::c_int,
+    pub gc_context: *mut libc::c_void,
+    pub object_closure: ObjectClosure,
+}
+
+impl GCThreadTLS {
+    fn new(kind: libc::c_int, gc_context: *mut libc::c_void) -> Self {
+        Self {
+            kind,
+            gc_context,
+            object_closure: Default::default(),
+        }
+    }
+
+    pub fn for_worker(gc_context: *mut GCWorker<Ruby>) -> Self {
+        Self::new(GC_THREAD_KIND_WORKER, gc_context as *mut libc::c_void)
+    }
+
+    pub fn from_vwt(vwt: VMWorkerThread) -> *mut GCThreadTLS {
+        unsafe { std::mem::transmute(vwt) }
+    }
+
+    /// Cast a pointer to `GCThreadTLS` to a ref, with assertion for null pointer.
+    ///
+    /// # Safety
+    ///
+    /// Has undefined behavior if `ptr` is invalid.
+    pub unsafe fn check_cast(ptr: *mut GCThreadTLS) -> &'static mut GCThreadTLS {
+        assert!(!ptr.is_null());
+        let result = unsafe { &mut *ptr };
+        debug_assert!({
+            let kind = result.kind;
+            kind == GC_THREAD_KIND_WORKER
+        });
+        result
+    }
+
+    /// Cast a pointer to `VMWorkerThread` to a ref, with assertion for null pointer.
+    ///
+    /// # Safety
+    ///
+    /// Has undefined behavior if `ptr` is invalid.
+    pub unsafe fn from_vwt_check(vwt: VMWorkerThread) -> &'static mut GCThreadTLS {
+        let ptr = Self::from_vwt(vwt);
+        unsafe { Self::check_cast(ptr) }
+    }
+
+    #[allow(clippy::not_unsafe_ptr_arg_deref)] // `transmute` does not dereference pointer
+    pub fn to_vwt(ptr: *mut Self) -> VMWorkerThread {
+        unsafe { std::mem::transmute(ptr) }
+    }
+
+    pub fn worker<'w>(&mut self) -> &'w mut GCWorker<Ruby> {
+        // NOTE: The returned ref points to the worker which does not have the same lifetime as self.
+        assert!(self.kind == GC_THREAD_KIND_WORKER);
+        unsafe { &mut *(self.gc_context as *mut GCWorker<Ruby>) }
+    }
+}
+
+#[repr(C)]
+#[derive(Clone)]
+pub struct RawVecOfObjRef {
+    pub ptr: *mut ObjectReference,
+    pub len: usize,
+    pub capa: usize,
+}
+
+impl RawVecOfObjRef {
+    pub fn from_vec(vec: Vec<ObjectReference>) -> RawVecOfObjRef {
+        // Note: Vec::into_raw_parts is unstable. We implement it manually.
+        let mut vec = std::mem::ManuallyDrop::new(vec);
+        let (ptr, len, capa) = (vec.as_mut_ptr(), vec.len(), vec.capacity());
+
+        RawVecOfObjRef { ptr, len, capa }
+    }
+
+    /// # Safety
+    ///
+    /// This function turns raw pointer into a Vec without check.
+    pub unsafe fn into_vec(self) -> Vec<ObjectReference> {
+        unsafe { Vec::from_raw_parts(self.ptr, self.len, self.capa) }
+    }
+}
+
+impl From<Vec<ObjectReference>> for RawVecOfObjRef {
+    fn from(v: Vec<ObjectReference>) -> Self {
+        Self::from_vec(v)
+    }
+}
+
+#[repr(C)]
+#[derive(Clone)]
+pub struct RubyBindingOptions {
+    pub suffix_size: usize,
+}
+
+#[repr(C)]
+#[derive(Clone)]
+pub struct RubyUpcalls {
+    pub init_gc_worker_thread: extern "C" fn(gc_worker_tls: *mut GCThreadTLS),
+    pub is_mutator: extern "C" fn() -> bool,
+    pub stop_the_world: extern "C" fn(),
+    pub resume_mutators: extern "C" fn(gc_may_move: bool),
+    pub block_for_gc: extern "C" fn(tls: VMMutatorThread),
+    pub before_updating_jit_code: extern "C" fn(),
+    pub after_updating_jit_code: extern "C" fn(),
+    pub number_of_mutators: extern "C" fn() -> usize,
+    pub get_mutators: extern "C" fn(
+        visit_mutator: extern "C" fn(*mut RubyMutator, *mut libc::c_void),
+        data: *mut libc::c_void,
+    ),
+    pub scan_gc_roots: extern "C" fn(),
+    pub scan_objspace: extern "C" fn(),
+    pub move_obj_during_marking: extern "C" fn(from: ObjectReference, to: ObjectReference),
+    pub update_object_references: extern "C" fn(object: ObjectReference),
+    pub call_gc_mark_children: extern "C" fn(object: ObjectReference),
+    pub handle_weak_references: extern "C" fn(object: ObjectReference, moving: bool),
+    pub call_obj_free: extern "C" fn(object: ObjectReference),
+    pub vm_live_bytes: extern "C" fn() -> usize,
+    pub update_global_tables: extern "C" fn(tbl_idx: c_int, moving: bool),
+    pub global_tables_count: extern "C" fn() -> c_int,
+    pub update_finalizer_table: extern "C" fn(),
+    pub special_const_p: extern "C" fn(object: ObjectReference) -> bool,
+    pub mutator_thread_panic_handler: extern "C" fn(),
+    pub gc_thread_panic_handler: extern "C" fn(),
+}
+
+unsafe impl Sync for RubyUpcalls {}
+
+#[repr(C)]
+#[derive(Clone)]
+pub struct HeapBounds {
+    pub start: *mut libc::c_void,
+    pub end: *mut libc::c_void,
+}
diff --git a/gc/mmtk/src/active_plan.rs b/gc/mmtk/src/active_plan.rs
new file mode 100644
index 0000000000..80372a7576
--- /dev/null
+++ b/gc/mmtk/src/active_plan.rs
@@ -0,0 +1,56 @@
+use std::collections::VecDeque;
+use std::marker::PhantomData;
+
+use crate::mmtk;
+use crate::upcalls;
+use crate::Ruby;
+use mmtk::util::opaque_pointer::*;
+use mmtk::vm::ActivePlan;
+use mmtk::Mutator;
+
+pub struct VMActivePlan {}
+
+impl ActivePlan<Ruby> for VMActivePlan {
+    fn number_of_mutators() -> usize {
+        (upcalls().number_of_mutators)()
+    }
+
+    fn is_mutator(_tls: VMThread) -> bool {
+        (upcalls().is_mutator)()
+    }
+
+    fn mutator(_tls: VMMutatorThread) -> &'static mut Mutator<Ruby> {
+        unimplemented!()
+    }
+
+    fn mutators<'a>() -> Box<dyn Iterator<Item = &'a mut Mutator<Ruby>> + 'a> {
+        let mut mutators = VecDeque::new();
+        (upcalls().get_mutators)(
+            add_mutator_to_vec,
+            &mut mutators as *mut VecDeque<&mut Mutator<Ruby>> as _,
+        );
+
+        Box::new(RubyMutatorIterator {
+            mutators,
+            phantom_data: PhantomData,
+        })
+    }
+}
+
+extern "C" fn add_mutator_to_vec(mutator: *mut Mutator<Ruby>, mutators: *mut libc::c_void) {
+    let mutators = unsafe { &mut *(mutators as *mut VecDeque<*mut Mutator<Ruby>>) };
+    mutators.push_back(unsafe { &mut *mutator });
+}
+
+struct RubyMutatorIterator<'a> {
+    mutators: VecDeque<&'a mut Mutator<Ruby>>,
+    phantom_data: PhantomData<&'a ()>,
+}
+
+impl<'a> Iterator for RubyMutatorIterator<'a> {
+    type Item = &'a mut Mutator<Ruby>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.mutators.pop_front()
+    }
+}
diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs
new file mode 100644
index 0000000000..c0540fe0c8
--- /dev/null
+++ b/gc/mmtk/src/api.rs
@@ -0,0 +1,551 @@
+// Functions in this module are unsafe for one reason:
+// They are called by C functions and they need to pass raw pointers to Rust.
+#![allow(clippy::missing_safety_doc)]
+
+use mmtk::util::alloc::BumpPointer;
+use mmtk::util::alloc::ImmixAllocator;
+use mmtk::util::conversions;
+use mmtk::util::options::PlanSelector;
+use std::str::FromStr;
+use std::sync::atomic::Ordering;
+
+use crate::abi::RawVecOfObjRef;
+use crate::abi::RubyBindingOptions;
+use crate::abi::RubyUpcalls;
+use crate::binding;
+use crate::binding::RubyBinding;
+use crate::heap::CpuHeapTriggerConfig;
+use crate::heap::RubyHeapTriggerConfig;
+use crate::heap::CPU_HEAP_TRIGGER_CONFIG;
+use crate::heap::RUBY_HEAP_TRIGGER_CONFIG;
+use crate::mmtk;
+use crate::utils::default_heap_max;
+use crate::utils::parse_capacity;
+use crate::Ruby;
+use crate::RubySlot;
+use mmtk::memory_manager;
+use mmtk::memory_manager::mmtk_init;
+use mmtk::util::constants::MIN_OBJECT_SIZE;
+use mmtk::util::options::GCTriggerSelector;
+use mmtk::util::Address;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMMutatorThread;
+use mmtk::util::VMThread;
+use mmtk::AllocationSemantics;
+use mmtk::MMTKBuilder;
+use mmtk::Mutator;
+
+pub type RubyMutator = Mutator<Ruby>;
+
+#[no_mangle]
+pub extern "C" fn mmtk_is_live_object(object: ObjectReference) -> bool {
+    memory_manager::is_live_object(object)
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_is_reachable(object: ObjectReference) -> bool {
+    object.is_reachable()
+}
+
+// =============== Bootup ===============
+
+fn parse_env_var_with<T, F: FnOnce(&str) -> Option<T>>(key: &str, parse: F) -> Option<T> {
+    let val = match std::env::var(key) {
+        Ok(val) => val,
+        Err(std::env::VarError::NotPresent) => return None,
+        Err(std::env::VarError::NotUnicode(os_string)) => {
+            eprintln!("[FATAL] Invalid {key} {os_string:?}");
+            std::process::exit(1);
+        }
+    };
+
+    let parsed = parse(&val).unwrap_or_else(|| {
+        eprintln!("[FATAL] Invalid {key} {val}");
+        std::process::exit(1);
+    });
+
+    Some(parsed)
+}
+
+fn parse_env_var<T: FromStr>(key: &str) -> Option<T> {
+    parse_env_var_with(key, |s| s.parse().ok())
+}
+
+fn mmtk_builder_default_parse_threads() -> Option<usize> {
+    parse_env_var("MMTK_THREADS")
+}
+
+fn mmtk_builder_default_parse_heap_min() -> usize {
+    const DEFAULT_HEAP_MIN: usize = 1 << 20;
+    parse_env_var_with("MMTK_HEAP_MIN", parse_capacity).unwrap_or(DEFAULT_HEAP_MIN)
+}
+
+fn mmtk_builder_default_parse_heap_max() -> usize {
+    parse_env_var_with("MMTK_HEAP_MAX", parse_capacity).unwrap_or_else(default_heap_max)
+}
+
+fn parse_float_env_var(key: &str, default: f64, min: f64, max: f64) -> f64 {
+    parse_env_var_with(key, |s| {
+        let mut float = f64::from_str(s).unwrap_or(default);
+
+        if float <= min {
+            eprintln!(
+                "{key} has value {float} which must be greater than {min}, using default instead"
+            );
+            float = default;
+        }
+
+        if float >= max {
+            eprintln!(
+                "{key} has value {float} which must be less than {max}, using default instead"
+            );
+            float = default;
+        }
+
+        Some(float)
+    })
+    .unwrap_or(default)
+}
+
+fn mmtk_builder_default_parse_heap_mode(heap_min: usize, heap_max: usize) -> GCTriggerSelector {
+    let make_fixed = || GCTriggerSelector::FixedHeapSize(heap_max);
+    let make_dynamic = || GCTriggerSelector::DynamicHeapSize(heap_min, heap_max);
+
+    parse_env_var_with("MMTK_HEAP_MODE", |s| match s {
+        "fixed" => Some(make_fixed()),
+        "dynamic" => Some(make_dynamic()),
+        "ruby" => {
+            let min_ratio = parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MIN_RATIO", 0.2, 0.0, 1.0);
+            let goal_ratio =
+                parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_GOAL_RATIO", 0.4, min_ratio, 1.0);
+            let max_ratio =
+                parse_float_env_var("RUBY_GC_HEAP_FREE_SLOTS_MAX_RATIO", 0.65, goal_ratio, 1.0);
+
+            crate::heap::RUBY_HEAP_TRIGGER_CONFIG
+                .set(RubyHeapTriggerConfig {
+                    min_heap_pages: conversions::bytes_to_pages_up(heap_min),
+                    max_heap_pages: conversions::bytes_to_pages_up(heap_max),
+                    heap_pages_min_ratio: min_ratio,
+                    heap_pages_goal_ratio: goal_ratio,
+                    heap_pages_max_ratio: max_ratio,
+                })
+                .unwrap_or_else(|_| panic!("RUBY_HEAP_TRIGGER_CONFIG is already set"));
+
+            Some(GCTriggerSelector::Delegated)
+        }
+        "cpu" => {
+            // CPU-overhead-driven heap sizing based on Tavakolisomeh et al.,
+            // "Heap Size Adjustment with CPU Control", MPLR '23.
+            //
+            // Target is expressed as a percentage (0, 100) via
+            // `MMTK_GC_CPU_TARGET`. The paper recommends 15 for ZGC (a
+            // concurrent collector); we default to 5 for MMTk-Ruby. With
+            // MMTk's stop-the-world Immix, every percent of GC CPU is also
+            // a percent of wall-clock the mutator is blocked on, so a much
+            // smaller budget is appropriate. An empirical sweep across
+            // ruby-bench (railsbench, lobsters, psych-load, liquid-render,
+            // lee) found target=5 to be Pareto-optimal: ~6% geomean speedup
+            // vs. the `ruby` heap mode with effectively identical geomean
+            // peak RSS.
+            let target_percent = parse_float_env_var("MMTK_GC_CPU_TARGET", 5.0, 0.0, 100.0);
+            let window_size = parse_env_var::<usize>("MMTK_GC_CPU_WINDOW").unwrap_or(3);
+            let window_size = window_size.max(1);
+
+            let min_heap_pages = conversions::bytes_to_pages_up(heap_min);
+            let max_heap_pages = conversions::bytes_to_pages_up(heap_max);
+            // Start at the min heap size, as the other delegated triggers do.
+            // The control loop will adjust from here after the first GC cycle.
+            let initial_heap_pages = min_heap_pages;
+
+            CPU_HEAP_TRIGGER_CONFIG
+                .set(CpuHeapTriggerConfig {
+                    min_heap_pages,
+                    max_heap_pages,
+                    initial_heap_pages,
+                    target_gc_cpu: target_percent / 100.0,
+                    window_size,
+                })
+                .unwrap_or_else(|_| panic!("CPU_HEAP_TRIGGER_CONFIG is already set"));
+
+            Some(GCTriggerSelector::Delegated)
+        }
+        _ => None,
+    })
+    .unwrap_or_else(make_dynamic)
+}
+
+fn mmtk_builder_default_parse_plan() -> PlanSelector {
+    parse_env_var_with("MMTK_PLAN", |s| match s {
+        "NoGC" => Some(PlanSelector::NoGC),
+        "MarkSweep" => Some(PlanSelector::MarkSweep),
+        "Immix" => Some(PlanSelector::Immix),
+        _ => None,
+    })
+    .unwrap_or(PlanSelector::Immix)
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_builder_default() -> *mut MMTKBuilder {
+    let mut builder = MMTKBuilder::new_no_env_vars();
+    builder.options.no_finalizer.set(true);
+
+    if let Some(threads) = mmtk_builder_default_parse_threads() {
+        if !builder.options.threads.set(threads) {
+            // MMTk will validate it and reject 0.
+            eprintln!("[FATAL] Failed to set the number of MMTk threads to {threads}");
+            std::process::exit(1);
+        }
+    }
+
+    let heap_min = mmtk_builder_default_parse_heap_min();
+
+    let heap_max = mmtk_builder_default_parse_heap_max();
+
+    if heap_min >= heap_max {
+        eprintln!("[FATAL] MMTK_HEAP_MIN({heap_min}) >= MMTK_HEAP_MAX({heap_max})");
+        std::process::exit(1);
+    }
+
+    builder
+        .options
+        .gc_trigger
+        .set(mmtk_builder_default_parse_heap_mode(heap_min, heap_max));
+
+    builder.options.plan.set(mmtk_builder_default_parse_plan());
+
+    Box::into_raw(Box::new(builder))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_init_binding(
+    builder: *mut MMTKBuilder,
+    binding_options: *const RubyBindingOptions,
+    upcalls: *const RubyUpcalls,
+) {
+    crate::MUTATOR_THREAD_PANIC_HANDLER
+        .set((unsafe { (*upcalls).clone() }).mutator_thread_panic_handler)
+        .unwrap_or_else(|_| panic!("MUTATOR_THREAD_PANIC_HANDLER is already initialized"));
+
+    crate::set_panic_hook();
+
+    let builder: Box<MMTKBuilder> = unsafe { Box::from_raw(builder) };
+    let binding_options = unsafe { (*binding_options).clone() };
+    let mmtk_boxed = mmtk_init(&builder);
+    let mmtk_static = Box::leak(Box::new(mmtk_boxed));
+
+    let mut binding = RubyBinding::new(mmtk_static, &binding_options, upcalls);
+    binding
+        .weak_proc
+        .init_parallel_obj_free_candidates(memory_manager::num_of_workers(binding.mmtk));
+
+    crate::BINDING
+        .set(binding)
+        .unwrap_or_else(|_| panic!("Binding is already initialized"));
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_initialize_collection(tls: VMThread) {
+    memory_manager::initialize_collection(mmtk(), tls)
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_bind_mutator(tls: VMMutatorThread) -> *mut RubyMutator {
+    Box::into_raw(memory_manager::bind_mutator(mmtk(), tls))
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_get_bump_pointer_allocator(m: *mut RubyMutator) -> *mut BumpPointer {
+    match *crate::BINDING.get().unwrap().mmtk.get_options().plan {
+        PlanSelector::Immix => {
+            let mutator: &mut Mutator<Ruby> = unsafe { &mut *m };
+            let allocator =
+                unsafe { mutator.allocator_mut(mmtk::util::alloc::AllocatorSelector::Immix(0)) };
+
+            if let Some(immix_allocator) = allocator.downcast_mut::<ImmixAllocator<Ruby>>() {
+                &mut immix_allocator.bump_pointer as *mut BumpPointer
+            } else {
+                panic!("Failed to get bump pointer allocator");
+            }
+        }
+        _ => std::ptr::null_mut(),
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_destroy_mutator(mutator: *mut RubyMutator) {
+    // notify mmtk-core about destroyed mutator
+    memory_manager::destroy_mutator(unsafe { &mut *mutator });
+    // turn the ptr back to a box, and let Rust properly reclaim it
+    let _ = unsafe { Box::from_raw(mutator) };
+}
+
+// =============== GC ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_handle_user_collection_request(
+    tls: VMMutatorThread,
+    force: bool,
+    exhaustive: bool,
+) {
+    crate::mmtk().handle_user_collection_request(tls, force, exhaustive);
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_set_gc_enabled(enable: bool) {
+    crate::CONFIGURATION
+        .gc_enabled
+        .store(enable, Ordering::Relaxed);
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_gc_enabled_p() -> bool {
+    crate::CONFIGURATION.gc_enabled.load(Ordering::Relaxed)
+}
+
+// =============== Object allocation ===============
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_alloc(
+    mutator: *mut RubyMutator,
+    size: usize,
+    align: usize,
+    offset: usize,
+    semantics: AllocationSemantics,
+) -> Address {
+    let clamped_size = size.max(MIN_OBJECT_SIZE);
+    memory_manager::alloc::<Ruby>(
+        unsafe { &mut *mutator },
+        clamped_size,
+        align,
+        offset,
+        semantics,
+    )
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_post_alloc(
+    mutator: *mut RubyMutator,
+    refer: ObjectReference,
+    bytes: usize,
+    semantics: AllocationSemantics,
+) {
+    memory_manager::post_alloc::<Ruby>(unsafe { &mut *mutator }, refer, bytes, semantics)
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_add_obj_free_candidates(
+    objects: *const ObjectReference,
+    count: usize,
+    can_parallel_free: bool,
+) {
+    let objects = unsafe { std::slice::from_raw_parts(objects, count) };
+    binding()
+        .weak_proc
+        .add_obj_free_candidates_batch(objects, can_parallel_free)
+}
+
+// =============== Weak references ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_declare_weak_references(object: ObjectReference) {
+    binding().weak_proc.add_weak_reference(object);
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_weak_references_alive_p(object: ObjectReference) -> bool {
+    object.is_reachable()
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_weak_references_count() -> usize {
+    binding().weak_proc.weak_references_count()
+}
+
+// =============== Compaction ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_register_pinning_obj(obj: ObjectReference) {
+    crate::binding().pinning_registry.register(obj);
+}
+
+// =============== Write barriers ===============
+
+#[no_mangle]
+pub unsafe extern "C" fn mmtk_object_reference_write_post(
+    mutator: *mut RubyMutator,
+    object: ObjectReference,
+) {
+    let ignored_slot = RubySlot::from_address(Address::ZERO);
+    let ignored_target = ObjectReference::from_raw_address(Address::ZERO);
+    mmtk::memory_manager::object_reference_write_post(
+        unsafe { &mut *mutator },
+        object,
+        ignored_slot,
+        ignored_target,
+    )
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_register_wb_unprotected_object(object: ObjectReference) {
+    crate::binding().register_wb_unprotected_object(object)
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_object_wb_unprotected_p(object: ObjectReference) -> bool {
+    crate::binding().object_wb_unprotected_p(object)
+}
+
+// =============== Heap walking ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_enumerate_objects(
+    callback: extern "C" fn(ObjectReference, *mut libc::c_void),
+    data: *mut libc::c_void,
+) {
+    crate::mmtk().enumerate_objects(|object| {
+        callback(object, data);
+    })
+}
+
+// =============== Finalizers ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_get_all_obj_free_candidates() -> RawVecOfObjRef {
+    let vec = binding().weak_proc.get_all_obj_free_candidates();
+    RawVecOfObjRef::from_vec(vec)
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_free_raw_vec_of_obj_ref(raw_vec: RawVecOfObjRef) {
+    unsafe { raw_vec.into_vec() };
+}
+
+// =============== Forking ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_before_fork() {
+    mmtk().prepare_to_fork();
+    binding().join_all_gc_threads();
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_after_fork(tls: VMThread) {
+    mmtk().after_fork(tls);
+}
+
+// =============== Statistics ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_total_bytes() -> usize {
+    memory_manager::total_bytes(mmtk())
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_used_bytes() -> usize {
+    memory_manager::used_bytes(mmtk())
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_free_bytes() -> usize {
+    memory_manager::free_bytes(mmtk())
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_starting_heap_address() -> Address {
+    memory_manager::starting_heap_address()
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_last_heap_address() -> Address {
+    memory_manager::last_heap_address()
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_worker_count() -> usize {
+    memory_manager::num_of_workers(mmtk())
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_plan() -> *const u8 {
+    static NO_GC: &[u8] = b"NoGC\0";
+    static MARK_SWEEP: &[u8] = b"MarkSweep\0";
+    static IMMIX: &[u8] = b"Immix\0";
+
+    match *crate::BINDING.get().unwrap().mmtk.get_options().plan {
+        PlanSelector::NoGC => NO_GC.as_ptr(),
+        PlanSelector::MarkSweep => MARK_SWEEP.as_ptr(),
+        PlanSelector::Immix => IMMIX.as_ptr(),
+        _ => panic!("Unknown plan"),
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_heap_mode() -> *const u8 {
+    static FIXED_HEAP: &[u8] = b"fixed\0";
+    static DYNAMIC_HEAP: &[u8] = b"dynamic\0";
+    static RUBY_HEAP: &[u8] = b"ruby\0";
+    static CPU_HEAP: &[u8] = b"cpu\0";
+
+    match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
+        GCTriggerSelector::FixedHeapSize(_) => FIXED_HEAP.as_ptr(),
+        GCTriggerSelector::DynamicHeapSize(_, _) => DYNAMIC_HEAP.as_ptr(),
+        GCTriggerSelector::Delegated => {
+            // Two delegated triggers exist; disambiguate via the populated
+            // config singleton.
+            if CPU_HEAP_TRIGGER_CONFIG.get().is_some() {
+                CPU_HEAP.as_ptr()
+            } else {
+                RUBY_HEAP.as_ptr()
+            }
+        }
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_heap_min() -> usize {
+    match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
+        GCTriggerSelector::FixedHeapSize(_) => 0,
+        GCTriggerSelector::DynamicHeapSize(min_size, _) => min_size,
+        GCTriggerSelector::Delegated => {
+            if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() {
+                conversions::pages_to_bytes(cfg.min_heap_pages)
+            } else {
+                conversions::pages_to_bytes(
+                    RUBY_HEAP_TRIGGER_CONFIG
+                        .get()
+                        .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
+                        .min_heap_pages,
+                )
+            }
+        }
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn mmtk_heap_max() -> usize {
+    match *crate::BINDING.get().unwrap().mmtk.get_options().gc_trigger {
+        GCTriggerSelector::FixedHeapSize(max_size) => max_size,
+        GCTriggerSelector::DynamicHeapSize(_, max_size) => max_size,
+        GCTriggerSelector::Delegated => {
+            if let Some(cfg) = CPU_HEAP_TRIGGER_CONFIG.get() {
+                conversions::pages_to_bytes(cfg.max_heap_pages)
+            } else {
+                conversions::pages_to_bytes(
+                    RUBY_HEAP_TRIGGER_CONFIG
+                        .get()
+                        .expect("RUBY_HEAP_TRIGGER_CONFIG not set")
+                        .max_heap_pages,
+                )
+            }
+        }
+    }
+}
+
+// =============== Miscellaneous ===============
+
+#[no_mangle]
+pub extern "C" fn mmtk_is_mmtk_object(addr: Address) -> bool {
+    debug_assert!(!addr.is_zero());
+    debug_assert!(addr.is_aligned_to(mmtk::util::is_mmtk_object::VO_BIT_REGION_SIZE));
+    memory_manager::is_mmtk_object(addr).is_some()
+}
diff --git a/gc/mmtk/src/binding.rs b/gc/mmtk/src/binding.rs
new file mode 100644
index 0000000000..36d4a992fd
--- /dev/null
+++ b/gc/mmtk/src/binding.rs
@@ -0,0 +1,129 @@
+use std::collections::HashSet;
+use std::ffi::CString;
+use std::sync::atomic::AtomicBool;
+use std::sync::Mutex;
+use std::thread::JoinHandle;
+
+use mmtk::util::ObjectReference;
+use mmtk::MMTK;
+
+use crate::abi;
+use crate::abi::RubyBindingOptions;
+use crate::pinning_registry::PinningRegistry;
+use crate::weak_proc::WeakProcessor;
+use crate::Ruby;
+
+pub struct RubyBindingFast {
+    pub suffix_size: usize,
+}
+
+impl Default for RubyBindingFast {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl RubyBindingFast {
+    pub const fn new() -> Self {
+        Self { suffix_size: 0 }
+    }
+}
+
+pub struct RubyConfiguration {
+    pub gc_enabled: AtomicBool,
+}
+
+impl Default for RubyConfiguration {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl RubyConfiguration {
+    pub const fn new() -> Self {
+        Self {
+            // Mimic the old behavior when the gc_enabled flag was in mmtk-core.
+            // We may refactor it so that it is false by default.
+            gc_enabled: AtomicBool::new(true),
+        }
+    }
+}
+
+pub struct RubyBinding {
+    pub mmtk: &'static MMTK<Ruby>,
+    pub options: RubyBindingOptions,
+    pub upcalls: *const abi::RubyUpcalls,
+    pub plan_name: Mutex<Option<CString>>,
+    pub weak_proc: WeakProcessor,
+    pub pinning_registry: PinningRegistry,
+    pub gc_thread_join_handles: Mutex<Vec<JoinHandle<()>>>,
+    pub wb_unprotected_objects: Mutex<HashSet<ObjectReference>>,
+}
+
+unsafe impl Sync for RubyBinding {}
+unsafe impl Send for RubyBinding {}
+
+impl RubyBinding {
+    pub fn new(
+        mmtk: &'static MMTK<Ruby>,
+        binding_options: &RubyBindingOptions,
+        upcalls: *const abi::RubyUpcalls,
+    ) -> Self {
+        unsafe {
+            crate::BINDING_FAST.suffix_size = binding_options.suffix_size;
+        }
+
+        Self {
+            mmtk,
+            options: binding_options.clone(),
+            upcalls,
+            plan_name: Mutex::new(None),
+            weak_proc: WeakProcessor::new(),
+            pinning_registry: PinningRegistry::new(),
+            gc_thread_join_handles: Default::default(),
+            wb_unprotected_objects: Default::default(),
+        }
+    }
+
+    pub fn upcalls(&self) -> &'static abi::RubyUpcalls {
+        unsafe { &*self.upcalls as &'static abi::RubyUpcalls }
+    }
+
+    pub fn get_plan_name_c(&self) -> *const libc::c_char {
+        let mut plan_name = self.plan_name.lock().unwrap();
+        if plan_name.is_none() {
+            let name_string = format!("{:?}", *self.mmtk.get_options().plan);
+            let c_string = CString::new(name_string)
+                .unwrap_or_else(|e| panic!("Failed converting plan name to CString: {e}"));
+            *plan_name = Some(c_string);
+        }
+        plan_name.as_deref().unwrap().as_ptr()
+    }
+
+    pub fn join_all_gc_threads(&self) {
+        let handles = {
+            let mut guard = self.gc_thread_join_handles.lock().unwrap();
+            std::mem::take(&mut *guard)
+        };
+
+        debug!("Joining GC threads...");
+        let total = handles.len();
+        let mut joined = 0;
+        for handle in handles {
+            handle.join().unwrap();
+            joined += 1;
+            debug!("{joined}/{total} GC threads joined.");
+        }
+    }
+
+    pub fn register_wb_unprotected_object(&self, object: ObjectReference) {
+        debug!("Registering WB-unprotected object: {object}");
+        let mut objects = self.wb_unprotected_objects.lock().unwrap();
+        objects.insert(object);
+    }
+
+    pub fn object_wb_unprotected_p(&self, object: ObjectReference) -> bool {
+        let objects = self.wb_unprotected_objects.lock().unwrap();
+        objects.contains(&object)
+    }
+}
diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs
new file mode 100644
index 0000000000..648efa4e27
--- /dev/null
+++ b/gc/mmtk/src/collection.rs
@@ -0,0 +1,122 @@
+use crate::abi::GCThreadTLS;
+
+use crate::api::RubyMutator;
+use crate::heap::CpuHeapTrigger;
+use crate::heap::RubyHeapTrigger;
+use crate::heap::CPU_HEAP_TRIGGER_CONFIG;
+use crate::mmtk;
+use crate::upcalls;
+use crate::Ruby;
+use mmtk::memory_manager;
+use mmtk::scheduler::*;
+use mmtk::util::heap::GCTriggerPolicy;
+use mmtk::util::VMMutatorThread;
+use mmtk::util::VMThread;
+use mmtk::util::VMWorkerThread;
+use mmtk::vm::Collection;
+use mmtk::vm::GCThreadContext;
+use std::sync::atomic::AtomicBool;
+use std::sync::atomic::Ordering;
+use std::thread;
+
+static CURRENT_GC_MAY_MOVE: AtomicBool = AtomicBool::new(false);
+
+pub struct VMCollection {}
+
+impl Collection<Ruby> for VMCollection {
+    fn is_collection_enabled() -> bool {
+        crate::CONFIGURATION.gc_enabled.load(Ordering::Relaxed)
+    }
+
+    fn stop_all_mutators<F>(tls: VMWorkerThread, mut mutator_visitor: F)
+    where
+        F: FnMut(&'static mut mmtk::Mutator<Ruby>),
+    {
+        (upcalls().stop_the_world)();
+
+        if crate::mmtk().get_plan().current_gc_may_move_object() {
+            CURRENT_GC_MAY_MOVE.store(true, Ordering::Relaxed);
+            (upcalls().before_updating_jit_code)();
+        } else {
+            CURRENT_GC_MAY_MOVE.store(false, Ordering::Relaxed);
+        }
+
+        crate::binding().pinning_registry.pin_children(tls);
+
+        (upcalls().get_mutators)(
+            Self::notify_mutator_ready::<F>,
+            &mut mutator_visitor as *mut F as *mut _,
+        );
+    }
+
+    fn resume_mutators(_tls: VMWorkerThread) {
+        let current_gc_may_move = CURRENT_GC_MAY_MOVE.load(Ordering::Relaxed);
+
+        if current_gc_may_move {
+            (upcalls().after_updating_jit_code)();
+        }
+
+        (upcalls().resume_mutators)(current_gc_may_move);
+    }
+
+    fn block_for_gc(tls: VMMutatorThread) {
+        (upcalls().block_for_gc)(tls);
+    }
+
+    fn spawn_gc_thread(_tls: VMThread, ctx: GCThreadContext<Ruby>) {
+        let join_handle = match ctx {
+            GCThreadContext::Worker(mut worker) => thread::Builder::new()
+                .name("MMTk Worker Thread".to_string())
+                .spawn(move || {
+                    let ordinal = worker.ordinal;
+                    debug!("Hello! This is MMTk Worker Thread running! ordinal: {ordinal}");
+                    crate::register_gc_thread(thread::current().id());
+                    let ptr_worker = &mut *worker as *mut GCWorker<Ruby>;
+                    let gc_thread_tls =
+                        Box::into_raw(Box::new(GCThreadTLS::for_worker(ptr_worker)));
+                    (upcalls().init_gc_worker_thread)(gc_thread_tls);
+                    memory_manager::start_worker(
+                        mmtk(),
+                        GCThreadTLS::to_vwt(gc_thread_tls),
+                        worker,
+                    );
+                    debug!("An MMTk Worker Thread is quitting. Good bye! ordinal: {ordinal}");
+                    crate::unregister_gc_thread(thread::current().id());
+                })
+                .unwrap(),
+        };
+
+        {
+            let mut handles = crate::binding().gc_thread_join_handles.lock().unwrap();
+            handles.push(join_handle);
+        }
+    }
+
+    fn vm_live_bytes() -> usize {
+        (upcalls().vm_live_bytes)()
+    }
+
+    fn create_gc_trigger() -> Box<dyn GCTriggerPolicy<Ruby>> {
+        // `GCTriggerSelector::Delegated` is currently used by two different
+        // heap modes: `ruby` (the Ruby-like free-slot ratio trigger) and `cpu`
+        // (the CPU-overhead trigger from Tavakolisomeh et al., MPLR '23).
+        // Which one is active is determined by which `OnceCell` config the
+        // `MMTK_HEAP_MODE` parser populated.
+        if CPU_HEAP_TRIGGER_CONFIG.get().is_some() {
+            Box::new(CpuHeapTrigger::default())
+        } else {
+            Box::new(RubyHeapTrigger::default())
+        }
+    }
+}
+
+impl VMCollection {
+    extern "C" fn notify_mutator_ready<F>(mutator_ptr: *mut RubyMutator, data: *mut libc::c_void)
+    where
+        F: FnMut(&'static mut mmtk::Mutator<Ruby>),
+    {
+        let mutator = unsafe { &mut *mutator_ptr };
+        let mutator_visitor = unsafe { &mut *(data as *mut F) };
+        mutator_visitor(mutator);
+    }
+}
diff --git a/gc/mmtk/src/heap/cpu_heap_trigger.rs b/gc/mmtk/src/heap/cpu_heap_trigger.rs
new file mode 100644
index 0000000000..ef5a79fe9a
--- /dev/null
+++ b/gc/mmtk/src/heap/cpu_heap_trigger.rs
@@ -0,0 +1,370 @@
+//! A GC trigger that adjusts the heap size based on the CPU overhead of GC.
+//!
+//! This is an implementation of the heap sizing policy described in
+//! Tavakolisomeh, Shimchenko, Österlund, Bruno, Ferreira, Wrigstad,
+//! "Heap Size Adjustment with CPU Control", MPLR '23.
+//! <https://doi.org/10.1145/3617651.3622988>
+//!
+//! The idea: rather than letting heap size control GC frequency, let a
+//! user-supplied *target GC CPU overhead* control the heap size. After each GC
+//! cycle, we measure the GC CPU overhead (fraction of process CPU time spent
+//! in GC) and compare it to the target. If GC is over budget we grow the heap
+//! (reducing GC frequency); if it is under budget we shrink the heap (trading
+//! memory for more frequent collections).
+//!
+//! ## Algorithm
+//!
+//! After each GC cycle we compute, using an average of the last `n` cycles:
+//!
+//! ```text
+//! GC_CPU             = T_GC / T_APP                                  (Eq. 1)
+//! overhead_error     = GC_CPU - target                                (Eq. 2)
+//! sigmoid_error      = 1 / (1 + e^(-overhead_error))                  (Eq. 3)
+//! adjustment_factor  = sigmoid_error + 0.5   (in (0.5, 1.5))          (Eq. 4)
+//! new_size           = current_size * adjustment_factor               (Eq. 5)
+//! ```
+//!
+//! where:
+//! - `T_GC` is the wall-clock duration of each GC cycle.
+//! - `T_APP` is process CPU time elapsed between consecutive GC cycles (sum of
+//!   CPU time over all threads — mutators, GC workers, compilers, etc.), read
+//!   via `clock_gettime(CLOCK_PROCESS_CPUTIME_ID)`.
+//!
+//! The final heap size is then clamped to the range
+//! `[max(1.1 * used, min_heap_pages), max_heap_pages]`, providing 10% headroom
+//! above current live memory to avoid triggering GC on an effectively-empty
+//! heap.
+//!
+//! ## Differences from the paper
+//!
+//! The paper targets ZGC, a concurrent generational collector. MMTk's Ruby
+//! binding currently ships stop-the-world collectors (Immix, MarkSweep). The
+//! paper's formula still applies: with a STW collector the process CPU time
+//! during GC closely tracks the wall-clock GC time, and mutator CPU time
+//! during the mutator phase is correctly attributed. For generational plans
+//! we skip nursery-only GCs, consistent with MemBalancer.
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use std::sync::Mutex;
+
+use mmtk::util::heap::GCTriggerPolicy;
+use mmtk::util::heap::SpaceStats;
+use mmtk::Plan;
+use mmtk::MMTK;
+use once_cell::sync::OnceCell;
+
+use crate::Ruby;
+
+pub static CPU_HEAP_TRIGGER_CONFIG: OnceCell<CpuHeapTriggerConfig> = OnceCell::new();
+
+/// Configuration for the [`CpuHeapTrigger`].
+pub struct CpuHeapTriggerConfig {
+    /// Lower bound on heap size (in pages). The trigger will never shrink below
+    /// this value.
+    pub min_heap_pages: usize,
+    /// Upper bound on heap size (in pages). The trigger will never grow above
+    /// this value.
+    pub max_heap_pages: usize,
+    /// Initial heap size (in pages).
+    pub initial_heap_pages: usize,
+    /// Target GC CPU overhead as a fraction of total process CPU time. For
+    /// example, `0.15` means the policy will try to keep GC CPU usage near 15%.
+    /// Valid range: `(0.0, 1.0)`.
+    pub target_gc_cpu: f64,
+    /// Number of recent GC cycles averaged together when computing the CPU
+    /// overhead signal. Smoothes out short-term fluctuations. The paper uses 3.
+    pub window_size: usize,
+}
+
+/// A single GC cycle's timing measurements.
+#[derive(Clone, Copy, Debug, Default)]
+struct GcSample {
+    /// Wall-clock seconds spent inside this GC cycle.
+    gc_seconds: f64,
+    /// Seconds of process CPU time elapsed since the previous GC cycle ended.
+    /// This covers both mutator time and (on multi-threaded mutators) any
+    /// mutator CPU time consumed in parallel with the previous GC.
+    app_cpu_seconds: f64,
+}
+
+struct CpuHeapTriggerState {
+    /// Ring buffer of the last `window_size` samples. Oldest-first.
+    samples: Vec<GcSample>,
+    /// Wall-clock time when the current GC cycle started. `None` when no GC is
+    /// in progress.
+    gc_start_wall: Option<std::time::Instant>,
+    /// Process CPU time (seconds) recorded at the end of the previous GC
+    /// cycle. `None` until the first cycle completes.
+    last_gc_end_cpu: Option<f64>,
+}
+
+impl CpuHeapTriggerState {
+    fn new() -> Self {
+        Self {
+            samples: Vec::new(),
+            gc_start_wall: None,
+            last_gc_end_cpu: None,
+        }
+    }
+
+    /// Pushes a new sample, dropping the oldest when the window is full.
+    fn push_sample(&mut self, sample: GcSample, window_size: usize) {
+        if self.samples.len() >= window_size {
+            self.samples.remove(0);
+        }
+        self.samples.push(sample);
+    }
+
+    /// Returns the arithmetic mean GC CPU overhead across the window, or
+    /// `None` if we don't yet have a full sample (which happens on the first
+    /// GC cycle — we have no baseline for `app_cpu_seconds`).
+    fn mean_gc_cpu(&self) -> Option<f64> {
+        if self.samples.is_empty() {
+            return None;
+        }
+        let total_gc: f64 = self.samples.iter().map(|s| s.gc_seconds).sum();
+        let total_app: f64 = self.samples.iter().map(|s| s.app_cpu_seconds).sum();
+        if total_app <= 0.0 {
+            return None;
+        }
+        Some(total_gc / total_app)
+    }
+}
+
+pub struct CpuHeapTrigger {
+    /// Target heap size in pages. Updated at the end of each GC cycle.
+    target_heap_pages: AtomicUsize,
+    /// Mutable timing state. Wrapped in a `Mutex` because `on_gc_start` and
+    /// `on_gc_end` are the only mutation sites and they are not on an
+    /// allocation hot path; avoiding the complexity of lock-free state is
+    /// worth the trivial contention.
+    state: Mutex<CpuHeapTriggerState>,
+}
+
+impl Default for CpuHeapTrigger {
+    fn default() -> Self {
+        let cfg = Self::get_config();
+        Self {
+            target_heap_pages: AtomicUsize::new(cfg.initial_heap_pages),
+            state: Mutex::new(CpuHeapTriggerState::new()),
+        }
+    }
+}
+
+impl GCTriggerPolicy<Ruby> for CpuHeapTrigger {
+    fn is_gc_required(
+        &self,
+        space_full: bool,
+        space: Option<SpaceStats<Ruby>>,
+        plan: &dyn Plan<VM = Ruby>,
+    ) -> bool {
+        // Let the plan decide, matching the other triggers.
+        plan.collection_required(space_full, space)
+    }
+
+    fn on_gc_start(&self, _mmtk: &'static MMTK<Ruby>) {
+        let mut state = self.state.lock().unwrap();
+        state.gc_start_wall = Some(std::time::Instant::now());
+    }
+
+    fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) {
+        // Skip nursery-only GCs for generational plans. The heap resizing
+        // decision is driven by the (much more expensive) full collections
+        // where the signal-to-noise ratio is high enough to be useful.
+        if let Some(gen_plan) = mmtk.get_plan().generational() {
+            if gen_plan.is_current_gc_nursery() {
+                return;
+            }
+        }
+
+        let cfg = Self::get_config();
+        let gc_end_cpu = process_cpu_time_seconds();
+
+        let mut state = self.state.lock().unwrap();
+
+        // Duration of this GC cycle (wall clock).
+        let gc_seconds = state
+            .gc_start_wall
+            .take()
+            .map(|start| start.elapsed().as_secs_f64())
+            .unwrap_or(0.0);
+
+        // Process CPU time elapsed since the previous GC cycle ended. We
+        // require at least one previous end timestamp to produce a valid
+        // sample — without it we cannot compute `T_APP`.
+        if let (Some(last_end), Some(now)) = (state.last_gc_end_cpu, gc_end_cpu) {
+            let app_cpu_seconds = (now - last_end).max(0.0);
+            // Only record non-degenerate samples to avoid poisoning the window
+            // with zero-time entries from back-to-back GCs.
+            if app_cpu_seconds > 0.0 {
+                state.push_sample(
+                    GcSample {
+                        gc_seconds,
+                        app_cpu_seconds,
+                    },
+                    cfg.window_size,
+                );
+            }
+        }
+        state.last_gc_end_cpu = gc_end_cpu;
+
+        // Compute the new heap size only when we have samples to average over.
+        if let Some(gc_cpu) = state.mean_gc_cpu() {
+            // Drop the lock before doing the (relatively cheap) math and
+            // atomic update; nothing below needs the state.
+            drop(state);
+
+            let overhead_error = gc_cpu - cfg.target_gc_cpu; // Eq. (2)
+            let sigmoid_error = sigmoid(overhead_error); // Eq. (3)
+            let adjustment_factor = sigmoid_error + 0.5; // Eq. (4), range (0.5, 1.5)
+
+            let current = self.target_heap_pages.load(Ordering::Relaxed);
+            let suggested = ((current as f64) * adjustment_factor) as usize; // Eq. (5)
+
+            // Clamp:
+            // - upper bound: configured max
+            // - lower bound: max(1.1 * used, min) — 10% headroom above current
+            //   live memory, so we never request a heap so small that GC is
+            //   triggered immediately on return from this one.
+            let used = mmtk.get_plan().get_used_pages();
+            let floor = ((used as f64) * 1.1).ceil() as usize;
+            let lower = floor.max(cfg.min_heap_pages).min(cfg.max_heap_pages);
+            let upper = cfg.max_heap_pages;
+            let new_target = suggested.clamp(lower, upper);
+
+            self.target_heap_pages.store(new_target, Ordering::Relaxed);
+
+            info!(
+                "CpuHeapTrigger: gc_cpu={:.4} target={:.4} factor={:.4} \
+                 pages {} -> {} (used={}, clamp=[{}, {}])",
+                gc_cpu,
+                cfg.target_gc_cpu,
+                adjustment_factor,
+                current,
+                new_target,
+                used,
+                lower,
+                upper
+            );
+        }
+    }
+
+    fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool {
+        plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_current_heap_size_in_pages(&self) -> usize {
+        self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_max_heap_size_in_pages(&self) -> usize {
+        Self::get_config().max_heap_pages
+    }
+
+    fn can_heap_size_grow(&self) -> bool {
+        self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages
+    }
+}
+
+impl CpuHeapTrigger {
+    fn get_config<'b>() -> &'b CpuHeapTriggerConfig {
+        CPU_HEAP_TRIGGER_CONFIG
+            .get()
+            .expect("Attempt to use CPU_HEAP_TRIGGER_CONFIG before it is initialized")
+    }
+}
+
+/// Standard logistic sigmoid. Returns 0.5 when x == 0, asymptotes to 0 and 1.
+fn sigmoid(x: f64) -> f64 {
+    1.0 / (1.0 + (-x).exp())
+}
+
+/// Reads the process-wide CPU time as a floating-point number of seconds,
+/// summed across all threads of this process. Returns `None` if the clock
+/// query fails (which should be essentially impossible on supported
+/// platforms).
+fn process_cpu_time_seconds() -> Option<f64> {
+    let mut ts = libc::timespec {
+        tv_sec: 0,
+        tv_nsec: 0,
+    };
+    // SAFETY: `clock_gettime` writes exactly `sizeof(timespec)` bytes to the
+    // pointer we pass, which is a valid local stack allocation.
+    let rc = unsafe { libc::clock_gettime(libc::CLOCK_PROCESS_CPUTIME_ID, &mut ts) };
+    if rc != 0 {
+        return None;
+    }
+    Some((ts.tv_sec as f64) + (ts.tv_nsec as f64) / 1_000_000_000.0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sigmoid_is_well_behaved() {
+        assert!((sigmoid(0.0) - 0.5).abs() < 1e-12);
+        assert!(sigmoid(-100.0) < 1e-9);
+        assert!(sigmoid(100.0) > 1.0 - 1e-9);
+        // Monotonic.
+        assert!(sigmoid(-1.0) < sigmoid(0.0));
+        assert!(sigmoid(0.0) < sigmoid(1.0));
+    }
+
+    #[test]
+    fn adjustment_factor_is_within_paper_bounds() {
+        // Eq. (4): adjustment_factor = sigmoid(e) + 0.5 must lie in (0.5, 1.5).
+        for e in [-10.0_f64, -1.0, 0.0, 1.0, 10.0] {
+            let f = sigmoid(e) + 0.5;
+            assert!(f > 0.5 && f < 1.5, "factor {f} out of range for e={e}");
+        }
+    }
+
+    #[test]
+    fn mean_gc_cpu_is_total_weighted() {
+        let mut state = CpuHeapTriggerState::new();
+        state.push_sample(
+            GcSample {
+                gc_seconds: 1.0,
+                app_cpu_seconds: 10.0,
+            },
+            3,
+        );
+        state.push_sample(
+            GcSample {
+                gc_seconds: 3.0,
+                app_cpu_seconds: 10.0,
+            },
+            3,
+        );
+        // (1 + 3) / (10 + 10) = 0.2
+        assert!((state.mean_gc_cpu().unwrap() - 0.2).abs() < 1e-12);
+    }
+
+    #[test]
+    fn window_drops_oldest() {
+        let mut state = CpuHeapTriggerState::new();
+        for i in 0..5 {
+            state.push_sample(
+                GcSample {
+                    gc_seconds: i as f64,
+                    app_cpu_seconds: 1.0,
+                },
+                3,
+            );
+        }
+        assert_eq!(state.samples.len(), 3);
+        // After pushing 0,1,2,3,4 with window 3, we should have [2,3,4].
+        assert_eq!(state.samples[0].gc_seconds, 2.0);
+        assert_eq!(state.samples[2].gc_seconds, 4.0);
+    }
+
+    #[test]
+    fn no_sample_without_prior_gc() {
+        // First GC cycle cannot produce a sample (no previous end time). The
+        // push happens only when last_gc_end_cpu is Some.
+        let state = CpuHeapTriggerState::new();
+        assert!(state.mean_gc_cpu().is_none());
+    }
+}
diff --git a/gc/mmtk/src/heap/mod.rs b/gc/mmtk/src/heap/mod.rs
new file mode 100644
index 0000000000..05a35efb23
--- /dev/null
+++ b/gc/mmtk/src/heap/mod.rs
@@ -0,0 +1,9 @@
+mod cpu_heap_trigger;
+mod ruby_heap_trigger;
+
+pub use cpu_heap_trigger::CpuHeapTrigger;
+pub use cpu_heap_trigger::CpuHeapTriggerConfig;
+pub use cpu_heap_trigger::CPU_HEAP_TRIGGER_CONFIG;
+pub use ruby_heap_trigger::RubyHeapTrigger;
+pub use ruby_heap_trigger::RubyHeapTriggerConfig;
+pub use ruby_heap_trigger::RUBY_HEAP_TRIGGER_CONFIG;
diff --git a/gc/mmtk/src/heap/ruby_heap_trigger.rs b/gc/mmtk/src/heap/ruby_heap_trigger.rs
new file mode 100644
index 0000000000..fe1130043d
--- /dev/null
+++ b/gc/mmtk/src/heap/ruby_heap_trigger.rs
@@ -0,0 +1,105 @@
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
+use mmtk::util::heap::GCTriggerPolicy;
+use mmtk::util::heap::SpaceStats;
+use mmtk::Plan;
+use mmtk::MMTK;
+use once_cell::sync::OnceCell;
+
+use crate::Ruby;
+
+pub static RUBY_HEAP_TRIGGER_CONFIG: OnceCell<RubyHeapTriggerConfig> = OnceCell::new();
+
+pub struct RubyHeapTriggerConfig {
+    /// Min heap size
+    pub min_heap_pages: usize,
+    /// Max heap size
+    pub max_heap_pages: usize,
+    /// Minimum ratio of empty space after a GC before the heap will grow
+    pub heap_pages_min_ratio: f64,
+    /// Ratio the heap will grow by
+    pub heap_pages_goal_ratio: f64,
+    /// Maximum ratio of empty space after a GC before the heap will shrink
+    pub heap_pages_max_ratio: f64,
+}
+
+pub struct RubyHeapTrigger {
+    /// Target number of heap pages
+    target_heap_pages: AtomicUsize,
+}
+
+impl GCTriggerPolicy<Ruby> for RubyHeapTrigger {
+    fn is_gc_required(
+        &self,
+        space_full: bool,
+        space: Option<SpaceStats<Ruby>>,
+        plan: &dyn Plan<VM = Ruby>,
+    ) -> bool {
+        // Let the plan decide
+        plan.collection_required(space_full, space)
+    }
+
+    fn on_gc_end(&self, mmtk: &'static MMTK<Ruby>) {
+        if let Some(plan) = mmtk.get_plan().generational() {
+            if plan.is_current_gc_nursery() {
+                // Nursery GC
+            } else {
+                // Full GC
+            }
+
+            panic!("TODO: support for generational GC not implemented")
+        } else {
+            let used_pages = mmtk.get_plan().get_used_pages();
+
+            let target_min =
+                (used_pages as f64 * (1.0 + Self::get_config().heap_pages_min_ratio)) as usize;
+            let target_max =
+                (used_pages as f64 * (1.0 + Self::get_config().heap_pages_max_ratio)) as usize;
+            let new_target =
+                (((used_pages as f64) * (1.0 + Self::get_config().heap_pages_goal_ratio)) as usize)
+                    .clamp(
+                        Self::get_config().min_heap_pages,
+                        Self::get_config().max_heap_pages,
+                    );
+
+            if used_pages < target_min || used_pages > target_max {
+                self.target_heap_pages.store(new_target, Ordering::Relaxed);
+            }
+        }
+    }
+
+    fn is_heap_full(&self, plan: &dyn Plan<VM = Ruby>) -> bool {
+        plan.get_reserved_pages() > self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_current_heap_size_in_pages(&self) -> usize {
+        self.target_heap_pages.load(Ordering::Relaxed)
+    }
+
+    fn get_max_heap_size_in_pages(&self) -> usize {
+        Self::get_config().max_heap_pages
+    }
+
+    fn can_heap_size_grow(&self) -> bool {
+        self.target_heap_pages.load(Ordering::Relaxed) < Self::get_config().max_heap_pages
+    }
+}
+
+impl Default for RubyHeapTrigger {
+    fn default() -> Self {
+        let min_heap_pages = Self::get_config().min_heap_pages;
+
+        Self {
+            target_heap_pages: AtomicUsize::new(min_heap_pages),
+        }
+    }
+}
+
+impl RubyHeapTrigger {
+    fn get_config<'b>() -> &'b RubyHeapTriggerConfig {
+        RUBY_HEAP_TRIGGER_CONFIG
+            .get()
+            .expect("Attempt to use RUBY_HEAP_TRIGGER_CONFIG before it is initialized")
+    }
+}
diff --git a/gc/mmtk/src/lib.rs b/gc/mmtk/src/lib.rs
new file mode 100644
index 0000000000..52dc782051
--- /dev/null
+++ b/gc/mmtk/src/lib.rs
@@ -0,0 +1,161 @@
+// Warn about unsafe operations in functions that are already marked as unsafe.
+// This will become default in Rust 2024 edition.
+#![warn(unsafe_op_in_unsafe_fn)]
+
+extern crate libc;
+extern crate mmtk;
+#[macro_use]
+extern crate log;
+extern crate probe;
+
+use std::collections::HashSet;
+use std::panic::PanicHookInfo;
+use std::sync::Mutex;
+use std::thread::ThreadId;
+
+use abi::RubyUpcalls;
+use binding::RubyBinding;
+use binding::RubyBindingFast;
+use binding::RubyConfiguration;
+use mmtk::vm::slot::SimpleSlot;
+use mmtk::vm::slot::UnimplementedMemorySlice;
+use mmtk::vm::VMBinding;
+use mmtk::MMTK;
+use once_cell::sync::OnceCell;
+
+pub mod abi;
+pub mod active_plan;
+pub mod api;
+pub mod binding;
+pub mod collection;
+pub mod heap;
+pub mod object_model;
+pub mod pinning_registry;
+pub mod reference_glue;
+pub mod scanning;
+pub mod utils;
+pub mod weak_proc;
+
+#[derive(Default)]
+pub struct Ruby;
+
+/// Ruby slot type, i.e. a slot that holds a VALUE.
+/// Currently we use SimpleSlot.
+/// It doesn't matter, becaues we have not started using slot-enqueuing, yet.
+pub type RubySlot = SimpleSlot;
+
+/// Ruby memory slice, i.e. an array of VALUEs.
+/// It is used by array-copy barriers which is supposed to perform bettern than copying array
+/// elements one by one.  At this moment, we just leave it unimplemented.
+pub type RubyMemorySlice = UnimplementedMemorySlice<RubySlot>;
+
+impl VMBinding for Ruby {
+    type VMObjectModel = object_model::VMObjectModel;
+    type VMScanning = scanning::VMScanning;
+    type VMCollection = collection::VMCollection;
+    type VMActivePlan = active_plan::VMActivePlan;
+    type VMReferenceGlue = reference_glue::VMReferenceGlue;
+
+    type VMSlot = RubySlot;
+    type VMMemorySlice = RubyMemorySlice;
+}
+
+/// The callback for mutator thread panic handler (which calls rb_bug to output
+/// debugging information such as the Ruby backtrace and memory maps).
+/// This is set before BINDING is set because mmtk_init could panic.
+pub static MUTATOR_THREAD_PANIC_HANDLER: OnceCell<extern "C" fn()> = OnceCell::new();
+
+/// The singleton object for the Ruby binding itself.
+pub static BINDING: OnceCell<RubyBinding> = OnceCell::new();
+
+/// Some data needs to be accessed fast.
+/// We sacrifice safety for speed using unsynchronized global variables.
+pub static mut BINDING_FAST: RubyBindingFast = RubyBindingFast::new();
+
+/// Some data needs to be accessed fast.
+pub static CONFIGURATION: RubyConfiguration = RubyConfiguration::new();
+
+pub fn binding<'b>() -> &'b RubyBinding {
+    BINDING
+        .get()
+        .expect("Attempt to use the binding before it is initialization")
+}
+
+pub fn mmtk() -> &'static MMTK<Ruby> {
+    binding().mmtk
+}
+
+pub fn upcalls() -> &'static RubyUpcalls {
+    binding().upcalls()
+}
+
+pub static GC_THREADS: OnceCell<Mutex<HashSet<ThreadId>>> = OnceCell::new();
+
+pub(crate) fn register_gc_thread(thread_id: ThreadId) {
+    let mut gc_threads = GC_THREADS.get().unwrap().lock().unwrap();
+    gc_threads.insert(thread_id);
+}
+
+pub(crate) fn unregister_gc_thread(thread_id: ThreadId) {
+    let mut gc_threads = GC_THREADS.get().unwrap().lock().unwrap();
+    gc_threads.remove(&thread_id);
+}
+
+pub(crate) fn is_gc_thread(thread_id: ThreadId) -> bool {
+    let gc_threads = GC_THREADS.get().unwrap().lock().unwrap();
+    gc_threads.contains(&thread_id)
+}
+
+fn handle_gc_thread_panic(panic_info: &PanicHookInfo) {
+    eprintln!("ERROR: An MMTk GC thread panicked.  This is a bug.");
+    eprintln!("{panic_info}");
+
+    let bt = std::backtrace::Backtrace::capture();
+    match bt.status() {
+        std::backtrace::BacktraceStatus::Unsupported => {
+            eprintln!("Backtrace is unsupported.")
+        }
+        std::backtrace::BacktraceStatus::Disabled => {
+            eprintln!("Backtrace is disabled.");
+            eprintln!("run with `RUST_BACKTRACE=1` environment variable to display a backtrace");
+        }
+        std::backtrace::BacktraceStatus::Captured => {
+            eprintln!("{bt}");
+        }
+        s => {
+            eprintln!("Unknown backtrace status: {s:?}");
+        }
+    }
+}
+
+pub(crate) fn set_panic_hook() {
+    if GC_THREADS.set(Default::default()).is_err() {
+        return;
+    }
+
+    let old_hook = std::panic::take_hook();
+
+    std::panic::set_hook(Box::new(move |panic_info| {
+        if is_gc_thread(std::thread::current().id()) {
+            handle_gc_thread_panic(panic_info);
+
+            (crate::binding().upcalls().gc_thread_panic_handler)();
+        } else {
+            old_hook(panic_info);
+            (crate::MUTATOR_THREAD_PANIC_HANDLER
+                .get()
+                .expect("MUTATOR_THREAD_PANIC_HANDLER is not set"))();
+        }
+    }));
+}
+
+/// This kind of assertion is enabled if either building in debug mode or the
+/// "extra_assert" feature is enabled.
+#[macro_export]
+macro_rules! extra_assert {
+    ($($arg:tt)*) => {
+        if std::cfg!(any(debug_assertions, feature = "extra_assert")) {
+            std::assert!($($arg)*);
+        }
+    };
+}
diff --git a/gc/mmtk/src/object_model.rs b/gc/mmtk/src/object_model.rs
new file mode 100644
index 0000000000..d673ca11a0
--- /dev/null
+++ b/gc/mmtk/src/object_model.rs
@@ -0,0 +1,124 @@
+use std::ptr::copy_nonoverlapping;
+
+use crate::abi;
+use crate::abi::RubyObjectAccess;
+use crate::abi::MIN_OBJ_ALIGN;
+use crate::abi::OBJREF_OFFSET;
+use crate::Ruby;
+use mmtk::util::constants::BITS_IN_BYTE;
+use mmtk::util::copy::CopySemantics;
+use mmtk::util::copy::GCWorkerCopyContext;
+use mmtk::util::Address;
+use mmtk::util::ObjectReference;
+use mmtk::vm::*;
+
+pub struct VMObjectModel {}
+
+impl VMObjectModel {
+    const OBJREF_OFFSET: usize = abi::OBJREF_OFFSET;
+}
+
+impl ObjectModel<Ruby> for VMObjectModel {
+    const GLOBAL_LOG_BIT_SPEC: VMGlobalLogBitSpec = VMGlobalLogBitSpec::side_first();
+
+    // We overwrite the prepended word which were used to hold object sizes.
+    const LOCAL_FORWARDING_POINTER_SPEC: VMLocalForwardingPointerSpec =
+        VMLocalForwardingPointerSpec::in_header(-((OBJREF_OFFSET * BITS_IN_BYTE) as isize));
+
+    const LOCAL_FORWARDING_BITS_SPEC: VMLocalForwardingBitsSpec =
+        VMLocalForwardingBitsSpec::side_first();
+
+    const LOCAL_MARK_BIT_SPEC: VMLocalMarkBitSpec =
+        VMLocalMarkBitSpec::side_after(Self::LOCAL_FORWARDING_BITS_SPEC.as_spec());
+
+    const LOCAL_PINNING_BIT_SPEC: VMLocalPinningBitSpec =
+        VMLocalPinningBitSpec::side_after(Self::LOCAL_MARK_BIT_SPEC.as_spec());
+
+    const LOCAL_LOS_MARK_NURSERY_SPEC: VMLocalLOSMarkNurserySpec =
+        VMLocalLOSMarkNurserySpec::side_after(Self::LOCAL_PINNING_BIT_SPEC.as_spec());
+
+    const UNIFIED_OBJECT_REFERENCE_ADDRESS: bool = false;
+    const OBJECT_REF_OFFSET_LOWER_BOUND: isize = Self::OBJREF_OFFSET as isize;
+
+    const NEED_VO_BITS_DURING_TRACING: bool = true;
+
+    fn copy(
+        from: ObjectReference,
+        semantics: CopySemantics,
+        copy_context: &mut GCWorkerCopyContext<Ruby>,
+    ) -> ObjectReference {
+        let from_acc = RubyObjectAccess::from_objref(from);
+        let from_start = from_acc.obj_start();
+        let object_size = from_acc.object_size();
+        let to_start = copy_context.alloc_copy(from, object_size, MIN_OBJ_ALIGN, 0, semantics);
+        debug_assert!(!to_start.is_zero());
+        let to_payload = to_start.add(OBJREF_OFFSET);
+        unsafe {
+            copy_nonoverlapping::<u8>(from_start.to_ptr(), to_start.to_mut_ptr(), object_size);
+        }
+        let to_obj = unsafe { ObjectReference::from_raw_address_unchecked(to_payload) };
+        copy_context.post_copy(to_obj, object_size, semantics);
+        trace!("Copied object from {} to {}", from, to_obj);
+
+        (crate::binding().upcalls().move_obj_during_marking)(from, to_obj);
+
+        #[cfg(feature = "clear_old_copy")]
+        {
+            trace!(
+                "Clearing old copy {} ({}-{})",
+                from,
+                from_start,
+                from_start + object_size
+            );
+            // For debug purpose, we clear the old copy so that if the Ruby VM reads from the old
+            // copy again, it will likely result in an error.
+            unsafe { std::ptr::write_bytes::<u8>(from_start.to_mut_ptr(), 0, object_size) }
+        }
+
+        to_obj
+    }
+
+    fn copy_to(_from: ObjectReference, _to: ObjectReference, _region: Address) -> Address {
+        unimplemented!(
+            "This function cannot be called because we do not support MarkCompact for Ruby."
+        )
+    }
+
+    fn get_reference_when_copied_to(_from: ObjectReference, _to: Address) -> ObjectReference {
+        unimplemented!(
+            "This function cannot be called because we do not support MarkCompact for Ruby."
+        )
+    }
+
+    fn get_current_size(object: ObjectReference) -> usize {
+        RubyObjectAccess::from_objref(object).object_size()
+    }
+
+    fn get_type_descriptor(_reference: ObjectReference) -> &'static [i8] {
+        todo!()
+    }
+
+    fn ref_to_object_start(object: ObjectReference) -> Address {
+        RubyObjectAccess::from_objref(object).obj_start()
+    }
+
+    fn ref_to_header(object: ObjectReference) -> Address {
+        RubyObjectAccess::from_objref(object).payload_addr()
+    }
+
+    fn get_size_when_copied(object: ObjectReference) -> usize {
+        Self::get_current_size(object)
+    }
+
+    fn get_align_when_copied(_object: ObjectReference) -> usize {
+        todo!()
+    }
+
+    fn get_align_offset_when_copied(_object: ObjectReference) -> usize {
+        todo!()
+    }
+
+    fn dump_object(_object: ObjectReference) {
+        todo!()
+    }
+}
diff --git a/gc/mmtk/src/pinning_registry.rs b/gc/mmtk/src/pinning_registry.rs
new file mode 100644
index 0000000000..b498b508f1
--- /dev/null
+++ b/gc/mmtk/src/pinning_registry.rs
@@ -0,0 +1,187 @@
+use std::sync::Mutex;
+
+use mmtk::memory_manager;
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMWorkerThread;
+use mmtk::MMTK;
+
+use crate::abi::GCThreadTLS;
+use crate::upcalls;
+use crate::Ruby;
+
+pub struct PinningRegistry {
+    pinning_objs: Mutex<Vec<ObjectReference>>,
+    pinned_objs: Mutex<Vec<ObjectReference>>,
+}
+
+impl PinningRegistry {
+    pub fn new() -> Self {
+        Self {
+            pinning_objs: Default::default(),
+            pinned_objs: Default::default(),
+        }
+    }
+
+    pub fn register(&self, object: ObjectReference) {
+        let mut pinning_objs = self.pinning_objs.lock().unwrap();
+        pinning_objs.push(object);
+    }
+
+    pub fn pin_children(&self, tls: VMWorkerThread) {
+        if !crate::mmtk().get_plan().current_gc_may_move_object() {
+            log::debug!("The current GC is non-moving, skipping pinning children.");
+            return;
+        }
+
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) };
+        let worker = gc_tls.worker();
+
+        let pinning_objs = self
+            .pinning_objs
+            .try_lock()
+            .expect("PinningRegistry should not have races during GC.");
+
+        let packet_size = 512;
+        let work_packets = pinning_objs
+            .chunks(packet_size)
+            .map(|chunk| {
+                Box::new(PinPinningChildren {
+                    pinning_objs: chunk.to_vec(),
+                }) as _
+            })
+            .collect();
+
+        worker.scheduler().work_buckets[WorkBucketStage::Prepare].bulk_add(work_packets);
+    }
+
+    pub fn cleanup(&self, worker: &mut GCWorker<Ruby>) {
+        worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(RemoveDeadPinnings);
+        if crate::mmtk().get_plan().current_gc_may_move_object() {
+            let packet = {
+                let mut pinned_objs = self
+                    .pinned_objs
+                    .try_lock()
+                    .expect("Unexpected contention on pinned_objs");
+                UnpinPinnedObjects {
+                    objs: std::mem::take(&mut pinned_objs),
+                }
+            };
+
+            worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(packet);
+        } else {
+            debug!("The current GC is non-moving, skipping unpinning objects.");
+            debug_assert_eq!(
+                {
+                    let pinned_objs = self
+                        .pinned_objs
+                        .try_lock()
+                        .expect("Unexpected contention on pinned_objs");
+                    pinned_objs.len()
+                },
+                0
+            );
+        }
+    }
+}
+
+impl Default for PinningRegistry {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+struct PinPinningChildren {
+    pinning_objs: Vec<ObjectReference>,
+}
+
+impl GCWork<Ruby> for PinPinningChildren {
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) {
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(worker.tls) };
+        let mut pinned_objs = vec![];
+        let mut newly_pinned_objs = vec![];
+
+        let visit_object = |_worker, target_object: ObjectReference, pin| {
+            log::trace!(
+                "    -> {} {}",
+                if pin { "(pin)" } else { "     " },
+                target_object
+            );
+            if pin {
+                debug_assert!(
+                    target_object.get_forwarded_object().is_none(),
+                    "Trying to pin {target_object} but has been moved"
+                );
+
+                pinned_objs.push(target_object);
+            }
+            target_object
+        };
+
+        gc_tls
+            .object_closure
+            .set_temporarily_and_run_code(visit_object, || {
+                for obj in self.pinning_objs.iter().cloned() {
+                    log::trace!("  Pinning: {}", obj);
+                    (upcalls().call_gc_mark_children)(obj);
+                }
+            });
+
+        for target_object in pinned_objs {
+            if memory_manager::pin_object(target_object) {
+                newly_pinned_objs.push(target_object);
+            }
+        }
+
+        let mut pinned_objs = crate::binding()
+            .pinning_registry
+            .pinned_objs
+            .lock()
+            .unwrap();
+        pinned_objs.append(&mut newly_pinned_objs);
+    }
+}
+
+struct RemoveDeadPinnings;
+
+impl GCWork<Ruby> for RemoveDeadPinnings {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) {
+        log::debug!("Removing dead Pinnings...");
+
+        let registry = &crate::binding().pinning_registry;
+        {
+            let mut pinning_objs = registry
+                .pinning_objs
+                .try_lock()
+                .expect("PinningRegistry should not have races during GC.");
+
+            pinning_objs.retain_mut(|obj| {
+                if obj.is_live() {
+                    let new_obj = obj.get_forwarded_object().unwrap_or(*obj);
+                    *obj = new_obj;
+                    true
+                } else {
+                    log::trace!("  Dead Pinning removed: {}", *obj);
+                    false
+                }
+            });
+        }
+    }
+}
+
+struct UnpinPinnedObjects {
+    objs: Vec<ObjectReference>,
+}
+
+impl GCWork<Ruby> for UnpinPinnedObjects {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static MMTK<Ruby>) {
+        log::debug!("Unpinning pinned objects...");
+
+        for obj in self.objs.iter() {
+            let unpinned = memory_manager::unpin_object(*obj);
+            debug_assert!(unpinned);
+        }
+    }
+}
diff --git a/gc/mmtk/src/reference_glue.rs b/gc/mmtk/src/reference_glue.rs
new file mode 100644
index 0000000000..1272bd54c1
--- /dev/null
+++ b/gc/mmtk/src/reference_glue.rs
@@ -0,0 +1,26 @@
+use crate::Ruby;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMWorkerThread;
+use mmtk::vm::ReferenceGlue;
+
+pub struct VMReferenceGlue {}
+
+impl ReferenceGlue<Ruby> for VMReferenceGlue {
+    type FinalizableType = ObjectReference;
+
+    fn get_referent(_object: ObjectReference) -> Option<ObjectReference> {
+        unimplemented!()
+    }
+
+    fn set_referent(_reff: ObjectReference, _referent: ObjectReference) {
+        unimplemented!()
+    }
+
+    fn enqueue_references(_references: &[ObjectReference], _tls: VMWorkerThread) {
+        unimplemented!()
+    }
+
+    fn clear_referent(_new_reference: ObjectReference) {
+        unimplemented!()
+    }
+}
diff --git a/gc/mmtk/src/scanning.rs b/gc/mmtk/src/scanning.rs
new file mode 100644
index 0000000000..355a2e7759
--- /dev/null
+++ b/gc/mmtk/src/scanning.rs
@@ -0,0 +1,291 @@
+use crate::abi::GCThreadTLS;
+
+use crate::upcalls;
+use crate::utils::ChunkedVecCollector;
+use crate::Ruby;
+use crate::RubySlot;
+use mmtk::memory_manager;
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
+use mmtk::util::ObjectReference;
+use mmtk::util::VMWorkerThread;
+use mmtk::vm::ObjectTracer;
+use mmtk::vm::RootsWorkFactory;
+use mmtk::vm::Scanning;
+use mmtk::vm::SlotVisitor;
+use mmtk::Mutator;
+
+pub struct VMScanning {}
+
+impl Scanning<Ruby> for VMScanning {
+    const UNIQUE_OBJECT_ENQUEUING: bool = true;
+
+    fn support_slot_enqueuing(_tls: VMWorkerThread, _object: ObjectReference) -> bool {
+        false
+    }
+
+    fn scan_object<EV: SlotVisitor<RubySlot>>(
+        _tls: VMWorkerThread,
+        _object: ObjectReference,
+        _slot_visitor: &mut EV,
+    ) {
+        unreachable!("We have not enabled slot enqueuing for any types, yet.");
+    }
+
+    fn scan_object_and_trace_edges<OT: ObjectTracer>(
+        tls: VMWorkerThread,
+        object: ObjectReference,
+        object_tracer: &mut OT,
+    ) {
+        debug_assert!(
+            mmtk::memory_manager::is_mmtk_object(object.to_raw_address()).is_some(),
+            "Not an MMTk object: {object}",
+        );
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) };
+        let visit_object = |_worker, target_object: ObjectReference, pin| {
+            trace!(
+                "Tracing edge: {} -> {}{}",
+                object,
+                target_object,
+                if pin { " pin" } else { "" }
+            );
+            debug_assert!(
+                mmtk::memory_manager::is_mmtk_object(target_object.to_raw_address()).is_some(),
+                "Destination is not an MMTk object. Src: {object} dst: {target_object}"
+            );
+
+            debug_assert!(
+                // If we are in a moving GC, all objects should be pinned by PinningRegistry.
+                // If it is requested that target_object be pinned but it is not pinned, then
+                // it is a bug because it could be moved.
+                if crate::mmtk().get_plan().current_gc_may_move_object() && pin {
+                    memory_manager::is_pinned(target_object)
+                } else {
+                    true
+                },
+                "Object {object} is trying to pin {target_object}"
+            );
+
+            let forwarded_target = object_tracer.trace_object(target_object);
+            if forwarded_target != target_object {
+                trace!("  Forwarded target {target_object} -> {forwarded_target}");
+            }
+            forwarded_target
+        };
+        gc_tls
+            .object_closure
+            .set_temporarily_and_run_code(visit_object, || {
+                (upcalls().call_gc_mark_children)(object);
+
+                if crate::mmtk().get_plan().current_gc_may_move_object() {
+                    (upcalls().update_object_references)(object);
+                }
+            });
+    }
+
+    fn notify_initial_thread_scan_complete(_partial_scan: bool, _tls: VMWorkerThread) {
+        // Do nothing
+    }
+
+    fn scan_roots_in_mutator_thread(
+        _tls: VMWorkerThread,
+        _mutator: &'static mut Mutator<Ruby>,
+        mut _factory: impl RootsWorkFactory<RubySlot>,
+    ) {
+        // Do nothing.  All stacks (including Ruby stacks and machine stacks) are reachable from
+        // `rb_vm_t` -> ractor -> thread -> fiber -> stacks.  It is part of `ScanGCRoots` which
+        // calls `rb_gc_mark_roots` -> `rb_vm_mark`.
+    }
+
+    fn scan_vm_specific_roots(tls: VMWorkerThread, factory: impl RootsWorkFactory<RubySlot>) {
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) };
+        let root_scanning_work_packets: Vec<Box<dyn GCWork<Ruby>>> = vec![
+            Box::new(ScanGCRoots::new(factory.clone())),
+            Box::new(ScanObjspace::new(factory.clone())),
+        ];
+        gc_tls.worker().scheduler().work_buckets[WorkBucketStage::Prepare]
+            .bulk_add(root_scanning_work_packets);
+
+        // Generate WB-unprotected roots scanning work packets
+
+        'gen_wb_unprotected_work: {
+            let is_nursery_gc = (crate::mmtk().get_plan().generational())
+                .is_some_and(|gen| gen.is_current_gc_nursery());
+            if !is_nursery_gc {
+                break 'gen_wb_unprotected_work;
+            }
+
+            let vecs = {
+                let guard = crate::binding()
+                    .wb_unprotected_objects
+                    .try_lock()
+                    .expect("Someone is holding the lock of wb_unprotected_objects?");
+                if guard.is_empty() {
+                    break 'gen_wb_unprotected_work;
+                }
+
+                let mut collector = ChunkedVecCollector::new(128);
+                collector.extend(guard.iter().copied());
+                collector.into_vecs()
+            };
+
+            let packets = vecs
+                .into_iter()
+                .map(|objects| {
+                    let factory = factory.clone();
+                    Box::new(ScanWbUnprotectedRoots { factory, objects }) as _
+                })
+                .collect::<Vec<_>>();
+
+            gc_tls.worker().scheduler().work_buckets[WorkBucketStage::Prepare].bulk_add(packets);
+        }
+    }
+
+    fn supports_return_barrier() -> bool {
+        false
+    }
+
+    fn prepare_for_roots_re_scanning() {
+        todo!()
+    }
+
+    fn process_weak_refs(
+        worker: &mut GCWorker<Ruby>,
+        tracer_context: impl mmtk::vm::ObjectTracerContext<Ruby>,
+    ) -> bool {
+        crate::binding()
+            .weak_proc
+            .process_weak_stuff(worker, tracer_context);
+        crate::binding().pinning_registry.cleanup(worker);
+        false
+    }
+
+    fn forward_weak_refs(
+        _worker: &mut GCWorker<Ruby>,
+        _tracer_context: impl mmtk::vm::ObjectTracerContext<Ruby>,
+    ) {
+        panic!("We can't use MarkCompact in Ruby.");
+    }
+}
+
+impl VMScanning {
+    const OBJECT_BUFFER_SIZE: usize = 4096;
+
+    fn collect_object_roots_in<F: FnOnce()>(
+        root_scan_kind: &str,
+        gc_tls: &mut GCThreadTLS,
+        factory: &mut impl RootsWorkFactory<RubySlot>,
+        callback: F,
+    ) {
+        let mut buffer: Vec<ObjectReference> = Vec::new();
+        let visit_object = |_, object: ObjectReference, pin| {
+            debug!(
+                "[{}] Visiting object: {}{}",
+                root_scan_kind,
+                object,
+                if pin {
+                    "(unmovable root)"
+                } else {
+                    "(movable, but we pin it anyway)"
+                }
+            );
+            debug_assert!(
+                mmtk::memory_manager::is_mmtk_object(object.to_raw_address()).is_some(),
+                "Root does not point to MMTk object.  object: {object}"
+            );
+            buffer.push(object);
+            if buffer.len() >= Self::OBJECT_BUFFER_SIZE {
+                factory.create_process_pinning_roots_work(std::mem::take(&mut buffer));
+            }
+            object
+        };
+        gc_tls
+            .object_closure
+            .set_temporarily_and_run_code(visit_object, callback);
+
+        if !buffer.is_empty() {
+            factory.create_process_pinning_roots_work(buffer);
+        }
+    }
+}
+
+trait GlobaRootScanningWork {
+    type F: RootsWorkFactory<RubySlot>;
+    const NAME: &'static str;
+
+    fn new(factory: Self::F) -> Self;
+    fn scan_roots();
+    fn roots_work_factory(&mut self) -> &mut Self::F;
+
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(worker.tls) };
+
+        let factory = self.roots_work_factory();
+
+        VMScanning::collect_object_roots_in(Self::NAME, gc_tls, factory, || {
+            Self::scan_roots();
+        });
+    }
+}
+
+macro_rules! define_global_root_scanner {
+    ($name: ident, $code: expr) => {
+        struct $name<F: RootsWorkFactory<RubySlot>> {
+            factory: F,
+        }
+        impl<F: RootsWorkFactory<RubySlot>> GlobaRootScanningWork for $name<F> {
+            type F = F;
+            const NAME: &'static str = stringify!($name);
+            fn new(factory: Self::F) -> Self {
+                Self { factory }
+            }
+            fn scan_roots() {
+                $code
+            }
+            fn roots_work_factory(&mut self) -> &mut Self::F {
+                &mut self.factory
+            }
+        }
+        impl<F: RootsWorkFactory<RubySlot>> GCWork<Ruby> for $name<F> {
+            fn do_work(&mut self, worker: &mut GCWorker<Ruby>, mmtk: &'static mmtk::MMTK<Ruby>) {
+                GlobaRootScanningWork::do_work(self, worker, mmtk);
+            }
+        }
+    };
+}
+
+define_global_root_scanner!(ScanGCRoots, {
+    (crate::upcalls().scan_gc_roots)();
+});
+
+define_global_root_scanner!(ScanObjspace, {
+    (crate::upcalls().scan_objspace)();
+});
+
+struct ScanWbUnprotectedRoots<F: RootsWorkFactory<RubySlot>> {
+    factory: F,
+    objects: Vec<ObjectReference>,
+}
+
+impl<F: RootsWorkFactory<RubySlot>> GCWork<Ruby> for ScanWbUnprotectedRoots<F> {
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(worker.tls) };
+        VMScanning::collect_object_roots_in("wb_unprot_roots", gc_tls, &mut self.factory, || {
+            for object in self.objects.iter().copied() {
+                if object.is_reachable() {
+                    debug!("[wb_unprot_roots] Visiting WB-unprotected object (parent): {object}");
+                    (upcalls().call_gc_mark_children)(object);
+
+                    if crate::mmtk().get_plan().current_gc_may_move_object() {
+                        (upcalls().update_object_references)(object);
+                    }
+                } else {
+                    debug!(
+                        "[wb_unprot_roots] Skipping young WB-unprotected object (parent): {object}"
+                    );
+                }
+            }
+        });
+    }
+}
diff --git a/gc/mmtk/src/utils.rs b/gc/mmtk/src/utils.rs
new file mode 100644
index 0000000000..d1979eaf58
--- /dev/null
+++ b/gc/mmtk/src/utils.rs
@@ -0,0 +1,161 @@
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
+use atomic_refcell::AtomicRefCell;
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
+
+use crate::Ruby;
+use sysinfo::System;
+
+pub struct ChunkedVecCollector<T> {
+    vecs: Vec<Vec<T>>,
+    current_vec: Vec<T>,
+    chunk_size: usize,
+}
+
+impl<T> ChunkedVecCollector<T> {
+    pub fn new(chunk_size: usize) -> Self {
+        Self {
+            vecs: vec![],
+            current_vec: Vec::with_capacity(chunk_size),
+            chunk_size,
+        }
+    }
+
+    pub fn add(&mut self, item: T) {
+        self.current_vec.push(item);
+        if self.current_vec.len() == self.chunk_size {
+            self.flush();
+        }
+    }
+
+    fn flush(&mut self) {
+        let new_vec = Vec::with_capacity(self.chunk_size);
+        let old_vec = std::mem::replace(&mut self.current_vec, new_vec);
+        self.vecs.push(old_vec);
+    }
+
+    pub fn into_vecs(mut self) -> Vec<Vec<T>> {
+        if !self.current_vec.is_empty() {
+            self.flush();
+        }
+        self.vecs
+    }
+}
+
+impl<A> Extend<A> for ChunkedVecCollector<A> {
+    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
+        for item in iter {
+            self.add(item);
+        }
+    }
+}
+
+pub struct AfterAll {
+    counter: AtomicUsize,
+    stage: WorkBucketStage,
+    packets: AtomicRefCell<Vec<Box<dyn GCWork<Ruby>>>>,
+}
+
+unsafe impl Sync for AfterAll {}
+
+impl AfterAll {
+    pub fn new(stage: WorkBucketStage) -> Self {
+        Self {
+            counter: AtomicUsize::new(0),
+            stage,
+            packets: AtomicRefCell::new(vec![]),
+        }
+    }
+
+    pub fn add_packets(&self, mut packets: Vec<Box<dyn GCWork<Ruby>>>) {
+        let mut borrow = self.packets.borrow_mut();
+        borrow.append(&mut packets);
+    }
+
+    pub fn count_up(&self, n: usize) {
+        self.counter.fetch_add(n, Ordering::SeqCst);
+    }
+
+    pub fn count_down(&self, worker: &mut GCWorker<Ruby>) {
+        let old = self.counter.fetch_sub(1, Ordering::SeqCst);
+        if old == 1 {
+            let packets = {
+                let mut borrow = self.packets.borrow_mut();
+                std::mem::take(borrow.as_mut())
+            };
+            worker.scheduler().work_buckets[self.stage].bulk_add(packets);
+        }
+    }
+}
+
+pub fn default_heap_max() -> usize {
+    let mut s = System::new();
+    s.refresh_memory();
+    s.total_memory()
+        .checked_mul(80)
+        .and_then(|v| v.checked_div(100))
+        .expect("Invalid Memory size") as usize
+}
+
+pub fn parse_capacity(input: &str) -> Option<usize> {
+    let trimmed = input.trim();
+
+    const KIBIBYTE: usize = 1024;
+    const MEBIBYTE: usize = 1024 * KIBIBYTE;
+    const GIBIBYTE: usize = 1024 * MEBIBYTE;
+
+    let (number, suffix) = if let Some(pos) = trimmed.find(|c: char| !c.is_numeric()) {
+        trimmed.split_at(pos)
+    } else {
+        (trimmed, "")
+    };
+
+    let Ok(v) = number.parse::<usize>() else {
+        return None;
+    };
+
+    match suffix {
+        "GiB" => Some(v * GIBIBYTE),
+        "MiB" => Some(v * MEBIBYTE),
+        "KiB" => Some(v * KIBIBYTE),
+        "" => Some(v),
+        _ => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_capacity_parses_bare_bytes() {
+        assert_eq!(Some(1234), parse_capacity("1234"));
+    }
+
+    #[test]
+    fn test_parse_capacity_parses_kibibytes() {
+        assert_eq!(Some(10240), parse_capacity("10KiB"));
+    }
+
+    #[test]
+    fn test_parse_capacity_parses_mebibytes() {
+        assert_eq!(Some(10485760), parse_capacity("10MiB"))
+    }
+
+    #[test]
+    fn test_parse_capacity_parses_gibibytes() {
+        assert_eq!(Some(10737418240), parse_capacity("10GiB"))
+    }
+
+    #[test]
+    fn test_parse_capacity_parses_nonsense_values() {
+        assert_eq!(None, parse_capacity("notanumber"));
+        assert_eq!(None, parse_capacity("5tartswithanumber"));
+        assert_eq!(None, parse_capacity("number1nthemiddle"));
+        assert_eq!(None, parse_capacity("numberattheend111"));
+        assert_eq!(None, parse_capacity("mult1pl3numb3r5"));
+    }
+}
diff --git a/gc/mmtk/src/weak_proc.rs b/gc/mmtk/src/weak_proc.rs
new file mode 100644
index 0000000000..d38dbe04a4
--- /dev/null
+++ b/gc/mmtk/src/weak_proc.rs
@@ -0,0 +1,328 @@
+use std::sync::Mutex;
+
+use mmtk::scheduler::GCWork;
+use mmtk::scheduler::GCWorker;
+use mmtk::scheduler::WorkBucketStage;
+use mmtk::util::ObjectReference;
+use mmtk::vm::ObjectTracerContext;
+
+use crate::abi::GCThreadTLS;
+use crate::upcalls;
+use crate::Ruby;
+
+pub struct WeakProcessor {
+    non_parallel_obj_free_candidates: Mutex<Vec<ObjectReference>>,
+    parallel_obj_free_candidates: Vec<Mutex<Vec<ObjectReference>>>,
+
+    /// Objects that needs `obj_free` called when dying.
+    /// If it is a bottleneck, replace it with a lock-free data structure,
+    /// or add candidates in batch.
+    weak_references: Mutex<Vec<ObjectReference>>,
+}
+
+impl Default for WeakProcessor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl WeakProcessor {
+    pub fn new() -> Self {
+        Self {
+            non_parallel_obj_free_candidates: Mutex::new(Vec::new()),
+            parallel_obj_free_candidates: vec![Mutex::new(Vec::new())],
+            weak_references: Mutex::new(Vec::new()),
+        }
+    }
+
+    pub fn init_parallel_obj_free_candidates(&mut self, num_workers: usize) {
+        debug_assert_eq!(self.parallel_obj_free_candidates.len(), 1);
+
+        for _ in 1..num_workers {
+            self.parallel_obj_free_candidates
+                .push(Mutex::new(Vec::new()));
+        }
+    }
+
+    /// Add a batch of objects as candidates for `obj_free`.
+    ///
+    /// Amortizes mutex acquisition over the entire batch. Called when a
+    /// mutator's local buffer is flushed (buffer full or stop-the-world).
+    pub fn add_obj_free_candidates_batch(
+        &self,
+        objects: &[ObjectReference],
+        can_parallel_free: bool,
+    ) {
+        if objects.is_empty() {
+            return;
+        }
+
+        if can_parallel_free {
+            let num_buckets = self.parallel_obj_free_candidates.len();
+            for idx in 0..num_buckets {
+                let mut bucket = self.parallel_obj_free_candidates[idx].lock().unwrap();
+                for (i, &obj) in objects.iter().enumerate() {
+                    if i % num_buckets == idx {
+                        bucket.push(obj);
+                    }
+                }
+            }
+        } else {
+            self.non_parallel_obj_free_candidates
+                .lock()
+                .unwrap()
+                .extend_from_slice(objects);
+        }
+    }
+
+    pub fn get_all_obj_free_candidates(&self) -> Vec<ObjectReference> {
+        // let mut obj_free_candidates = self.obj_free_candidates.lock().unwrap();
+        let mut all_obj_free_candidates = self
+            .non_parallel_obj_free_candidates
+            .lock()
+            .unwrap()
+            .to_vec();
+
+        for candidates_mutex in &self.parallel_obj_free_candidates {
+            all_obj_free_candidates.extend(candidates_mutex.lock().unwrap().to_vec());
+        }
+
+        std::mem::take(all_obj_free_candidates.as_mut())
+    }
+
+    pub fn add_weak_reference(&self, object: ObjectReference) {
+        let mut weak_references = self.weak_references.lock().unwrap();
+        weak_references.push(object);
+    }
+
+    pub fn weak_references_count(&self) -> usize {
+        self.weak_references.lock().unwrap().len()
+    }
+
+    pub fn process_weak_stuff(
+        &self,
+        worker: &mut GCWorker<Ruby>,
+        _tracer_context: impl ObjectTracerContext<Ruby>,
+    ) {
+        worker.add_work(
+            WorkBucketStage::VMRefClosure,
+            ProcessNonParallelObjFreeCanadidates {},
+        );
+
+        for index in 0..self.parallel_obj_free_candidates.len() {
+            worker.add_work(
+                WorkBucketStage::VMRefClosure,
+                ProcessParallelObjFreeCandidates { index },
+            );
+        }
+
+        worker.add_work(WorkBucketStage::VMRefClosure, ProcessWeakReferences);
+
+        worker.add_work(WorkBucketStage::Prepare, UpdateFinalizerObjIdTables);
+
+        let global_tables_count = (crate::upcalls().global_tables_count)();
+        let work_packets = (0..global_tables_count)
+            .map(|i| Box::new(UpdateGlobalTables { idx: i }) as _)
+            .collect();
+
+        worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].bulk_add(work_packets);
+
+        worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure]
+            .bulk_add(vec![Box::new(UpdateWbUnprotectedObjectsList) as _]);
+    }
+}
+
+fn process_obj_free_candidates(obj_free_candidates: &mut Vec<ObjectReference>) {
+    // Process obj_free
+    let mut new_candidates = Vec::new();
+
+    for object in obj_free_candidates.iter().copied() {
+        if object.is_reachable() {
+            // Forward and add back to the candidate list.
+            let new_object = object.forward();
+            trace!("Forwarding obj_free candidate: {object} -> {new_object}");
+            new_candidates.push(new_object);
+        } else {
+            (upcalls().call_obj_free)(object);
+        }
+    }
+
+    *obj_free_candidates = new_candidates;
+}
+
+struct ProcessParallelObjFreeCandidates {
+    index: usize,
+}
+
+impl GCWork<Ruby> for ProcessParallelObjFreeCandidates {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        let mut obj_free_candidates = crate::binding().weak_proc.parallel_obj_free_candidates
+            [self.index]
+            .try_lock()
+            .expect("Lock for parallel_obj_free_candidates should not be held");
+
+        process_obj_free_candidates(&mut obj_free_candidates);
+    }
+}
+
+struct ProcessNonParallelObjFreeCanadidates;
+
+impl GCWork<Ruby> for ProcessNonParallelObjFreeCanadidates {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        let mut obj_free_candidates = crate::binding()
+            .weak_proc
+            .non_parallel_obj_free_candidates
+            .try_lock()
+            .expect("Lock for non_parallel_obj_free_candidates should not be held");
+
+        process_obj_free_candidates(&mut obj_free_candidates);
+    }
+}
+
+struct ProcessWeakReferences;
+
+impl GCWork<Ruby> for ProcessWeakReferences {
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        if crate::mmtk().get_plan().current_gc_may_move_object() {
+            let gc_tls: &mut GCThreadTLS = unsafe { GCThreadTLS::from_vwt_check(worker.tls) };
+
+            let visit_object = |_worker, target_object: ObjectReference, _pin| {
+                debug_assert!(
+                    mmtk::memory_manager::is_mmtk_object(target_object.to_raw_address()).is_some(),
+                    "Destination is not an MMTk object"
+                );
+
+                target_object
+                    .get_forwarded_object()
+                    .unwrap_or(target_object)
+            };
+
+            gc_tls
+                .object_closure
+                .set_temporarily_and_run_code(visit_object, || {
+                    self.process_weak_references(true);
+                })
+        } else {
+            self.process_weak_references(false);
+        }
+    }
+}
+
+impl ProcessWeakReferences {
+    fn process_weak_references(&mut self, moving_gc: bool) {
+        let mut weak_references = crate::binding()
+            .weak_proc
+            .weak_references
+            .try_lock()
+            .expect("Mutators should not be holding the lock.");
+
+        weak_references.retain_mut(|object_ptr| {
+            let object = object_ptr.get_forwarded_object().unwrap_or(*object_ptr);
+
+            if object != *object_ptr {
+                *object_ptr = object;
+            }
+
+            if object.is_reachable() {
+                (upcalls().handle_weak_references)(object, moving_gc);
+
+                true
+            } else {
+                false
+            }
+        });
+    }
+}
+
+trait GlobalTableProcessingWork {
+    fn process_table(&mut self);
+
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        let gc_tls = unsafe { GCThreadTLS::from_vwt_check(worker.tls) };
+
+        // `hash_foreach_replace` depends on `gb_object_moved_p` which has to have the semantics
+        // of `trace_object` due to the way it is used in `UPDATE_IF_MOVED`.
+        let forward_object = |_worker, object: ObjectReference, _pin| {
+            debug_assert!(
+                mmtk::memory_manager::is_mmtk_object(object.to_raw_address()).is_some(),
+                "{object} is not an MMTk object"
+            );
+            let result = object.forward();
+            trace!("Forwarding reference: {object} -> {result}");
+            result
+        };
+
+        gc_tls
+            .object_closure
+            .set_temporarily_and_run_code(forward_object, || {
+                self.process_table();
+            });
+    }
+}
+
+struct UpdateFinalizerObjIdTables;
+impl GlobalTableProcessingWork for UpdateFinalizerObjIdTables {
+    fn process_table(&mut self) {
+        (crate::upcalls().update_finalizer_table)();
+    }
+}
+impl GCWork<Ruby> for UpdateFinalizerObjIdTables {
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, mmtk: &'static mmtk::MMTK<Ruby>) {
+        GlobalTableProcessingWork::do_work(self, worker, mmtk);
+    }
+}
+
+struct UpdateGlobalTables {
+    idx: i32,
+}
+impl GlobalTableProcessingWork for UpdateGlobalTables {
+    fn process_table(&mut self) {
+        (crate::upcalls().update_global_tables)(
+            self.idx,
+            crate::mmtk().get_plan().current_gc_may_move_object(),
+        )
+    }
+}
+impl GCWork<Ruby> for UpdateGlobalTables {
+    fn do_work(&mut self, worker: &mut GCWorker<Ruby>, mmtk: &'static mmtk::MMTK<Ruby>) {
+        GlobalTableProcessingWork::do_work(self, worker, mmtk);
+    }
+}
+
+struct UpdateWbUnprotectedObjectsList;
+
+impl GCWork<Ruby> for UpdateWbUnprotectedObjectsList {
+    fn do_work(&mut self, _worker: &mut GCWorker<Ruby>, _mmtk: &'static mmtk::MMTK<Ruby>) {
+        let mut objects = crate::binding().wb_unprotected_objects.try_lock().expect(
+            "Someone is holding the lock of wb_unprotected_objects during weak processing phase?",
+        );
+
+        let old_objects = std::mem::take(&mut *objects);
+
+        debug!("Updating {} WB-unprotected objects", old_objects.len());
+
+        for object in old_objects {
+            if object.is_reachable() {
+                // Forward and add back to the candidate list.
+                let new_object = object.forward();
+                trace!("Forwarding WB-unprotected object: {object} -> {new_object}");
+                objects.insert(new_object);
+            } else {
+                trace!("Removing WB-unprotected object from list: {object}");
+            }
+        }
+
+        debug!("Retained {} live WB-unprotected objects.", objects.len());
+    }
+}
+
+// Provide a shorthand `object.forward()`.
+trait Forwardable {
+    fn forward(&self) -> Self;
+}
+
+impl Forwardable for ObjectReference {
+    fn forward(&self) -> Self {
+        self.get_forwarded_object().unwrap_or(*self)
+    }
+}