summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Valentine-House <matt@eightbitraptor.com>2026-02-03 13:51:35 -0500
committerMatt Valentine-House <matt@eightbitraptor.com>2026-02-13 14:29:14 +0000
commit5c71c8c55bd71cff64e670097e4800d9e3d67ae4 (patch)
tree97659aac5f5ff20ab3de00db1ff87aea850d1155
parentc9a49411dcfa6ac42d15ce33858207cb4b11025e (diff)
gc: implement slot-based bitmap indexing with division magic
Replace the BASE_SLOT_SIZE-granularity bitmap scheme with slot-based indexing where each bit represents one slot regardless of size. Key changes: - Add slot_div_magic field to heap_page for fast division - Use Go-inspired formula: slot_index = (offset * div_magic) >> 32 - Update all bitmap iteration to use one-bit-per-slot scheme - Remove slot_bits_mask from rb_heap_t (no longer needed) This enables arbitrary slot sizes (not just power-of-two multiples of BASE_SLOT_SIZE) by decoupling bitmap indexing from slot size. Functions updated: - gc_sweep_plane/gc_sweep_page - rgengc_rememberset_mark/rgengc_rememberset_mark_plane - gc_marks_wb_unprotected_objects/gc_marks_wb_unprotected_objects_plane - gc_compact_plane/gc_compact_page - invalidate_moved_plane/invalidate_moved_page - RVALUE_AGE_GET/RVALUE_AGE_SET_BITMAP Inspired by Go runtime's mbitmap.go divideByElemSize().
-rw-r--r--gc/default/default.c181
1 files changed, 78 insertions, 103 deletions
diff --git a/gc/default/default.c b/gc/default/default.c
index bfddb08ea5..c766521108 100644
--- a/gc/default/default.c
+++ b/gc/default/default.c
@@ -452,7 +452,6 @@ typedef int (*gc_compact_compare_func)(const void *l, const void *r, void *d);
typedef struct rb_heap_struct {
short slot_size;
- bits_t slot_bits_mask;
/* Basic statistics */
size_t total_allocated_pages;
@@ -765,6 +764,7 @@ struct free_slot {
struct heap_page {
unsigned short slot_size;
+ uint32_t slot_div_magic;
unsigned short total_slots;
unsigned short free_slots;
unsigned short final_slots;
@@ -841,17 +841,33 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *
#define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header)
#define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page)
-#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE)
-#define BITMAP_INDEX(p) (NUM_IN_PAGE(p) / BITS_BITLENGTH )
-#define BITMAP_OFFSET(p) (NUM_IN_PAGE(p) & (BITS_BITLENGTH-1))
-#define BITMAP_BIT(p) ((bits_t)1 << BITMAP_OFFSET(p))
+static inline uint32_t
+compute_slot_div_magic(unsigned short slot_size)
+{
+ return (uint32_t)(UINT32_MAX / slot_size) + 1;
+}
+
+static inline size_t
+slot_index_for_offset(size_t offset, uint32_t div_magic)
+{
+ return (size_t)(((uint64_t)offset * div_magic) >> 32);
+}
+
+#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic)
+#define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH)
+#define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1))
+#define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p))
+
+#define _MARKED_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] & SLOT_BITMAP_BIT(page, p))
+#define _MARK_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] |= SLOT_BITMAP_BIT(page, p))
+#define _CLEAR_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] &= ~SLOT_BITMAP_BIT(page, p))
-/* Bitmap Operations */
-#define MARKED_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] & BITMAP_BIT(p))
-#define MARK_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] | BITMAP_BIT(p))
-#define CLEAR_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] & ~BITMAP_BIT(p))
+#define MARKED_IN_BITMAP(bits, p) _MARKED_IN_BITMAP(bits, GET_HEAP_PAGE(p), p)
+#define MARK_IN_BITMAP(bits, p) _MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p)
+#define CLEAR_IN_BITMAP(bits, p) _CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p)
+
+#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE)
-/* getting bitmap */
#define GET_HEAP_MARK_BITS(x) (&GET_HEAP_PAGE(x)->mark_bits[0])
#define GET_HEAP_PINNED_BITS(x) (&GET_HEAP_PAGE(x)->pinned_bits[0])
#define GET_HEAP_UNCOLLECTIBLE_BITS(x) (&GET_HEAP_PAGE(x)->uncollectible_bits[0])
@@ -861,9 +877,11 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *
static int
RVALUE_AGE_GET(VALUE obj)
{
- bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits;
- int idx = BITMAP_INDEX(obj) * 2;
- int shift = BITMAP_OFFSET(obj);
+ struct heap_page *page = GET_HEAP_PAGE(obj);
+ bits_t *age_bits = page->age_bits;
+ size_t slot_idx = SLOT_INDEX(page, obj);
+ size_t idx = (slot_idx / BITS_BITLENGTH) * 2;
+ int shift = (int)(slot_idx & (BITS_BITLENGTH - 1));
int lo = (age_bits[idx] >> shift) & 1;
int hi = (age_bits[idx + 1] >> shift) & 1;
return lo | (hi << 1);
@@ -873,9 +891,11 @@ static void
RVALUE_AGE_SET_BITMAP(VALUE obj, int age)
{
RUBY_ASSERT(age <= RVALUE_OLD_AGE);
- bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits;
- int idx = BITMAP_INDEX(obj) * 2;
- int shift = BITMAP_OFFSET(obj);
+ struct heap_page *page = GET_HEAP_PAGE(obj);
+ bits_t *age_bits = page->age_bits;
+ size_t slot_idx = SLOT_INDEX(page, obj);
+ size_t idx = (slot_idx / BITS_BITLENGTH) * 2;
+ int shift = (int)(slot_idx & (BITS_BITLENGTH - 1));
bits_t mask = (bits_t)1 << shift;
age_bits[idx] = (age_bits[idx] & ~mask) | ((bits_t)(age & 1) << shift);
@@ -1986,6 +2006,7 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page)
page->start = start;
page->total_slots = slot_count;
page->slot_size = heap->slot_size;
+ page->slot_div_magic = compute_slot_div_magic(heap->slot_size);
page->heap = heap;
asan_unlock_freelist(page);
@@ -2598,7 +2619,7 @@ is_pointer_to_heap(rb_objspace_t *objspace, const void *ptr)
else {
if (p < page->start) return FALSE;
if (p >= page->start + (page->total_slots * page->slot_size)) return FALSE;
- if ((NUM_IN_PAGE(p) * BASE_SLOT_SIZE) % page->slot_size != 0) return FALSE;
+ if ((p - page->start) % page->slot_size != 0) return FALSE;
return TRUE;
}
@@ -3489,8 +3510,6 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit
{
struct heap_page *sweep_page = ctx->page;
short slot_size = sweep_page->slot_size;
- short slot_bits = slot_size / BASE_SLOT_SIZE;
- GC_ASSERT(slot_bits > 0);
do {
VALUE vp = (VALUE)p;
@@ -3566,7 +3585,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit
}
}
p += slot_size;
- bitset >>= slot_bits;
+ bitset >>= 1;
} while (bitset);
}
@@ -3591,50 +3610,33 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context
p = (uintptr_t)sweep_page->start;
bits = sweep_page->mark_bits;
+ short slot_size = sweep_page->slot_size;
+ int total_slots = sweep_page->total_slots;
+ int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
- int page_rvalue_count = sweep_page->total_slots * (sweep_page->slot_size / BASE_SLOT_SIZE);
- int out_of_range_bits = (NUM_IN_PAGE(p) + page_rvalue_count) % BITS_BITLENGTH;
- if (out_of_range_bits != 0) { // sizeof(RVALUE) == 64
- bits[BITMAP_INDEX(p) + page_rvalue_count / BITS_BITLENGTH] |= ~(((bits_t)1 << out_of_range_bits) - 1);
+ int out_of_range_bits = total_slots % BITS_BITLENGTH;
+ if (out_of_range_bits != 0) {
+ bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1);
}
- /* The last bitmap plane may not be used if the last plane does not
- * have enough space for the slot_size. In that case, the last plane must
- * be skipped since none of the bits will be set. */
- int bitmap_plane_count = CEILDIV(NUM_IN_PAGE(p) + page_rvalue_count, BITS_BITLENGTH);
- GC_ASSERT(bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT - 1 ||
- bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT);
-
- bits_t slot_mask = heap->slot_bits_mask;
-
// Clear wb_unprotected and age bits for all unmarked slots
{
bits_t *wb_unprotected_bits = sweep_page->wb_unprotected_bits;
bits_t *age_bits = sweep_page->age_bits;
for (int i = 0; i < bitmap_plane_count; i++) {
- bits_t unmarked = ~bits[i] & slot_mask;
+ bits_t unmarked = ~bits[i];
wb_unprotected_bits[i] &= ~unmarked;
age_bits[i * 2] &= ~unmarked;
age_bits[i * 2 + 1] &= ~unmarked;
}
}
- // Skip out of range slots at the head of the page
- bitset = ~bits[0];
- bitset >>= NUM_IN_PAGE(p);
- bitset &= slot_mask;
- if (bitset) {
- gc_sweep_plane(objspace, heap, p, bitset, ctx);
- }
- p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
-
- for (int i = 1; i < bitmap_plane_count; i++) {
+ for (int i = 0; i < bitmap_plane_count; i++) {
bitset = ~bits[i];
- bitset &= slot_mask;
if (bitset) {
gc_sweep_plane(objspace, heap, p, bitset, ctx);
}
- p += BITS_BITLENGTH * BASE_SLOT_SIZE;
+ p += BITS_BITLENGTH * slot_size;
}
if (!heap->compact_cursor) {
@@ -4086,7 +4088,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
GC_ASSERT(BUILTIN_TYPE(forwarding_object) != T_NONE);
}
}
- p += BASE_SLOT_SIZE;
+ p += page->slot_size;
bitset >>= 1;
} while (bitset);
}
@@ -4098,25 +4100,21 @@ invalidate_moved_page(rb_objspace_t *objspace, struct heap_page *page)
int i;
bits_t *mark_bits, *pin_bits;
bits_t bitset;
+ short slot_size = page->slot_size;
+ int total_slots = page->total_slots;
+ int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
mark_bits = page->mark_bits;
pin_bits = page->pinned_bits;
uintptr_t p = page->start;
- // Skip out of range slots at the head of the page
- bitset = pin_bits[0] & ~mark_bits[0];
- bitset >>= NUM_IN_PAGE(p);
- invalidate_moved_plane(objspace, page, p, bitset);
- p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
-
- for (i=1; i < HEAP_PAGE_BITMAP_LIMIT; i++) {
+ for (i=0; i < bitmap_plane_count; i++) {
/* Moved objects are pinned but never marked. We reuse the pin bits
* to indicate there is a moved object in this slot. */
bitset = pin_bits[i] & ~mark_bits[i];
-
invalidate_moved_plane(objspace, page, p, bitset);
- p += BITS_BITLENGTH * BASE_SLOT_SIZE;
+ p += BITS_BITLENGTH * slot_size;
}
}
#endif
@@ -5310,7 +5308,7 @@ gc_remember_unprotected(rb_objspace_t *objspace, VALUE obj)
}
static inline void
-gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits)
+gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits, short slot_size)
{
if (bits) {
do {
@@ -5320,7 +5318,7 @@ gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits
GC_ASSERT(RVALUE_MARKED(objspace, (VALUE)p));
gc_mark_children(objspace, (VALUE)p);
}
- p += BASE_SLOT_SIZE;
+ p += slot_size;
bits >>= 1;
} while (bits);
}
@@ -5335,18 +5333,15 @@ gc_marks_wb_unprotected_objects(rb_objspace_t *objspace, rb_heap_t *heap)
bits_t *mark_bits = page->mark_bits;
bits_t *wbun_bits = page->wb_unprotected_bits;
uintptr_t p = page->start;
+ short slot_size = page->slot_size;
+ int total_slots = page->total_slots;
+ int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
size_t j;
- bits_t bits = mark_bits[0] & wbun_bits[0];
- bits >>= NUM_IN_PAGE(p);
- gc_marks_wb_unprotected_objects_plane(objspace, p, bits);
- p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
-
- for (j=1; j<HEAP_PAGE_BITMAP_LIMIT; j++) {
+ for (j=0; j<(size_t)bitmap_plane_count; j++) {
bits_t bits = mark_bits[j] & wbun_bits[j];
-
- gc_marks_wb_unprotected_objects_plane(objspace, p, bits);
- p += BITS_BITLENGTH * BASE_SLOT_SIZE;
+ gc_marks_wb_unprotected_objects_plane(objspace, p, bits, slot_size);
+ p += BITS_BITLENGTH * slot_size;
}
}
@@ -5601,8 +5596,6 @@ static bool
gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page)
{
short slot_size = page->slot_size;
- short slot_bits = slot_size / BASE_SLOT_SIZE;
- GC_ASSERT(slot_bits > 0);
do {
VALUE vp = (VALUE)p;
@@ -5619,7 +5612,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b
}
}
p += slot_size;
- bitset >>= slot_bits;
+ bitset >>= 1;
} while (bitset);
return true;
@@ -5634,26 +5627,21 @@ gc_compact_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page
bits_t *mark_bits, *pin_bits;
bits_t bitset;
uintptr_t p = page->start;
+ short slot_size = page->slot_size;
+ int total_slots = page->total_slots;
+ int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
mark_bits = page->mark_bits;
pin_bits = page->pinned_bits;
- // objects that can be moved are marked and not pinned
- bitset = (mark_bits[0] & ~pin_bits[0]);
- bitset >>= NUM_IN_PAGE(p);
- if (bitset) {
- if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
- return false;
- }
- p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
-
- for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
+ for (int j = 0; j < bitmap_plane_count; j++) {
+ // objects that can be moved are marked and not pinned
bitset = (mark_bits[j] & ~pin_bits[j]);
if (bitset) {
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
return false;
}
- p += BITS_BITLENGTH * BASE_SLOT_SIZE;
+ p += BITS_BITLENGTH * slot_size;
}
return true;
@@ -5940,7 +5928,7 @@ rgengc_remember(rb_objspace_t *objspace, VALUE obj)
#endif
static inline void
-rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset)
+rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset, short slot_size)
{
if (bitset) {
do {
@@ -5956,7 +5944,7 @@ rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitse
rb_darray_append_without_gc(&objspace->weak_references, obj);
}
}
- p += BASE_SLOT_SIZE;
+ p += slot_size;
bitset >>= 1;
} while (bitset);
}
@@ -5975,6 +5963,9 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap)
ccan_list_for_each(&heap->pages, page, page_node) {
if (page->flags.has_remembered_objects | page->flags.has_uncollectible_wb_unprotected_objects) {
uintptr_t p = page->start;
+ short slot_size = page->slot_size;
+ int total_slots = page->total_slots;
+ int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
bits_t bitset, bits[HEAP_PAGE_BITMAP_LIMIT];
bits_t *remembered_bits = page->remembered_bits;
bits_t *uncollectible_bits = page->uncollectible_bits;
@@ -5984,21 +5975,16 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap)
else if (page->flags.has_remembered_objects) has_old++;
else if (page->flags.has_uncollectible_wb_unprotected_objects) has_shady++;
#endif
- for (j=0; j<HEAP_PAGE_BITMAP_LIMIT; j++) {
+ for (j=0; j < (size_t)bitmap_plane_count; j++) {
bits[j] = remembered_bits[j] | (uncollectible_bits[j] & wb_unprotected_bits[j]);
remembered_bits[j] = 0;
}
page->flags.has_remembered_objects = FALSE;
- bitset = bits[0];
- bitset >>= NUM_IN_PAGE(p);
- rgengc_rememberset_mark_plane(objspace, p, bitset);
- p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
-
- for (j=1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
+ for (j=0; j < (size_t)bitmap_plane_count; j++) {
bitset = bits[j];
- rgengc_rememberset_mark_plane(objspace, p, bitset);
- p += BITS_BITLENGTH * BASE_SLOT_SIZE;
+ rgengc_rememberset_mark_plane(objspace, p, bitset, slot_size);
+ p += BITS_BITLENGTH * slot_size;
}
}
#if PROFILE_REMEMBERSET_MARK
@@ -9527,17 +9513,6 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
heap->slot_size = (1 << i) * BASE_SLOT_SIZE;
- // Bitmask with every (1 << i)th bit set, representing aligned slot positions
- static const bits_t slot_bits_masks[] = {
- ~(bits_t)0, // i=0: every 1st bit
- (bits_t)0x5555555555555555ULL, // i=1: every 2nd bit
- (bits_t)0x1111111111111111ULL, // i=2: every 4th bit
- (bits_t)0x0101010101010101ULL, // i=3: every 8th bit
- (bits_t)0x0001000100010001ULL, // i=4: every 16th bit
- };
- GC_ASSERT(HEAP_COUNT == sizeof(slot_bits_masks) / sizeof(slot_bits_masks[0]));
- heap->slot_bits_mask = slot_bits_masks[i];
-
ccan_list_head_init(&heap->pages);
}