diff options
| author | Matt Valentine-House <matt@eightbitraptor.com> | 2026-02-03 13:51:35 -0500 |
|---|---|---|
| committer | Matt Valentine-House <matt@eightbitraptor.com> | 2026-02-13 14:29:14 +0000 |
| commit | 5c71c8c55bd71cff64e670097e4800d9e3d67ae4 (patch) | |
| tree | 97659aac5f5ff20ab3de00db1ff87aea850d1155 | |
| parent | c9a49411dcfa6ac42d15ce33858207cb4b11025e (diff) | |
gc: implement slot-based bitmap indexing with division magic
Replace the BASE_SLOT_SIZE-granularity bitmap scheme with slot-based
indexing where each bit represents one slot regardless of size.
Key changes:
- Add slot_div_magic field to heap_page for fast division
- Use Go-inspired formula: slot_index = (offset * div_magic) >> 32
- Update all bitmap iteration to use one-bit-per-slot scheme
- Remove slot_bits_mask from rb_heap_t (no longer needed)
This enables arbitrary slot sizes (not just power-of-two multiples of
BASE_SLOT_SIZE) by decoupling bitmap indexing from slot size.
Functions updated:
- gc_sweep_plane/gc_sweep_page
- rgengc_rememberset_mark/rgengc_rememberset_mark_plane
- gc_marks_wb_unprotected_objects/gc_marks_wb_unprotected_objects_plane
- gc_compact_plane/gc_compact_page
- invalidate_moved_plane/invalidate_moved_page
- RVALUE_AGE_GET/RVALUE_AGE_SET_BITMAP
Inspired by Go runtime's mbitmap.go divideByElemSize().
| -rw-r--r-- | gc/default/default.c | 181 |
1 files changed, 78 insertions, 103 deletions
diff --git a/gc/default/default.c b/gc/default/default.c index bfddb08ea5..c766521108 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -452,7 +452,6 @@ typedef int (*gc_compact_compare_func)(const void *l, const void *r, void *d); typedef struct rb_heap_struct { short slot_size; - bits_t slot_bits_mask; /* Basic statistics */ size_t total_allocated_pages; @@ -765,6 +764,7 @@ struct free_slot { struct heap_page { unsigned short slot_size; + uint32_t slot_div_magic; unsigned short total_slots; unsigned short free_slots; unsigned short final_slots; @@ -841,17 +841,33 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header) #define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page) -#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE) -#define BITMAP_INDEX(p) (NUM_IN_PAGE(p) / BITS_BITLENGTH ) -#define BITMAP_OFFSET(p) (NUM_IN_PAGE(p) & (BITS_BITLENGTH-1)) -#define BITMAP_BIT(p) ((bits_t)1 << BITMAP_OFFSET(p)) +static inline uint32_t +compute_slot_div_magic(unsigned short slot_size) +{ + return (uint32_t)(UINT32_MAX / slot_size) + 1; +} + +static inline size_t +slot_index_for_offset(size_t offset, uint32_t div_magic) +{ + return (size_t)(((uint64_t)offset * div_magic) >> 32); +} + +#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic) +#define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH) +#define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1)) +#define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p)) + +#define _MARKED_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] & SLOT_BITMAP_BIT(page, p)) +#define _MARK_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] |= SLOT_BITMAP_BIT(page, p)) +#define _CLEAR_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] &= ~SLOT_BITMAP_BIT(page, p)) -/* Bitmap Operations */ -#define MARKED_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] & BITMAP_BIT(p)) -#define MARK_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] | BITMAP_BIT(p)) -#define CLEAR_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] & ~BITMAP_BIT(p)) +#define MARKED_IN_BITMAP(bits, p) _MARKED_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) +#define MARK_IN_BITMAP(bits, p) _MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) +#define CLEAR_IN_BITMAP(bits, p) _CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p) + +#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE) -/* getting bitmap */ #define GET_HEAP_MARK_BITS(x) (&GET_HEAP_PAGE(x)->mark_bits[0]) #define GET_HEAP_PINNED_BITS(x) (&GET_HEAP_PAGE(x)->pinned_bits[0]) #define GET_HEAP_UNCOLLECTIBLE_BITS(x) (&GET_HEAP_PAGE(x)->uncollectible_bits[0]) @@ -861,9 +877,11 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * static int RVALUE_AGE_GET(VALUE obj) { - bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits; - int idx = BITMAP_INDEX(obj) * 2; - int shift = BITMAP_OFFSET(obj); + struct heap_page *page = GET_HEAP_PAGE(obj); + bits_t *age_bits = page->age_bits; + size_t slot_idx = SLOT_INDEX(page, obj); + size_t idx = (slot_idx / BITS_BITLENGTH) * 2; + int shift = (int)(slot_idx & (BITS_BITLENGTH - 1)); int lo = (age_bits[idx] >> shift) & 1; int hi = (age_bits[idx + 1] >> shift) & 1; return lo | (hi << 1); @@ -873,9 +891,11 @@ static void RVALUE_AGE_SET_BITMAP(VALUE obj, int age) { RUBY_ASSERT(age <= RVALUE_OLD_AGE); - bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits; - int idx = BITMAP_INDEX(obj) * 2; - int shift = BITMAP_OFFSET(obj); + struct heap_page *page = GET_HEAP_PAGE(obj); + bits_t *age_bits = page->age_bits; + size_t slot_idx = SLOT_INDEX(page, obj); + size_t idx = (slot_idx / BITS_BITLENGTH) * 2; + int shift = (int)(slot_idx & (BITS_BITLENGTH - 1)); bits_t mask = (bits_t)1 << shift; age_bits[idx] = (age_bits[idx] & ~mask) | ((bits_t)(age & 1) << shift); @@ -1986,6 +2006,7 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) page->start = start; page->total_slots = slot_count; page->slot_size = heap->slot_size; + page->slot_div_magic = compute_slot_div_magic(heap->slot_size); page->heap = heap; asan_unlock_freelist(page); @@ -2598,7 +2619,7 @@ is_pointer_to_heap(rb_objspace_t *objspace, const void *ptr) else { if (p < page->start) return FALSE; if (p >= page->start + (page->total_slots * page->slot_size)) return FALSE; - if ((NUM_IN_PAGE(p) * BASE_SLOT_SIZE) % page->slot_size != 0) return FALSE; + if ((p - page->start) % page->slot_size != 0) return FALSE; return TRUE; } @@ -3489,8 +3510,6 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit { struct heap_page *sweep_page = ctx->page; short slot_size = sweep_page->slot_size; - short slot_bits = slot_size / BASE_SLOT_SIZE; - GC_ASSERT(slot_bits > 0); do { VALUE vp = (VALUE)p; @@ -3566,7 +3585,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit } } p += slot_size; - bitset >>= slot_bits; + bitset >>= 1; } while (bitset); } @@ -3591,50 +3610,33 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context p = (uintptr_t)sweep_page->start; bits = sweep_page->mark_bits; + short slot_size = sweep_page->slot_size; + int total_slots = sweep_page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); - int page_rvalue_count = sweep_page->total_slots * (sweep_page->slot_size / BASE_SLOT_SIZE); - int out_of_range_bits = (NUM_IN_PAGE(p) + page_rvalue_count) % BITS_BITLENGTH; - if (out_of_range_bits != 0) { // sizeof(RVALUE) == 64 - bits[BITMAP_INDEX(p) + page_rvalue_count / BITS_BITLENGTH] |= ~(((bits_t)1 << out_of_range_bits) - 1); + int out_of_range_bits = total_slots % BITS_BITLENGTH; + if (out_of_range_bits != 0) { + bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1); } - /* The last bitmap plane may not be used if the last plane does not - * have enough space for the slot_size. In that case, the last plane must - * be skipped since none of the bits will be set. */ - int bitmap_plane_count = CEILDIV(NUM_IN_PAGE(p) + page_rvalue_count, BITS_BITLENGTH); - GC_ASSERT(bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT - 1 || - bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT); - - bits_t slot_mask = heap->slot_bits_mask; - // Clear wb_unprotected and age bits for all unmarked slots { bits_t *wb_unprotected_bits = sweep_page->wb_unprotected_bits; bits_t *age_bits = sweep_page->age_bits; for (int i = 0; i < bitmap_plane_count; i++) { - bits_t unmarked = ~bits[i] & slot_mask; + bits_t unmarked = ~bits[i]; wb_unprotected_bits[i] &= ~unmarked; age_bits[i * 2] &= ~unmarked; age_bits[i * 2 + 1] &= ~unmarked; } } - // Skip out of range slots at the head of the page - bitset = ~bits[0]; - bitset >>= NUM_IN_PAGE(p); - bitset &= slot_mask; - if (bitset) { - gc_sweep_plane(objspace, heap, p, bitset, ctx); - } - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (int i = 1; i < bitmap_plane_count; i++) { + for (int i = 0; i < bitmap_plane_count; i++) { bitset = ~bits[i]; - bitset &= slot_mask; if (bitset) { gc_sweep_plane(objspace, heap, p, bitset, ctx); } - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + p += BITS_BITLENGTH * slot_size; } if (!heap->compact_cursor) { @@ -4086,7 +4088,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ GC_ASSERT(BUILTIN_TYPE(forwarding_object) != T_NONE); } } - p += BASE_SLOT_SIZE; + p += page->slot_size; bitset >>= 1; } while (bitset); } @@ -4098,25 +4100,21 @@ invalidate_moved_page(rb_objspace_t *objspace, struct heap_page *page) int i; bits_t *mark_bits, *pin_bits; bits_t bitset; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); mark_bits = page->mark_bits; pin_bits = page->pinned_bits; uintptr_t p = page->start; - // Skip out of range slots at the head of the page - bitset = pin_bits[0] & ~mark_bits[0]; - bitset >>= NUM_IN_PAGE(p); - invalidate_moved_plane(objspace, page, p, bitset); - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (i=1; i < HEAP_PAGE_BITMAP_LIMIT; i++) { + for (i=0; i < bitmap_plane_count; i++) { /* Moved objects are pinned but never marked. We reuse the pin bits * to indicate there is a moved object in this slot. */ bitset = pin_bits[i] & ~mark_bits[i]; - invalidate_moved_plane(objspace, page, p, bitset); - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + p += BITS_BITLENGTH * slot_size; } } #endif @@ -5310,7 +5308,7 @@ gc_remember_unprotected(rb_objspace_t *objspace, VALUE obj) } static inline void -gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits) +gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits, short slot_size) { if (bits) { do { @@ -5320,7 +5318,7 @@ gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits GC_ASSERT(RVALUE_MARKED(objspace, (VALUE)p)); gc_mark_children(objspace, (VALUE)p); } - p += BASE_SLOT_SIZE; + p += slot_size; bits >>= 1; } while (bits); } @@ -5335,18 +5333,15 @@ gc_marks_wb_unprotected_objects(rb_objspace_t *objspace, rb_heap_t *heap) bits_t *mark_bits = page->mark_bits; bits_t *wbun_bits = page->wb_unprotected_bits; uintptr_t p = page->start; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); size_t j; - bits_t bits = mark_bits[0] & wbun_bits[0]; - bits >>= NUM_IN_PAGE(p); - gc_marks_wb_unprotected_objects_plane(objspace, p, bits); - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (j=1; j<HEAP_PAGE_BITMAP_LIMIT; j++) { + for (j=0; j<(size_t)bitmap_plane_count; j++) { bits_t bits = mark_bits[j] & wbun_bits[j]; - - gc_marks_wb_unprotected_objects_plane(objspace, p, bits); - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + gc_marks_wb_unprotected_objects_plane(objspace, p, bits, slot_size); + p += BITS_BITLENGTH * slot_size; } } @@ -5601,8 +5596,6 @@ static bool gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) { short slot_size = page->slot_size; - short slot_bits = slot_size / BASE_SLOT_SIZE; - GC_ASSERT(slot_bits > 0); do { VALUE vp = (VALUE)p; @@ -5619,7 +5612,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b } } p += slot_size; - bitset >>= slot_bits; + bitset >>= 1; } while (bitset); return true; @@ -5634,26 +5627,21 @@ gc_compact_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page bits_t *mark_bits, *pin_bits; bits_t bitset; uintptr_t p = page->start; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); mark_bits = page->mark_bits; pin_bits = page->pinned_bits; - // objects that can be moved are marked and not pinned - bitset = (mark_bits[0] & ~pin_bits[0]); - bitset >>= NUM_IN_PAGE(p); - if (bitset) { - if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) - return false; - } - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) { + for (int j = 0; j < bitmap_plane_count; j++) { + // objects that can be moved are marked and not pinned bitset = (mark_bits[j] & ~pin_bits[j]); if (bitset) { if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) return false; } - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + p += BITS_BITLENGTH * slot_size; } return true; @@ -5940,7 +5928,7 @@ rgengc_remember(rb_objspace_t *objspace, VALUE obj) #endif static inline void -rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset) +rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset, short slot_size) { if (bitset) { do { @@ -5956,7 +5944,7 @@ rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitse rb_darray_append_without_gc(&objspace->weak_references, obj); } } - p += BASE_SLOT_SIZE; + p += slot_size; bitset >>= 1; } while (bitset); } @@ -5975,6 +5963,9 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap) ccan_list_for_each(&heap->pages, page, page_node) { if (page->flags.has_remembered_objects | page->flags.has_uncollectible_wb_unprotected_objects) { uintptr_t p = page->start; + short slot_size = page->slot_size; + int total_slots = page->total_slots; + int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH); bits_t bitset, bits[HEAP_PAGE_BITMAP_LIMIT]; bits_t *remembered_bits = page->remembered_bits; bits_t *uncollectible_bits = page->uncollectible_bits; @@ -5984,21 +5975,16 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap) else if (page->flags.has_remembered_objects) has_old++; else if (page->flags.has_uncollectible_wb_unprotected_objects) has_shady++; #endif - for (j=0; j<HEAP_PAGE_BITMAP_LIMIT; j++) { + for (j=0; j < (size_t)bitmap_plane_count; j++) { bits[j] = remembered_bits[j] | (uncollectible_bits[j] & wb_unprotected_bits[j]); remembered_bits[j] = 0; } page->flags.has_remembered_objects = FALSE; - bitset = bits[0]; - bitset >>= NUM_IN_PAGE(p); - rgengc_rememberset_mark_plane(objspace, p, bitset); - p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; - - for (j=1; j < HEAP_PAGE_BITMAP_LIMIT; j++) { + for (j=0; j < (size_t)bitmap_plane_count; j++) { bitset = bits[j]; - rgengc_rememberset_mark_plane(objspace, p, bitset); - p += BITS_BITLENGTH * BASE_SLOT_SIZE; + rgengc_rememberset_mark_plane(objspace, p, bitset, slot_size); + p += BITS_BITLENGTH * slot_size; } } #if PROFILE_REMEMBERSET_MARK @@ -9527,17 +9513,6 @@ rb_gc_impl_objspace_init(void *objspace_ptr) heap->slot_size = (1 << i) * BASE_SLOT_SIZE; - // Bitmask with every (1 << i)th bit set, representing aligned slot positions - static const bits_t slot_bits_masks[] = { - ~(bits_t)0, // i=0: every 1st bit - (bits_t)0x5555555555555555ULL, // i=1: every 2nd bit - (bits_t)0x1111111111111111ULL, // i=2: every 4th bit - (bits_t)0x0101010101010101ULL, // i=3: every 8th bit - (bits_t)0x0001000100010001ULL, // i=4: every 16th bit - }; - GC_ASSERT(HEAP_COUNT == sizeof(slot_bits_masks) / sizeof(slot_bits_masks[0])); - heap->slot_bits_mask = slot_bits_masks[i]; - ccan_list_head_init(&heap->pages); } |
