diff options
| author | Matt Valentine-House <matt@eightbitraptor.com> | 2026-03-31 13:02:18 +0100 |
|---|---|---|
| committer | Matt Valentine-House <matt@eightbitraptor.com> | 2026-04-09 13:24:09 +0100 |
| commit | c9b70883640e59bdd1f2f6a538957119cb558fac (patch) | |
| tree | 7505b82e5db724c5c3b89eab90aae6341027a294 | |
| parent | 2567e76ec376fee3b6d6e98e9578bcbb44e23034 (diff) | |
Introduce power-of-two size pools
Replace the RVALUE_SLOT_SIZE-multiplier based pool sizes with explicit
power-of-two (and near-power-of-two) slot sizes. On 64-bit this gives
12 heaps (32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, 1024)
instead of 5, providing finer granularity and less internal
fragmentation. On 32-bit the layout is 5 heaps (32, 64, 128, 256, 512).
| -rw-r--r-- | gc.rb | 35 | ||||
| -rw-r--r-- | gc/default/default.c | 84 | ||||
| -rw-r--r-- | gc/mmtk/mmtk.c | 18 | ||||
| -rw-r--r-- | internal/class.h | 4 | ||||
| -rw-r--r-- | test/.excludes-mmtk/TestObjSpace.rb | 1 |
5 files changed, 96 insertions, 46 deletions
@@ -269,7 +269,16 @@ module GC # GC.stat_heap # # => # {0 => - # {slot_size: 40, + # {slot_size: 32, + # heap_eden_pages: 24, + # heap_eden_slots: 12288, + # total_allocated_pages: 24, + # force_major_gc_count: 0, + # force_incremental_marking_finish_count: 0, + # total_allocated_objects: 8450, + # total_freed_objects: 3120}, + # 1 => + # {slot_size: 64, # heap_eden_pages: 246, # heap_eden_slots: 402802, # total_allocated_pages: 246, @@ -277,8 +286,8 @@ module GC # force_incremental_marking_finish_count: 1, # total_allocated_objects: 33867152, # total_freed_objects: 33520523}, - # 1 => - # {slot_size: 80, + # 2 => + # {slot_size: 128, # heap_eden_pages: 84, # heap_eden_slots: 68746, # total_allocated_pages: 84, @@ -286,8 +295,8 @@ module GC # force_incremental_marking_finish_count: 4, # total_allocated_objects: 147491, # total_freed_objects: 90699}, - # 2 => - # {slot_size: 160, + # 3 => + # {slot_size: 256, # heap_eden_pages: 157, # heap_eden_slots: 64182, # total_allocated_pages: 157, @@ -295,8 +304,8 @@ module GC # force_incremental_marking_finish_count: 0, # total_allocated_objects: 211460, # total_freed_objects: 190075}, - # 3 => - # {slot_size: 320, + # 4 => + # {slot_size: 512, # heap_eden_pages: 8, # heap_eden_slots: 1631, # total_allocated_pages: 8, @@ -304,8 +313,8 @@ module GC # force_incremental_marking_finish_count: 0, # total_allocated_objects: 1422, # total_freed_objects: 700}, - # 4 => - # {slot_size: 640, + # 5 => + # {slot_size: 1024, # heap_eden_pages: 16, # heap_eden_slots: 1628, # total_allocated_pages: 16, @@ -316,7 +325,7 @@ module GC # # In the example above, the keys in the outer hash are the heap identifiers: # - # GC.stat_heap.keys # => [0, 1, 2, 3, 4] + # GC.stat_heap.keys # => [0, 1, 2, 3, 4, 5] # # On CRuby, each heap identifier is an integer; # on other implementations, a heap identifier may be a string. @@ -324,9 +333,9 @@ module GC # With only argument +heap_id+ given, # returns statistics for the given heap identifier: # - # GC.stat_heap(2) + # GC.stat_heap(3) # # => - # {slot_size: 160, + # {slot_size: 256, # heap_eden_pages: 157, # heap_eden_slots: 64182, # total_allocated_pages: 157, @@ -338,7 +347,7 @@ module GC # With arguments +heap_id+ and +key+ given, # returns the value for the given key in the given heap: # - # GC.stat_heap(2, :slot_size) # => 160 + # GC.stat_heap(3, :slot_size) # => 256 # # With arguments +nil+ and +hash+ given, # merges the statistics for all heaps into the given hash: diff --git a/gc/default/default.c b/gc/default/default.c index 3230e80dd5..1338d9f1f0 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -187,9 +187,41 @@ static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local; #define USE_TICK_T (PRINT_ENTER_EXIT_TICK || PRINT_ROOT_TICKS) #ifndef HEAP_COUNT -# define HEAP_COUNT 5 +# if SIZEOF_VALUE >= 8 +# define HEAP_COUNT 12 +# else +# define HEAP_COUNT 5 +# endif #endif +/* Precomputed reciprocals for fast slot index calculation. + * For slot size d: reciprocal = ceil(2^48 / d). + * Then offset / d == (uint32_t)((offset * reciprocal) >> 48) + * for all offset < HEAP_PAGE_SIZE. */ +#define SLOT_RECIPROCAL_SHIFT 48 + +static const uint64_t heap_slot_reciprocal_table[HEAP_COUNT] = { +#if SIZEOF_VALUE >= 8 + /* 32 */ (1ULL << 48) / 32, + /* 40 */ (1ULL << 48) / 40 + 1, + /* 64 */ (1ULL << 48) / 64, + /* 80 */ (1ULL << 48) / 80 + 1, + /* 96 */ (1ULL << 48) / 96 + 1, + /* 128 */ (1ULL << 48) / 128, + /* 160 */ (1ULL << 48) / 160 + 1, + /* 256 */ (1ULL << 48) / 256, + /* 512 */ (1ULL << 48) / 512, + /* 640 */ (1ULL << 48) / 640 + 1, + /* 768 */ (1ULL << 48) / 768 + 1, + /* 1024 */ (1ULL << 48) / 1024, +#else + /* 32 */ (1ULL << 48) / 32, + /* 64 */ (1ULL << 48) / 64, + /* 128 */ (1ULL << 48) / 128, + /* 256 */ (1ULL << 48) / 256, + /* 512 */ (1ULL << 48) / 512, +#endif +}; typedef struct ractor_newobj_heap_cache { struct free_slot *freelist; struct heap_page *using_page; @@ -689,15 +721,17 @@ size_t rb_gc_impl_obj_slot_size(VALUE obj); #define RVALUE_SLOT_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) + RVALUE_OVERHEAD) +#if SIZEOF_VALUE >= 8 static const size_t pool_slot_sizes[HEAP_COUNT] = { - RVALUE_SLOT_SIZE, - RVALUE_SLOT_SIZE * 2, - RVALUE_SLOT_SIZE * 4, - RVALUE_SLOT_SIZE * 8, - RVALUE_SLOT_SIZE * 16, + 32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, 1024, }; - -static uint8_t size_to_heap_idx[RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1)) / 8 + 1]; +static uint8_t size_to_heap_idx[1024 / 8 + 1]; +#else +static const size_t pool_slot_sizes[HEAP_COUNT] = { + 32, 64, 128, 256, 512, +}; +static uint8_t size_to_heap_idx[512 / 8 + 1]; +#endif #ifndef MAX # define MAX(a, b) (((a) > (b)) ? (a) : (b)) @@ -707,11 +741,12 @@ static uint8_t size_to_heap_idx[RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1)) / 8 + #endif #define roomof(x, y) (((x) + (y) - 1) / (y)) #define CEILDIV(i, mod) roomof(i, mod) +#define MIN_POOL_SLOT_SIZE 32 enum { HEAP_PAGE_ALIGN = (1UL << HEAP_PAGE_ALIGN_LOG), HEAP_PAGE_ALIGN_MASK = (~(~0UL << HEAP_PAGE_ALIGN_LOG)), HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN, - HEAP_PAGE_BITMAP_LIMIT = CEILDIV(CEILDIV(HEAP_PAGE_SIZE, RVALUE_SLOT_SIZE), BITS_BITLENGTH), + HEAP_PAGE_BITMAP_LIMIT = CEILDIV(CEILDIV(HEAP_PAGE_SIZE, MIN_POOL_SLOT_SIZE), BITS_BITLENGTH), HEAP_PAGE_BITMAP_SIZE = (BITS_SIZE * HEAP_PAGE_BITMAP_LIMIT), }; #define HEAP_PAGE_ALIGN (1 << HEAP_PAGE_ALIGN_LOG) @@ -773,8 +808,11 @@ struct free_slot { }; struct heap_page { + /* Cache line 0: allocation fast path + SLOT_INDEX */ + struct free_slot *freelist; + uintptr_t start; + uint64_t slot_size_reciprocal; unsigned short slot_size; - uint32_t slot_div_magic; unsigned short total_slots; unsigned short free_slots; unsigned short final_slots; @@ -789,8 +827,6 @@ struct heap_page { struct heap_page *free_next; struct heap_page_body *body; - uintptr_t start; - struct free_slot *freelist; struct ccan_list_node page_node; bits_t wb_unprotected_bits[HEAP_PAGE_BITMAP_LIMIT]; @@ -851,15 +887,13 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header) #define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page) -static uint32_t slot_div_magics[HEAP_COUNT]; - static inline size_t -slot_index_for_offset(size_t offset, uint32_t div_magic) +slot_index_for_offset(size_t offset, uint64_t reciprocal) { - return (size_t)(((uint64_t)offset * div_magic) >> 32); + return (uint32_t)(((uint64_t)offset * reciprocal) >> SLOT_RECIPROCAL_SHIFT); } -#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic) +#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_size_reciprocal) #define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH) #define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1)) #define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p)) @@ -1990,19 +2024,17 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page) GC_ASSERT(!heap->sweeping_page); GC_ASSERT(heap_page_in_global_empty_pages_pool(objspace, page)); - /* Align start to the first slot_size boundary after the page header */ + /* Align start to slot_size boundary */ uintptr_t start = (uintptr_t)page->body + sizeof(struct heap_page_header); - size_t remainder = start % heap->slot_size; - if (remainder != 0) { - start += heap->slot_size - remainder; - } + uintptr_t rem = start % heap->slot_size; + if (rem) start += heap->slot_size - rem; int slot_count = (int)((HEAP_PAGE_SIZE - (start - (uintptr_t)page->body))/heap->slot_size); page->start = start; page->total_slots = slot_count; page->slot_size = heap->slot_size; - page->slot_div_magic = slot_div_magics[heap - heaps]; + page->slot_size_reciprocal = heap_slot_reciprocal_table[heap - heaps]; page->heap = heap; memset(&page->wb_unprotected_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); @@ -9521,11 +9553,15 @@ rb_gc_impl_objspace_init(void *objspace_ptr) rb_bug("Could not preregister postponed job for GC"); } + /* A standard RVALUE (RBasic + embedded VALUEs + debug overhead) must fit + * in at least one pool. In debug builds RVALUE_OVERHEAD can push this + * beyond the 48-byte pool into the 64-byte pool, which is fine. */ + GC_ASSERT(rb_gc_impl_size_allocatable_p(sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]))); + for (int i = 0; i < HEAP_COUNT; i++) { rb_heap_t *heap = &heaps[i]; heap->slot_size = pool_slot_sizes[i]; - slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1); ccan_list_head_init(&heap->pages); } diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index 1dacd95ab5..f83079f3ab 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -635,12 +635,19 @@ void rb_gc_impl_set_params(void *objspace_ptr) { } static VALUE gc_verify_internal_consistency(VALUE self) { return Qnil; } -#define MMTK_HEAP_COUNT 6 -#define MMTK_MAX_OBJ_SIZE 640 - +#if SIZEOF_VALUE >= 8 +#define MMTK_HEAP_COUNT 12 +#define MMTK_MAX_OBJ_SIZE 1024 static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = { - 32, 40, 80, 160, 320, MMTK_MAX_OBJ_SIZE, 0 + 32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, MMTK_MAX_OBJ_SIZE, 0 }; +#else +#define MMTK_HEAP_COUNT 5 +#define MMTK_MAX_OBJ_SIZE 512 +static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = { + 32, 64, 128, 256, MMTK_MAX_OBJ_SIZE, 0 +}; +#endif void rb_gc_impl_init(void) @@ -649,8 +656,7 @@ rb_gc_impl_init(void) rb_hash_aset(gc_constants, ID2SYM(rb_intern("RBASIC_SIZE")), SIZET2NUM(sizeof(struct RBasic))); rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OVERHEAD")), INT2NUM(0)); rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVARGC_MAX_ALLOCATE_SIZE")), LONG2FIX(MMTK_MAX_OBJ_SIZE)); - // Pretend we have 5 size pools - rb_hash_aset(gc_constants, ID2SYM(rb_intern("SIZE_POOL_COUNT")), LONG2FIX(MMTK_HEAP_COUNT)); + rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_COUNT")), LONG2FIX(MMTK_HEAP_COUNT)); // TODO: correctly set RVALUE_OLD_AGE when we have generational GC support rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OLD_AGE")), INT2FIX(0)); OBJ_FREEZE(gc_constants); diff --git a/internal/class.h b/internal/class.h index 80bb8cb4f7..c08aa03755 100644 --- a/internal/class.h +++ b/internal/class.h @@ -111,9 +111,9 @@ struct RClass_and_rb_classext_t { }; #if SIZEOF_VALUE >= SIZEOF_LONG_LONG -// Assert that classes can be embedded in heaps[2] (which has 160B slot size) +// Assert that classes can be embedded in heaps[3] (256B slot size on 64-bit). // On 32bit platforms there is no variable width allocation so it doesn't matter. -STATIC_ASSERT(sizeof_rb_classext_t, sizeof(struct RClass_and_rb_classext_t) <= 4 * RVALUE_SIZE); +STATIC_ASSERT(sizeof_rb_classext_t, sizeof(struct RClass_and_rb_classext_t) <= 256); #endif struct RClass_boxable { diff --git a/test/.excludes-mmtk/TestObjSpace.rb b/test/.excludes-mmtk/TestObjSpace.rb index 82858b256f..94eb2c436d 100644 --- a/test/.excludes-mmtk/TestObjSpace.rb +++ b/test/.excludes-mmtk/TestObjSpace.rb @@ -1,5 +1,4 @@ exclude(:test_dump_all_full, "testing behaviour specific to default GC") exclude(:test_dump_flag_age, "testing behaviour specific to default GC") exclude(:test_dump_flags, "testing behaviour specific to default GC") -exclude(:test_dump_includes_slot_size, "can be removed when pool 0 slot size is 32 bytes") exclude(:test_dump_objects_dumps_page_slot_sizes, "testing behaviour specific to default GC") |
