summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Valentine-House <matt@eightbitraptor.com>2026-03-31 13:02:18 +0100
committerMatt Valentine-House <matt@eightbitraptor.com>2026-04-09 13:24:09 +0100
commitc9b70883640e59bdd1f2f6a538957119cb558fac (patch)
tree7505b82e5db724c5c3b89eab90aae6341027a294
parent2567e76ec376fee3b6d6e98e9578bcbb44e23034 (diff)
Introduce power-of-two size pools
Replace the RVALUE_SLOT_SIZE-multiplier based pool sizes with explicit power-of-two (and near-power-of-two) slot sizes. On 64-bit this gives 12 heaps (32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, 1024) instead of 5, providing finer granularity and less internal fragmentation. On 32-bit the layout is 5 heaps (32, 64, 128, 256, 512).
-rw-r--r--gc.rb35
-rw-r--r--gc/default/default.c84
-rw-r--r--gc/mmtk/mmtk.c18
-rw-r--r--internal/class.h4
-rw-r--r--test/.excludes-mmtk/TestObjSpace.rb1
5 files changed, 96 insertions, 46 deletions
diff --git a/gc.rb b/gc.rb
index 01d798addb..48bed27880 100644
--- a/gc.rb
+++ b/gc.rb
@@ -269,7 +269,16 @@ module GC
# GC.stat_heap
# # =>
# {0 =>
- # {slot_size: 40,
+ # {slot_size: 32,
+ # heap_eden_pages: 24,
+ # heap_eden_slots: 12288,
+ # total_allocated_pages: 24,
+ # force_major_gc_count: 0,
+ # force_incremental_marking_finish_count: 0,
+ # total_allocated_objects: 8450,
+ # total_freed_objects: 3120},
+ # 1 =>
+ # {slot_size: 64,
# heap_eden_pages: 246,
# heap_eden_slots: 402802,
# total_allocated_pages: 246,
@@ -277,8 +286,8 @@ module GC
# force_incremental_marking_finish_count: 1,
# total_allocated_objects: 33867152,
# total_freed_objects: 33520523},
- # 1 =>
- # {slot_size: 80,
+ # 2 =>
+ # {slot_size: 128,
# heap_eden_pages: 84,
# heap_eden_slots: 68746,
# total_allocated_pages: 84,
@@ -286,8 +295,8 @@ module GC
# force_incremental_marking_finish_count: 4,
# total_allocated_objects: 147491,
# total_freed_objects: 90699},
- # 2 =>
- # {slot_size: 160,
+ # 3 =>
+ # {slot_size: 256,
# heap_eden_pages: 157,
# heap_eden_slots: 64182,
# total_allocated_pages: 157,
@@ -295,8 +304,8 @@ module GC
# force_incremental_marking_finish_count: 0,
# total_allocated_objects: 211460,
# total_freed_objects: 190075},
- # 3 =>
- # {slot_size: 320,
+ # 4 =>
+ # {slot_size: 512,
# heap_eden_pages: 8,
# heap_eden_slots: 1631,
# total_allocated_pages: 8,
@@ -304,8 +313,8 @@ module GC
# force_incremental_marking_finish_count: 0,
# total_allocated_objects: 1422,
# total_freed_objects: 700},
- # 4 =>
- # {slot_size: 640,
+ # 5 =>
+ # {slot_size: 1024,
# heap_eden_pages: 16,
# heap_eden_slots: 1628,
# total_allocated_pages: 16,
@@ -316,7 +325,7 @@ module GC
#
# In the example above, the keys in the outer hash are the heap identifiers:
#
- # GC.stat_heap.keys # => [0, 1, 2, 3, 4]
+ # GC.stat_heap.keys # => [0, 1, 2, 3, 4, 5]
#
# On CRuby, each heap identifier is an integer;
# on other implementations, a heap identifier may be a string.
@@ -324,9 +333,9 @@ module GC
# With only argument +heap_id+ given,
# returns statistics for the given heap identifier:
#
- # GC.stat_heap(2)
+ # GC.stat_heap(3)
# # =>
- # {slot_size: 160,
+ # {slot_size: 256,
# heap_eden_pages: 157,
# heap_eden_slots: 64182,
# total_allocated_pages: 157,
@@ -338,7 +347,7 @@ module GC
# With arguments +heap_id+ and +key+ given,
# returns the value for the given key in the given heap:
#
- # GC.stat_heap(2, :slot_size) # => 160
+ # GC.stat_heap(3, :slot_size) # => 256
#
# With arguments +nil+ and +hash+ given,
# merges the statistics for all heaps into the given hash:
diff --git a/gc/default/default.c b/gc/default/default.c
index 3230e80dd5..1338d9f1f0 100644
--- a/gc/default/default.c
+++ b/gc/default/default.c
@@ -187,9 +187,41 @@ static RB_THREAD_LOCAL_SPECIFIER int malloc_increase_local;
#define USE_TICK_T (PRINT_ENTER_EXIT_TICK || PRINT_ROOT_TICKS)
#ifndef HEAP_COUNT
-# define HEAP_COUNT 5
+# if SIZEOF_VALUE >= 8
+# define HEAP_COUNT 12
+# else
+# define HEAP_COUNT 5
+# endif
#endif
+/* Precomputed reciprocals for fast slot index calculation.
+ * For slot size d: reciprocal = ceil(2^48 / d).
+ * Then offset / d == (uint32_t)((offset * reciprocal) >> 48)
+ * for all offset < HEAP_PAGE_SIZE. */
+#define SLOT_RECIPROCAL_SHIFT 48
+
+static const uint64_t heap_slot_reciprocal_table[HEAP_COUNT] = {
+#if SIZEOF_VALUE >= 8
+ /* 32 */ (1ULL << 48) / 32,
+ /* 40 */ (1ULL << 48) / 40 + 1,
+ /* 64 */ (1ULL << 48) / 64,
+ /* 80 */ (1ULL << 48) / 80 + 1,
+ /* 96 */ (1ULL << 48) / 96 + 1,
+ /* 128 */ (1ULL << 48) / 128,
+ /* 160 */ (1ULL << 48) / 160 + 1,
+ /* 256 */ (1ULL << 48) / 256,
+ /* 512 */ (1ULL << 48) / 512,
+ /* 640 */ (1ULL << 48) / 640 + 1,
+ /* 768 */ (1ULL << 48) / 768 + 1,
+ /* 1024 */ (1ULL << 48) / 1024,
+#else
+ /* 32 */ (1ULL << 48) / 32,
+ /* 64 */ (1ULL << 48) / 64,
+ /* 128 */ (1ULL << 48) / 128,
+ /* 256 */ (1ULL << 48) / 256,
+ /* 512 */ (1ULL << 48) / 512,
+#endif
+};
typedef struct ractor_newobj_heap_cache {
struct free_slot *freelist;
struct heap_page *using_page;
@@ -689,15 +721,17 @@ size_t rb_gc_impl_obj_slot_size(VALUE obj);
#define RVALUE_SLOT_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]) + RVALUE_OVERHEAD)
+#if SIZEOF_VALUE >= 8
static const size_t pool_slot_sizes[HEAP_COUNT] = {
- RVALUE_SLOT_SIZE,
- RVALUE_SLOT_SIZE * 2,
- RVALUE_SLOT_SIZE * 4,
- RVALUE_SLOT_SIZE * 8,
- RVALUE_SLOT_SIZE * 16,
+ 32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, 1024,
};
-
-static uint8_t size_to_heap_idx[RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1)) / 8 + 1];
+static uint8_t size_to_heap_idx[1024 / 8 + 1];
+#else
+static const size_t pool_slot_sizes[HEAP_COUNT] = {
+ 32, 64, 128, 256, 512,
+};
+static uint8_t size_to_heap_idx[512 / 8 + 1];
+#endif
#ifndef MAX
# define MAX(a, b) (((a) > (b)) ? (a) : (b))
@@ -707,11 +741,12 @@ static uint8_t size_to_heap_idx[RVALUE_SLOT_SIZE * (1 << (HEAP_COUNT - 1)) / 8 +
#endif
#define roomof(x, y) (((x) + (y) - 1) / (y))
#define CEILDIV(i, mod) roomof(i, mod)
+#define MIN_POOL_SLOT_SIZE 32
enum {
HEAP_PAGE_ALIGN = (1UL << HEAP_PAGE_ALIGN_LOG),
HEAP_PAGE_ALIGN_MASK = (~(~0UL << HEAP_PAGE_ALIGN_LOG)),
HEAP_PAGE_SIZE = HEAP_PAGE_ALIGN,
- HEAP_PAGE_BITMAP_LIMIT = CEILDIV(CEILDIV(HEAP_PAGE_SIZE, RVALUE_SLOT_SIZE), BITS_BITLENGTH),
+ HEAP_PAGE_BITMAP_LIMIT = CEILDIV(CEILDIV(HEAP_PAGE_SIZE, MIN_POOL_SLOT_SIZE), BITS_BITLENGTH),
HEAP_PAGE_BITMAP_SIZE = (BITS_SIZE * HEAP_PAGE_BITMAP_LIMIT),
};
#define HEAP_PAGE_ALIGN (1 << HEAP_PAGE_ALIGN_LOG)
@@ -773,8 +808,11 @@ struct free_slot {
};
struct heap_page {
+ /* Cache line 0: allocation fast path + SLOT_INDEX */
+ struct free_slot *freelist;
+ uintptr_t start;
+ uint64_t slot_size_reciprocal;
unsigned short slot_size;
- uint32_t slot_div_magic;
unsigned short total_slots;
unsigned short free_slots;
unsigned short final_slots;
@@ -789,8 +827,6 @@ struct heap_page {
struct heap_page *free_next;
struct heap_page_body *body;
- uintptr_t start;
- struct free_slot *freelist;
struct ccan_list_node page_node;
bits_t wb_unprotected_bits[HEAP_PAGE_BITMAP_LIMIT];
@@ -851,15 +887,13 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *
#define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header)
#define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page)
-static uint32_t slot_div_magics[HEAP_COUNT];
-
static inline size_t
-slot_index_for_offset(size_t offset, uint32_t div_magic)
+slot_index_for_offset(size_t offset, uint64_t reciprocal)
{
- return (size_t)(((uint64_t)offset * div_magic) >> 32);
+ return (uint32_t)(((uint64_t)offset * reciprocal) >> SLOT_RECIPROCAL_SHIFT);
}
-#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic)
+#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_size_reciprocal)
#define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH)
#define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1))
#define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p))
@@ -1990,19 +2024,17 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page)
GC_ASSERT(!heap->sweeping_page);
GC_ASSERT(heap_page_in_global_empty_pages_pool(objspace, page));
- /* Align start to the first slot_size boundary after the page header */
+ /* Align start to slot_size boundary */
uintptr_t start = (uintptr_t)page->body + sizeof(struct heap_page_header);
- size_t remainder = start % heap->slot_size;
- if (remainder != 0) {
- start += heap->slot_size - remainder;
- }
+ uintptr_t rem = start % heap->slot_size;
+ if (rem) start += heap->slot_size - rem;
int slot_count = (int)((HEAP_PAGE_SIZE - (start - (uintptr_t)page->body))/heap->slot_size);
page->start = start;
page->total_slots = slot_count;
page->slot_size = heap->slot_size;
- page->slot_div_magic = slot_div_magics[heap - heaps];
+ page->slot_size_reciprocal = heap_slot_reciprocal_table[heap - heaps];
page->heap = heap;
memset(&page->wb_unprotected_bits[0], 0, HEAP_PAGE_BITMAP_SIZE);
@@ -9521,11 +9553,15 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
rb_bug("Could not preregister postponed job for GC");
}
+ /* A standard RVALUE (RBasic + embedded VALUEs + debug overhead) must fit
+ * in at least one pool. In debug builds RVALUE_OVERHEAD can push this
+ * beyond the 48-byte pool into the 64-byte pool, which is fine. */
+ GC_ASSERT(rb_gc_impl_size_allocatable_p(sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX])));
+
for (int i = 0; i < HEAP_COUNT; i++) {
rb_heap_t *heap = &heaps[i];
heap->slot_size = pool_slot_sizes[i];
- slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1);
ccan_list_head_init(&heap->pages);
}
diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c
index 1dacd95ab5..f83079f3ab 100644
--- a/gc/mmtk/mmtk.c
+++ b/gc/mmtk/mmtk.c
@@ -635,12 +635,19 @@ void rb_gc_impl_set_params(void *objspace_ptr) { }
static VALUE gc_verify_internal_consistency(VALUE self) { return Qnil; }
-#define MMTK_HEAP_COUNT 6
-#define MMTK_MAX_OBJ_SIZE 640
-
+#if SIZEOF_VALUE >= 8
+#define MMTK_HEAP_COUNT 12
+#define MMTK_MAX_OBJ_SIZE 1024
static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = {
- 32, 40, 80, 160, 320, MMTK_MAX_OBJ_SIZE, 0
+ 32, 40, 64, 80, 96, 128, 160, 256, 512, 640, 768, MMTK_MAX_OBJ_SIZE, 0
};
+#else
+#define MMTK_HEAP_COUNT 5
+#define MMTK_MAX_OBJ_SIZE 512
+static size_t heap_sizes[MMTK_HEAP_COUNT + 1] = {
+ 32, 64, 128, 256, MMTK_MAX_OBJ_SIZE, 0
+};
+#endif
void
rb_gc_impl_init(void)
@@ -649,8 +656,7 @@ rb_gc_impl_init(void)
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RBASIC_SIZE")), SIZET2NUM(sizeof(struct RBasic)));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OVERHEAD")), INT2NUM(0));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVARGC_MAX_ALLOCATE_SIZE")), LONG2FIX(MMTK_MAX_OBJ_SIZE));
- // Pretend we have 5 size pools
- rb_hash_aset(gc_constants, ID2SYM(rb_intern("SIZE_POOL_COUNT")), LONG2FIX(MMTK_HEAP_COUNT));
+ rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_COUNT")), LONG2FIX(MMTK_HEAP_COUNT));
// TODO: correctly set RVALUE_OLD_AGE when we have generational GC support
rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVALUE_OLD_AGE")), INT2FIX(0));
OBJ_FREEZE(gc_constants);
diff --git a/internal/class.h b/internal/class.h
index 80bb8cb4f7..c08aa03755 100644
--- a/internal/class.h
+++ b/internal/class.h
@@ -111,9 +111,9 @@ struct RClass_and_rb_classext_t {
};
#if SIZEOF_VALUE >= SIZEOF_LONG_LONG
-// Assert that classes can be embedded in heaps[2] (which has 160B slot size)
+// Assert that classes can be embedded in heaps[3] (256B slot size on 64-bit).
// On 32bit platforms there is no variable width allocation so it doesn't matter.
-STATIC_ASSERT(sizeof_rb_classext_t, sizeof(struct RClass_and_rb_classext_t) <= 4 * RVALUE_SIZE);
+STATIC_ASSERT(sizeof_rb_classext_t, sizeof(struct RClass_and_rb_classext_t) <= 256);
#endif
struct RClass_boxable {
diff --git a/test/.excludes-mmtk/TestObjSpace.rb b/test/.excludes-mmtk/TestObjSpace.rb
index 82858b256f..94eb2c436d 100644
--- a/test/.excludes-mmtk/TestObjSpace.rb
+++ b/test/.excludes-mmtk/TestObjSpace.rb
@@ -1,5 +1,4 @@
exclude(:test_dump_all_full, "testing behaviour specific to default GC")
exclude(:test_dump_flag_age, "testing behaviour specific to default GC")
exclude(:test_dump_flags, "testing behaviour specific to default GC")
-exclude(:test_dump_includes_slot_size, "can be removed when pool 0 slot size is 32 bytes")
exclude(:test_dump_objects_dumps_page_slot_sizes, "testing behaviour specific to default GC")