summaryrefslogtreecommitdiff
path: root/gc.c
diff options
context:
space:
mode:
authorJean Boussier <byroot@ruby-lang.org>2023-10-10 15:32:12 +0200
committerJean Boussier <jean.boussier@gmail.com>2023-10-23 09:33:15 +0200
commite5364ea496a193560c0dc339f82a67674d26fee2 (patch)
treec680de57367698155a75d9c4219b3243d97ba1b1 /gc.c
parente7d845b1d0154651962f34eefb37ffb0ac0c4e0a (diff)
rb_shape_transition_shape_capa: use optimal sizes transitions
Previously the growth was 3(embed), 6, 12, 24, ... With this change it's now 3(embed), 8, 16, 32, 64, ... by default. However, since power of two isn't the best size for all allocators, if `malloc_usable_size` is vailable, we use it to discover the best offset. On Linux/glibc 2.35 for instance, the growth will be 3(embed), 7, 15, 31 to avoid wasting 8B per object. Test program: ```c size_t test(size_t slots) { size_t allocated = slots * VALUE_SIZE; void *test_ptr = malloc(allocated); size_t wasted = malloc_usable_size(test_ptr) - allocated; free(test_ptr); fprintf(stderr, "slots = %lu, wasted_bytes = %lu\n", slots, wasted); return wasted; } int main(int argc, char *argv[]) { size_t best_padding = 0; size_t padding = 0; for (padding = 0; padding <= 2; padding++) { size_t wasted = test(8 - padding); if (wasted == 0) { best_padding = padding; break; } } size_t index = 0; fprintf(stderr, "=============== naive ================\n"); size_t list_size = 4; for (index = 0; index < 10; index++) { test(list_size); list_size *= 2; } fprintf(stderr, "=============== auto-padded (-%lu) ================\n", best_padding); list_size = 4; for (index = 0; index < 10; index ++) { test(list_size - best_padding); list_size *= 2; } fprintf(stderr, "\n\n"); return 0; } ``` ``` ===== glibc ====== slots = 8, wasted_bytes = 8 slots = 7, wasted_bytes = 0 =============== naive ================ slots = 4, wasted_bytes = 8 slots = 8, wasted_bytes = 8 slots = 16, wasted_bytes = 8 slots = 32, wasted_bytes = 8 slots = 64, wasted_bytes = 8 slots = 128, wasted_bytes = 8 slots = 256, wasted_bytes = 8 slots = 512, wasted_bytes = 8 slots = 1024, wasted_bytes = 8 slots = 2048, wasted_bytes = 8 =============== auto-padded (-1) ================ slots = 3, wasted_bytes = 0 slots = 7, wasted_bytes = 0 slots = 15, wasted_bytes = 0 slots = 31, wasted_bytes = 0 slots = 63, wasted_bytes = 0 slots = 127, wasted_bytes = 0 slots = 255, wasted_bytes = 0 slots = 511, wasted_bytes = 0 slots = 1023, wasted_bytes = 0 slots = 2047, wasted_bytes = 0 ``` ``` ========== jemalloc ======= slots = 8, wasted_bytes = 0 =============== naive ================ slots = 4, wasted_bytes = 0 slots = 8, wasted_bytes = 0 slots = 16, wasted_bytes = 0 slots = 32, wasted_bytes = 0 slots = 64, wasted_bytes = 0 slots = 128, wasted_bytes = 0 slots = 256, wasted_bytes = 0 slots = 512, wasted_bytes = 0 slots = 1024, wasted_bytes = 0 slots = 2048, wasted_bytes = 0 =============== auto-padded (-0) ================ slots = 4, wasted_bytes = 0 slots = 8, wasted_bytes = 0 slots = 16, wasted_bytes = 0 slots = 32, wasted_bytes = 0 slots = 64, wasted_bytes = 0 slots = 128, wasted_bytes = 0 slots = 256, wasted_bytes = 0 slots = 512, wasted_bytes = 0 slots = 1024, wasted_bytes = 0 slots = 2048, wasted_bytes = 0 ```
Diffstat (limited to 'gc.c')
-rw-r--r--gc.c64
1 files changed, 64 insertions, 0 deletions
diff --git a/gc.c b/gc.c
index d1fc9085bc..a9167e2ede 100644
--- a/gc.c
+++ b/gc.c
@@ -157,6 +157,68 @@
#define MAP_ANONYMOUS MAP_ANON
#endif
+
+static size_t malloc_offset = 0;
+#if defined(HAVE_MALLOC_USABLE_SIZE)
+static size_t
+gc_compute_malloc_offset(void)
+{
+ // Different allocators use different metadata storage strategies which result in different
+ // ideal sizes.
+ // For instance malloc(64) will waste 8B with glibc, but waste 0B with jemalloc.
+ // But malloc(56) will waste 0B with glibc, but waste 8B with jemalloc.
+ // So we try allocating 64, 56 and 48 bytes and select the first offset that doesn't
+ // waste memory.
+ // This was tested on Linux with glibc 2.35 and jemalloc 5, and for both it result in
+ // no wasted memory.
+ size_t offset = 0;
+ for (offset = 0; offset <= 16; offset += 8) {
+ size_t allocated = (64 - offset);
+ void *test_ptr = malloc(allocated);
+ size_t wasted = malloc_usable_size(test_ptr) - allocated;
+ free(test_ptr);
+
+ if (wasted == 0) {
+ return offset;
+ }
+ }
+ return 0;
+}
+#else
+static size_t
+gc_compute_malloc_offset(void)
+{
+ // If we don't have malloc_usable_size, we use powers of 2.
+ return 0;
+}
+#endif
+
+size_t
+rb_malloc_grow_capa(size_t current, size_t type_size)
+{
+ size_t current_capacity = current;
+ if (current_capacity < 4) {
+ current_capacity = 4;
+ }
+ current_capacity *= type_size;
+
+ // We double the current capacity.
+ size_t new_capacity = (current_capacity * 2);
+
+ // And round up to the next power of 2 if it's not already one.
+ if (rb_popcount64(new_capacity) != 1) {
+ new_capacity = (size_t)(1 << (64 - nlz_int64(new_capacity)));
+ }
+
+ new_capacity -= malloc_offset;
+ new_capacity /= type_size;
+ if (current > new_capacity) {
+ rb_bug("rb_malloc_grow_capa: current_capacity=%zu, new_capacity=%zu, malloc_offset=%zu", current, new_capacity, malloc_offset);
+ }
+ RUBY_ASSERT(new_capacity > current);
+ return new_capacity;
+}
+
static inline struct rbimpl_size_mul_overflow_tag
size_add_overflow(size_t x, size_t y)
{
@@ -13979,6 +14041,8 @@ void
Init_GC(void)
{
#undef rb_intern
+ malloc_offset = gc_compute_malloc_offset();
+
VALUE rb_mObjSpace;
VALUE rb_mProfiler;
VALUE gc_constants;