From 9aded89f4071a8afb79326701789241f1da12f82 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 19 Nov 2021 14:51:58 -0500 Subject: Speed up Ractors for Variable Width Allocation This commit adds a Ractor cache for every size pool. Previously, all VWA allocated objects used the slowpath and locked the VM. On a micro-benchmark that benchmarks String allocation: VWA turned off: 29.196591 0.889709 30.086300 ( 9.434059) VWA before this commit: 29.279486 41.477869 70.757355 ( 12.527379) VWA after this commit: 16.782903 0.557117 17.340020 ( 4.255603) --- class.c | 4 +- gc.c | 152 +++++++++++++++++++++++----------------------------------- internal/gc.h | 12 ++++- string.c | 14 ++++-- 4 files changed, 82 insertions(+), 100 deletions(-) diff --git a/class.c b/class.c index 6ad64a6efe..f1e8953f81 100644 --- a/class.c +++ b/class.c @@ -203,7 +203,9 @@ class_alloc(VALUE flags, VALUE klass) RVARGC_NEWOBJ_OF(obj, struct RClass, klass, (flags & T_MASK) | FL_PROMOTED1 /* start from age == 2 */ | (RGENGC_WB_PROTECTED_CLASS ? FL_WB_PROTECTED : 0), alloc_size); -#if !USE_RVARGC +#if USE_RVARGC + memset(RCLASS_EXT(obj), 0, sizeof(rb_classext_t)); +#else obj->ptr = ZALLOC(rb_classext_t); #endif diff --git a/gc.c b/gc.c index b2c5ca0a80..e0ba25860c 100644 --- a/gc.c +++ b/gc.c @@ -659,11 +659,6 @@ typedef struct mark_stack { size_t unused_cache_size; } mark_stack_t; -#if USE_RVARGC -#define SIZE_POOL_COUNT 4 -#else -#define SIZE_POOL_COUNT 1 -#endif #define SIZE_POOL_EDEN_HEAP(size_pool) (&(size_pool)->eden_heap) #define SIZE_POOL_TOMB_HEAP(size_pool) (&(size_pool)->tomb_heap) @@ -681,11 +676,6 @@ typedef struct rb_heap_struct { } rb_heap_t; typedef struct rb_size_pool_struct { -#if USE_RVARGC - RVALUE *freelist; - struct heap_page *using_page; -#endif - short slot_size; size_t allocatable_pages; @@ -2325,7 +2315,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page); static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap); -static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page); +static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page, size_t size_pool_idx); size_t rb_gc_obj_slot_size(VALUE obj) @@ -2355,17 +2345,14 @@ rb_gc_size_allocatable_p(size_t size) } static inline VALUE -ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size) +ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx) { - if (size > sizeof(RVALUE)) { - return Qfalse; - } - - RVALUE *p = cr->newobj_cache.freelist; + rb_ractor_newobj_size_pool_cache_t *cache = &cr->newobj_cache.size_pool_caches[size_pool_idx]; + RVALUE *p = cache->freelist; if (p) { VALUE obj = (VALUE)p; - cr->newobj_cache.freelist = p->as.free.next; + cache->freelist = p->as.free.next; asan_unpoison_object(obj, true); return obj; } @@ -2396,28 +2383,31 @@ heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t } static inline void -ractor_set_cache(rb_ractor_t *cr, struct heap_page *page) +ractor_set_cache(rb_ractor_t *cr, struct heap_page *page, size_t size_pool_idx) { gc_report(3, &rb_objspace, "ractor_set_cache: Using page %p\n", (void *)GET_PAGE_BODY(page->start)); - cr->newobj_cache.using_page = page; - cr->newobj_cache.freelist = page->freelist; + + rb_ractor_newobj_size_pool_cache_t *cache = &cr->newobj_cache.size_pool_caches[size_pool_idx]; + + cache->using_page = page; + cache->freelist = page->freelist; page->free_slots = 0; page->freelist = NULL; - asan_unpoison_object((VALUE)cr->newobj_cache.freelist, false); - GC_ASSERT(RB_TYPE_P((VALUE)cr->newobj_cache.freelist, T_NONE)); - asan_poison_object((VALUE)cr->newobj_cache.freelist); + asan_unpoison_object((VALUE)cache->freelist, false); + GC_ASSERT(RB_TYPE_P((VALUE)cache->freelist, T_NONE)); + asan_poison_object((VALUE)cache->freelist); } static inline void -ractor_cache_slots(rb_objspace_t *objspace, rb_ractor_t *cr) +ractor_cache_slots(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx) { ASSERT_vm_locking(); - rb_size_pool_t *size_pool = &size_pools[0]; + rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; struct heap_page *page = heap_next_freepage(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool)); - ractor_set_cache(cr, page); + ractor_set_cache(cr, page, size_pool_idx); } static inline VALUE @@ -2430,10 +2420,10 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3) return obj; } -#if USE_RVARGC -static inline rb_size_pool_t * -size_pool_for_size(rb_objspace_t *objspace, size_t size) +static inline size_t +size_pool_idx_for_size(size_t size) { +#if USE_RVARGC size_t slot_count = CEILDIV(size, sizeof(RVALUE)); /* size_pool_idx is ceil(log2(slot_count)) */ @@ -2442,41 +2432,31 @@ size_pool_for_size(rb_objspace_t *objspace, size_t size) rb_bug("size_pool_for_size: allocation size too large"); } - rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; - GC_ASSERT(size_pool->slot_size >= (short)size); - GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size); - - return size_pool; + return size_pool_idx; +#else + GC_ASSERT(size <= sizeof(RVALUE)); + return 0; +#endif } - -static inline VALUE -heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap) +#if USE_RVARGC +static inline rb_size_pool_t * +size_pool_for_size(rb_objspace_t *objspace, size_t size) { - RVALUE *p = size_pool->freelist; - - if (UNLIKELY(p == NULL)) { - struct heap_page *page = heap_next_freepage(objspace, size_pool, heap); - size_pool->using_page = page; - - asan_unpoison_memory_region(&page->freelist, sizeof(RVALUE*), false); - p = page->freelist; - page->freelist = NULL; - asan_poison_memory_region(&page->freelist, sizeof(RVALUE*)); - page->free_slots = 0; - } + size_t size_pool_idx = size_pool_idx_for_size(size); - asan_unpoison_object((VALUE)p, true); - size_pool->freelist = p->as.free.next; + rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; + GC_ASSERT(size_pool->slot_size >= (short)size); + GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size); - return (VALUE)p; + return size_pool; } #endif -ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size)); +ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t size_pool_idx)); static inline VALUE -newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size) +newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t size_pool_idx) { VALUE obj; unsigned int lev; @@ -2497,22 +2477,9 @@ newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t * } } - if (alloc_size <= sizeof(RVALUE)) { - // allocate new slot - while ((obj = ractor_cached_free_region(objspace, cr, alloc_size)) == Qfalse) { - ractor_cache_slots(objspace, cr); - } - } - else { -#if USE_RVARGC - rb_size_pool_t *size_pool = size_pool_for_size(objspace, alloc_size); - - obj = heap_get_freeobj(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool)); - - memset((void *)obj, 0, size_pool->slot_size); -#else - rb_bug("unreachable when not using rvargc"); -#endif + // allocate new slot + while ((obj = ractor_cached_free_region(objspace, cr, size_pool_idx)) == Qfalse) { + ractor_cache_slots(objspace, cr, size_pool_idx); } GC_ASSERT(obj != 0); newobj_init(klass, flags, wb_protected, objspace, obj); @@ -2525,20 +2492,20 @@ newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t * } NOINLINE(static VALUE newobj_slowpath_wb_protected(VALUE klass, VALUE flags, - rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size)); + rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx)); NOINLINE(static VALUE newobj_slowpath_wb_unprotected(VALUE klass, VALUE flags, - rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size)); + rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx)); static VALUE -newobj_slowpath_wb_protected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size) +newobj_slowpath_wb_protected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx) { - return newobj_slowpath(klass, flags, objspace, cr, TRUE, alloc_size); + return newobj_slowpath(klass, flags, objspace, cr, TRUE, size_pool_idx); } static VALUE -newobj_slowpath_wb_unprotected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size) +newobj_slowpath_wb_unprotected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx) { - return newobj_slowpath(klass, flags, objspace, cr, FALSE, alloc_size); + return newobj_slowpath(klass, flags, objspace, cr, FALSE, size_pool_idx); } static inline VALUE @@ -2559,11 +2526,13 @@ newobj_of0(VALUE klass, VALUE flags, int wb_protected, rb_ractor_t *cr, size_t a } #endif + size_t size_pool_idx = size_pool_idx_for_size(alloc_size); + if ((!UNLIKELY(during_gc || ruby_gc_stressful || gc_event_hook_available_p(objspace)) && wb_protected && - (obj = ractor_cached_free_region(objspace, cr, alloc_size)) != Qfalse)) { + (obj = ractor_cached_free_region(objspace, cr, size_pool_idx)) != Qfalse)) { newobj_init(klass, flags, wb_protected, objspace, obj); } @@ -2571,8 +2540,8 @@ newobj_of0(VALUE klass, VALUE flags, int wb_protected, rb_ractor_t *cr, size_t a RB_DEBUG_COUNTER_INC(obj_newobj_slowpath); obj = wb_protected ? - newobj_slowpath_wb_protected(klass, flags, objspace, cr, alloc_size) : - newobj_slowpath_wb_unprotected(klass, flags, objspace, cr, alloc_size); + newobj_slowpath_wb_protected(klass, flags, objspace, cr, size_pool_idx) : + newobj_slowpath_wb_unprotected(klass, flags, objspace, cr, size_pool_idx); } return obj; @@ -5585,13 +5554,6 @@ gc_sweep_start(rb_objspace_t *objspace) for (int i = 0; i < SIZE_POOL_COUNT; i++) { rb_size_pool_t *size_pool = &size_pools[i]; -#if USE_RVARGC - heap_page_freelist_append(size_pool->using_page, size_pool->freelist); - - size_pool->using_page = NULL; - size_pool->freelist = NULL; -#endif - gc_sweep_start_heap(objspace, SIZE_POOL_EDEN_HEAP(size_pool)); } @@ -8698,14 +8660,18 @@ rb_obj_gc_flags(VALUE obj, ID* flags, size_t max) void rb_gc_ractor_newobj_cache_clear(rb_ractor_newobj_cache_t *newobj_cache) { - struct heap_page *page = newobj_cache->using_page; - RVALUE *freelist = newobj_cache->freelist; - RUBY_DEBUG_LOG("ractor using_page:%p freelist:%p", (void *)page, (void *)freelist); + for (size_t size_pool_idx = 0; size_pool_idx < SIZE_POOL_COUNT; size_pool_idx++) { + rb_ractor_newobj_size_pool_cache_t *cache = &newobj_cache->size_pool_caches[size_pool_idx]; + + struct heap_page *page = cache->using_page; + RVALUE *freelist = cache->freelist; + RUBY_DEBUG_LOG("ractor using_page:%p freelist:%p", (void *)page, (void *)freelist); - heap_page_freelist_append(page, freelist); + heap_page_freelist_append(page, freelist); - newobj_cache->using_page = NULL; - newobj_cache->freelist = NULL; + cache->using_page = NULL; + cache->freelist = NULL; + } } void diff --git a/internal/gc.h b/internal/gc.h index b7b29214cf..baf4f36a10 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -67,9 +67,19 @@ struct rb_objspace; /* in vm_core.h */ rb_obj_write((VALUE)(a), UNALIGNED_MEMBER_ACCESS((VALUE *)(slot)), \ (VALUE)(b), __FILE__, __LINE__) -typedef struct ractor_newobj_cache { +#if USE_RVARGC +# define SIZE_POOL_COUNT 4 +#else +# define SIZE_POOL_COUNT 1 +#endif + +typedef struct ractor_newobj_size_pool_cache { struct RVALUE *freelist; struct heap_page *using_page; +} rb_ractor_newobj_size_pool_cache_t; + +typedef struct ractor_newobj_cache { + rb_ractor_newobj_size_pool_cache_t size_pool_caches[SIZE_POOL_COUNT]; } rb_ractor_newobj_cache_t; /* gc.c */ diff --git a/string.c b/string.c index 2746d882f1..8201124409 100644 --- a/string.c +++ b/string.c @@ -868,7 +868,9 @@ static inline VALUE empty_str_alloc(VALUE klass) { RUBY_DTRACE_CREATE_HOOK(STRING, 0); - return str_alloc_embed(klass, 0); + VALUE str = str_alloc_embed(klass, 0); + memset(RSTRING(str)->as.embed.ary, 0, str_embed_capa(str)); + return str; } static VALUE @@ -1732,10 +1734,11 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup) VALUE flags = FL_TEST_RAW(str, flag_mask); int encidx = 0; if (STR_EMBED_P(str)) { - assert(str_embed_capa(dup) >= RSTRING_EMBED_LEN(str)); - STR_SET_EMBED_LEN(dup, RSTRING_EMBED_LEN(str)); - MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, - char, RSTRING_EMBED_LEN(str)); + long len = RSTRING_EMBED_LEN(str); + + assert(str_embed_capa(dup) >= len + 1); + STR_SET_EMBED_LEN(dup, len); + MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, char, len + 1); flags &= ~RSTRING_NOEMBED; } else { @@ -2321,6 +2324,7 @@ rb_str_times(VALUE str, VALUE times) if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) { if (STR_EMBEDDABLE_P(len, 1)) { str2 = str_alloc_embed(rb_cString, len + 1); + memset(RSTRING_PTR(str2), 0, len + 1); } else { str2 = str_alloc_heap(rb_cString); -- cgit v1.2.3