diff options
author | Peter Zhu <peter@peterzhu.ca> | 2023-07-24 15:26:10 -0400 |
---|---|---|
committer | Peter Zhu <peter@peterzhu.ca> | 2023-08-25 09:01:21 -0400 |
commit | ee9cc8e32e04597a4a86b391df2c19aed9fde3e5 (patch) | |
tree | 6775a480714642571c8e30336fc2a1abab44815a /weakmap.c | |
parent | 2091bf94931bddcf23b196fc8fe108ebd7ec1648 (diff) |
Implement WeakMap using weak references
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/8113
Diffstat (limited to 'weakmap.c')
-rw-r--r-- | weakmap.c | 497 |
1 files changed, 209 insertions, 288 deletions
@@ -6,68 +6,78 @@ #include "ruby/st.h" #include "ruby/st.h" +/* ===== WeakMap ===== + * + * WeakMap contains one ST table which contains a pointer to the object as the + * key and a pointer to the object as the value. This means that the key and + * value of the table are both of the type `VALUE *`. + * + * The objects are not directly stored as keys and values in the table because + * `rb_gc_mark_weak` requires a pointer to the memory location to overwrite + * when the object is reclaimed. Using a pointer into the ST table entry is not + * safe because the pointer can change when the ST table is resized. + * + * WeakMap hashes and compares using the pointer address of the object. + * + * For performance and memory efficiency reasons, the key and value + * are allocated at the same time and adjacent to each other. + * + * During GC and while iterating, reclaimed entries (i.e. either the key or + * value points to `Qundef`) are removed from the ST table. + */ + struct weakmap { - st_table *obj2wmap; /* obj -> [ref,...] */ - st_table *wmap2obj; /* ref -> obj */ - VALUE final; + st_table *table; }; -static int -wmap_replace_ref(st_data_t *key, st_data_t *value, st_data_t _argp, int existing) +static bool +wmap_live_p(VALUE obj) { - *key = rb_gc_location((VALUE)*key); - - VALUE *values = (VALUE *)*value; - VALUE size = values[0]; + return !UNDEF_P(obj); +} - for (VALUE index = 1; index <= size; index++) { - values[index] = rb_gc_location(values[index]); - } +static void +wmap_free_entry(VALUE *key, VALUE *val) +{ + assert(key + 1 == val); - return ST_CONTINUE; + /* We only need to free key because val is allocated beside key on in the + * same malloc call. */ + ruby_sized_xfree(key, sizeof(VALUE) * 2); } static int -wmap_foreach_replace(st_data_t key, st_data_t value, st_data_t _argp, int error) +wmap_mark_weak_table_i(st_data_t key, st_data_t val, st_data_t _) { - if (rb_gc_location((VALUE)key) != (VALUE)key) { - return ST_REPLACE; - } + VALUE key_obj = *(VALUE *)key; + VALUE val_obj = *(VALUE *)val; - VALUE *values = (VALUE *)value; - VALUE size = values[0]; + if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) { + rb_gc_mark_weak((VALUE *)key); + rb_gc_mark_weak((VALUE *)val); - for (VALUE index = 1; index <= size; index++) { - VALUE val = values[index]; - if (rb_gc_location(val) != val) { - return ST_REPLACE; - } + return ST_CONTINUE; } + else { + wmap_free_entry((VALUE *)key, (VALUE *)val); - return ST_CONTINUE; -} - -static void -wmap_compact(void *ptr) -{ - struct weakmap *w = ptr; - if (w->wmap2obj) rb_gc_update_tbl_refs(w->wmap2obj); - if (w->obj2wmap) st_foreach_with_replace(w->obj2wmap, wmap_foreach_replace, wmap_replace_ref, (st_data_t)NULL); - w->final = rb_gc_location(w->final); + return ST_DELETE; + } } static void wmap_mark(void *ptr) { struct weakmap *w = ptr; - rb_gc_mark_movable(w->final); + if (w->table) { + st_foreach(w->table, wmap_mark_weak_table_i, (st_data_t)0); + } } static int -wmap_free_map(st_data_t key, st_data_t val, st_data_t arg) +wmap_free_table_i(st_data_t key, st_data_t val, st_data_t arg) { - VALUE *ptr = (VALUE *)val; - ruby_sized_xfree(ptr, (ptr[0] + 1) * sizeof(VALUE)); + wmap_free_entry((VALUE *)key, (VALUE *)val); return ST_CONTINUE; } @@ -75,30 +85,63 @@ static void wmap_free(void *ptr) { struct weakmap *w = ptr; - st_foreach(w->obj2wmap, wmap_free_map, 0); - st_free_table(w->obj2wmap); - st_free_table(w->wmap2obj); + + st_foreach(w->table, wmap_free_table_i, 0); + st_free_table(w->table); xfree(w); } +static size_t +wmap_memsize(const void *ptr) +{ + const struct weakmap *w = ptr; + + size_t size = sizeof(*w); + size += st_memsize(w->table); + /* The key and value of the table each take sizeof(VALUE) in size. */ + size += st_table_size(w->table) * (2 * sizeof(VALUE)); + + return size; +} + static int -wmap_memsize_map(st_data_t key, st_data_t val, st_data_t arg) +wmap_compact_table_i(st_data_t key, st_data_t val, st_data_t data) { - VALUE *ptr = (VALUE *)val; - *(size_t *)arg += (ptr[0] + 1) * sizeof(VALUE); + st_table *table = (st_table *)data; + + VALUE key_obj = *(VALUE *)key; + VALUE val_obj = *(VALUE *)val; + + if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) { + VALUE new_key_obj = rb_gc_location(key_obj); + + *(VALUE *)val = rb_gc_location(val_obj); + + /* If the key object moves, then we must reinsert because the hash is + * based on the pointer rather than the object itself. */ + if (key_obj != new_key_obj) { + *(VALUE *)key = new_key_obj; + + st_insert(table, key, val); + + return ST_DELETE; + } + } + else { + wmap_free_entry((VALUE *)key, (VALUE *)val); + + return ST_DELETE; + } + return ST_CONTINUE; } -static size_t -wmap_memsize(const void *ptr) +static void +wmap_compact(void *ptr) { - size_t size; - const struct weakmap *w = ptr; - size = sizeof(*w); - size += st_memsize(w->obj2wmap); - size += st_memsize(w->wmap2obj); - st_foreach(w->obj2wmap, wmap_memsize_map, (st_data_t)&size); - return size; + struct weakmap *w = ptr; + + st_foreach(w->table, wmap_compact_table_i, (st_data_t)w->table); } static const rb_data_type_t weakmap_type = { @@ -112,111 +155,68 @@ static const rb_data_type_t weakmap_type = { 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; -static VALUE wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self)); +static int +wmap_cmp(st_data_t x, st_data_t y) +{ + return *(VALUE *)x != *(VALUE *)y; +} + +static st_index_t +wmap_hash(st_data_t n) +{ + return st_numhash(*(VALUE *)n); +} + +static const struct st_hash_type wmap_hash_type = { + wmap_cmp, + wmap_hash, +}; static VALUE wmap_allocate(VALUE klass) { struct weakmap *w; VALUE obj = TypedData_Make_Struct(klass, struct weakmap, &weakmap_type, w); - w->obj2wmap = rb_init_identtable(); - w->wmap2obj = rb_init_identtable(); - RB_OBJ_WRITE(obj, &w->final, rb_func_lambda_new(wmap_finalize, obj, 1, 1)); + w->table = st_init_table(&wmap_hash_type); return obj; } -static int -wmap_live_p(VALUE obj) -{ - if (SPECIAL_CONST_P(obj)) return TRUE; - /* If rb_gc_is_ptr_to_obj returns false, the page could be in the tomb heap - * or have already been freed. */ - if (!rb_gc_is_ptr_to_obj((void *)obj)) return FALSE; - - void *poisoned = asan_poisoned_object_p(obj); - asan_unpoison_object(obj, false); - - enum ruby_value_type t = BUILTIN_TYPE(obj); - int ret = (!(t == T_NONE || t >= T_FIXNUM || t == T_ICLASS) && - !rb_objspace_garbage_object_p(obj)); - - if (poisoned) { - asan_poison_object(obj); - } - - return ret; -} +struct wmap_foreach_data { + struct weakmap *w; + void (*func)(VALUE, VALUE, st_data_t); + st_data_t arg; +}; static int -wmap_remove_inverse_ref(st_data_t *key, st_data_t *val, st_data_t arg, int existing) +wmap_foreach_i(st_data_t key, st_data_t val, st_data_t arg) { - if (!existing) return ST_STOP; - - VALUE old_ref = (VALUE)arg; + struct wmap_foreach_data *data = (struct wmap_foreach_data *)arg; - VALUE *values = (VALUE *)*val; - VALUE size = values[0]; - - if (size == 1) { - // fast path, we only had one backref - RUBY_ASSERT(values[1] == old_ref); - ruby_sized_xfree(values, 2 * sizeof(VALUE)); - return ST_DELETE; - } + VALUE key_obj = *(VALUE *)key; + VALUE val_obj = *(VALUE *)val; - bool found = false; - VALUE index = 1; - for (; index <= size; index++) { - if (values[index] == old_ref) { - found = true; - break; - } + if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) { + data->func(key_obj, val_obj, data->arg); } - if (!found) return ST_STOP; + else { + wmap_free_entry((VALUE *)key, (VALUE *)val); - if (size > index) { - MEMMOVE(&values[index], &values[index + 1], VALUE, size - index); + return ST_DELETE; } - size -= 1; - values[0] = size; - SIZED_REALLOC_N(values, VALUE, size + 1, size + 2); - *val = (st_data_t)values; return ST_CONTINUE; } -/* :nodoc: */ -static VALUE -wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self)) +static void +wmap_foreach(struct weakmap *w, void (*func)(VALUE, VALUE, st_data_t), st_data_t arg) { - st_data_t orig, wmap, data; - VALUE obj, *rids, i, size; - struct weakmap *w; - - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - /* Get reference from object id. */ - if (UNDEF_P(obj = rb_gc_id2ref_obj_tbl(objid))) { - rb_bug("wmap_finalize: objid is not found."); - } - - /* obj is original referenced object and/or weak reference. */ - orig = (st_data_t)obj; - if (st_delete(w->obj2wmap, &orig, &data)) { - rids = (VALUE *)data; - size = *rids++; - for (i = 0; i < size; ++i) { - wmap = (st_data_t)rids[i]; - st_delete(w->wmap2obj, &wmap, NULL); - } - ruby_sized_xfree((VALUE *)data, (size + 1) * sizeof(VALUE)); - } + struct wmap_foreach_data foreach_data = { + .w = w, + .func = func, + .arg = arg, + }; - wmap = (st_data_t)obj; - if (st_delete(w->wmap2obj, &wmap, &orig)) { - wmap = (st_data_t)obj; - st_update(w->obj2wmap, orig, wmap_remove_inverse_ref, wmap); - } - return self; + st_foreach(w->table, wmap_foreach_i, (st_data_t)&foreach_data); } static VALUE @@ -225,19 +225,15 @@ wmap_inspect_append(VALUE str, VALUE obj) if (SPECIAL_CONST_P(obj)) { return rb_str_append(str, rb_inspect(obj)); } - else if (wmap_live_p(obj)) { - return rb_str_append(str, rb_any_to_s(obj)); - } else { - return rb_str_catf(str, "#<collected:%p>", (void*)obj); + return rb_str_append(str, rb_any_to_s(obj)); } } -static int -wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_inspect_i(VALUE key, VALUE val, st_data_t data) { - VALUE str = (VALUE)arg; - VALUE k = (VALUE)key, v = (VALUE)val; + VALUE str = (VALUE)data; if (RSTRING_PTR(str)[0] == '#') { rb_str_cat2(str, ", "); @@ -246,11 +242,10 @@ wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg) rb_str_cat2(str, ": "); RSTRING_PTR(str)[0] = '#'; } - wmap_inspect_append(str, k); - rb_str_cat2(str, " => "); - wmap_inspect_append(str, v); - return ST_CONTINUE; + wmap_inspect_append(str, key); + rb_str_cat2(str, " => "); + wmap_inspect_append(str, val); } static VALUE @@ -261,30 +256,19 @@ wmap_inspect(VALUE self) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); VALUE str = rb_sprintf("-<%"PRIsVALUE":%p", c, (void *)self); - if (w->wmap2obj) { - st_foreach(w->wmap2obj, wmap_inspect_i, (st_data_t)str); - } + + wmap_foreach(w, wmap_inspect_i, (st_data_t)str); + RSTRING_PTR(str)[0] = '#'; rb_str_cat2(str, ">"); - return str; -} -static inline bool -wmap_live_entry_p(st_data_t key, st_data_t val) -{ - return wmap_live_p((VALUE)key) && wmap_live_p((VALUE)val); + return str; } -static int -wmap_each_i(st_data_t key, st_data_t val, st_data_t _) +static void +wmap_each_i(VALUE key, VALUE val, st_data_t _) { - if (wmap_live_entry_p(key, val)) { - rb_yield_values(2, (VALUE)key, (VALUE)val); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_yield_values(2, key, val); } /* Iterates over keys and objects in a weakly referenced object */ @@ -292,22 +276,17 @@ static VALUE wmap_each(VALUE self) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - st_foreach(w->wmap2obj, wmap_each_i, (st_data_t)0); + + wmap_foreach(w, wmap_each_i, (st_data_t)0); + return self; } -static int -wmap_each_key_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_each_key_i(VALUE key, VALUE _val, st_data_t _data) { - if (wmap_live_entry_p(key, val)) { - rb_yield((VALUE)key); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_yield(key); } /* Iterates over keys and objects in a weakly referenced object */ @@ -315,22 +294,17 @@ static VALUE wmap_each_key(VALUE self) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - st_foreach(w->wmap2obj, wmap_each_key_i, (st_data_t)0); + + wmap_foreach(w, wmap_each_key_i, (st_data_t)0); + return self; } -static int -wmap_each_value_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_each_value_i(VALUE _key, VALUE val, st_data_t _data) { - if (wmap_live_entry_p(key, val)) { - rb_yield((VALUE)val); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_yield(val); } /* Iterates over keys and objects in a weakly referenced object */ @@ -338,24 +312,19 @@ static VALUE wmap_each_value(VALUE self) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - st_foreach(w->wmap2obj, wmap_each_value_i, (st_data_t)0); + + wmap_foreach(w, wmap_each_value_i, (st_data_t)0); + return self; } -static int -wmap_keys_i(st_data_t key, st_data_t val, st_data_t arg) +static void +wmap_keys_i(st_data_t key, st_data_t _, st_data_t arg) { VALUE ary = (VALUE)arg; - if (wmap_live_entry_p(key, val)) { - rb_ary_push(ary, (VALUE)key); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_ary_push(ary, key); } /* Iterates over keys and objects in a weakly referenced object */ @@ -366,22 +335,17 @@ wmap_keys(VALUE self) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); VALUE ary = rb_ary_new(); - st_foreach(w->wmap2obj, wmap_keys_i, (st_data_t)ary); + wmap_foreach(w, wmap_keys_i, (st_data_t)ary); + return ary; } -static int +static void wmap_values_i(st_data_t key, st_data_t val, st_data_t arg) { VALUE ary = (VALUE)arg; - if (wmap_live_entry_p(key, val)) { - rb_ary_push(ary, (VALUE)val); - return ST_CONTINUE; - } - else { - return ST_DELETE; - } + rb_ary_push(ary, (VALUE)val); } /* Iterates over values and objects in a weakly referenced object */ @@ -392,37 +356,9 @@ wmap_values(VALUE self) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); VALUE ary = rb_ary_new(); - st_foreach(w->wmap2obj, wmap_values_i, (st_data_t)ary); - return ary; -} - -static int -wmap_aset_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing) -{ - VALUE size, *ptr, *optr; - if (existing) { - size = (ptr = optr = (VALUE *)*val)[0]; - - for (VALUE index = 1; index <= size; index++) { - if (ptr[index] == (VALUE)arg) { - // The reference was already registered. - return ST_STOP; - } - } + wmap_foreach(w, wmap_values_i, (st_data_t)ary); - ++size; - SIZED_REALLOC_N(ptr, VALUE, size + 1, size); - } - else { - optr = 0; - size = 1; - ptr = ruby_xmalloc(2 * sizeof(VALUE)); - } - ptr[0] = size; - ptr[size] = (VALUE)arg; - if (ptr == optr) return ST_STOP; - *val = (st_data_t)ptr; - return ST_CONTINUE; + return ary; } static VALUE @@ -437,57 +373,39 @@ nonspecial_obj_id(VALUE obj) #endif } -struct wmap_aset_replace_args { - VALUE new_value; - VALUE old_value; -}; - static int -wmap_aset_replace_value(st_data_t *key, st_data_t *val, st_data_t _args, int existing) +wmap_aset_replace(st_data_t *key, st_data_t *val, st_data_t new_key, int existing) { - struct wmap_aset_replace_args *args = (struct wmap_aset_replace_args *)_args; - if (existing) { - args->old_value = *val; + VALUE *orig_pair = ((VALUE *)*key); + assert(orig_pair[0] == *(VALUE *)new_key); + + wmap_free_entry(orig_pair, orig_pair + 1); } - *val = (st_data_t)args->new_value; + + *key = new_key; + *val = (st_data_t)(((VALUE *)new_key) + 1); + return ST_CONTINUE; } /* Creates a weak reference from the given key to the given value */ static VALUE -wmap_aset(VALUE self, VALUE key, VALUE value) +wmap_aset(VALUE self, VALUE key, VALUE val) { struct weakmap *w; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - if (FL_ABLE(value)) { - rb_define_finalizer_no_check(value, w->final); - } - if (FL_ABLE(key)) { - rb_define_finalizer_no_check(key, w->final); - } - struct wmap_aset_replace_args aset_args = { - .new_value = value, - .old_value = Qundef, - }; - st_update(w->wmap2obj, (st_data_t)key, wmap_aset_replace_value, (st_data_t)&aset_args); + VALUE *pair = xmalloc(sizeof(VALUE) * 2); + pair[0] = key; + pair[1] = val; - // If the value is unchanged, we have nothing to do. - if (value != aset_args.old_value) { - if (!UNDEF_P(aset_args.old_value) && FL_ABLE(aset_args.old_value)) { - // That key existed and had an inverse reference, we need to clear the outdated inverse reference. - st_update(w->obj2wmap, (st_data_t)aset_args.old_value, wmap_remove_inverse_ref, key); - } + st_update(w->table, (st_data_t)pair, wmap_aset_replace, (st_data_t)pair); - if (FL_ABLE(value)) { - // If the value has no finalizer, we don't need to keep the inverse reference - st_update(w->obj2wmap, (st_data_t)value, wmap_aset_update, key); - } - } + RB_OBJ_WRITTEN(self, Qundef, key); + RB_OBJ_WRITTEN(self, Qundef, val); - return nonspecial_obj_id(value); + return nonspecial_obj_id(val); } /* Retrieves a weakly referenced object with the given key */ @@ -500,11 +418,11 @@ wmap_lookup(VALUE self, VALUE key) TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); st_data_t data; - if (!st_lookup(w->wmap2obj, (st_data_t)key, &data)) return Qundef; + if (!st_lookup(w->table, (st_data_t)&key, &data)) return Qundef; - VALUE obj = (VALUE)data; - if (!wmap_live_p(obj)) return Qundef; - return obj; + if (!wmap_live_p(*(VALUE *)data)) return Qundef; + + return *(VALUE *)data; } /* Retrieves a weakly referenced object with the given key */ @@ -519,20 +437,23 @@ wmap_aref(VALUE self, VALUE key) static VALUE wmap_delete(VALUE self, VALUE key) { - assert(wmap_live_p(key)); - struct weakmap *w; TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - VALUE old_value = Qnil; - if (st_delete(w->wmap2obj, (st_data_t *)&key, (st_data_t *)&old_value)) { - if (FL_ABLE(old_value)) { - // That key existed and had an inverse reference, we need to clear the outdated inverse reference. - st_update(w->obj2wmap, (st_data_t)old_value, wmap_remove_inverse_ref, key); + VALUE orig_key = key; + st_data_t orig_key_data = (st_data_t)&orig_key; + st_data_t orig_val_data; + if (st_delete(w->table, &orig_key_data, &orig_val_data)) { + VALUE orig_val = *(VALUE *)orig_val_data; + + wmap_free_entry((VALUE *)orig_key_data, (VALUE *)orig_val_data); + + if (wmap_live_p(orig_val)) { + return orig_val; } - return old_value; } - else if (rb_block_given_p()) { + + if (rb_block_given_p()) { return rb_yield(key); } else { @@ -552,10 +473,10 @@ static VALUE wmap_size(VALUE self) { struct weakmap *w; - st_index_t n; - TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w); - n = w->wmap2obj->num_entries; + + st_index_t n = st_table_size(w->table); + #if SIZEOF_ST_INDEX_T <= SIZEOF_LONG return ULONG2NUM(n); #else |