summaryrefslogtreecommitdiff
path: root/weakmap.c
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-07-24 15:26:10 -0400
committerPeter Zhu <peter@peterzhu.ca>2023-08-25 09:01:21 -0400
commitee9cc8e32e04597a4a86b391df2c19aed9fde3e5 (patch)
tree6775a480714642571c8e30336fc2a1abab44815a /weakmap.c
parent2091bf94931bddcf23b196fc8fe108ebd7ec1648 (diff)
Implement WeakMap using weak references
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/8113
Diffstat (limited to 'weakmap.c')
-rw-r--r--weakmap.c497
1 files changed, 209 insertions, 288 deletions
diff --git a/weakmap.c b/weakmap.c
index ef859f89e8..bb65b1690b 100644
--- a/weakmap.c
+++ b/weakmap.c
@@ -6,68 +6,78 @@
#include "ruby/st.h"
#include "ruby/st.h"
+/* ===== WeakMap =====
+ *
+ * WeakMap contains one ST table which contains a pointer to the object as the
+ * key and a pointer to the object as the value. This means that the key and
+ * value of the table are both of the type `VALUE *`.
+ *
+ * The objects are not directly stored as keys and values in the table because
+ * `rb_gc_mark_weak` requires a pointer to the memory location to overwrite
+ * when the object is reclaimed. Using a pointer into the ST table entry is not
+ * safe because the pointer can change when the ST table is resized.
+ *
+ * WeakMap hashes and compares using the pointer address of the object.
+ *
+ * For performance and memory efficiency reasons, the key and value
+ * are allocated at the same time and adjacent to each other.
+ *
+ * During GC and while iterating, reclaimed entries (i.e. either the key or
+ * value points to `Qundef`) are removed from the ST table.
+ */
+
struct weakmap {
- st_table *obj2wmap; /* obj -> [ref,...] */
- st_table *wmap2obj; /* ref -> obj */
- VALUE final;
+ st_table *table;
};
-static int
-wmap_replace_ref(st_data_t *key, st_data_t *value, st_data_t _argp, int existing)
+static bool
+wmap_live_p(VALUE obj)
{
- *key = rb_gc_location((VALUE)*key);
-
- VALUE *values = (VALUE *)*value;
- VALUE size = values[0];
+ return !UNDEF_P(obj);
+}
- for (VALUE index = 1; index <= size; index++) {
- values[index] = rb_gc_location(values[index]);
- }
+static void
+wmap_free_entry(VALUE *key, VALUE *val)
+{
+ assert(key + 1 == val);
- return ST_CONTINUE;
+ /* We only need to free key because val is allocated beside key on in the
+ * same malloc call. */
+ ruby_sized_xfree(key, sizeof(VALUE) * 2);
}
static int
-wmap_foreach_replace(st_data_t key, st_data_t value, st_data_t _argp, int error)
+wmap_mark_weak_table_i(st_data_t key, st_data_t val, st_data_t _)
{
- if (rb_gc_location((VALUE)key) != (VALUE)key) {
- return ST_REPLACE;
- }
+ VALUE key_obj = *(VALUE *)key;
+ VALUE val_obj = *(VALUE *)val;
- VALUE *values = (VALUE *)value;
- VALUE size = values[0];
+ if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) {
+ rb_gc_mark_weak((VALUE *)key);
+ rb_gc_mark_weak((VALUE *)val);
- for (VALUE index = 1; index <= size; index++) {
- VALUE val = values[index];
- if (rb_gc_location(val) != val) {
- return ST_REPLACE;
- }
+ return ST_CONTINUE;
}
+ else {
+ wmap_free_entry((VALUE *)key, (VALUE *)val);
- return ST_CONTINUE;
-}
-
-static void
-wmap_compact(void *ptr)
-{
- struct weakmap *w = ptr;
- if (w->wmap2obj) rb_gc_update_tbl_refs(w->wmap2obj);
- if (w->obj2wmap) st_foreach_with_replace(w->obj2wmap, wmap_foreach_replace, wmap_replace_ref, (st_data_t)NULL);
- w->final = rb_gc_location(w->final);
+ return ST_DELETE;
+ }
}
static void
wmap_mark(void *ptr)
{
struct weakmap *w = ptr;
- rb_gc_mark_movable(w->final);
+ if (w->table) {
+ st_foreach(w->table, wmap_mark_weak_table_i, (st_data_t)0);
+ }
}
static int
-wmap_free_map(st_data_t key, st_data_t val, st_data_t arg)
+wmap_free_table_i(st_data_t key, st_data_t val, st_data_t arg)
{
- VALUE *ptr = (VALUE *)val;
- ruby_sized_xfree(ptr, (ptr[0] + 1) * sizeof(VALUE));
+ wmap_free_entry((VALUE *)key, (VALUE *)val);
return ST_CONTINUE;
}
@@ -75,30 +85,63 @@ static void
wmap_free(void *ptr)
{
struct weakmap *w = ptr;
- st_foreach(w->obj2wmap, wmap_free_map, 0);
- st_free_table(w->obj2wmap);
- st_free_table(w->wmap2obj);
+
+ st_foreach(w->table, wmap_free_table_i, 0);
+ st_free_table(w->table);
xfree(w);
}
+static size_t
+wmap_memsize(const void *ptr)
+{
+ const struct weakmap *w = ptr;
+
+ size_t size = sizeof(*w);
+ size += st_memsize(w->table);
+ /* The key and value of the table each take sizeof(VALUE) in size. */
+ size += st_table_size(w->table) * (2 * sizeof(VALUE));
+
+ return size;
+}
+
static int
-wmap_memsize_map(st_data_t key, st_data_t val, st_data_t arg)
+wmap_compact_table_i(st_data_t key, st_data_t val, st_data_t data)
{
- VALUE *ptr = (VALUE *)val;
- *(size_t *)arg += (ptr[0] + 1) * sizeof(VALUE);
+ st_table *table = (st_table *)data;
+
+ VALUE key_obj = *(VALUE *)key;
+ VALUE val_obj = *(VALUE *)val;
+
+ if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) {
+ VALUE new_key_obj = rb_gc_location(key_obj);
+
+ *(VALUE *)val = rb_gc_location(val_obj);
+
+ /* If the key object moves, then we must reinsert because the hash is
+ * based on the pointer rather than the object itself. */
+ if (key_obj != new_key_obj) {
+ *(VALUE *)key = new_key_obj;
+
+ st_insert(table, key, val);
+
+ return ST_DELETE;
+ }
+ }
+ else {
+ wmap_free_entry((VALUE *)key, (VALUE *)val);
+
+ return ST_DELETE;
+ }
+
return ST_CONTINUE;
}
-static size_t
-wmap_memsize(const void *ptr)
+static void
+wmap_compact(void *ptr)
{
- size_t size;
- const struct weakmap *w = ptr;
- size = sizeof(*w);
- size += st_memsize(w->obj2wmap);
- size += st_memsize(w->wmap2obj);
- st_foreach(w->obj2wmap, wmap_memsize_map, (st_data_t)&size);
- return size;
+ struct weakmap *w = ptr;
+
+ st_foreach(w->table, wmap_compact_table_i, (st_data_t)w->table);
}
static const rb_data_type_t weakmap_type = {
@@ -112,111 +155,68 @@ static const rb_data_type_t weakmap_type = {
0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
};
-static VALUE wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self));
+static int
+wmap_cmp(st_data_t x, st_data_t y)
+{
+ return *(VALUE *)x != *(VALUE *)y;
+}
+
+static st_index_t
+wmap_hash(st_data_t n)
+{
+ return st_numhash(*(VALUE *)n);
+}
+
+static const struct st_hash_type wmap_hash_type = {
+ wmap_cmp,
+ wmap_hash,
+};
static VALUE
wmap_allocate(VALUE klass)
{
struct weakmap *w;
VALUE obj = TypedData_Make_Struct(klass, struct weakmap, &weakmap_type, w);
- w->obj2wmap = rb_init_identtable();
- w->wmap2obj = rb_init_identtable();
- RB_OBJ_WRITE(obj, &w->final, rb_func_lambda_new(wmap_finalize, obj, 1, 1));
+ w->table = st_init_table(&wmap_hash_type);
return obj;
}
-static int
-wmap_live_p(VALUE obj)
-{
- if (SPECIAL_CONST_P(obj)) return TRUE;
- /* If rb_gc_is_ptr_to_obj returns false, the page could be in the tomb heap
- * or have already been freed. */
- if (!rb_gc_is_ptr_to_obj((void *)obj)) return FALSE;
-
- void *poisoned = asan_poisoned_object_p(obj);
- asan_unpoison_object(obj, false);
-
- enum ruby_value_type t = BUILTIN_TYPE(obj);
- int ret = (!(t == T_NONE || t >= T_FIXNUM || t == T_ICLASS) &&
- !rb_objspace_garbage_object_p(obj));
-
- if (poisoned) {
- asan_poison_object(obj);
- }
-
- return ret;
-}
+struct wmap_foreach_data {
+ struct weakmap *w;
+ void (*func)(VALUE, VALUE, st_data_t);
+ st_data_t arg;
+};
static int
-wmap_remove_inverse_ref(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
+wmap_foreach_i(st_data_t key, st_data_t val, st_data_t arg)
{
- if (!existing) return ST_STOP;
-
- VALUE old_ref = (VALUE)arg;
+ struct wmap_foreach_data *data = (struct wmap_foreach_data *)arg;
- VALUE *values = (VALUE *)*val;
- VALUE size = values[0];
-
- if (size == 1) {
- // fast path, we only had one backref
- RUBY_ASSERT(values[1] == old_ref);
- ruby_sized_xfree(values, 2 * sizeof(VALUE));
- return ST_DELETE;
- }
+ VALUE key_obj = *(VALUE *)key;
+ VALUE val_obj = *(VALUE *)val;
- bool found = false;
- VALUE index = 1;
- for (; index <= size; index++) {
- if (values[index] == old_ref) {
- found = true;
- break;
- }
+ if (wmap_live_p(key_obj) && wmap_live_p(val_obj)) {
+ data->func(key_obj, val_obj, data->arg);
}
- if (!found) return ST_STOP;
+ else {
+ wmap_free_entry((VALUE *)key, (VALUE *)val);
- if (size > index) {
- MEMMOVE(&values[index], &values[index + 1], VALUE, size - index);
+ return ST_DELETE;
}
- size -= 1;
- values[0] = size;
- SIZED_REALLOC_N(values, VALUE, size + 1, size + 2);
- *val = (st_data_t)values;
return ST_CONTINUE;
}
-/* :nodoc: */
-static VALUE
-wmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self))
+static void
+wmap_foreach(struct weakmap *w, void (*func)(VALUE, VALUE, st_data_t), st_data_t arg)
{
- st_data_t orig, wmap, data;
- VALUE obj, *rids, i, size;
- struct weakmap *w;
-
- TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
- /* Get reference from object id. */
- if (UNDEF_P(obj = rb_gc_id2ref_obj_tbl(objid))) {
- rb_bug("wmap_finalize: objid is not found.");
- }
-
- /* obj is original referenced object and/or weak reference. */
- orig = (st_data_t)obj;
- if (st_delete(w->obj2wmap, &orig, &data)) {
- rids = (VALUE *)data;
- size = *rids++;
- for (i = 0; i < size; ++i) {
- wmap = (st_data_t)rids[i];
- st_delete(w->wmap2obj, &wmap, NULL);
- }
- ruby_sized_xfree((VALUE *)data, (size + 1) * sizeof(VALUE));
- }
+ struct wmap_foreach_data foreach_data = {
+ .w = w,
+ .func = func,
+ .arg = arg,
+ };
- wmap = (st_data_t)obj;
- if (st_delete(w->wmap2obj, &wmap, &orig)) {
- wmap = (st_data_t)obj;
- st_update(w->obj2wmap, orig, wmap_remove_inverse_ref, wmap);
- }
- return self;
+ st_foreach(w->table, wmap_foreach_i, (st_data_t)&foreach_data);
}
static VALUE
@@ -225,19 +225,15 @@ wmap_inspect_append(VALUE str, VALUE obj)
if (SPECIAL_CONST_P(obj)) {
return rb_str_append(str, rb_inspect(obj));
}
- else if (wmap_live_p(obj)) {
- return rb_str_append(str, rb_any_to_s(obj));
- }
else {
- return rb_str_catf(str, "#<collected:%p>", (void*)obj);
+ return rb_str_append(str, rb_any_to_s(obj));
}
}
-static int
-wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg)
+static void
+wmap_inspect_i(VALUE key, VALUE val, st_data_t data)
{
- VALUE str = (VALUE)arg;
- VALUE k = (VALUE)key, v = (VALUE)val;
+ VALUE str = (VALUE)data;
if (RSTRING_PTR(str)[0] == '#') {
rb_str_cat2(str, ", ");
@@ -246,11 +242,10 @@ wmap_inspect_i(st_data_t key, st_data_t val, st_data_t arg)
rb_str_cat2(str, ": ");
RSTRING_PTR(str)[0] = '#';
}
- wmap_inspect_append(str, k);
- rb_str_cat2(str, " => ");
- wmap_inspect_append(str, v);
- return ST_CONTINUE;
+ wmap_inspect_append(str, key);
+ rb_str_cat2(str, " => ");
+ wmap_inspect_append(str, val);
}
static VALUE
@@ -261,30 +256,19 @@ wmap_inspect(VALUE self)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
VALUE str = rb_sprintf("-<%"PRIsVALUE":%p", c, (void *)self);
- if (w->wmap2obj) {
- st_foreach(w->wmap2obj, wmap_inspect_i, (st_data_t)str);
- }
+
+ wmap_foreach(w, wmap_inspect_i, (st_data_t)str);
+
RSTRING_PTR(str)[0] = '#';
rb_str_cat2(str, ">");
- return str;
-}
-static inline bool
-wmap_live_entry_p(st_data_t key, st_data_t val)
-{
- return wmap_live_p((VALUE)key) && wmap_live_p((VALUE)val);
+ return str;
}
-static int
-wmap_each_i(st_data_t key, st_data_t val, st_data_t _)
+static void
+wmap_each_i(VALUE key, VALUE val, st_data_t _)
{
- if (wmap_live_entry_p(key, val)) {
- rb_yield_values(2, (VALUE)key, (VALUE)val);
- return ST_CONTINUE;
- }
- else {
- return ST_DELETE;
- }
+ rb_yield_values(2, key, val);
}
/* Iterates over keys and objects in a weakly referenced object */
@@ -292,22 +276,17 @@ static VALUE
wmap_each(VALUE self)
{
struct weakmap *w;
-
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
- st_foreach(w->wmap2obj, wmap_each_i, (st_data_t)0);
+
+ wmap_foreach(w, wmap_each_i, (st_data_t)0);
+
return self;
}
-static int
-wmap_each_key_i(st_data_t key, st_data_t val, st_data_t arg)
+static void
+wmap_each_key_i(VALUE key, VALUE _val, st_data_t _data)
{
- if (wmap_live_entry_p(key, val)) {
- rb_yield((VALUE)key);
- return ST_CONTINUE;
- }
- else {
- return ST_DELETE;
- }
+ rb_yield(key);
}
/* Iterates over keys and objects in a weakly referenced object */
@@ -315,22 +294,17 @@ static VALUE
wmap_each_key(VALUE self)
{
struct weakmap *w;
-
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
- st_foreach(w->wmap2obj, wmap_each_key_i, (st_data_t)0);
+
+ wmap_foreach(w, wmap_each_key_i, (st_data_t)0);
+
return self;
}
-static int
-wmap_each_value_i(st_data_t key, st_data_t val, st_data_t arg)
+static void
+wmap_each_value_i(VALUE _key, VALUE val, st_data_t _data)
{
- if (wmap_live_entry_p(key, val)) {
- rb_yield((VALUE)val);
- return ST_CONTINUE;
- }
- else {
- return ST_DELETE;
- }
+ rb_yield(val);
}
/* Iterates over keys and objects in a weakly referenced object */
@@ -338,24 +312,19 @@ static VALUE
wmap_each_value(VALUE self)
{
struct weakmap *w;
-
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
- st_foreach(w->wmap2obj, wmap_each_value_i, (st_data_t)0);
+
+ wmap_foreach(w, wmap_each_value_i, (st_data_t)0);
+
return self;
}
-static int
-wmap_keys_i(st_data_t key, st_data_t val, st_data_t arg)
+static void
+wmap_keys_i(st_data_t key, st_data_t _, st_data_t arg)
{
VALUE ary = (VALUE)arg;
- if (wmap_live_entry_p(key, val)) {
- rb_ary_push(ary, (VALUE)key);
- return ST_CONTINUE;
- }
- else {
- return ST_DELETE;
- }
+ rb_ary_push(ary, key);
}
/* Iterates over keys and objects in a weakly referenced object */
@@ -366,22 +335,17 @@ wmap_keys(VALUE self)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
VALUE ary = rb_ary_new();
- st_foreach(w->wmap2obj, wmap_keys_i, (st_data_t)ary);
+ wmap_foreach(w, wmap_keys_i, (st_data_t)ary);
+
return ary;
}
-static int
+static void
wmap_values_i(st_data_t key, st_data_t val, st_data_t arg)
{
VALUE ary = (VALUE)arg;
- if (wmap_live_entry_p(key, val)) {
- rb_ary_push(ary, (VALUE)val);
- return ST_CONTINUE;
- }
- else {
- return ST_DELETE;
- }
+ rb_ary_push(ary, (VALUE)val);
}
/* Iterates over values and objects in a weakly referenced object */
@@ -392,37 +356,9 @@ wmap_values(VALUE self)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
VALUE ary = rb_ary_new();
- st_foreach(w->wmap2obj, wmap_values_i, (st_data_t)ary);
- return ary;
-}
-
-static int
-wmap_aset_update(st_data_t *key, st_data_t *val, st_data_t arg, int existing)
-{
- VALUE size, *ptr, *optr;
- if (existing) {
- size = (ptr = optr = (VALUE *)*val)[0];
-
- for (VALUE index = 1; index <= size; index++) {
- if (ptr[index] == (VALUE)arg) {
- // The reference was already registered.
- return ST_STOP;
- }
- }
+ wmap_foreach(w, wmap_values_i, (st_data_t)ary);
- ++size;
- SIZED_REALLOC_N(ptr, VALUE, size + 1, size);
- }
- else {
- optr = 0;
- size = 1;
- ptr = ruby_xmalloc(2 * sizeof(VALUE));
- }
- ptr[0] = size;
- ptr[size] = (VALUE)arg;
- if (ptr == optr) return ST_STOP;
- *val = (st_data_t)ptr;
- return ST_CONTINUE;
+ return ary;
}
static VALUE
@@ -437,57 +373,39 @@ nonspecial_obj_id(VALUE obj)
#endif
}
-struct wmap_aset_replace_args {
- VALUE new_value;
- VALUE old_value;
-};
-
static int
-wmap_aset_replace_value(st_data_t *key, st_data_t *val, st_data_t _args, int existing)
+wmap_aset_replace(st_data_t *key, st_data_t *val, st_data_t new_key, int existing)
{
- struct wmap_aset_replace_args *args = (struct wmap_aset_replace_args *)_args;
-
if (existing) {
- args->old_value = *val;
+ VALUE *orig_pair = ((VALUE *)*key);
+ assert(orig_pair[0] == *(VALUE *)new_key);
+
+ wmap_free_entry(orig_pair, orig_pair + 1);
}
- *val = (st_data_t)args->new_value;
+
+ *key = new_key;
+ *val = (st_data_t)(((VALUE *)new_key) + 1);
+
return ST_CONTINUE;
}
/* Creates a weak reference from the given key to the given value */
static VALUE
-wmap_aset(VALUE self, VALUE key, VALUE value)
+wmap_aset(VALUE self, VALUE key, VALUE val)
{
struct weakmap *w;
-
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
- if (FL_ABLE(value)) {
- rb_define_finalizer_no_check(value, w->final);
- }
- if (FL_ABLE(key)) {
- rb_define_finalizer_no_check(key, w->final);
- }
- struct wmap_aset_replace_args aset_args = {
- .new_value = value,
- .old_value = Qundef,
- };
- st_update(w->wmap2obj, (st_data_t)key, wmap_aset_replace_value, (st_data_t)&aset_args);
+ VALUE *pair = xmalloc(sizeof(VALUE) * 2);
+ pair[0] = key;
+ pair[1] = val;
- // If the value is unchanged, we have nothing to do.
- if (value != aset_args.old_value) {
- if (!UNDEF_P(aset_args.old_value) && FL_ABLE(aset_args.old_value)) {
- // That key existed and had an inverse reference, we need to clear the outdated inverse reference.
- st_update(w->obj2wmap, (st_data_t)aset_args.old_value, wmap_remove_inverse_ref, key);
- }
+ st_update(w->table, (st_data_t)pair, wmap_aset_replace, (st_data_t)pair);
- if (FL_ABLE(value)) {
- // If the value has no finalizer, we don't need to keep the inverse reference
- st_update(w->obj2wmap, (st_data_t)value, wmap_aset_update, key);
- }
- }
+ RB_OBJ_WRITTEN(self, Qundef, key);
+ RB_OBJ_WRITTEN(self, Qundef, val);
- return nonspecial_obj_id(value);
+ return nonspecial_obj_id(val);
}
/* Retrieves a weakly referenced object with the given key */
@@ -500,11 +418,11 @@ wmap_lookup(VALUE self, VALUE key)
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
st_data_t data;
- if (!st_lookup(w->wmap2obj, (st_data_t)key, &data)) return Qundef;
+ if (!st_lookup(w->table, (st_data_t)&key, &data)) return Qundef;
- VALUE obj = (VALUE)data;
- if (!wmap_live_p(obj)) return Qundef;
- return obj;
+ if (!wmap_live_p(*(VALUE *)data)) return Qundef;
+
+ return *(VALUE *)data;
}
/* Retrieves a weakly referenced object with the given key */
@@ -519,20 +437,23 @@ wmap_aref(VALUE self, VALUE key)
static VALUE
wmap_delete(VALUE self, VALUE key)
{
- assert(wmap_live_p(key));
-
struct weakmap *w;
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
- VALUE old_value = Qnil;
- if (st_delete(w->wmap2obj, (st_data_t *)&key, (st_data_t *)&old_value)) {
- if (FL_ABLE(old_value)) {
- // That key existed and had an inverse reference, we need to clear the outdated inverse reference.
- st_update(w->obj2wmap, (st_data_t)old_value, wmap_remove_inverse_ref, key);
+ VALUE orig_key = key;
+ st_data_t orig_key_data = (st_data_t)&orig_key;
+ st_data_t orig_val_data;
+ if (st_delete(w->table, &orig_key_data, &orig_val_data)) {
+ VALUE orig_val = *(VALUE *)orig_val_data;
+
+ wmap_free_entry((VALUE *)orig_key_data, (VALUE *)orig_val_data);
+
+ if (wmap_live_p(orig_val)) {
+ return orig_val;
}
- return old_value;
}
- else if (rb_block_given_p()) {
+
+ if (rb_block_given_p()) {
return rb_yield(key);
}
else {
@@ -552,10 +473,10 @@ static VALUE
wmap_size(VALUE self)
{
struct weakmap *w;
- st_index_t n;
-
TypedData_Get_Struct(self, struct weakmap, &weakmap_type, w);
- n = w->wmap2obj->num_entries;
+
+ st_index_t n = st_table_size(w->table);
+
#if SIZEOF_ST_INDEX_T <= SIZEOF_LONG
return ULONG2NUM(n);
#else