From bd2b314a05ae9192b3143e1e678a37c370d8a9ce Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Thu, 31 Oct 2019 17:21:01 -0700 Subject: Use a monotonically increasing number for object_id This changes object_id from being based on the objects location in memory (or a nearby memory location in the case of a conflict) to be based on an always increasing number. This number is a Ruby Integer which allows it to overflow the size of a pointer without issue (very unlikely to happen in real programs especially on 64-bit, but a nice guarantee). This changes obj_to_id_tbl and id_to_obj_tbl to both be maps of Ruby objects to Ruby objects (previously they were Ruby object to C integer) which simplifies updating them after compaction as we can run them through gc_update_table_refs. Co-authored-by: Aaron Patterson --- bootstraptest/test_objectspace.rb | 9 +++ gc.c | 138 +++++++++++++++++++------------------- hash.c | 8 ++- test/ruby/test_gc.rb | 8 +++ test/ruby/test_gc_compact.rb | 90 ------------------------- 5 files changed, 91 insertions(+), 162 deletions(-) diff --git a/bootstraptest/test_objectspace.rb b/bootstraptest/test_objectspace.rb index 24a1a0ce2c..63a8d99322 100644 --- a/bootstraptest/test_objectspace.rb +++ b/bootstraptest/test_objectspace.rb @@ -44,3 +44,12 @@ assert_normal_exit %q{ Thread.new {} end }, '[ruby-core:37858]' + +assert_equal 'ok', %q{ + objects_and_ids = 1000.times.map { o = Object.new; [o, o.object_id] } + objects_and_ids.each { |expected, id| + actual = ObjectSpace._id2ref(id) + raise "expected #{expected.inspect}, got #{actual.inspect}" unless actual.equal?(expected) + } + 'ok' +} diff --git a/gc.c b/gc.c index 5a4f72d9ff..a92a875c99 100644 --- a/gc.c +++ b/gc.c @@ -700,6 +700,7 @@ typedef struct rb_objspace { rb_event_flag_t hook_events; size_t total_allocated_objects; + VALUE next_object_id; rb_heap_t eden_heap; rb_heap_t tomb_heap; /* heap for zombies and ghosts */ @@ -747,7 +748,6 @@ typedef struct rb_objspace { #if USE_RGENGC size_t minor_gc_count; size_t major_gc_count; - size_t object_id_collisions; #if RGENGC_PROFILE > 0 size_t total_generated_normal_object_count; size_t total_generated_shady_object_count; @@ -2846,12 +2846,41 @@ obj_free(rb_objspace_t *objspace, VALUE obj) } } + +#define OBJ_ID_INCREMENT (sizeof(RVALUE) / 2) +#define OBJ_ID_INITIAL (OBJ_ID_INCREMENT * 2) + +static int +object_id_cmp(st_data_t x, st_data_t y) +{ + if (RB_TYPE_P(x, T_BIGNUM)) { + return !rb_big_eql(x, y); + } else { + return x != y; + } +} + +static st_index_t +object_id_hash(st_data_t n) +{ + if (RB_TYPE_P(n, T_BIGNUM)) { + return FIX2LONG(rb_big_hash(n)); + } else { + return st_numhash(n); + } +} +static const struct st_hash_type object_id_hash_type = { + object_id_cmp, + object_id_hash, +}; + void Init_heap(void) { rb_objspace_t *objspace = &rb_objspace; - objspace->id_to_obj_tbl = st_init_numtable(); + objspace->next_object_id = INT2FIX(OBJ_ID_INITIAL); + objspace->id_to_obj_tbl = st_init_table(&object_id_hash_type); objspace->obj_to_id_tbl = st_init_numtable(); #if RGENGC_ESTIMATE_OLDMALLOC @@ -3576,37 +3605,33 @@ id2ref(VALUE obj, VALUE objid) VALUE orig; void *p0; - ptr = NUM2PTR(objid); - p0 = (void *)ptr; - - if (ptr == Qtrue) return Qtrue; - if (ptr == Qfalse) return Qfalse; - if (ptr == Qnil) return Qnil; - if (FIXNUM_P(ptr)) return (VALUE)ptr; - if (FLONUM_P(ptr)) return (VALUE)ptr; - ptr = obj_id_to_ref(objid); - - if (st_lookup(objspace->id_to_obj_tbl, ptr, &orig)) { - return orig; + if (FIXNUM_P(objid) || rb_big_size(objid) <= SIZEOF_VOIDP) { + ptr = NUM2PTR(objid); + if (ptr == Qtrue) return Qtrue; + if (ptr == Qfalse) return Qfalse; + if (ptr == Qnil) return Qnil; + if (FIXNUM_P(ptr)) return (VALUE)ptr; + if (FLONUM_P(ptr)) return (VALUE)ptr; + + ptr = obj_id_to_ref(objid); + if ((ptr % sizeof(RVALUE)) == (4 << 2)) { + ID symid = ptr / sizeof(RVALUE); + p0 = (void *)ptr; + if (rb_id2str(symid) == 0) + rb_raise(rb_eRangeError, "%p is not symbol id value", p0); + return ID2SYM(symid); + } } - if ((ptr % sizeof(RVALUE)) == (4 << 2)) { - ID symid = ptr / sizeof(RVALUE); - if (rb_id2str(symid) == 0) - rb_raise(rb_eRangeError, "%p is not symbol id value", p0); - return ID2SYM(symid); + if (st_lookup(objspace->id_to_obj_tbl, objid, &orig)) { + return orig; } - if (!is_id_value(objspace, ptr)) { - rb_raise(rb_eRangeError, "%p is not id value", p0); - } - if (!is_live_object(objspace, ptr)) { - rb_raise(rb_eRangeError, "%p is recycled object", p0); - } - if (RBASIC(ptr)->klass == 0) { - rb_raise(rb_eRangeError, "%p is internal object", p0); + if (rb_int_ge(objid, objspace->next_object_id)) { + rb_raise(rb_eRangeError, "%+"PRIsVALUE" is not id value", rb_int2str(objid, 10)); + } else { + rb_raise(rb_eRangeError, "%+"PRIsVALUE" is recycled object", rb_int2str(objid, 10)); } - return (VALUE)ptr; } static VALUE @@ -3637,27 +3662,20 @@ cached_object_id(VALUE obj) if (st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, &id)) { GC_ASSERT(FL_TEST(obj, FL_SEEN_OBJ_ID)); - return nonspecial_obj_id(id); + return id; } else { GC_ASSERT(!FL_TEST(obj, FL_SEEN_OBJ_ID)); - id = obj; - while (1) { - /* id is the object id */ - if (st_is_member(objspace->id_to_obj_tbl, (st_data_t)id)) { - objspace->profile.object_id_collisions++; - id += sizeof(VALUE); - } - else { - st_insert(objspace->obj_to_id_tbl, (st_data_t)obj, (st_data_t)id); - st_insert(objspace->id_to_obj_tbl, (st_data_t)id, (st_data_t)obj); - FL_SET(obj, FL_SEEN_OBJ_ID); - return nonspecial_obj_id(id); - } - } + id = objspace->next_object_id; + objspace->next_object_id = rb_int_plus(id, INT2FIX(OBJ_ID_INCREMENT)); + + st_insert(objspace->obj_to_id_tbl, (st_data_t)obj, (st_data_t)id); + st_insert(objspace->id_to_obj_tbl, (st_data_t)id, (st_data_t)obj); + FL_SET(obj, FL_SEEN_OBJ_ID); + + return id; } - return nonspecial_obj_id(obj); } static VALUE @@ -5566,6 +5584,10 @@ gc_mark_roots(rb_objspace_t *objspace, const char **categoryp) MARK_CHECKPOINT("global_tbl"); rb_gc_mark_global_tbl(); + MARK_CHECKPOINT("object_id"); + rb_gc_mark(objspace->next_object_id); + mark_tbl_no_pin(objspace, objspace->obj_to_id_tbl); /* Only mark ids */ + if (stress_to_class) rb_gc_mark(stress_to_class); MARK_CHECKPOINT("finish"); @@ -7551,18 +7573,6 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj) return FALSE; } -static int -update_id_to_obj(st_data_t *key, st_data_t *value, st_data_t arg, int exists) -{ - if (exists) { - *value = arg; - return ST_CONTINUE; - } - else { - return ST_STOP; - } -} - static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, VALUE moved_list) { @@ -7596,17 +7606,6 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, VALUE moved_list) rb_mv_generic_ivar((VALUE)src, (VALUE)dest); } - VALUE id; - - /* If the source object's object_id has been seen, we need to update - * the object to object id mapping. */ - if (st_lookup(objspace->obj_to_id_tbl, (VALUE)src, &id)) { - gc_report(4, objspace, "Moving object with seen id: %p -> %p\n", (void *)src, (void *)dest); - st_delete(objspace->obj_to_id_tbl, (st_data_t *)&src, 0); - st_insert(objspace->obj_to_id_tbl, (VALUE)dest, id); - st_update(objspace->id_to_obj_tbl, (st_data_t)id, update_id_to_obj, (st_data_t)dest); - } - /* Move the object */ memcpy(dest, src, sizeof(RVALUE)); memset(src, 0, sizeof(RVALUE)); @@ -8428,6 +8427,8 @@ gc_update_references(rb_objspace_t * objspace) rb_transient_heap_update_references(); global_symbols.ids = rb_gc_location(global_symbols.ids); global_symbols.dsymbol_fstr_hash = rb_gc_location(global_symbols.dsymbol_fstr_hash); + gc_update_table_refs(objspace, objspace->obj_to_id_tbl); + gc_update_table_refs(objspace, objspace->id_to_obj_tbl); gc_update_table_refs(objspace, global_symbols.str_sym); gc_update_table_refs(objspace, finalizer_table); } @@ -8885,7 +8886,6 @@ enum gc_stat_sym { #if USE_RGENGC gc_stat_sym_minor_gc_count, gc_stat_sym_major_gc_count, - gc_stat_sym_object_id_collisions, gc_stat_sym_remembered_wb_unprotected_objects, gc_stat_sym_remembered_wb_unprotected_objects_limit, gc_stat_sym_old_objects, @@ -8961,7 +8961,6 @@ setup_gc_stat_symbols(void) S(malloc_increase_bytes_limit); #if USE_RGENGC S(minor_gc_count); - S(object_id_collisions); S(major_gc_count); S(remembered_wb_unprotected_objects); S(remembered_wb_unprotected_objects_limit); @@ -9134,7 +9133,6 @@ gc_stat_internal(VALUE hash_or_sym) SET(malloc_increase_bytes_limit, malloc_limit); #if USE_RGENGC SET(minor_gc_count, objspace->profile.minor_gc_count); - SET(object_id_collisions, objspace->profile.object_id_collisions); SET(major_gc_count, objspace->profile.major_gc_count); SET(remembered_wb_unprotected_objects, objspace->rgengc.uncollectible_wb_unprotected_objects); SET(remembered_wb_unprotected_objects_limit, objspace->rgengc.uncollectible_wb_unprotected_objects_limit); diff --git a/hash.c b/hash.c index 0f5d418534..a5de9fde07 100644 --- a/hash.c +++ b/hash.c @@ -273,10 +273,14 @@ rb_objid_hash(st_index_t index) static st_index_t objid_hash(VALUE obj) { + VALUE object_id = rb_obj_id(obj); + if (!FIXNUM_P(object_id)) + object_id = rb_big_hash(object_id); + #if SIZEOF_LONG == SIZEOF_VOIDP - return (st_index_t)st_index_hash((st_index_t)NUM2LONG(rb_obj_id(obj))); + return (st_index_t)st_index_hash((st_index_t)NUM2LONG(object_id)); #elif SIZEOF_LONG_LONG == SIZEOF_VOIDP - return (st_index_t)st_index_hash((st_index_t)NUM2LL(rb_obj_id(obj))); + return (st_index_t)st_index_hash((st_index_t)NUM2LL(object_id)); #endif } diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index 1511ea3011..f42e098863 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -461,4 +461,12 @@ class TestGc < Test::Unit::TestCase skip "finalizers did not get run" if @result.empty? assert_equal([:c1, :c2], @result) end + + def test_object_ids_never_repeat + GC.start + a = 1000.times.map { Object.new.object_id } + GC.start + b = 1000.times.map { Object.new.object_id } + assert_empty(a & b) + end end diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb index eaffccda08..bc26897386 100644 --- a/test/ruby/test_gc_compact.rb +++ b/test/ruby/test_gc_compact.rb @@ -7,13 +7,6 @@ class TestGCCompact < Test::Unit::TestCase (Fiddle.dlwrap(obj) >> 1) end - def assert_object_ids(list) - same_count = list.find_all { |obj| - memory_location(obj) == obj.object_id - }.count - list.count - same_count - end - def big_list(level = 10) if level > 0 big_list(level - 1) @@ -41,89 +34,6 @@ class TestGCCompact < Test::Unit::TestCase new_object end - def try_to_move_objects - 10.times do - list_of_objects = big_list - - ids = list_of_objects.map(&:object_id) # store id in map - addresses = list_of_objects.map(&self.:memory_location) - - assert_equal ids, addresses - - # All object ids should be equal - assert_equal 0, assert_object_ids(list_of_objects) # should be 0 - - GC.verify_compaction_references(toward: :empty) - - # Some should have moved - id_count = assert_object_ids(list_of_objects) - skip "couldn't get objects to move" if id_count == 0 - assert_operator id_count, :>, 0 - - new_ids = list_of_objects.map(&:object_id) - - # Object ids should not change after compaction - assert_equal ids, new_ids - - new_tenant = find_object_in_recycled_slot(addresses) - return [list_of_objects, addresses, new_tenant] if new_tenant - end - - flunk "Couldn't get objects to move" - end - - def test_find_collided_object - skip "figure out how to guarantee move" - - list_of_objects, addresses, new_tenant = try_to_move_objects - - # This is the object that used to be in new_object's position - loc = memory_location(new_tenant) - assert loc, "should have a memory location" - - if (ENV['TRAVIS'] && RUBY_PLATFORM =~ /darwin/) - skip "tests are failing on Travis osx / Wercker from here" - end - - address_idx = addresses.index(loc) - assert address_idx, "should have an address index" - - previous_tenant = list_of_objects[address_idx] - assert previous_tenant, "should have a previous tenant" - - assert_not_equal previous_tenant.object_id, new_tenant.object_id - - # Should be able to look up object by object_id - assert_equal new_tenant, ObjectSpace._id2ref(new_tenant.object_id) - - # Should be able to look up object by object_id - assert_equal previous_tenant, ObjectSpace._id2ref(previous_tenant.object_id) - - int = (new_tenant.object_id >> 1) - # These two should be the same! but they are not :( - assert_equal int, ObjectSpace._id2ref(int.object_id) - end - - def test_many_collisions - list_of_objects = big_list - ids = list_of_objects.map(&:object_id) - addresses = list_of_objects.map(&self.:memory_location) - - GC.verify_compaction_references(toward: :empty) - - skip "time consuming" - - new_tenants = 10.times.map { - find_object_in_recycled_slot(addresses) - } - - collisions = GC.stat(:object_id_collisions) - skip "couldn't get objects to collide" if collisions == 0 - assert_operator collisions, :>, 0 - ids.clear - new_tenants.clear - end - def test_complex_hash_keys list_of_objects = big_list hash = list_of_objects.hash -- cgit v1.2.3