diff options
-rw-r--r-- | debug_counter.h | 1 | ||||
-rw-r--r-- | ext/objspace/depend | 1 | ||||
-rw-r--r-- | ext/objspace/objspace_dump.c | 8 | ||||
-rw-r--r-- | gc.c | 52 | ||||
-rw-r--r-- | id_table.h | 5 | ||||
-rw-r--r-- | internal/gc.h | 7 | ||||
-rw-r--r-- | internal/variable.h | 1 | ||||
-rw-r--r-- | object.c | 14 | ||||
-rw-r--r-- | ractor.c | 83 | ||||
-rw-r--r-- | shape.c | 156 | ||||
-rw-r--r-- | shape.h | 45 | ||||
-rw-r--r-- | test/ruby/test_shapes.rb | 186 | ||||
-rw-r--r-- | transient_heap.c | 1 | ||||
-rw-r--r-- | variable.c | 112 | ||||
-rw-r--r-- | vm_insnhelper.c | 43 | ||||
-rw-r--r-- | yjit/bindgen/src/main.rs | 2 | ||||
-rw-r--r-- | yjit/src/codegen.rs | 50 | ||||
-rw-r--r-- | yjit/src/cruby.rs | 4 | ||||
-rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 2 |
19 files changed, 650 insertions, 123 deletions
diff --git a/debug_counter.h b/debug_counter.h index b0047685f0..6e0b8dee60 100644 --- a/debug_counter.h +++ b/debug_counter.h @@ -243,6 +243,7 @@ RB_DEBUG_COUNTER(obj_wb_unprotect) RB_DEBUG_COUNTER(obj_obj_embed) RB_DEBUG_COUNTER(obj_obj_transient) RB_DEBUG_COUNTER(obj_obj_ptr) +RB_DEBUG_COUNTER(obj_obj_too_complex) RB_DEBUG_COUNTER(obj_str_ptr) RB_DEBUG_COUNTER(obj_str_embed) diff --git a/ext/objspace/depend b/ext/objspace/depend index f83607236a..de5fa6c6a3 100644 --- a/ext/objspace/depend +++ b/ext/objspace/depend @@ -540,6 +540,7 @@ objspace_dump.o: $(top_srcdir)/id_table.h objspace_dump.o: $(top_srcdir)/internal.h objspace_dump.o: $(top_srcdir)/internal/array.h objspace_dump.o: $(top_srcdir)/internal/basic_operators.h +objspace_dump.o: $(top_srcdir)/internal/class.h objspace_dump.o: $(top_srcdir)/internal/compilers.h objspace_dump.o: $(top_srcdir)/internal/gc.h objspace_dump.o: $(top_srcdir)/internal/hash.h diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c index 4c261a7a35..228ed2fa7c 100644 --- a/ext/objspace/objspace_dump.c +++ b/ext/objspace/objspace_dump.c @@ -13,6 +13,7 @@ **********************************************************************/ #include "gc.h" +#include "id_table.h" #include "internal.h" #include "internal/array.h" #include "internal/class.h" @@ -546,7 +547,7 @@ dump_object(VALUE obj, struct dump_config *dc) case T_OBJECT: dump_append(dc, ", \"ivars\":"); - dump_append_lu(dc, ROBJECT_IV_CAPACITY(obj)); + dump_append_lu(dc, ROBJECT_IV_COUNT(obj)); break; case T_FILE: @@ -735,7 +736,7 @@ shape_i(rb_shape_t *shape, void *data) dump_append_sizet(dc, rb_shape_depth(shape)); dump_append(dc, ", \"shape_type\":"); - switch(shape->type) { + switch((enum shape_type)shape->type) { case SHAPE_ROOT: dump_append(dc, "\"ROOT\""); break; @@ -762,6 +763,9 @@ shape_i(rb_shape_t *shape, void *data) case SHAPE_T_OBJECT: dump_append(dc, "\"T_OBJECT\""); break; + case SHAPE_OBJ_TOO_COMPLEX: + dump_append(dc, "\"OBJ_TOO_COMPLEX\""); + break; default: rb_bug("[objspace] unexpected shape type"); } @@ -2965,6 +2965,7 @@ rb_class_instance_allocate_internal(VALUE klass, VALUE flags, bool wb_protected) ROBJECT_SET_SHAPE_ID(obj, ROBJECT_SHAPE_ID(obj) + SIZE_POOL_COUNT); #if RUBY_DEBUG + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); VALUE *ptr = ROBJECT_IVPTR(obj); for (size_t i = 0; i < ROBJECT_IV_CAPACITY(obj); i++) { ptr[i] = Qundef; @@ -3451,7 +3452,11 @@ obj_free(rb_objspace_t *objspace, VALUE obj) switch (BUILTIN_TYPE(obj)) { case T_OBJECT: - if (RANY(obj)->as.basic.flags & ROBJECT_EMBED) { + if (rb_shape_obj_too_complex(obj)) { + RB_DEBUG_COUNTER_INC(obj_obj_too_complex); + rb_id_table_free(ROBJECT_IV_HASH(obj)); + } + else if (RANY(obj)->as.basic.flags & ROBJECT_EMBED) { RB_DEBUG_COUNTER_INC(obj_obj_embed); } else if (ROBJ_TRANSIENT_P(obj)) { @@ -4875,7 +4880,10 @@ obj_memsize_of(VALUE obj, int use_all_types) switch (BUILTIN_TYPE(obj)) { case T_OBJECT: - if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) { + if (rb_shape_obj_too_complex(obj)) { + size += rb_id_table_memsize(ROBJECT_IV_HASH(obj)); + } + else if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) { size += ROBJECT_IV_CAPACITY(obj) * sizeof(VALUE); } break; @@ -7297,14 +7305,23 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) case T_OBJECT: { - const VALUE * const ptr = ROBJECT_IVPTR(obj); - - uint32_t i, len = ROBJECT_IV_COUNT(obj); - for (i = 0; i < len; i++) { - gc_mark(objspace, ptr[i]); + rb_shape_t *shape = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj)); + if (rb_shape_obj_too_complex(obj)) { + mark_m_tbl(objspace, ROBJECT_IV_HASH(obj)); } + else { + const VALUE * const ptr = ROBJECT_IVPTR(obj); - rb_shape_t *shape = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj)); + uint32_t i, len = ROBJECT_IV_COUNT(obj); + for (i = 0; i < len; i++) { + gc_mark(objspace, ptr[i]); + } + + if (LIKELY(during_gc) && + ROBJ_TRANSIENT_P(obj)) { + rb_transient_heap_mark(obj, ptr); + } + } if (shape) { VALUE klass = RBASIC_CLASS(obj); @@ -7314,11 +7331,6 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) RCLASS_EXT(klass)->max_iv_count = num_of_ivs; } } - - if (LIKELY(during_gc) && - ROBJ_TRANSIENT_P(obj)) { - rb_transient_heap_mark(obj, ptr); - } } break; @@ -8426,7 +8438,12 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V break; case T_OBJECT: - obj_size = rb_obj_embedded_size(ROBJECT_IV_CAPACITY(src)); + if (rb_shape_obj_too_complex(src)) { + return &size_pools[0]; + } + else { + obj_size = rb_obj_embedded_size(ROBJECT_IV_CAPACITY(src)); + } break; case T_STRING: @@ -10038,11 +10055,18 @@ gc_ref_update_array(rb_objspace_t * objspace, VALUE v) } } +static void update_m_tbl(rb_objspace_t *objspace, struct rb_id_table *tbl); + static void gc_ref_update_object(rb_objspace_t *objspace, VALUE v) { VALUE *ptr = ROBJECT_IVPTR(v); + if (rb_shape_obj_too_complex(v)) { + update_m_tbl(objspace, ROBJECT_IV_HASH(v)); + return; + } + #if USE_RVARGC uint32_t numiv = ROBJECT_IV_CAPACITY(v); diff --git a/id_table.h b/id_table.h index 9d9eb5648e..f72e2d1d92 100644 --- a/id_table.h +++ b/id_table.h @@ -19,7 +19,6 @@ struct rb_id_table *rb_id_table_create(size_t size); void rb_id_table_free(struct rb_id_table *tbl); void rb_id_table_clear(struct rb_id_table *tbl); -size_t rb_id_table_size(const struct rb_id_table *tbl); size_t rb_id_table_memsize(const struct rb_id_table *tbl); int rb_id_table_insert(struct rb_id_table *tbl, ID id, VALUE val); @@ -33,4 +32,8 @@ void rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *fu void rb_id_table_foreach_values(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, void *data); void rb_id_table_foreach_values_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, rb_id_table_update_value_callback_func_t *replace, void *data); +RUBY_SYMBOL_EXPORT_BEGIN +size_t rb_id_table_size(const struct rb_id_table *tbl); +RUBY_SYMBOL_EXPORT_END + #endif /* RUBY_ID_TABLE_H */ diff --git a/internal/gc.h b/internal/gc.h index 5b2b9e8f70..d43eb29578 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -14,7 +14,6 @@ #include "internal/compilers.h" /* for __has_attribute */ #include "ruby/ruby.h" /* for rb_event_flag_t */ -#include "shape.h" struct rb_execution_context_struct; /* in vm_core.h */ struct rb_objspace; /* in vm_core.h */ @@ -68,11 +67,7 @@ struct rb_objspace; /* in vm_core.h */ rb_obj_write((VALUE)(a), UNALIGNED_MEMBER_ACCESS((VALUE *)(slot)), \ (VALUE)(b), __FILE__, __LINE__) -#if USE_RVARGC && SHAPE_IN_BASIC_FLAGS -# define SIZE_POOL_COUNT 5 -#else -# define SIZE_POOL_COUNT 1 -#endif +#include "shape.h" #define RCLASS_EXT_EMBEDDED (SIZE_POOL_COUNT > 1) diff --git a/internal/variable.h b/internal/variable.h index e59a0f1924..3933279633 100644 --- a/internal/variable.h +++ b/internal/variable.h @@ -38,6 +38,7 @@ static inline void ROBJ_TRANSIENT_UNSET(VALUE obj); struct gen_ivtbl; int rb_gen_ivtbl_get(VALUE obj, ID id, struct gen_ivtbl **ivtbl); +int rb_obj_evacuate_ivs_to_hash_table(ID key, VALUE val, st_data_t arg); RUBY_SYMBOL_EXPORT_BEGIN /* variable.c (export) */ @@ -272,8 +272,20 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj) RUBY_ASSERT(!RB_TYPE_P(obj, T_CLASS) && !RB_TYPE_P(obj, T_MODULE)); RUBY_ASSERT(BUILTIN_TYPE(dest) == BUILTIN_TYPE(obj)); - uint32_t src_num_ivs = RBASIC_IV_COUNT(obj); rb_shape_t * src_shape = rb_shape_get_shape(obj); + + if (rb_shape_id(src_shape) == OBJ_TOO_COMPLEX_SHAPE_ID) { + struct rb_id_table * table = rb_id_table_create(rb_id_table_size(ROBJECT_IV_HASH(obj))); + + rb_ivar_foreach(obj, rb_obj_evacuate_ivs_to_hash_table, (st_data_t)table); + rb_shape_set_too_complex(dest); + + ROBJECT(dest)->as.heap.ivptr = (VALUE *)table; + + return; + } + + uint32_t src_num_ivs = RBASIC_IV_COUNT(obj); rb_shape_t * shape_to_set_on_dest = src_shape; VALUE * src_buf; VALUE * dest_buf; @@ -2248,6 +2248,19 @@ obj_hash_traverse_i(VALUE key, VALUE val, VALUE ptr) return ST_CONTINUE; } +static enum rb_id_table_iterator_result +obj_hash_iv_traverse_i(VALUE val, void *ptr) +{ + struct obj_traverse_callback_data *d = (struct obj_traverse_callback_data *)ptr; + + if (obj_traverse_i(val, d->data)) { + d->stop = true; + return ID_TABLE_STOP; + } + + return ID_TABLE_CONTINUE; +} + static void obj_traverse_reachable_i(VALUE obj, void *ptr) { @@ -2306,12 +2319,22 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) case T_OBJECT: { - uint32_t len = ROBJECT_IV_COUNT(obj); - VALUE *ptr = ROBJECT_IVPTR(obj); + if (rb_shape_obj_too_complex(obj)) { + struct obj_traverse_callback_data d = { + .stop = false, + .data = data, + }; + rb_id_table_foreach_values(ROBJECT_IV_HASH(obj), obj_hash_iv_traverse_i, &d); + if (d.stop) return 1; + } + else { + uint32_t len = ROBJECT_IV_COUNT(obj); + VALUE *ptr = ROBJECT_IVPTR(obj); - for (uint32_t i=0; i<len; i++) { - VALUE val = ptr[i]; - if (!UNDEF_P(val) && obj_traverse_i(val, data)) return 1; + for (uint32_t i=0; i<len; i++) { + VALUE val = ptr[i]; + if (!UNDEF_P(val) && obj_traverse_i(val, data)) return 1; + } } } break; @@ -2656,6 +2679,30 @@ obj_hash_traverse_replace_i(st_data_t *key, st_data_t *val, st_data_t ptr, int e return ST_CONTINUE; } +static enum rb_id_table_iterator_result +obj_iv_hash_traverse_replace_foreach_i(VALUE val, void *data) +{ + return ID_TABLE_REPLACE; +} + +static enum rb_id_table_iterator_result +obj_iv_hash_traverse_replace_i(VALUE *val, void *ptr, int exists) +{ + struct obj_traverse_replace_callback_data *d = (struct obj_traverse_replace_callback_data *)ptr; + struct obj_traverse_replace_data *data = d->data; + + if (obj_traverse_replace_i(*val, data)) { + d->stop = true; + return ID_TABLE_STOP; + } + else if (*val != data->replacement) { + VALUE v = *val = data->replacement; + RB_OBJ_WRITTEN(d->src, Qundef, v); + } + + return ID_TABLE_CONTINUE; +} + static struct st_table * obj_traverse_replace_rec(struct obj_traverse_replace_data *data) { @@ -2756,16 +2803,30 @@ obj_traverse_replace_i(VALUE obj, struct obj_traverse_replace_data *data) case T_OBJECT: { + if (rb_shape_obj_too_complex(obj)) { + struct rb_id_table * table = ROBJECT_IV_HASH(obj); + struct obj_traverse_replace_callback_data d = { + .stop = false, + .data = data, + .src = obj, + }; + rb_id_table_foreach_values_with_replace(table, + obj_iv_hash_traverse_replace_foreach_i, + obj_iv_hash_traverse_replace_i, + (void *)&d); + } + else { #if USE_TRANSIENT_HEAP - if (data->move) rb_obj_transient_heap_evacuate(obj, TRUE); + if (data->move) rb_obj_transient_heap_evacuate(obj, TRUE); #endif - uint32_t len = ROBJECT_IV_COUNT(obj); - VALUE *ptr = ROBJECT_IVPTR(obj); + uint32_t len = ROBJECT_IV_COUNT(obj); + VALUE *ptr = ROBJECT_IVPTR(obj); - for (uint32_t i=0; i<len; i++) { - if (!UNDEF_P(ptr[i])) { - CHECK_AND_REPLACE(ptr[i]); + for (uint32_t i=0; i<len; i++) { + if (!UNDEF_P(ptr[i])) { + CHECK_AND_REPLACE(ptr[i]); + } } } } @@ -99,13 +99,13 @@ rb_shape_get_shape_id(VALUE obj) #else switch (BUILTIN_TYPE(obj)) { case T_OBJECT: - return ROBJECT_SHAPE_ID(obj); - break; + return ROBJECT_SHAPE_ID(obj); + break; case T_CLASS: case T_MODULE: - return RCLASS_SHAPE_ID(obj); + return RCLASS_SHAPE_ID(obj); default: - return rb_generic_shape_id(obj); + return rb_generic_shape_id(obj); } #endif } @@ -130,50 +130,57 @@ rb_shape_get_shape(VALUE obj) } static rb_shape_t* -get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created) +get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created, bool new_shapes_allowed) { rb_shape_t *res = NULL; - RB_VM_LOCK_ENTER(); - { - bool had_edges = !!shape->edges; - - *variation_created = false; - - if (!shape->edges) { - shape->edges = rb_id_table_create(0); - } - // Lookup the shape in edges - if there's already an edge and a corresponding shape for it, - // we can return that. Otherwise, we'll need to get a new shape - if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) { - *variation_created = had_edges; + // There should never be outgoing edges from "too complex" + RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID); - rb_shape_t * new_shape = rb_shape_alloc(id, shape); + *variation_created = false; - new_shape->type = (uint8_t)shape_type; - new_shape->capacity = shape->capacity; + if (new_shapes_allowed) { + RB_VM_LOCK_ENTER(); + { + bool had_edges = !!shape->edges; - switch (shape_type) { - case SHAPE_IVAR: - new_shape->next_iv_index = shape->next_iv_index + 1; - break; - case SHAPE_CAPACITY_CHANGE: - case SHAPE_FROZEN: - case SHAPE_T_OBJECT: - new_shape->next_iv_index = shape->next_iv_index; - break; - case SHAPE_INITIAL_CAPACITY: - case SHAPE_ROOT: - rb_bug("Unreachable"); - break; + if (!shape->edges) { + shape->edges = rb_id_table_create(0); } - rb_id_table_insert(shape->edges, id, (VALUE)new_shape); + // Lookup the shape in edges - if there's already an edge and a corresponding shape for it, + // we can return that. Otherwise, we'll need to get a new shape + if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) { + *variation_created = had_edges; + + rb_shape_t * new_shape = rb_shape_alloc(id, shape); + + new_shape->type = (uint8_t)shape_type; + new_shape->capacity = shape->capacity; + + switch (shape_type) { + case SHAPE_IVAR: + new_shape->next_iv_index = shape->next_iv_index + 1; + break; + case SHAPE_CAPACITY_CHANGE: + case SHAPE_FROZEN: + case SHAPE_T_OBJECT: + new_shape->next_iv_index = shape->next_iv_index; + break; + case SHAPE_OBJ_TOO_COMPLEX: + case SHAPE_INITIAL_CAPACITY: + case SHAPE_ROOT: + rb_bug("Unreachable"); + break; + } + + rb_id_table_insert(shape->edges, id, (VALUE)new_shape); - res = new_shape; + res = new_shape; + } } + RB_VM_LOCK_LEAVE(); } - RB_VM_LOCK_LEAVE(); return res; } @@ -192,6 +199,7 @@ move_iv(VALUE obj, ID id, attr_index_t from, attr_index_t to) RCLASS_IVPTR(obj)[to] = RCLASS_IVPTR(obj)[from]; break; case T_OBJECT: + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); ROBJECT_IVPTR(obj)[to] = ROBJECT_IVPTR(obj)[from]; break; default: { @@ -242,7 +250,7 @@ remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed) // has the same attributes as this shape. if (new_parent) { bool dont_care; - rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care); + rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care, true); new_child->capacity = shape->capacity; if (new_child->type == SHAPE_IVAR) { move_iv(obj, id, shape->next_iv_index - 1, new_child->next_iv_index - 1); @@ -275,7 +283,7 @@ rb_shape_transition_shape_frozen(VALUE obj) RUBY_ASSERT(shape); RUBY_ASSERT(RB_OBJ_FROZEN(obj)); - if (rb_shape_frozen_shape_p(shape)) { + if (rb_shape_frozen_shape_p(shape) || rb_shape_obj_too_complex(obj)) { return; } @@ -287,7 +295,7 @@ rb_shape_transition_shape_frozen(VALUE obj) } bool dont_care; - next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care); + next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true); RUBY_ASSERT(next_shape); rb_shape_set_shape(obj, next_shape); @@ -302,7 +310,7 @@ rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id) { RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id)))); bool dont_care; - return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care); + return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care, true); } rb_shape_t * @@ -310,8 +318,20 @@ rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id) { RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id)))); - bool variation_created; - rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created); + bool allow_new_shape = true; + + if (BUILTIN_TYPE(obj) == T_OBJECT) { + VALUE klass = rb_obj_class(obj); + allow_new_shape = RCLASS_EXT(klass)->variation_count < SHAPE_MAX_VARIATIONS; + } + + bool variation_created = false; + rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created, allow_new_shape); + + if (!new_shape) { + RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT); + new_shape = rb_shape_get_shape_by_id(OBJ_TOO_COMPLEX_SHAPE_ID); + } // Check if we should update max_iv_count on the object's class if (BUILTIN_TYPE(obj) == T_OBJECT) { @@ -333,7 +353,7 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity) { ID edge_name = rb_make_temporary_id(new_capacity); bool dont_care; - rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care); + rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care, true); new_shape->capacity = new_capacity; return new_shape; } @@ -341,6 +361,10 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity) bool rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value) { + // It doesn't make sense to ask for the index of an IV that's stored + // on an object that is "too complex" as it uses a hash for storing IVs + RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID); + while (shape->parent_id != INVALID_SHAPE_ID) { if (shape->edge_name == id) { enum shape_type shape_type; @@ -356,6 +380,7 @@ rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value) case SHAPE_INITIAL_CAPACITY: case SHAPE_T_OBJECT: return false; + case SHAPE_OBJ_TOO_COMPLEX: case SHAPE_FROZEN: rb_bug("Ivar should not exist on transition\n"); } @@ -448,11 +473,28 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape) case SHAPE_INITIAL_CAPACITY: case SHAPE_T_OBJECT: break; + case SHAPE_OBJ_TOO_COMPLEX: + rb_bug("Unreachable\n"); + break; } return midway_shape; } +bool +rb_shape_obj_too_complex(VALUE obj) +{ + return rb_shape_get_shape_id(obj) == OBJ_TOO_COMPLEX_SHAPE_ID; +} + +void +rb_shape_set_too_complex(VALUE obj) +{ + RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT); + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); + rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID); +} + size_t rb_shape_edges_count(rb_shape_t *shape) { @@ -519,6 +561,19 @@ rb_shape_capacity(VALUE self) } static VALUE +rb_shape_too_complex(VALUE self) +{ + rb_shape_t * shape; + TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); + if (rb_shape_id(shape) == OBJ_TOO_COMPLEX_SHAPE_ID) { + return Qtrue; + } + else { + return Qfalse; + } +} + +static VALUE rb_shape_parent_id(VALUE self) { rb_shape_t * shape; @@ -730,7 +785,7 @@ Init_default_shapes(void) rb_shape_t * shape = rb_shape_get_shape_by_id(i); bool dont_care; rb_shape_t * t_object_shape = - get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care); + get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care, true); t_object_shape->edges = rb_id_table_create(0); RUBY_ASSERT(rb_shape_id(t_object_shape) == (shape_id_t)(i + SIZE_POOL_COUNT)); } @@ -740,10 +795,16 @@ Init_default_shapes(void) #if RUBY_DEBUG rb_shape_t * special_const_shape = #endif - get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care); + get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true); RUBY_ASSERT(rb_shape_id(special_const_shape) == SPECIAL_CONST_SHAPE_ID); RUBY_ASSERT(SPECIAL_CONST_SHAPE_ID == (GET_VM()->next_shape_id - 1)); RUBY_ASSERT(rb_shape_frozen_shape_p(special_const_shape)); + + rb_shape_t * hash_fallback_shape = rb_shape_alloc_with_parent_id(0, ROOT_SHAPE_ID); + hash_fallback_shape->type = SHAPE_OBJ_TOO_COMPLEX; + hash_fallback_shape->size_pool_index = 0; + RUBY_ASSERT(OBJ_TOO_COMPLEX_SHAPE_ID == (GET_VM()->next_shape_id - 1)); + RUBY_ASSERT(rb_shape_id(hash_fallback_shape) == OBJ_TOO_COMPLEX_SHAPE_ID); } void @@ -763,6 +824,7 @@ Init_shape(void) rb_define_method(rb_cShape, "id", rb_wrapped_shape_id, 0); rb_define_method(rb_cShape, "type", rb_shape_type, 0); rb_define_method(rb_cShape, "capacity", rb_shape_capacity, 0); + rb_define_method(rb_cShape, "too_complex?", rb_shape_too_complex, 0); rb_define_const(rb_cShape, "SHAPE_ROOT", INT2NUM(SHAPE_ROOT)); rb_define_const(rb_cShape, "SHAPE_IVAR", INT2NUM(SHAPE_IVAR)); rb_define_const(rb_cShape, "SHAPE_T_OBJECT", INT2NUM(SHAPE_T_OBJECT)); @@ -770,6 +832,8 @@ Init_shape(void) rb_define_const(rb_cShape, "SHAPE_ID_NUM_BITS", INT2NUM(SHAPE_ID_NUM_BITS)); rb_define_const(rb_cShape, "SHAPE_FLAG_SHIFT", INT2NUM(SHAPE_FLAG_SHIFT)); rb_define_const(rb_cShape, "SPECIAL_CONST_SHAPE_ID", INT2NUM(SPECIAL_CONST_SHAPE_ID)); + rb_define_const(rb_cShape, "OBJ_TOO_COMPLEX_SHAPE_ID", INT2NUM(OBJ_TOO_COMPLEX_SHAPE_ID)); + rb_define_const(rb_cShape, "SHAPE_MAX_VARIATIONS", INT2NUM(SHAPE_MAX_VARIATIONS)); rb_define_singleton_method(rb_cShape, "transition_tree", shape_transition_tree, 0); rb_define_singleton_method(rb_cShape, "find_by_id", rb_shape_find_by_id, 1); @@ -27,12 +27,22 @@ typedef uint16_t shape_id_t; # define SHAPE_BITMAP_SIZE 16384 +# define SHAPE_MAX_VARIATIONS 8 + # define MAX_SHAPE_ID (SHAPE_MASK - 1) # define INVALID_SHAPE_ID SHAPE_MASK # define ROOT_SHAPE_ID 0x0 + // We use SIZE_POOL_COUNT number of shape IDs for transitions out of different size pools // The next available shapd ID will be the SPECIAL_CONST_SHAPE_ID +#if USE_RVARGC && (SIZEOF_UINT64_T == SIZEOF_VALUE) +# define SIZE_POOL_COUNT 5 +#else +# define SIZE_POOL_COUNT 1 +#endif + # define SPECIAL_CONST_SHAPE_ID (SIZE_POOL_COUNT * 2) +# define OBJ_TOO_COMPLEX_SHAPE_ID (SPECIAL_CONST_SHAPE_ID + 1) struct rb_shape { struct rb_id_table * edges; // id_table from ID (ivar) to next shape @@ -53,6 +63,7 @@ enum shape_type { SHAPE_CAPACITY_CHANGE, SHAPE_INITIAL_CAPACITY, SHAPE_T_OBJECT, + SHAPE_OBJ_TOO_COMPLEX, }; #if SHAPE_IN_BASIC_FLAGS @@ -141,6 +152,7 @@ rb_shape_t * rb_shape_get_next_iv_shape(rb_shape_t * shape, ID id); rb_shape_t* rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id); bool rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t * value); shape_id_t rb_shape_id(rb_shape_t * shape); +bool rb_shape_obj_too_complex(VALUE obj); MJIT_SYMBOL_EXPORT_END rb_shape_t * rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape); @@ -149,15 +161,41 @@ static inline uint32_t ROBJECT_IV_CAPACITY(VALUE obj) { RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); + // Asking for capacity doesn't make sense when the object is using + // a hash table for storing instance variables + RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) != OBJ_TOO_COMPLEX_SHAPE_ID); return rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->capacity; } +static inline struct rb_id_table * +ROBJECT_IV_HASH(VALUE obj) +{ + RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); + RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) == OBJ_TOO_COMPLEX_SHAPE_ID); + return (struct rb_id_table *)ROBJECT(obj)->as.heap.ivptr; +} + +static inline void +ROBJECT_SET_IV_HASH(VALUE obj, const struct rb_id_table *tbl) +{ + RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); + RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) == OBJ_TOO_COMPLEX_SHAPE_ID); + ROBJECT(obj)->as.heap.ivptr = (VALUE *)tbl; +} + +size_t rb_id_table_size(const struct rb_id_table *tbl); + static inline uint32_t ROBJECT_IV_COUNT(VALUE obj) { - RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); - uint32_t ivc = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->next_iv_index; - return ivc; + if (ROBJECT_SHAPE_ID(obj) == OBJ_TOO_COMPLEX_SHAPE_ID) { + return (uint32_t)rb_id_table_size(ROBJECT_IV_HASH(obj)); + } + else { + RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT); + RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) != OBJ_TOO_COMPLEX_SHAPE_ID); + return rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->next_iv_index; + } } static inline uint32_t @@ -182,6 +220,7 @@ bool rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id); VALUE rb_obj_debug_shape(VALUE self, VALUE obj); VALUE rb_shape_flags_mask(void); +void rb_shape_set_too_complex(VALUE obj); RUBY_SYMBOL_EXPORT_BEGIN typedef void each_shape_callback(rb_shape_t * shape, void *data); diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb index b19238950b..358657d360 100644 --- a/test/ruby/test_shapes.rb +++ b/test/ruby/test_shapes.rb @@ -37,6 +37,35 @@ class TestShapes < Test::Unit::TestCase end end + class TooComplex + attr_reader :hopefully_unique_name, :b + + def initialize + @hopefully_unique_name = "a" + @b = "b" + end + + # Make enough lazily defined accessors to allow us to force + # polymorphism + class_eval (RubyVM::Shape::SHAPE_MAX_VARIATIONS + 1).times.map { + "def a#{_1}_m; @a#{_1} ||= #{_1}; end" + }.join(" ; ") + + class_eval "attr_accessor " + (RubyVM::Shape::SHAPE_MAX_VARIATIONS + 1).times.map { + ":a#{_1}" + }.join(", ") + + def iv_not_defined; @not_defined; end + + def write_iv_method + self.a3 = 12345 + end + + def write_iv + @a3 = 12345 + end + end + # RubyVM::Shape.of returns new instances of shape objects for # each call. This helper method allows us to define equality for # shapes @@ -51,6 +80,156 @@ class TestShapes < Test::Unit::TestCase refute_equal(shape1.id, shape2.id) end + def test_too_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + end + + def test_too_complex_ractor + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + $VERBOSE = nil + class TooComplex + attr_reader :very_unique + end + + RubyVM::Shape::SHAPE_MAX_VARIATIONS.times do + TooComplex.new.instance_variable_set(:"@unique_#{_1}", Object.new) + end + + tc = TooComplex.new + tc.instance_variable_set(:"@very_unique", 3) + + assert_predicate RubyVM::Shape.of(tc), :too_complex? + assert_equal 3, tc.very_unique + assert_equal 3, Ractor.new(tc) { |x| Ractor.yield(x.very_unique) }.take + assert_equal tc.instance_variables.sort, Ractor.new(tc) { |x| Ractor.yield(x.instance_variables) }.take.sort + end; + end + + def test_too_complex_ractor_shareable + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + $VERBOSE = nil + class TooComplex + attr_reader :very_unique + end + + RubyVM::Shape::SHAPE_MAX_VARIATIONS.times do + TooComplex.new.instance_variable_set(:"@unique_#{_1}", Object.new) + end + + tc = TooComplex.new + tc.instance_variable_set(:"@very_unique", 3) + + assert_predicate RubyVM::Shape.of(tc), :too_complex? + assert_equal 3, tc.very_unique + assert_equal 3, Ractor.make_shareable(tc).very_unique + end; + end + + def test_read_iv_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + assert_equal 3, tc.a3_m + end + + def test_read_method_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + assert_equal 3, tc.a3_m + assert_equal 3, tc.a3 + end + + def test_write_method_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + tc.write_iv_method + tc.write_iv_method + assert_equal 12345, tc.a3_m + assert_equal 12345, tc.a3 + end + + def test_write_iv_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + tc.write_iv + tc.write_iv + assert_equal 12345, tc.a3_m + assert_equal 12345, tc.a3 + end + + def test_iv_read_via_method_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + assert_equal 3, tc.a3_m + assert_equal 3, tc.instance_variable_get(:@a3) + end + + def test_delete_iv_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + + assert_equal 3, tc.a3_m # make sure IV is initialized + assert tc.instance_variable_defined?(:@a3) + tc.remove_instance_variable(:@a3) + assert_nil tc.a3 + end + + def test_delete_undefined_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + + refute tc.instance_variable_defined?(:@a3) + assert_raise(NameError) do + tc.remove_instance_variable(:@a3) + end + assert_nil tc.a3 + end + + def test_freeze_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + tc.freeze + assert_raise(FrozenError) { tc.a3_m } + end + + def test_read_undefined_iv_after_complex + ensure_complex + + tc = TooComplex.new + tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m") + assert_predicate RubyVM::Shape.of(tc), :too_complex? + assert_equal nil, tc.iv_not_defined + end + def test_shape_order bar = ShapeOrder.new # 0 => 1 bar.set_c # 1 => 2 @@ -218,4 +397,11 @@ class TestShapes < Test::Unit::TestCase RubyVM::Shape.find_by_id(-1) end end + + def ensure_complex + RubyVM::Shape::SHAPE_MAX_VARIATIONS.times do + tc = TooComplex.new + tc.send("a#{_1}_m") + end + end end if defined?(RubyVM::Shape) diff --git a/transient_heap.c b/transient_heap.c index 9dfd980e64..14e941a41a 100644 --- a/transient_heap.c +++ b/transient_heap.c @@ -599,6 +599,7 @@ transient_heap_ptr(VALUE obj, int error) break; case T_OBJECT: if (ROBJ_TRANSIENT_P(obj)) { + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); ptr = ROBJECT_IVPTR(obj); } break; diff --git a/variable.c b/variable.c index 42a73b5953..0c283738aa 100644 --- a/variable.c +++ b/variable.c @@ -1172,6 +1172,18 @@ rb_ivar_lookup(VALUE obj, ID id, VALUE undef) #if !SHAPE_IN_BASIC_FLAGS shape_id = ROBJECT_SHAPE_ID(obj); #endif + if (rb_shape_obj_too_complex(obj)) { + struct rb_id_table * iv_table = ROBJECT_IV_HASH(obj); + VALUE val; + if (rb_id_table_lookup(iv_table, id, &val)) { + return val; + } + else { + return undef; + } + } + + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); ivar_list = ROBJECT_IVPTR(obj); break; } @@ -1334,6 +1346,7 @@ rb_obj_transient_heap_evacuate(VALUE obj, int promote) assert(!RB_FL_TEST_RAW(obj, ROBJECT_EMBED)); uint32_t len = ROBJECT_IV_CAPACITY(obj); + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); const VALUE *old_ptr = ROBJECT_IVPTR(obj); VALUE *new_ptr; @@ -1353,6 +1366,7 @@ rb_obj_transient_heap_evacuate(VALUE obj, int promote) void rb_ensure_iv_list_size(VALUE obj, uint32_t current_capacity, uint32_t new_capacity) { + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); VALUE *ptr = ROBJECT_IVPTR(obj); VALUE *newptr; @@ -1402,21 +1416,36 @@ rb_grow_iv_list(VALUE obj) return res; } +int +rb_obj_evacuate_ivs_to_hash_table(ID key, VALUE val, st_data_t arg) +{ + rb_id_table_insert((struct rb_id_table *)arg, key, val); + return ST_CONTINUE; +} + attr_index_t rb_obj_ivar_set(VALUE obj, ID id, VALUE val) { attr_index_t index; - shape_id_t next_shape_id = ROBJECT_SHAPE_ID(obj); - rb_shape_t *shape = rb_shape_get_shape_by_id(next_shape_id); + rb_shape_t *shape = rb_shape_get_shape(obj); uint32_t num_iv = shape->capacity; + if (rb_shape_obj_too_complex(obj)) { + struct rb_id_table * table = ROBJECT_IV_HASH(obj); + rb_id_table_insert(table, id, val); + RB_OBJ_WRITTEN(obj, Qundef, val); + return 0; + } + if (!rb_shape_get_iv_index(shape, id, &index)) { index = shape->next_iv_index; if (index >= MAX_IVARS) { rb_raise(rb_eArgError, "too many instance variables"); } + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); + if (UNLIKELY(shape->next_iv_index >= num_iv)) { RUBY_ASSERT(shape->next_iv_index == num_iv); @@ -1425,13 +1454,39 @@ rb_obj_ivar_set(VALUE obj, ID id, VALUE val) } rb_shape_t *next_shape = rb_shape_get_next(shape, obj, id); - RUBY_ASSERT(next_shape->type == SHAPE_IVAR); - RUBY_ASSERT(index == (next_shape->next_iv_index - 1)); - next_shape_id = rb_shape_id(next_shape); - rb_shape_set_shape(obj, next_shape); + if (next_shape->type == SHAPE_OBJ_TOO_COMPLEX) { + struct rb_id_table * table = rb_id_table_create(shape->next_iv_index); + + // Evacuate all previous values from shape into id_table + rb_ivar_foreach(obj, rb_obj_evacuate_ivs_to_hash_table, (st_data_t)table); + + // Insert new value too + rb_id_table_insert(table, id, val); + RB_OBJ_WRITTEN(obj, Qundef, val); + + rb_shape_set_too_complex(obj); + RUBY_ASSERT(rb_shape_obj_too_complex(obj)); + + if (ROBJ_TRANSIENT_P(obj)) { + ROBJ_TRANSIENT_UNSET(obj); + } + else if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) { + xfree(ROBJECT(obj)->as.heap.ivptr); + } + + ROBJECT(obj)->as.heap.ivptr = (VALUE *)table; + + return 0; + } + else { + rb_shape_set_shape(obj, next_shape); + RUBY_ASSERT(next_shape->type == SHAPE_IVAR); + RUBY_ASSERT(index == (next_shape->next_iv_index - 1)); + } } + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); RB_OBJ_WRITE(obj, &ROBJECT_IVPTR(obj)[index], val); return index; @@ -1554,7 +1609,17 @@ rb_ivar_defined(VALUE obj, ID id) attr_index_t index; if (SPECIAL_CONST_P(obj)) return Qfalse; - return RBOOL(rb_shape_get_iv_index(rb_shape_get_shape(obj), id, &index)); + if (rb_shape_obj_too_complex(obj)) { + VALUE idx; + if (!rb_id_table_lookup(ROBJECT_IV_HASH(obj), id, &idx)) { + return Qfalse; + } + + return Qtrue; + } + else { + return RBOOL(rb_shape_get_iv_index(rb_shape_get_shape(obj), id, &index)); + } } typedef int rb_ivar_foreach_callback_func(ID key, VALUE val, st_data_t arg); @@ -1564,6 +1629,7 @@ struct iv_itr_data { VALUE obj; struct gen_ivtbl * ivtbl; st_data_t arg; + rb_ivar_foreach_callback_func *func; }; static void @@ -1577,6 +1643,7 @@ iterate_over_shapes_with_callback(rb_shape_t *shape, rb_ivar_foreach_callback_fu VALUE * iv_list; switch (BUILTIN_TYPE(itr_data->obj)) { case T_OBJECT: + RUBY_ASSERT(!rb_shape_obj_too_complex(itr_data->obj)); iv_list = ROBJECT_IVPTR(itr_data->obj); break; case T_CLASS: @@ -1598,9 +1665,19 @@ iterate_over_shapes_with_callback(rb_shape_t *shape, rb_ivar_foreach_callback_fu case SHAPE_T_OBJECT: iterate_over_shapes_with_callback(rb_shape_get_parent(shape), callback, itr_data); return; + case SHAPE_OBJ_TOO_COMPLEX: + rb_bug("Unreachable\n"); } } +static enum rb_id_table_iterator_result +each_hash_iv(ID id, VALUE val, void *data) +{ + struct iv_itr_data * itr_data = (struct iv_itr_data *)data; + rb_ivar_foreach_callback_func *callback = itr_data->func; + return callback(id, val, itr_data->arg); +} + static void obj_ivar_each(VALUE obj, rb_ivar_foreach_callback_func *func, st_data_t arg) { @@ -1608,7 +1685,13 @@ obj_ivar_each(VALUE obj, rb_ivar_foreach_callback_func *func, st_data_t arg) struct iv_itr_data itr_data; itr_data.obj = obj; itr_data.arg = arg; - iterate_over_shapes_with_callback(shape, func, &itr_data); + itr_data.func = func; + if (rb_shape_obj_too_complex(obj)) { + rb_id_table_foreach(ROBJECT_IV_HASH(obj), each_hash_iv, &itr_data); + } + else { + iterate_over_shapes_with_callback(shape, func, &itr_data); + } } static void @@ -1742,6 +1825,10 @@ rb_ivar_count(VALUE obj) switch (BUILTIN_TYPE(obj)) { case T_OBJECT: + if (rb_shape_obj_too_complex(obj)) { + return ROBJECT_IV_COUNT(obj); + } + if (rb_shape_get_shape(obj)->next_iv_index > 0) { st_index_t i, count, num = ROBJECT_IV_COUNT(obj); const VALUE *const ivptr = ROBJECT_IVPTR(obj); @@ -1893,7 +1980,14 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name) rb_shape_transition_shape_remove_ivar(obj, id, shape, &val); break; case T_OBJECT: { - rb_shape_transition_shape_remove_ivar(obj, id, shape, &val); + if (rb_shape_obj_too_complex(obj)) { + if (rb_id_table_lookup(ROBJECT_IV_HASH(obj), id, &val)) { + rb_id_table_delete(ROBJECT_IV_HASH(obj), id); + } + } + else { + rb_shape_transition_shape_remove_ivar(obj, id, shape, &val); + } break; } default: { diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 84fe32c1b7..68b8d89abb 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1213,6 +1213,8 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call } if (LIKELY(cached_id == shape_id)) { + RUBY_ASSERT(cached_id != OBJ_TOO_COMPLEX_SHAPE_ID); + if (index == ATTR_INDEX_NOT_SET) { return Qnil; } @@ -1242,24 +1244,31 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id); - if (rb_shape_get_iv_index(shape, id, &index)) { - // This fills in the cache with the shared cache object. - // "ent" is the shared cache object - fill_ivar_cache(iseq, ic, cc, is_attr, index, shape_id); - - // We fetched the ivar list above - val = ivar_list[index]; - RUBY_ASSERT(!UNDEF_P(val)); + if (shape_id == OBJ_TOO_COMPLEX_SHAPE_ID) { + if (!rb_id_table_lookup(ROBJECT_IV_HASH(obj), id, &val)) { + val = Qnil; + } } else { - if (is_attr) { - vm_cc_attr_index_initialize(cc, shape_id); + if (rb_shape_get_iv_index(shape, id, &index)) { + // This fills in the cache with the shared cache object. + // "ent" is the shared cache object + fill_ivar_cache(iseq, ic, cc, is_attr, index, shape_id); + + // We fetched the ivar list above + val = ivar_list[index]; + RUBY_ASSERT(!UNDEF_P(val)); } else { - vm_ic_attr_index_initialize(ic, shape_id); - } + if (is_attr) { + vm_cc_attr_index_initialize(cc, shape_id); + } + else { + vm_ic_attr_index_initialize(ic, shape_id); + } - val = Qnil; + val = Qnil; + } } } @@ -1283,6 +1292,8 @@ general_path: static void populate_cache(attr_index_t index, shape_id_t next_shape_id, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr) { + RUBY_ASSERT(next_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID); + // Cache population code if (is_attr) { vm_cc_attr_index_set(cc, index, next_shape_id); @@ -1309,7 +1320,9 @@ vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic, shape_id_t next_shape_id = ROBJECT_SHAPE_ID(obj); - populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr); + if (next_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID) { + populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr); + } RB_DEBUG_COUNTER_INC(ivar_set_ic_miss_iv_hit); return val; @@ -1413,6 +1426,7 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i VM_ASSERT(!rb_ractor_shareable_p(obj) || rb_obj_frozen_p(obj)); shape_id_t shape_id = ROBJECT_SHAPE_ID(obj); + RUBY_ASSERT(dest_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID); if (LIKELY(shape_id == dest_shape_id)) { RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID); @@ -1440,6 +1454,7 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i VALUE *ptr = ROBJECT_IVPTR(obj); + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); RB_OBJ_WRITE(obj, &ptr[index], val); RB_DEBUG_COUNTER_INC(ivar_set_ic_hit); diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index e33b0d5dba..996d7fa1c0 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -93,7 +93,9 @@ fn main() { .allowlist_function("rb_shape_get_next") .allowlist_function("rb_shape_id") .allowlist_function("rb_shape_transition_shape_capa") + .allowlist_function("rb_shape_obj_too_complex") .allowlist_var("SHAPE_ID_NUM_BITS") + .allowlist_var("OBJ_TOO_COMPLEX_SHAPE_ID") // From ruby/internal/intern/object.h .allowlist_function("rb_obj_is_kind_of") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index ab87038c67..cec5c4671b 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -1963,6 +1963,11 @@ fn gen_get_ivar( recv_opnd: YARVOpnd, side_exit: CodePtr, ) -> CodegenStatus { + // If the object has a too complex shape, we exit + if comptime_receiver.shape_too_complex() { + return CantCompile; + } + let comptime_val_klass = comptime_receiver.class_of(); let starting_context = ctx.clone(); // make a copy for use with jit_chain_guard @@ -2192,7 +2197,8 @@ fn gen_setinstancevariable( // If the comptime receiver is frozen, writing an IV will raise an exception // and we don't want to JIT code to deal with that situation. - if comptime_receiver.is_frozen() { + // If the object has a too complex shape, we will also exit + if comptime_receiver.is_frozen() || comptime_receiver.shape_too_complex() { return CantCompile; } @@ -2281,39 +2287,53 @@ fn gen_setinstancevariable( megamorphic_side_exit, ); - let write_val = ctx.stack_pop(1); + let write_val; match ivar_index { // If we don't have an instance variable index, then we need to // transition out of the current shape. None => { - let mut shape = comptime_receiver.shape_of(); + let shape = comptime_receiver.shape_of(); + + let current_capacity = unsafe { (*shape).capacity }; + let new_capacity = current_capacity * 2; // If the object doesn't have the capacity to store the IV, // then we'll need to allocate it. - let needs_extension = unsafe { (*shape).next_iv_index >= (*shape).capacity }; + let needs_extension = unsafe { (*shape).next_iv_index >= current_capacity }; // We can write to the object, but we need to transition the shape let ivar_index = unsafe { (*shape).next_iv_index } as usize; - if needs_extension { - let current_capacity = unsafe { (*shape).capacity }; - let newsize = current_capacity * 2; - + let capa_shape = if needs_extension { // We need to add an extended table to the object // First, create an outgoing transition that increases the // capacity - shape = unsafe { - rb_shape_transition_shape_capa(shape, newsize) - }; + Some(unsafe { rb_shape_transition_shape_capa(shape, new_capacity) }) + } else { + None + }; + + let dest_shape = if capa_shape.is_none() { + unsafe { rb_shape_get_next(shape, comptime_receiver, ivar_name) } + } else { + unsafe { rb_shape_get_next(capa_shape.unwrap(), comptime_receiver, ivar_name) } + }; + + let new_shape_id = unsafe { rb_shape_id(dest_shape) }; + if new_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID { + return CantCompile; + } + + if needs_extension { // Generate the C call so that runtime code will increase // the capacity and set the buffer. asm.ccall(rb_ensure_iv_list_size as *const u8, vec![ recv, Opnd::UImm(current_capacity.into()), - Opnd::UImm(newsize.into()) + Opnd::UImm(new_capacity.into()) ] ); @@ -2321,10 +2341,7 @@ fn gen_setinstancevariable( recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)) } - let new_shape_id = unsafe { - rb_shape_id(rb_shape_get_next(shape, comptime_receiver, ivar_name)) - }; - + write_val = ctx.stack_pop(1); gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension); asm.comment("write shape"); @@ -2342,6 +2359,7 @@ fn gen_setinstancevariable( // the iv index by searching up the shape tree. If we've // made the transition already, then there's no reason to // update the shape on the object. Just set the IV. + write_val = ctx.stack_pop(1); gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false); }, } diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index b6228fe64b..ba09d4119f 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -398,6 +398,10 @@ impl VALUE { unsafe { rb_obj_frozen_p(self) != VALUE(0) } } + pub fn shape_too_complex(self) -> bool { + unsafe { rb_shape_obj_too_complex(self) } + } + pub fn shape_id_of(self) -> u32 { unsafe { rb_shape_get_shape_id(self) } } diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 3e00aa3689..af77747861 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -124,6 +124,7 @@ impl<T> ::std::cmp::PartialEq for __BindgenUnionField<T> { } impl<T> ::std::cmp::Eq for __BindgenUnionField<T> {} pub const SHAPE_ID_NUM_BITS: u32 = 32; +pub const OBJ_TOO_COMPLEX_SHAPE_ID: u32 = 11; pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1; pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2; pub const STRING_REDEFINED_OP_FLAG: u32 = 4; @@ -1112,6 +1113,7 @@ extern "C" { pub fn rb_shape_get_next(shape: *mut rb_shape_t, obj: VALUE, id: ID) -> *mut rb_shape_t; pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool; pub fn rb_shape_id(shape: *mut rb_shape_t) -> shape_id_t; + pub fn rb_shape_obj_too_complex(obj: VALUE) -> bool; pub fn rb_ary_tmp_new_from_values( arg1: VALUE, arg2: ::std::os::raw::c_long, |