summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJemma Issroff <jemmaissroff@gmail.com>2022-12-08 17:16:52 -0500
committerAaron Patterson <aaron.patterson@gmail.com>2022-12-15 10:06:04 -0800
commitc1ab6ddc9a6fa228caa5d26b118b54855051279c (patch)
treea3361c22480e38d798dfa975bdabf47a832a9fb0
parenta3d552aedd190b0f21a4f6479f0ef1d2ce90189b (diff)
Transition complex objects to "too complex" shape
When an object becomes "too complex" (in other words it has too many variations in the shape tree), we transition it to use a "too complex" shape and use a hash for storing instance variables. Without this patch, there were rare cases where shape tree growth could "explode" and cause performance degradation on what would otherwise have been cached fast paths. This patch puts a limit on shape tree growth, and gracefully degrades in the rare case where there could be a factorial growth in the shape tree. For example: ```ruby class NG; end HUGE_NUMBER.times do NG.new.instance_variable_set(:"@unique_ivar_#{_1}", 1) end ``` We consider objects to be "too complex" when the object's class has more than SHAPE_MAX_VARIATIONS (currently 8) leaf nodes in the shape tree and the object introduces a new variation (a new leaf node) associated with that class. For example, new variations on instances of the following class would be considered "too complex" because those instances create more than 8 leaves in the shape tree: ```ruby class Foo; end 9.times { Foo.new.instance_variable_set(":@uniq_#{_1}", 1) } ``` However, the following class is *not* too complex because it only has one leaf in the shape tree: ```ruby class Foo def initialize @a = @b = @c = @d = @e = @f = @g = @h = @i = nil end end 9.times { Foo.new } `` This case is rare, so we don't expect this change to impact performance of most applications, but it needs to be handled. Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org>
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/6931
-rw-r--r--debug_counter.h1
-rw-r--r--ext/objspace/depend1
-rw-r--r--ext/objspace/objspace_dump.c8
-rw-r--r--gc.c52
-rw-r--r--id_table.h5
-rw-r--r--internal/gc.h7
-rw-r--r--internal/variable.h1
-rw-r--r--object.c14
-rw-r--r--ractor.c83
-rw-r--r--shape.c156
-rw-r--r--shape.h45
-rw-r--r--test/ruby/test_shapes.rb186
-rw-r--r--transient_heap.c1
-rw-r--r--variable.c112
-rw-r--r--vm_insnhelper.c43
-rw-r--r--yjit/bindgen/src/main.rs2
-rw-r--r--yjit/src/codegen.rs50
-rw-r--r--yjit/src/cruby.rs4
-rw-r--r--yjit/src/cruby_bindings.inc.rs2
19 files changed, 650 insertions, 123 deletions
diff --git a/debug_counter.h b/debug_counter.h
index b0047685f0..6e0b8dee60 100644
--- a/debug_counter.h
+++ b/debug_counter.h
@@ -243,6 +243,7 @@ RB_DEBUG_COUNTER(obj_wb_unprotect)
RB_DEBUG_COUNTER(obj_obj_embed)
RB_DEBUG_COUNTER(obj_obj_transient)
RB_DEBUG_COUNTER(obj_obj_ptr)
+RB_DEBUG_COUNTER(obj_obj_too_complex)
RB_DEBUG_COUNTER(obj_str_ptr)
RB_DEBUG_COUNTER(obj_str_embed)
diff --git a/ext/objspace/depend b/ext/objspace/depend
index f83607236a..de5fa6c6a3 100644
--- a/ext/objspace/depend
+++ b/ext/objspace/depend
@@ -540,6 +540,7 @@ objspace_dump.o: $(top_srcdir)/id_table.h
objspace_dump.o: $(top_srcdir)/internal.h
objspace_dump.o: $(top_srcdir)/internal/array.h
objspace_dump.o: $(top_srcdir)/internal/basic_operators.h
+objspace_dump.o: $(top_srcdir)/internal/class.h
objspace_dump.o: $(top_srcdir)/internal/compilers.h
objspace_dump.o: $(top_srcdir)/internal/gc.h
objspace_dump.o: $(top_srcdir)/internal/hash.h
diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c
index 4c261a7a35..228ed2fa7c 100644
--- a/ext/objspace/objspace_dump.c
+++ b/ext/objspace/objspace_dump.c
@@ -13,6 +13,7 @@
**********************************************************************/
#include "gc.h"
+#include "id_table.h"
#include "internal.h"
#include "internal/array.h"
#include "internal/class.h"
@@ -546,7 +547,7 @@ dump_object(VALUE obj, struct dump_config *dc)
case T_OBJECT:
dump_append(dc, ", \"ivars\":");
- dump_append_lu(dc, ROBJECT_IV_CAPACITY(obj));
+ dump_append_lu(dc, ROBJECT_IV_COUNT(obj));
break;
case T_FILE:
@@ -735,7 +736,7 @@ shape_i(rb_shape_t *shape, void *data)
dump_append_sizet(dc, rb_shape_depth(shape));
dump_append(dc, ", \"shape_type\":");
- switch(shape->type) {
+ switch((enum shape_type)shape->type) {
case SHAPE_ROOT:
dump_append(dc, "\"ROOT\"");
break;
@@ -762,6 +763,9 @@ shape_i(rb_shape_t *shape, void *data)
case SHAPE_T_OBJECT:
dump_append(dc, "\"T_OBJECT\"");
break;
+ case SHAPE_OBJ_TOO_COMPLEX:
+ dump_append(dc, "\"OBJ_TOO_COMPLEX\"");
+ break;
default:
rb_bug("[objspace] unexpected shape type");
}
diff --git a/gc.c b/gc.c
index a10a2193e4..1a7f3ab095 100644
--- a/gc.c
+++ b/gc.c
@@ -2965,6 +2965,7 @@ rb_class_instance_allocate_internal(VALUE klass, VALUE flags, bool wb_protected)
ROBJECT_SET_SHAPE_ID(obj, ROBJECT_SHAPE_ID(obj) + SIZE_POOL_COUNT);
#if RUBY_DEBUG
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
VALUE *ptr = ROBJECT_IVPTR(obj);
for (size_t i = 0; i < ROBJECT_IV_CAPACITY(obj); i++) {
ptr[i] = Qundef;
@@ -3451,7 +3452,11 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
switch (BUILTIN_TYPE(obj)) {
case T_OBJECT:
- if (RANY(obj)->as.basic.flags & ROBJECT_EMBED) {
+ if (rb_shape_obj_too_complex(obj)) {
+ RB_DEBUG_COUNTER_INC(obj_obj_too_complex);
+ rb_id_table_free(ROBJECT_IV_HASH(obj));
+ }
+ else if (RANY(obj)->as.basic.flags & ROBJECT_EMBED) {
RB_DEBUG_COUNTER_INC(obj_obj_embed);
}
else if (ROBJ_TRANSIENT_P(obj)) {
@@ -4875,7 +4880,10 @@ obj_memsize_of(VALUE obj, int use_all_types)
switch (BUILTIN_TYPE(obj)) {
case T_OBJECT:
- if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) {
+ if (rb_shape_obj_too_complex(obj)) {
+ size += rb_id_table_memsize(ROBJECT_IV_HASH(obj));
+ }
+ else if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) {
size += ROBJECT_IV_CAPACITY(obj) * sizeof(VALUE);
}
break;
@@ -7297,14 +7305,23 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj)
case T_OBJECT:
{
- const VALUE * const ptr = ROBJECT_IVPTR(obj);
-
- uint32_t i, len = ROBJECT_IV_COUNT(obj);
- for (i = 0; i < len; i++) {
- gc_mark(objspace, ptr[i]);
+ rb_shape_t *shape = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj));
+ if (rb_shape_obj_too_complex(obj)) {
+ mark_m_tbl(objspace, ROBJECT_IV_HASH(obj));
}
+ else {
+ const VALUE * const ptr = ROBJECT_IVPTR(obj);
- rb_shape_t *shape = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj));
+ uint32_t i, len = ROBJECT_IV_COUNT(obj);
+ for (i = 0; i < len; i++) {
+ gc_mark(objspace, ptr[i]);
+ }
+
+ if (LIKELY(during_gc) &&
+ ROBJ_TRANSIENT_P(obj)) {
+ rb_transient_heap_mark(obj, ptr);
+ }
+ }
if (shape) {
VALUE klass = RBASIC_CLASS(obj);
@@ -7314,11 +7331,6 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj)
RCLASS_EXT(klass)->max_iv_count = num_of_ivs;
}
}
-
- if (LIKELY(during_gc) &&
- ROBJ_TRANSIENT_P(obj)) {
- rb_transient_heap_mark(obj, ptr);
- }
}
break;
@@ -8426,7 +8438,12 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V
break;
case T_OBJECT:
- obj_size = rb_obj_embedded_size(ROBJECT_IV_CAPACITY(src));
+ if (rb_shape_obj_too_complex(src)) {
+ return &size_pools[0];
+ }
+ else {
+ obj_size = rb_obj_embedded_size(ROBJECT_IV_CAPACITY(src));
+ }
break;
case T_STRING:
@@ -10038,11 +10055,18 @@ gc_ref_update_array(rb_objspace_t * objspace, VALUE v)
}
}
+static void update_m_tbl(rb_objspace_t *objspace, struct rb_id_table *tbl);
+
static void
gc_ref_update_object(rb_objspace_t *objspace, VALUE v)
{
VALUE *ptr = ROBJECT_IVPTR(v);
+ if (rb_shape_obj_too_complex(v)) {
+ update_m_tbl(objspace, ROBJECT_IV_HASH(v));
+ return;
+ }
+
#if USE_RVARGC
uint32_t numiv = ROBJECT_IV_CAPACITY(v);
diff --git a/id_table.h b/id_table.h
index 9d9eb5648e..f72e2d1d92 100644
--- a/id_table.h
+++ b/id_table.h
@@ -19,7 +19,6 @@ struct rb_id_table *rb_id_table_create(size_t size);
void rb_id_table_free(struct rb_id_table *tbl);
void rb_id_table_clear(struct rb_id_table *tbl);
-size_t rb_id_table_size(const struct rb_id_table *tbl);
size_t rb_id_table_memsize(const struct rb_id_table *tbl);
int rb_id_table_insert(struct rb_id_table *tbl, ID id, VALUE val);
@@ -33,4 +32,8 @@ void rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *fu
void rb_id_table_foreach_values(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, void *data);
void rb_id_table_foreach_values_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, rb_id_table_update_value_callback_func_t *replace, void *data);
+RUBY_SYMBOL_EXPORT_BEGIN
+size_t rb_id_table_size(const struct rb_id_table *tbl);
+RUBY_SYMBOL_EXPORT_END
+
#endif /* RUBY_ID_TABLE_H */
diff --git a/internal/gc.h b/internal/gc.h
index 5b2b9e8f70..d43eb29578 100644
--- a/internal/gc.h
+++ b/internal/gc.h
@@ -14,7 +14,6 @@
#include "internal/compilers.h" /* for __has_attribute */
#include "ruby/ruby.h" /* for rb_event_flag_t */
-#include "shape.h"
struct rb_execution_context_struct; /* in vm_core.h */
struct rb_objspace; /* in vm_core.h */
@@ -68,11 +67,7 @@ struct rb_objspace; /* in vm_core.h */
rb_obj_write((VALUE)(a), UNALIGNED_MEMBER_ACCESS((VALUE *)(slot)), \
(VALUE)(b), __FILE__, __LINE__)
-#if USE_RVARGC && SHAPE_IN_BASIC_FLAGS
-# define SIZE_POOL_COUNT 5
-#else
-# define SIZE_POOL_COUNT 1
-#endif
+#include "shape.h"
#define RCLASS_EXT_EMBEDDED (SIZE_POOL_COUNT > 1)
diff --git a/internal/variable.h b/internal/variable.h
index e59a0f1924..3933279633 100644
--- a/internal/variable.h
+++ b/internal/variable.h
@@ -38,6 +38,7 @@ static inline void ROBJ_TRANSIENT_UNSET(VALUE obj);
struct gen_ivtbl;
int rb_gen_ivtbl_get(VALUE obj, ID id, struct gen_ivtbl **ivtbl);
+int rb_obj_evacuate_ivs_to_hash_table(ID key, VALUE val, st_data_t arg);
RUBY_SYMBOL_EXPORT_BEGIN
/* variable.c (export) */
diff --git a/object.c b/object.c
index 73528fe935..cd50960aba 100644
--- a/object.c
+++ b/object.c
@@ -272,8 +272,20 @@ rb_obj_copy_ivar(VALUE dest, VALUE obj)
RUBY_ASSERT(!RB_TYPE_P(obj, T_CLASS) && !RB_TYPE_P(obj, T_MODULE));
RUBY_ASSERT(BUILTIN_TYPE(dest) == BUILTIN_TYPE(obj));
- uint32_t src_num_ivs = RBASIC_IV_COUNT(obj);
rb_shape_t * src_shape = rb_shape_get_shape(obj);
+
+ if (rb_shape_id(src_shape) == OBJ_TOO_COMPLEX_SHAPE_ID) {
+ struct rb_id_table * table = rb_id_table_create(rb_id_table_size(ROBJECT_IV_HASH(obj)));
+
+ rb_ivar_foreach(obj, rb_obj_evacuate_ivs_to_hash_table, (st_data_t)table);
+ rb_shape_set_too_complex(dest);
+
+ ROBJECT(dest)->as.heap.ivptr = (VALUE *)table;
+
+ return;
+ }
+
+ uint32_t src_num_ivs = RBASIC_IV_COUNT(obj);
rb_shape_t * shape_to_set_on_dest = src_shape;
VALUE * src_buf;
VALUE * dest_buf;
diff --git a/ractor.c b/ractor.c
index 3605da74e6..fddcec382b 100644
--- a/ractor.c
+++ b/ractor.c
@@ -2248,6 +2248,19 @@ obj_hash_traverse_i(VALUE key, VALUE val, VALUE ptr)
return ST_CONTINUE;
}
+static enum rb_id_table_iterator_result
+obj_hash_iv_traverse_i(VALUE val, void *ptr)
+{
+ struct obj_traverse_callback_data *d = (struct obj_traverse_callback_data *)ptr;
+
+ if (obj_traverse_i(val, d->data)) {
+ d->stop = true;
+ return ID_TABLE_STOP;
+ }
+
+ return ID_TABLE_CONTINUE;
+}
+
static void
obj_traverse_reachable_i(VALUE obj, void *ptr)
{
@@ -2306,12 +2319,22 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data)
case T_OBJECT:
{
- uint32_t len = ROBJECT_IV_COUNT(obj);
- VALUE *ptr = ROBJECT_IVPTR(obj);
+ if (rb_shape_obj_too_complex(obj)) {
+ struct obj_traverse_callback_data d = {
+ .stop = false,
+ .data = data,
+ };
+ rb_id_table_foreach_values(ROBJECT_IV_HASH(obj), obj_hash_iv_traverse_i, &d);
+ if (d.stop) return 1;
+ }
+ else {
+ uint32_t len = ROBJECT_IV_COUNT(obj);
+ VALUE *ptr = ROBJECT_IVPTR(obj);
- for (uint32_t i=0; i<len; i++) {
- VALUE val = ptr[i];
- if (!UNDEF_P(val) && obj_traverse_i(val, data)) return 1;
+ for (uint32_t i=0; i<len; i++) {
+ VALUE val = ptr[i];
+ if (!UNDEF_P(val) && obj_traverse_i(val, data)) return 1;
+ }
}
}
break;
@@ -2656,6 +2679,30 @@ obj_hash_traverse_replace_i(st_data_t *key, st_data_t *val, st_data_t ptr, int e
return ST_CONTINUE;
}
+static enum rb_id_table_iterator_result
+obj_iv_hash_traverse_replace_foreach_i(VALUE val, void *data)
+{
+ return ID_TABLE_REPLACE;
+}
+
+static enum rb_id_table_iterator_result
+obj_iv_hash_traverse_replace_i(VALUE *val, void *ptr, int exists)
+{
+ struct obj_traverse_replace_callback_data *d = (struct obj_traverse_replace_callback_data *)ptr;
+ struct obj_traverse_replace_data *data = d->data;
+
+ if (obj_traverse_replace_i(*val, data)) {
+ d->stop = true;
+ return ID_TABLE_STOP;
+ }
+ else if (*val != data->replacement) {
+ VALUE v = *val = data->replacement;
+ RB_OBJ_WRITTEN(d->src, Qundef, v);
+ }
+
+ return ID_TABLE_CONTINUE;
+}
+
static struct st_table *
obj_traverse_replace_rec(struct obj_traverse_replace_data *data)
{
@@ -2756,16 +2803,30 @@ obj_traverse_replace_i(VALUE obj, struct obj_traverse_replace_data *data)
case T_OBJECT:
{
+ if (rb_shape_obj_too_complex(obj)) {
+ struct rb_id_table * table = ROBJECT_IV_HASH(obj);
+ struct obj_traverse_replace_callback_data d = {
+ .stop = false,
+ .data = data,
+ .src = obj,
+ };
+ rb_id_table_foreach_values_with_replace(table,
+ obj_iv_hash_traverse_replace_foreach_i,
+ obj_iv_hash_traverse_replace_i,
+ (void *)&d);
+ }
+ else {
#if USE_TRANSIENT_HEAP
- if (data->move) rb_obj_transient_heap_evacuate(obj, TRUE);
+ if (data->move) rb_obj_transient_heap_evacuate(obj, TRUE);
#endif
- uint32_t len = ROBJECT_IV_COUNT(obj);
- VALUE *ptr = ROBJECT_IVPTR(obj);
+ uint32_t len = ROBJECT_IV_COUNT(obj);
+ VALUE *ptr = ROBJECT_IVPTR(obj);
- for (uint32_t i=0; i<len; i++) {
- if (!UNDEF_P(ptr[i])) {
- CHECK_AND_REPLACE(ptr[i]);
+ for (uint32_t i=0; i<len; i++) {
+ if (!UNDEF_P(ptr[i])) {
+ CHECK_AND_REPLACE(ptr[i]);
+ }
}
}
}
diff --git a/shape.c b/shape.c
index 60bb427b44..ac9a016c00 100644
--- a/shape.c
+++ b/shape.c
@@ -99,13 +99,13 @@ rb_shape_get_shape_id(VALUE obj)
#else
switch (BUILTIN_TYPE(obj)) {
case T_OBJECT:
- return ROBJECT_SHAPE_ID(obj);
- break;
+ return ROBJECT_SHAPE_ID(obj);
+ break;
case T_CLASS:
case T_MODULE:
- return RCLASS_SHAPE_ID(obj);
+ return RCLASS_SHAPE_ID(obj);
default:
- return rb_generic_shape_id(obj);
+ return rb_generic_shape_id(obj);
}
#endif
}
@@ -130,50 +130,57 @@ rb_shape_get_shape(VALUE obj)
}
static rb_shape_t*
-get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created)
+get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created, bool new_shapes_allowed)
{
rb_shape_t *res = NULL;
- RB_VM_LOCK_ENTER();
- {
- bool had_edges = !!shape->edges;
-
- *variation_created = false;
-
- if (!shape->edges) {
- shape->edges = rb_id_table_create(0);
- }
- // Lookup the shape in edges - if there's already an edge and a corresponding shape for it,
- // we can return that. Otherwise, we'll need to get a new shape
- if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) {
- *variation_created = had_edges;
+ // There should never be outgoing edges from "too complex"
+ RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID);
- rb_shape_t * new_shape = rb_shape_alloc(id, shape);
+ *variation_created = false;
- new_shape->type = (uint8_t)shape_type;
- new_shape->capacity = shape->capacity;
+ if (new_shapes_allowed) {
+ RB_VM_LOCK_ENTER();
+ {
+ bool had_edges = !!shape->edges;
- switch (shape_type) {
- case SHAPE_IVAR:
- new_shape->next_iv_index = shape->next_iv_index + 1;
- break;
- case SHAPE_CAPACITY_CHANGE:
- case SHAPE_FROZEN:
- case SHAPE_T_OBJECT:
- new_shape->next_iv_index = shape->next_iv_index;
- break;
- case SHAPE_INITIAL_CAPACITY:
- case SHAPE_ROOT:
- rb_bug("Unreachable");
- break;
+ if (!shape->edges) {
+ shape->edges = rb_id_table_create(0);
}
- rb_id_table_insert(shape->edges, id, (VALUE)new_shape);
+ // Lookup the shape in edges - if there's already an edge and a corresponding shape for it,
+ // we can return that. Otherwise, we'll need to get a new shape
+ if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) {
+ *variation_created = had_edges;
+
+ rb_shape_t * new_shape = rb_shape_alloc(id, shape);
+
+ new_shape->type = (uint8_t)shape_type;
+ new_shape->capacity = shape->capacity;
+
+ switch (shape_type) {
+ case SHAPE_IVAR:
+ new_shape->next_iv_index = shape->next_iv_index + 1;
+ break;
+ case SHAPE_CAPACITY_CHANGE:
+ case SHAPE_FROZEN:
+ case SHAPE_T_OBJECT:
+ new_shape->next_iv_index = shape->next_iv_index;
+ break;
+ case SHAPE_OBJ_TOO_COMPLEX:
+ case SHAPE_INITIAL_CAPACITY:
+ case SHAPE_ROOT:
+ rb_bug("Unreachable");
+ break;
+ }
+
+ rb_id_table_insert(shape->edges, id, (VALUE)new_shape);
- res = new_shape;
+ res = new_shape;
+ }
}
+ RB_VM_LOCK_LEAVE();
}
- RB_VM_LOCK_LEAVE();
return res;
}
@@ -192,6 +199,7 @@ move_iv(VALUE obj, ID id, attr_index_t from, attr_index_t to)
RCLASS_IVPTR(obj)[to] = RCLASS_IVPTR(obj)[from];
break;
case T_OBJECT:
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
ROBJECT_IVPTR(obj)[to] = ROBJECT_IVPTR(obj)[from];
break;
default: {
@@ -242,7 +250,7 @@ remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed)
// has the same attributes as this shape.
if (new_parent) {
bool dont_care;
- rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care);
+ rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care, true);
new_child->capacity = shape->capacity;
if (new_child->type == SHAPE_IVAR) {
move_iv(obj, id, shape->next_iv_index - 1, new_child->next_iv_index - 1);
@@ -275,7 +283,7 @@ rb_shape_transition_shape_frozen(VALUE obj)
RUBY_ASSERT(shape);
RUBY_ASSERT(RB_OBJ_FROZEN(obj));
- if (rb_shape_frozen_shape_p(shape)) {
+ if (rb_shape_frozen_shape_p(shape) || rb_shape_obj_too_complex(obj)) {
return;
}
@@ -287,7 +295,7 @@ rb_shape_transition_shape_frozen(VALUE obj)
}
bool dont_care;
- next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care);
+ next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true);
RUBY_ASSERT(next_shape);
rb_shape_set_shape(obj, next_shape);
@@ -302,7 +310,7 @@ rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id)
{
RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id))));
bool dont_care;
- return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care);
+ return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care, true);
}
rb_shape_t *
@@ -310,8 +318,20 @@ rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id)
{
RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id))));
- bool variation_created;
- rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created);
+ bool allow_new_shape = true;
+
+ if (BUILTIN_TYPE(obj) == T_OBJECT) {
+ VALUE klass = rb_obj_class(obj);
+ allow_new_shape = RCLASS_EXT(klass)->variation_count < SHAPE_MAX_VARIATIONS;
+ }
+
+ bool variation_created = false;
+ rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created, allow_new_shape);
+
+ if (!new_shape) {
+ RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT);
+ new_shape = rb_shape_get_shape_by_id(OBJ_TOO_COMPLEX_SHAPE_ID);
+ }
// Check if we should update max_iv_count on the object's class
if (BUILTIN_TYPE(obj) == T_OBJECT) {
@@ -333,7 +353,7 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity)
{
ID edge_name = rb_make_temporary_id(new_capacity);
bool dont_care;
- rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care);
+ rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care, true);
new_shape->capacity = new_capacity;
return new_shape;
}
@@ -341,6 +361,10 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity)
bool
rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value)
{
+ // It doesn't make sense to ask for the index of an IV that's stored
+ // on an object that is "too complex" as it uses a hash for storing IVs
+ RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID);
+
while (shape->parent_id != INVALID_SHAPE_ID) {
if (shape->edge_name == id) {
enum shape_type shape_type;
@@ -356,6 +380,7 @@ rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value)
case SHAPE_INITIAL_CAPACITY:
case SHAPE_T_OBJECT:
return false;
+ case SHAPE_OBJ_TOO_COMPLEX:
case SHAPE_FROZEN:
rb_bug("Ivar should not exist on transition\n");
}
@@ -448,11 +473,28 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape)
case SHAPE_INITIAL_CAPACITY:
case SHAPE_T_OBJECT:
break;
+ case SHAPE_OBJ_TOO_COMPLEX:
+ rb_bug("Unreachable\n");
+ break;
}
return midway_shape;
}
+bool
+rb_shape_obj_too_complex(VALUE obj)
+{
+ return rb_shape_get_shape_id(obj) == OBJ_TOO_COMPLEX_SHAPE_ID;
+}
+
+void
+rb_shape_set_too_complex(VALUE obj)
+{
+ RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT);
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
+ rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID);
+}
+
size_t
rb_shape_edges_count(rb_shape_t *shape)
{
@@ -519,6 +561,19 @@ rb_shape_capacity(VALUE self)
}
static VALUE
+rb_shape_too_complex(VALUE self)
+{
+ rb_shape_t * shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+ if (rb_shape_id(shape) == OBJ_TOO_COMPLEX_SHAPE_ID) {
+ return Qtrue;
+ }
+ else {
+ return Qfalse;
+ }
+}
+
+static VALUE
rb_shape_parent_id(VALUE self)
{
rb_shape_t * shape;
@@ -730,7 +785,7 @@ Init_default_shapes(void)
rb_shape_t * shape = rb_shape_get_shape_by_id(i);
bool dont_care;
rb_shape_t * t_object_shape =
- get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care);
+ get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care, true);
t_object_shape->edges = rb_id_table_create(0);
RUBY_ASSERT(rb_shape_id(t_object_shape) == (shape_id_t)(i + SIZE_POOL_COUNT));
}
@@ -740,10 +795,16 @@ Init_default_shapes(void)
#if RUBY_DEBUG
rb_shape_t * special_const_shape =
#endif
- get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care);
+ get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true);
RUBY_ASSERT(rb_shape_id(special_const_shape) == SPECIAL_CONST_SHAPE_ID);
RUBY_ASSERT(SPECIAL_CONST_SHAPE_ID == (GET_VM()->next_shape_id - 1));
RUBY_ASSERT(rb_shape_frozen_shape_p(special_const_shape));
+
+ rb_shape_t * hash_fallback_shape = rb_shape_alloc_with_parent_id(0, ROOT_SHAPE_ID);
+ hash_fallback_shape->type = SHAPE_OBJ_TOO_COMPLEX;
+ hash_fallback_shape->size_pool_index = 0;
+ RUBY_ASSERT(OBJ_TOO_COMPLEX_SHAPE_ID == (GET_VM()->next_shape_id - 1));
+ RUBY_ASSERT(rb_shape_id(hash_fallback_shape) == OBJ_TOO_COMPLEX_SHAPE_ID);
}
void
@@ -763,6 +824,7 @@ Init_shape(void)
rb_define_method(rb_cShape, "id", rb_wrapped_shape_id, 0);
rb_define_method(rb_cShape, "type", rb_shape_type, 0);
rb_define_method(rb_cShape, "capacity", rb_shape_capacity, 0);
+ rb_define_method(rb_cShape, "too_complex?", rb_shape_too_complex, 0);
rb_define_const(rb_cShape, "SHAPE_ROOT", INT2NUM(SHAPE_ROOT));
rb_define_const(rb_cShape, "SHAPE_IVAR", INT2NUM(SHAPE_IVAR));
rb_define_const(rb_cShape, "SHAPE_T_OBJECT", INT2NUM(SHAPE_T_OBJECT));
@@ -770,6 +832,8 @@ Init_shape(void)
rb_define_const(rb_cShape, "SHAPE_ID_NUM_BITS", INT2NUM(SHAPE_ID_NUM_BITS));
rb_define_const(rb_cShape, "SHAPE_FLAG_SHIFT", INT2NUM(SHAPE_FLAG_SHIFT));
rb_define_const(rb_cShape, "SPECIAL_CONST_SHAPE_ID", INT2NUM(SPECIAL_CONST_SHAPE_ID));
+ rb_define_const(rb_cShape, "OBJ_TOO_COMPLEX_SHAPE_ID", INT2NUM(OBJ_TOO_COMPLEX_SHAPE_ID));
+ rb_define_const(rb_cShape, "SHAPE_MAX_VARIATIONS", INT2NUM(SHAPE_MAX_VARIATIONS));
rb_define_singleton_method(rb_cShape, "transition_tree", shape_transition_tree, 0);
rb_define_singleton_method(rb_cShape, "find_by_id", rb_shape_find_by_id, 1);
diff --git a/shape.h b/shape.h
index ddb870f5e7..9a42a770bb 100644
--- a/shape.h
+++ b/shape.h
@@ -27,12 +27,22 @@ typedef uint16_t shape_id_t;
# define SHAPE_BITMAP_SIZE 16384
+# define SHAPE_MAX_VARIATIONS 8
+
# define MAX_SHAPE_ID (SHAPE_MASK - 1)
# define INVALID_SHAPE_ID SHAPE_MASK
# define ROOT_SHAPE_ID 0x0
+
// We use SIZE_POOL_COUNT number of shape IDs for transitions out of different size pools
// The next available shapd ID will be the SPECIAL_CONST_SHAPE_ID
+#if USE_RVARGC && (SIZEOF_UINT64_T == SIZEOF_VALUE)
+# define SIZE_POOL_COUNT 5
+#else
+# define SIZE_POOL_COUNT 1
+#endif
+
# define SPECIAL_CONST_SHAPE_ID (SIZE_POOL_COUNT * 2)
+# define OBJ_TOO_COMPLEX_SHAPE_ID (SPECIAL_CONST_SHAPE_ID + 1)
struct rb_shape {
struct rb_id_table * edges; // id_table from ID (ivar) to next shape
@@ -53,6 +63,7 @@ enum shape_type {
SHAPE_CAPACITY_CHANGE,
SHAPE_INITIAL_CAPACITY,
SHAPE_T_OBJECT,
+ SHAPE_OBJ_TOO_COMPLEX,
};
#if SHAPE_IN_BASIC_FLAGS
@@ -141,6 +152,7 @@ rb_shape_t * rb_shape_get_next_iv_shape(rb_shape_t * shape, ID id);
rb_shape_t* rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id);
bool rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t * value);
shape_id_t rb_shape_id(rb_shape_t * shape);
+bool rb_shape_obj_too_complex(VALUE obj);
MJIT_SYMBOL_EXPORT_END
rb_shape_t * rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape);
@@ -149,15 +161,41 @@ static inline uint32_t
ROBJECT_IV_CAPACITY(VALUE obj)
{
RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
+ // Asking for capacity doesn't make sense when the object is using
+ // a hash table for storing instance variables
+ RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) != OBJ_TOO_COMPLEX_SHAPE_ID);
return rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->capacity;
}
+static inline struct rb_id_table *
+ROBJECT_IV_HASH(VALUE obj)
+{
+ RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
+ RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) == OBJ_TOO_COMPLEX_SHAPE_ID);
+ return (struct rb_id_table *)ROBJECT(obj)->as.heap.ivptr;
+}
+
+static inline void
+ROBJECT_SET_IV_HASH(VALUE obj, const struct rb_id_table *tbl)
+{
+ RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
+ RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) == OBJ_TOO_COMPLEX_SHAPE_ID);
+ ROBJECT(obj)->as.heap.ivptr = (VALUE *)tbl;
+}
+
+size_t rb_id_table_size(const struct rb_id_table *tbl);
+
static inline uint32_t
ROBJECT_IV_COUNT(VALUE obj)
{
- RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
- uint32_t ivc = rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->next_iv_index;
- return ivc;
+ if (ROBJECT_SHAPE_ID(obj) == OBJ_TOO_COMPLEX_SHAPE_ID) {
+ return (uint32_t)rb_id_table_size(ROBJECT_IV_HASH(obj));
+ }
+ else {
+ RBIMPL_ASSERT_TYPE(obj, RUBY_T_OBJECT);
+ RUBY_ASSERT(ROBJECT_SHAPE_ID(obj) != OBJ_TOO_COMPLEX_SHAPE_ID);
+ return rb_shape_get_shape_by_id(ROBJECT_SHAPE_ID(obj))->next_iv_index;
+ }
}
static inline uint32_t
@@ -182,6 +220,7 @@ bool rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id);
VALUE rb_obj_debug_shape(VALUE self, VALUE obj);
VALUE rb_shape_flags_mask(void);
+void rb_shape_set_too_complex(VALUE obj);
RUBY_SYMBOL_EXPORT_BEGIN
typedef void each_shape_callback(rb_shape_t * shape, void *data);
diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb
index b19238950b..358657d360 100644
--- a/test/ruby/test_shapes.rb
+++ b/test/ruby/test_shapes.rb
@@ -37,6 +37,35 @@ class TestShapes < Test::Unit::TestCase
end
end
+ class TooComplex
+ attr_reader :hopefully_unique_name, :b
+
+ def initialize
+ @hopefully_unique_name = "a"
+ @b = "b"
+ end
+
+ # Make enough lazily defined accessors to allow us to force
+ # polymorphism
+ class_eval (RubyVM::Shape::SHAPE_MAX_VARIATIONS + 1).times.map {
+ "def a#{_1}_m; @a#{_1} ||= #{_1}; end"
+ }.join(" ; ")
+
+ class_eval "attr_accessor " + (RubyVM::Shape::SHAPE_MAX_VARIATIONS + 1).times.map {
+ ":a#{_1}"
+ }.join(", ")
+
+ def iv_not_defined; @not_defined; end
+
+ def write_iv_method
+ self.a3 = 12345
+ end
+
+ def write_iv
+ @a3 = 12345
+ end
+ end
+
# RubyVM::Shape.of returns new instances of shape objects for
# each call. This helper method allows us to define equality for
# shapes
@@ -51,6 +80,156 @@ class TestShapes < Test::Unit::TestCase
refute_equal(shape1.id, shape2.id)
end
+ def test_too_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ end
+
+ def test_too_complex_ractor
+ assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
+ begin;
+ $VERBOSE = nil
+ class TooComplex
+ attr_reader :very_unique
+ end
+
+ RubyVM::Shape::SHAPE_MAX_VARIATIONS.times do
+ TooComplex.new.instance_variable_set(:"@unique_#{_1}", Object.new)
+ end
+
+ tc = TooComplex.new
+ tc.instance_variable_set(:"@very_unique", 3)
+
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ assert_equal 3, tc.very_unique
+ assert_equal 3, Ractor.new(tc) { |x| Ractor.yield(x.very_unique) }.take
+ assert_equal tc.instance_variables.sort, Ractor.new(tc) { |x| Ractor.yield(x.instance_variables) }.take.sort
+ end;
+ end
+
+ def test_too_complex_ractor_shareable
+ assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
+ begin;
+ $VERBOSE = nil
+ class TooComplex
+ attr_reader :very_unique
+ end
+
+ RubyVM::Shape::SHAPE_MAX_VARIATIONS.times do
+ TooComplex.new.instance_variable_set(:"@unique_#{_1}", Object.new)
+ end
+
+ tc = TooComplex.new
+ tc.instance_variable_set(:"@very_unique", 3)
+
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ assert_equal 3, tc.very_unique
+ assert_equal 3, Ractor.make_shareable(tc).very_unique
+ end;
+ end
+
+ def test_read_iv_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ assert_equal 3, tc.a3_m
+ end
+
+ def test_read_method_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ assert_equal 3, tc.a3_m
+ assert_equal 3, tc.a3
+ end
+
+ def test_write_method_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ tc.write_iv_method
+ tc.write_iv_method
+ assert_equal 12345, tc.a3_m
+ assert_equal 12345, tc.a3
+ end
+
+ def test_write_iv_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ tc.write_iv
+ tc.write_iv
+ assert_equal 12345, tc.a3_m
+ assert_equal 12345, tc.a3
+ end
+
+ def test_iv_read_via_method_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ assert_equal 3, tc.a3_m
+ assert_equal 3, tc.instance_variable_get(:@a3)
+ end
+
+ def test_delete_iv_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+
+ assert_equal 3, tc.a3_m # make sure IV is initialized
+ assert tc.instance_variable_defined?(:@a3)
+ tc.remove_instance_variable(:@a3)
+ assert_nil tc.a3
+ end
+
+ def test_delete_undefined_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+
+ refute tc.instance_variable_defined?(:@a3)
+ assert_raise(NameError) do
+ tc.remove_instance_variable(:@a3)
+ end
+ assert_nil tc.a3
+ end
+
+ def test_freeze_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ tc.freeze
+ assert_raise(FrozenError) { tc.a3_m }
+ end
+
+ def test_read_undefined_iv_after_complex
+ ensure_complex
+
+ tc = TooComplex.new
+ tc.send("a#{RubyVM::Shape::SHAPE_MAX_VARIATIONS}_m")
+ assert_predicate RubyVM::Shape.of(tc), :too_complex?
+ assert_equal nil, tc.iv_not_defined
+ end
+
def test_shape_order
bar = ShapeOrder.new # 0 => 1
bar.set_c # 1 => 2
@@ -218,4 +397,11 @@ class TestShapes < Test::Unit::TestCase
RubyVM::Shape.find_by_id(-1)
end
end
+
+ def ensure_complex
+ RubyVM::Shape::SHAPE_MAX_VARIATIONS.times do
+ tc = TooComplex.new
+ tc.send("a#{_1}_m")
+ end
+ end
end if defined?(RubyVM::Shape)
diff --git a/transient_heap.c b/transient_heap.c
index 9dfd980e64..14e941a41a 100644
--- a/transient_heap.c
+++ b/transient_heap.c
@@ -599,6 +599,7 @@ transient_heap_ptr(VALUE obj, int error)
break;
case T_OBJECT:
if (ROBJ_TRANSIENT_P(obj)) {
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
ptr = ROBJECT_IVPTR(obj);
}
break;
diff --git a/variable.c b/variable.c
index 42a73b5953..0c283738aa 100644
--- a/variable.c
+++ b/variable.c
@@ -1172,6 +1172,18 @@ rb_ivar_lookup(VALUE obj, ID id, VALUE undef)
#if !SHAPE_IN_BASIC_FLAGS
shape_id = ROBJECT_SHAPE_ID(obj);
#endif
+ if (rb_shape_obj_too_complex(obj)) {
+ struct rb_id_table * iv_table = ROBJECT_IV_HASH(obj);
+ VALUE val;
+ if (rb_id_table_lookup(iv_table, id, &val)) {
+ return val;
+ }
+ else {
+ return undef;
+ }
+ }
+
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
ivar_list = ROBJECT_IVPTR(obj);
break;
}
@@ -1334,6 +1346,7 @@ rb_obj_transient_heap_evacuate(VALUE obj, int promote)
assert(!RB_FL_TEST_RAW(obj, ROBJECT_EMBED));
uint32_t len = ROBJECT_IV_CAPACITY(obj);
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
const VALUE *old_ptr = ROBJECT_IVPTR(obj);
VALUE *new_ptr;
@@ -1353,6 +1366,7 @@ rb_obj_transient_heap_evacuate(VALUE obj, int promote)
void
rb_ensure_iv_list_size(VALUE obj, uint32_t current_capacity, uint32_t new_capacity)
{
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
VALUE *ptr = ROBJECT_IVPTR(obj);
VALUE *newptr;
@@ -1402,21 +1416,36 @@ rb_grow_iv_list(VALUE obj)
return res;
}
+int
+rb_obj_evacuate_ivs_to_hash_table(ID key, VALUE val, st_data_t arg)
+{
+ rb_id_table_insert((struct rb_id_table *)arg, key, val);
+ return ST_CONTINUE;
+}
+
attr_index_t
rb_obj_ivar_set(VALUE obj, ID id, VALUE val)
{
attr_index_t index;
- shape_id_t next_shape_id = ROBJECT_SHAPE_ID(obj);
- rb_shape_t *shape = rb_shape_get_shape_by_id(next_shape_id);
+ rb_shape_t *shape = rb_shape_get_shape(obj);
uint32_t num_iv = shape->capacity;
+ if (rb_shape_obj_too_complex(obj)) {
+ struct rb_id_table * table = ROBJECT_IV_HASH(obj);
+ rb_id_table_insert(table, id, val);
+ RB_OBJ_WRITTEN(obj, Qundef, val);
+ return 0;
+ }
+
if (!rb_shape_get_iv_index(shape, id, &index)) {
index = shape->next_iv_index;
if (index >= MAX_IVARS) {
rb_raise(rb_eArgError, "too many instance variables");
}
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
+
if (UNLIKELY(shape->next_iv_index >= num_iv)) {
RUBY_ASSERT(shape->next_iv_index == num_iv);
@@ -1425,13 +1454,39 @@ rb_obj_ivar_set(VALUE obj, ID id, VALUE val)
}
rb_shape_t *next_shape = rb_shape_get_next(shape, obj, id);
- RUBY_ASSERT(next_shape->type == SHAPE_IVAR);
- RUBY_ASSERT(index == (next_shape->next_iv_index - 1));
- next_shape_id = rb_shape_id(next_shape);
- rb_shape_set_shape(obj, next_shape);
+ if (next_shape->type == SHAPE_OBJ_TOO_COMPLEX) {
+ struct rb_id_table * table = rb_id_table_create(shape->next_iv_index);
+
+ // Evacuate all previous values from shape into id_table
+ rb_ivar_foreach(obj, rb_obj_evacuate_ivs_to_hash_table, (st_data_t)table);
+
+ // Insert new value too
+ rb_id_table_insert(table, id, val);
+ RB_OBJ_WRITTEN(obj, Qundef, val);
+
+ rb_shape_set_too_complex(obj);
+ RUBY_ASSERT(rb_shape_obj_too_complex(obj));
+
+ if (ROBJ_TRANSIENT_P(obj)) {
+ ROBJ_TRANSIENT_UNSET(obj);
+ }
+ else if (!(RBASIC(obj)->flags & ROBJECT_EMBED)) {
+ xfree(ROBJECT(obj)->as.heap.ivptr);
+ }
+
+ ROBJECT(obj)->as.heap.ivptr = (VALUE *)table;
+
+ return 0;
+ }
+ else {
+ rb_shape_set_shape(obj, next_shape);
+ RUBY_ASSERT(next_shape->type == SHAPE_IVAR);
+ RUBY_ASSERT(index == (next_shape->next_iv_index - 1));
+ }
}
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
RB_OBJ_WRITE(obj, &ROBJECT_IVPTR(obj)[index], val);
return index;
@@ -1554,7 +1609,17 @@ rb_ivar_defined(VALUE obj, ID id)
attr_index_t index;
if (SPECIAL_CONST_P(obj)) return Qfalse;
- return RBOOL(rb_shape_get_iv_index(rb_shape_get_shape(obj), id, &index));
+ if (rb_shape_obj_too_complex(obj)) {
+ VALUE idx;
+ if (!rb_id_table_lookup(ROBJECT_IV_HASH(obj), id, &idx)) {
+ return Qfalse;
+ }
+
+ return Qtrue;
+ }
+ else {
+ return RBOOL(rb_shape_get_iv_index(rb_shape_get_shape(obj), id, &index));
+ }
}
typedef int rb_ivar_foreach_callback_func(ID key, VALUE val, st_data_t arg);
@@ -1564,6 +1629,7 @@ struct iv_itr_data {
VALUE obj;
struct gen_ivtbl * ivtbl;
st_data_t arg;
+ rb_ivar_foreach_callback_func *func;
};
static void
@@ -1577,6 +1643,7 @@ iterate_over_shapes_with_callback(rb_shape_t *shape, rb_ivar_foreach_callback_fu
VALUE * iv_list;
switch (BUILTIN_TYPE(itr_data->obj)) {
case T_OBJECT:
+ RUBY_ASSERT(!rb_shape_obj_too_complex(itr_data->obj));
iv_list = ROBJECT_IVPTR(itr_data->obj);
break;
case T_CLASS:
@@ -1598,9 +1665,19 @@ iterate_over_shapes_with_callback(rb_shape_t *shape, rb_ivar_foreach_callback_fu
case SHAPE_T_OBJECT:
iterate_over_shapes_with_callback(rb_shape_get_parent(shape), callback, itr_data);
return;
+ case SHAPE_OBJ_TOO_COMPLEX:
+ rb_bug("Unreachable\n");
}
}
+static enum rb_id_table_iterator_result
+each_hash_iv(ID id, VALUE val, void *data)
+{
+ struct iv_itr_data * itr_data = (struct iv_itr_data *)data;
+ rb_ivar_foreach_callback_func *callback = itr_data->func;
+ return callback(id, val, itr_data->arg);
+}
+
static void
obj_ivar_each(VALUE obj, rb_ivar_foreach_callback_func *func, st_data_t arg)
{
@@ -1608,7 +1685,13 @@ obj_ivar_each(VALUE obj, rb_ivar_foreach_callback_func *func, st_data_t arg)
struct iv_itr_data itr_data;
itr_data.obj = obj;
itr_data.arg = arg;
- iterate_over_shapes_with_callback(shape, func, &itr_data);
+ itr_data.func = func;
+ if (rb_shape_obj_too_complex(obj)) {
+ rb_id_table_foreach(ROBJECT_IV_HASH(obj), each_hash_iv, &itr_data);
+ }
+ else {
+ iterate_over_shapes_with_callback(shape, func, &itr_data);
+ }
}
static void
@@ -1742,6 +1825,10 @@ rb_ivar_count(VALUE obj)
switch (BUILTIN_TYPE(obj)) {
case T_OBJECT:
+ if (rb_shape_obj_too_complex(obj)) {
+ return ROBJECT_IV_COUNT(obj);
+ }
+
if (rb_shape_get_shape(obj)->next_iv_index > 0) {
st_index_t i, count, num = ROBJECT_IV_COUNT(obj);
const VALUE *const ivptr = ROBJECT_IVPTR(obj);
@@ -1893,7 +1980,14 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name)
rb_shape_transition_shape_remove_ivar(obj, id, shape, &val);
break;
case T_OBJECT: {
- rb_shape_transition_shape_remove_ivar(obj, id, shape, &val);
+ if (rb_shape_obj_too_complex(obj)) {
+ if (rb_id_table_lookup(ROBJECT_IV_HASH(obj), id, &val)) {
+ rb_id_table_delete(ROBJECT_IV_HASH(obj), id);
+ }
+ }
+ else {
+ rb_shape_transition_shape_remove_ivar(obj, id, shape, &val);
+ }
break;
}
default: {
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 84fe32c1b7..68b8d89abb 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1213,6 +1213,8 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call
}
if (LIKELY(cached_id == shape_id)) {
+ RUBY_ASSERT(cached_id != OBJ_TOO_COMPLEX_SHAPE_ID);
+
if (index == ATTR_INDEX_NOT_SET) {
return Qnil;
}
@@ -1242,24 +1244,31 @@ vm_getivar(VALUE obj, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_call
rb_shape_t *shape = rb_shape_get_shape_by_id(shape_id);
- if (rb_shape_get_iv_index(shape, id, &index)) {
- // This fills in the cache with the shared cache object.
- // "ent" is the shared cache object
- fill_ivar_cache(iseq, ic, cc, is_attr, index, shape_id);
-
- // We fetched the ivar list above
- val = ivar_list[index];
- RUBY_ASSERT(!UNDEF_P(val));
+ if (shape_id == OBJ_TOO_COMPLEX_SHAPE_ID) {
+ if (!rb_id_table_lookup(ROBJECT_IV_HASH(obj), id, &val)) {
+ val = Qnil;
+ }
}
else {
- if (is_attr) {
- vm_cc_attr_index_initialize(cc, shape_id);
+ if (rb_shape_get_iv_index(shape, id, &index)) {
+ // This fills in the cache with the shared cache object.
+ // "ent" is the shared cache object
+ fill_ivar_cache(iseq, ic, cc, is_attr, index, shape_id);
+
+ // We fetched the ivar list above
+ val = ivar_list[index];
+ RUBY_ASSERT(!UNDEF_P(val));
}
else {
- vm_ic_attr_index_initialize(ic, shape_id);
- }
+ if (is_attr) {
+ vm_cc_attr_index_initialize(cc, shape_id);
+ }
+ else {
+ vm_ic_attr_index_initialize(ic, shape_id);
+ }
- val = Qnil;
+ val = Qnil;
+ }
}
}
@@ -1283,6 +1292,8 @@ general_path:
static void
populate_cache(attr_index_t index, shape_id_t next_shape_id, ID id, const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, bool is_attr)
{
+ RUBY_ASSERT(next_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID);
+
// Cache population code
if (is_attr) {
vm_cc_attr_index_set(cc, index, next_shape_id);
@@ -1309,7 +1320,9 @@ vm_setivar_slowpath(VALUE obj, ID id, VALUE val, const rb_iseq_t *iseq, IVC ic,
shape_id_t next_shape_id = ROBJECT_SHAPE_ID(obj);
- populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr);
+ if (next_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID) {
+ populate_cache(index, next_shape_id, id, iseq, ic, cc, is_attr);
+ }
RB_DEBUG_COUNTER_INC(ivar_set_ic_miss_iv_hit);
return val;
@@ -1413,6 +1426,7 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i
VM_ASSERT(!rb_ractor_shareable_p(obj) || rb_obj_frozen_p(obj));
shape_id_t shape_id = ROBJECT_SHAPE_ID(obj);
+ RUBY_ASSERT(dest_shape_id != OBJ_TOO_COMPLEX_SHAPE_ID);
if (LIKELY(shape_id == dest_shape_id)) {
RUBY_ASSERT(dest_shape_id != INVALID_SHAPE_ID && shape_id != INVALID_SHAPE_ID);
@@ -1440,6 +1454,7 @@ vm_setivar(VALUE obj, ID id, VALUE val, shape_id_t dest_shape_id, attr_index_t i
VALUE *ptr = ROBJECT_IVPTR(obj);
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
RB_OBJ_WRITE(obj, &ptr[index], val);
RB_DEBUG_COUNTER_INC(ivar_set_ic_hit);
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index e33b0d5dba..996d7fa1c0 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -93,7 +93,9 @@ fn main() {
.allowlist_function("rb_shape_get_next")
.allowlist_function("rb_shape_id")
.allowlist_function("rb_shape_transition_shape_capa")
+ .allowlist_function("rb_shape_obj_too_complex")
.allowlist_var("SHAPE_ID_NUM_BITS")
+ .allowlist_var("OBJ_TOO_COMPLEX_SHAPE_ID")
// From ruby/internal/intern/object.h
.allowlist_function("rb_obj_is_kind_of")
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index ab87038c67..cec5c4671b 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -1963,6 +1963,11 @@ fn gen_get_ivar(
recv_opnd: YARVOpnd,
side_exit: CodePtr,
) -> CodegenStatus {
+ // If the object has a too complex shape, we exit
+ if comptime_receiver.shape_too_complex() {
+ return CantCompile;
+ }
+
let comptime_val_klass = comptime_receiver.class_of();
let starting_context = ctx.clone(); // make a copy for use with jit_chain_guard
@@ -2192,7 +2197,8 @@ fn gen_setinstancevariable(
// If the comptime receiver is frozen, writing an IV will raise an exception
// and we don't want to JIT code to deal with that situation.
- if comptime_receiver.is_frozen() {
+ // If the object has a too complex shape, we will also exit
+ if comptime_receiver.is_frozen() || comptime_receiver.shape_too_complex() {
return CantCompile;
}
@@ -2281,39 +2287,53 @@ fn gen_setinstancevariable(
megamorphic_side_exit,
);
- let write_val = ctx.stack_pop(1);
+ let write_val;
match ivar_index {
// If we don't have an instance variable index, then we need to
// transition out of the current shape.
None => {
- let mut shape = comptime_receiver.shape_of();
+ let shape = comptime_receiver.shape_of();
+
+ let current_capacity = unsafe { (*shape).capacity };
+ let new_capacity = current_capacity * 2;
// If the object doesn't have the capacity to store the IV,
// then we'll need to allocate it.
- let needs_extension = unsafe { (*shape).next_iv_index >= (*shape).capacity };
+ let needs_extension = unsafe { (*shape).next_iv_index >= current_capacity };
// We can write to the object, but we need to transition the shape
let ivar_index = unsafe { (*shape).next_iv_index } as usize;
- if needs_extension {
- let current_capacity = unsafe { (*shape).capacity };
- let newsize = current_capacity * 2;
-
+ let capa_shape = if needs_extension {
// We need to add an extended table to the object
// First, create an outgoing transition that increases the
// capacity
- shape = unsafe {
- rb_shape_transition_shape_capa(shape, newsize)
- };
+ Some(unsafe { rb_shape_transition_shape_capa(shape, new_capacity) })
+ } else {
+ None
+ };
+
+ let dest_shape = if capa_shape.is_none() {
+ unsafe { rb_shape_get_next(shape, comptime_receiver, ivar_name) }
+ } else {
+ unsafe { rb_shape_get_next(capa_shape.unwrap(), comptime_receiver, ivar_name) }
+ };
+
+ let new_shape_id = unsafe { rb_shape_id(dest_shape) };
+ if new_shape_id == OBJ_TOO_COMPLEX_SHAPE_ID {
+ return CantCompile;
+ }
+
+ if needs_extension {
// Generate the C call so that runtime code will increase
// the capacity and set the buffer.
asm.ccall(rb_ensure_iv_list_size as *const u8,
vec![
recv,
Opnd::UImm(current_capacity.into()),
- Opnd::UImm(newsize.into())
+ Opnd::UImm(new_capacity.into())
]
);
@@ -2321,10 +2341,7 @@ fn gen_setinstancevariable(
recv = asm.load(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF))
}
- let new_shape_id = unsafe {
- rb_shape_id(rb_shape_get_next(shape, comptime_receiver, ivar_name))
- };
-
+ write_val = ctx.stack_pop(1);
gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, needs_extension);
asm.comment("write shape");
@@ -2342,6 +2359,7 @@ fn gen_setinstancevariable(
// the iv index by searching up the shape tree. If we've
// made the transition already, then there's no reason to
// update the shape on the object. Just set the IV.
+ write_val = ctx.stack_pop(1);
gen_write_iv(asm, comptime_receiver, recv, ivar_index, write_val, false);
},
}
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
index b6228fe64b..ba09d4119f 100644
--- a/yjit/src/cruby.rs
+++ b/yjit/src/cruby.rs
@@ -398,6 +398,10 @@ impl VALUE {
unsafe { rb_obj_frozen_p(self) != VALUE(0) }
}
+ pub fn shape_too_complex(self) -> bool {
+ unsafe { rb_shape_obj_too_complex(self) }
+ }
+
pub fn shape_id_of(self) -> u32 {
unsafe { rb_shape_get_shape_id(self) }
}
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 3e00aa3689..af77747861 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -124,6 +124,7 @@ impl<T> ::std::cmp::PartialEq for __BindgenUnionField<T> {
}
impl<T> ::std::cmp::Eq for __BindgenUnionField<T> {}
pub const SHAPE_ID_NUM_BITS: u32 = 32;
+pub const OBJ_TOO_COMPLEX_SHAPE_ID: u32 = 11;
pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1;
pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2;
pub const STRING_REDEFINED_OP_FLAG: u32 = 4;
@@ -1112,6 +1113,7 @@ extern "C" {
pub fn rb_shape_get_next(shape: *mut rb_shape_t, obj: VALUE, id: ID) -> *mut rb_shape_t;
pub fn rb_shape_get_iv_index(shape: *mut rb_shape_t, id: ID, value: *mut attr_index_t) -> bool;
pub fn rb_shape_id(shape: *mut rb_shape_t) -> shape_id_t;
+ pub fn rb_shape_obj_too_complex(obj: VALUE) -> bool;
pub fn rb_ary_tmp_new_from_values(
arg1: VALUE,
arg2: ::std::os::raw::c_long,