diff options
author | Jemma Issroff <jemmaissroff@gmail.com> | 2022-12-08 17:16:52 -0500 |
---|---|---|
committer | Aaron Patterson <aaron.patterson@gmail.com> | 2022-12-15 10:06:04 -0800 |
commit | c1ab6ddc9a6fa228caa5d26b118b54855051279c (patch) | |
tree | a3361c22480e38d798dfa975bdabf47a832a9fb0 /shape.c | |
parent | a3d552aedd190b0f21a4f6479f0ef1d2ce90189b (diff) |
Transition complex objects to "too complex" shape
When an object becomes "too complex" (in other words it has too many
variations in the shape tree), we transition it to use a "too complex"
shape and use a hash for storing instance variables.
Without this patch, there were rare cases where shape tree growth could
"explode" and cause performance degradation on what would otherwise have
been cached fast paths.
This patch puts a limit on shape tree growth, and gracefully degrades in
the rare case where there could be a factorial growth in the shape tree.
For example:
```ruby
class NG; end
HUGE_NUMBER.times do
NG.new.instance_variable_set(:"@unique_ivar_#{_1}", 1)
end
```
We consider objects to be "too complex" when the object's class has more
than SHAPE_MAX_VARIATIONS (currently 8) leaf nodes in the shape tree and
the object introduces a new variation (a new leaf node) associated with
that class.
For example, new variations on instances of the following class would be
considered "too complex" because those instances create more than 8
leaves in the shape tree:
```ruby
class Foo; end
9.times { Foo.new.instance_variable_set(":@uniq_#{_1}", 1) }
```
However, the following class is *not* too complex because it only has
one leaf in the shape tree:
```ruby
class Foo
def initialize
@a = @b = @c = @d = @e = @f = @g = @h = @i = nil
end
end
9.times { Foo.new }
``
This case is rare, so we don't expect this change to impact performance
of most applications, but it needs to be handled.
Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org>
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6931
Diffstat (limited to 'shape.c')
-rw-r--r-- | shape.c | 156 |
1 files changed, 110 insertions, 46 deletions
@@ -99,13 +99,13 @@ rb_shape_get_shape_id(VALUE obj) #else switch (BUILTIN_TYPE(obj)) { case T_OBJECT: - return ROBJECT_SHAPE_ID(obj); - break; + return ROBJECT_SHAPE_ID(obj); + break; case T_CLASS: case T_MODULE: - return RCLASS_SHAPE_ID(obj); + return RCLASS_SHAPE_ID(obj); default: - return rb_generic_shape_id(obj); + return rb_generic_shape_id(obj); } #endif } @@ -130,50 +130,57 @@ rb_shape_get_shape(VALUE obj) } static rb_shape_t* -get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created) +get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created, bool new_shapes_allowed) { rb_shape_t *res = NULL; - RB_VM_LOCK_ENTER(); - { - bool had_edges = !!shape->edges; - - *variation_created = false; - - if (!shape->edges) { - shape->edges = rb_id_table_create(0); - } - // Lookup the shape in edges - if there's already an edge and a corresponding shape for it, - // we can return that. Otherwise, we'll need to get a new shape - if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) { - *variation_created = had_edges; + // There should never be outgoing edges from "too complex" + RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID); - rb_shape_t * new_shape = rb_shape_alloc(id, shape); + *variation_created = false; - new_shape->type = (uint8_t)shape_type; - new_shape->capacity = shape->capacity; + if (new_shapes_allowed) { + RB_VM_LOCK_ENTER(); + { + bool had_edges = !!shape->edges; - switch (shape_type) { - case SHAPE_IVAR: - new_shape->next_iv_index = shape->next_iv_index + 1; - break; - case SHAPE_CAPACITY_CHANGE: - case SHAPE_FROZEN: - case SHAPE_T_OBJECT: - new_shape->next_iv_index = shape->next_iv_index; - break; - case SHAPE_INITIAL_CAPACITY: - case SHAPE_ROOT: - rb_bug("Unreachable"); - break; + if (!shape->edges) { + shape->edges = rb_id_table_create(0); } - rb_id_table_insert(shape->edges, id, (VALUE)new_shape); + // Lookup the shape in edges - if there's already an edge and a corresponding shape for it, + // we can return that. Otherwise, we'll need to get a new shape + if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) { + *variation_created = had_edges; + + rb_shape_t * new_shape = rb_shape_alloc(id, shape); + + new_shape->type = (uint8_t)shape_type; + new_shape->capacity = shape->capacity; + + switch (shape_type) { + case SHAPE_IVAR: + new_shape->next_iv_index = shape->next_iv_index + 1; + break; + case SHAPE_CAPACITY_CHANGE: + case SHAPE_FROZEN: + case SHAPE_T_OBJECT: + new_shape->next_iv_index = shape->next_iv_index; + break; + case SHAPE_OBJ_TOO_COMPLEX: + case SHAPE_INITIAL_CAPACITY: + case SHAPE_ROOT: + rb_bug("Unreachable"); + break; + } + + rb_id_table_insert(shape->edges, id, (VALUE)new_shape); - res = new_shape; + res = new_shape; + } } + RB_VM_LOCK_LEAVE(); } - RB_VM_LOCK_LEAVE(); return res; } @@ -192,6 +199,7 @@ move_iv(VALUE obj, ID id, attr_index_t from, attr_index_t to) RCLASS_IVPTR(obj)[to] = RCLASS_IVPTR(obj)[from]; break; case T_OBJECT: + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); ROBJECT_IVPTR(obj)[to] = ROBJECT_IVPTR(obj)[from]; break; default: { @@ -242,7 +250,7 @@ remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed) // has the same attributes as this shape. if (new_parent) { bool dont_care; - rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care); + rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care, true); new_child->capacity = shape->capacity; if (new_child->type == SHAPE_IVAR) { move_iv(obj, id, shape->next_iv_index - 1, new_child->next_iv_index - 1); @@ -275,7 +283,7 @@ rb_shape_transition_shape_frozen(VALUE obj) RUBY_ASSERT(shape); RUBY_ASSERT(RB_OBJ_FROZEN(obj)); - if (rb_shape_frozen_shape_p(shape)) { + if (rb_shape_frozen_shape_p(shape) || rb_shape_obj_too_complex(obj)) { return; } @@ -287,7 +295,7 @@ rb_shape_transition_shape_frozen(VALUE obj) } bool dont_care; - next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care); + next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true); RUBY_ASSERT(next_shape); rb_shape_set_shape(obj, next_shape); @@ -302,7 +310,7 @@ rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id) { RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id)))); bool dont_care; - return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care); + return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care, true); } rb_shape_t * @@ -310,8 +318,20 @@ rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id) { RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id)))); - bool variation_created; - rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created); + bool allow_new_shape = true; + + if (BUILTIN_TYPE(obj) == T_OBJECT) { + VALUE klass = rb_obj_class(obj); + allow_new_shape = RCLASS_EXT(klass)->variation_count < SHAPE_MAX_VARIATIONS; + } + + bool variation_created = false; + rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created, allow_new_shape); + + if (!new_shape) { + RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT); + new_shape = rb_shape_get_shape_by_id(OBJ_TOO_COMPLEX_SHAPE_ID); + } // Check if we should update max_iv_count on the object's class if (BUILTIN_TYPE(obj) == T_OBJECT) { @@ -333,7 +353,7 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity) { ID edge_name = rb_make_temporary_id(new_capacity); bool dont_care; - rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care); + rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care, true); new_shape->capacity = new_capacity; return new_shape; } @@ -341,6 +361,10 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity) bool rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value) { + // It doesn't make sense to ask for the index of an IV that's stored + // on an object that is "too complex" as it uses a hash for storing IVs + RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID); + while (shape->parent_id != INVALID_SHAPE_ID) { if (shape->edge_name == id) { enum shape_type shape_type; @@ -356,6 +380,7 @@ rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value) case SHAPE_INITIAL_CAPACITY: case SHAPE_T_OBJECT: return false; + case SHAPE_OBJ_TOO_COMPLEX: case SHAPE_FROZEN: rb_bug("Ivar should not exist on transition\n"); } @@ -448,11 +473,28 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape) case SHAPE_INITIAL_CAPACITY: case SHAPE_T_OBJECT: break; + case SHAPE_OBJ_TOO_COMPLEX: + rb_bug("Unreachable\n"); + break; } return midway_shape; } +bool +rb_shape_obj_too_complex(VALUE obj) +{ + return rb_shape_get_shape_id(obj) == OBJ_TOO_COMPLEX_SHAPE_ID; +} + +void +rb_shape_set_too_complex(VALUE obj) +{ + RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT); + RUBY_ASSERT(!rb_shape_obj_too_complex(obj)); + rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID); +} + size_t rb_shape_edges_count(rb_shape_t *shape) { @@ -519,6 +561,19 @@ rb_shape_capacity(VALUE self) } static VALUE +rb_shape_too_complex(VALUE self) +{ + rb_shape_t * shape; + TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); + if (rb_shape_id(shape) == OBJ_TOO_COMPLEX_SHAPE_ID) { + return Qtrue; + } + else { + return Qfalse; + } +} + +static VALUE rb_shape_parent_id(VALUE self) { rb_shape_t * shape; @@ -730,7 +785,7 @@ Init_default_shapes(void) rb_shape_t * shape = rb_shape_get_shape_by_id(i); bool dont_care; rb_shape_t * t_object_shape = - get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care); + get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care, true); t_object_shape->edges = rb_id_table_create(0); RUBY_ASSERT(rb_shape_id(t_object_shape) == (shape_id_t)(i + SIZE_POOL_COUNT)); } @@ -740,10 +795,16 @@ Init_default_shapes(void) #if RUBY_DEBUG rb_shape_t * special_const_shape = #endif - get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care); + get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true); RUBY_ASSERT(rb_shape_id(special_const_shape) == SPECIAL_CONST_SHAPE_ID); RUBY_ASSERT(SPECIAL_CONST_SHAPE_ID == (GET_VM()->next_shape_id - 1)); RUBY_ASSERT(rb_shape_frozen_shape_p(special_const_shape)); + + rb_shape_t * hash_fallback_shape = rb_shape_alloc_with_parent_id(0, ROOT_SHAPE_ID); + hash_fallback_shape->type = SHAPE_OBJ_TOO_COMPLEX; + hash_fallback_shape->size_pool_index = 0; + RUBY_ASSERT(OBJ_TOO_COMPLEX_SHAPE_ID == (GET_VM()->next_shape_id - 1)); + RUBY_ASSERT(rb_shape_id(hash_fallback_shape) == OBJ_TOO_COMPLEX_SHAPE_ID); } void @@ -763,6 +824,7 @@ Init_shape(void) rb_define_method(rb_cShape, "id", rb_wrapped_shape_id, 0); rb_define_method(rb_cShape, "type", rb_shape_type, 0); rb_define_method(rb_cShape, "capacity", rb_shape_capacity, 0); + rb_define_method(rb_cShape, "too_complex?", rb_shape_too_complex, 0); rb_define_const(rb_cShape, "SHAPE_ROOT", INT2NUM(SHAPE_ROOT)); rb_define_const(rb_cShape, "SHAPE_IVAR", INT2NUM(SHAPE_IVAR)); rb_define_const(rb_cShape, "SHAPE_T_OBJECT", INT2NUM(SHAPE_T_OBJECT)); @@ -770,6 +832,8 @@ Init_shape(void) rb_define_const(rb_cShape, "SHAPE_ID_NUM_BITS", INT2NUM(SHAPE_ID_NUM_BITS)); rb_define_const(rb_cShape, "SHAPE_FLAG_SHIFT", INT2NUM(SHAPE_FLAG_SHIFT)); rb_define_const(rb_cShape, "SPECIAL_CONST_SHAPE_ID", INT2NUM(SPECIAL_CONST_SHAPE_ID)); + rb_define_const(rb_cShape, "OBJ_TOO_COMPLEX_SHAPE_ID", INT2NUM(OBJ_TOO_COMPLEX_SHAPE_ID)); + rb_define_const(rb_cShape, "SHAPE_MAX_VARIATIONS", INT2NUM(SHAPE_MAX_VARIATIONS)); rb_define_singleton_method(rb_cShape, "transition_tree", shape_transition_tree, 0); rb_define_singleton_method(rb_cShape, "find_by_id", rb_shape_find_by_id, 1); |