From b13a7c8e36e9b00b5c6668846f31be4e25523111 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 25 Jan 2022 19:16:57 -0800 Subject: Constant time class to class ancestor lookup Previously when checking ancestors, we would walk all the way up the ancestry chain checking each parent for a matching class or module. I believe this was especially unfriendly to CPU cache since for each step we need to check two cache lines (the class and class ext). This check is used quite often in: * case statements * rescue statements * Calling protected methods * Class#is_a? * Module#=== * Module#<=> I believe it's most common to check a class against a parent class, to this commit aims to improve that (unfortunately does not help checking for an included Module). This is done by storing on each class the number and an array of all parent classes, in order (BasicObject is at index 0). Using this we can check whether a class is a subclass of another in constant time since we know the location to expect it in the hierarchy. --- benchmark/module_eqq.yml | 27 ++++++++++++++++++++++ class.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ gc.c | 11 +++++++++ internal/class.h | 7 ++++++ object.c | 43 ++++++++++++++++++++++++++++++---- 5 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 benchmark/module_eqq.yml diff --git a/benchmark/module_eqq.yml b/benchmark/module_eqq.yml new file mode 100644 index 0000000000..a561fb86dc --- /dev/null +++ b/benchmark/module_eqq.yml @@ -0,0 +1,27 @@ +prelude: | + class SimpleClass; end + class MediumClass + 10.times { include Module.new } + end + class LargeClass + 100.times { include Module.new } + end + class HugeClass + 300.times { include Module.new } + end + SimpleObj = SimpleClass.new + MediumObj = MediumClass.new + LargeObj = LargeClass.new + HugeObj = HugeClass.new +benchmark: + simple_class_eqq_simple_obj: | + SimpleClass === SimpleObj + medium_class_eqq_simple_obj: | + MediumClass === SimpleObj + simple_class_eqq_medium_obj: | + SimpleClass === MediumObj + simple_class_eqq_large_obj: | + SimpleClass === LargeObj + simple_class_eqq_huge_obj: | + SimpleClass === HugeObj +loop_count: 20000000 diff --git a/class.c b/class.c index e7e60dc101..ef3f8aac2e 100644 --- a/class.c +++ b/class.c @@ -259,6 +259,63 @@ rb_class_boot(VALUE super) return (VALUE)klass; } +void +rb_class_remove_superclasses(VALUE klass) +{ + if (!RB_TYPE_P(klass, T_CLASS)) + return; + + if (RCLASS_SUPERCLASSES(klass)) + xfree(RCLASS_SUPERCLASSES(klass)); + + RCLASS_SUPERCLASSES(klass) = NULL; + RCLASS_SUPERCLASS_DEPTH(klass) = 0; +} + +void +rb_class_update_superclasses(VALUE klass) +{ + VALUE super = RCLASS_SUPER(klass); + + if (!RB_TYPE_P(klass, T_CLASS)) return; + if (super == Qundef) return; + + // If the superclass array is already built + if (RCLASS_SUPERCLASSES(klass)) + return; + + // find the proper superclass + while (super != Qfalse && !RB_TYPE_P(super, T_CLASS)) { + super = RCLASS_SUPER(super); + } + + // For BasicObject and uninitialized classes, depth=0 and ary=NULL + if (super == Qfalse) + return; + + // Sometimes superclasses are set before the full ancestry tree is built + // This happens during metaclass construction + if (super != rb_cBasicObject && !RCLASS_SUPERCLASS_DEPTH(super)) { + rb_class_update_superclasses(super); + + // If it is still unset we need to try later + if (!RCLASS_SUPERCLASS_DEPTH(super)) + return; + } + + size_t parent_num = RCLASS_SUPERCLASS_DEPTH(super); + size_t num = parent_num + 1; + + VALUE *superclasses = xmalloc(sizeof(VALUE) * num); + superclasses[parent_num] = super; + if (parent_num > 0) { + memcpy(superclasses, RCLASS_SUPERCLASSES(super), sizeof(VALUE) * parent_num); + } + + RCLASS_SUPERCLASSES(klass) = superclasses; + RCLASS_SUPERCLASS_DEPTH(klass) = num; +} + void rb_check_inheritable(VALUE super) { @@ -667,6 +724,9 @@ make_metaclass(VALUE klass) while (RB_TYPE_P(super, T_ICLASS)) super = RCLASS_SUPER(super); RCLASS_SET_SUPER(metaclass, super ? ENSURE_EIGENCLASS(super) : rb_cClass); + // Full class ancestry may not have been filled until we reach here. + rb_class_update_superclasses(METACLASS_OF(metaclass)); + return metaclass; } diff --git a/gc.c b/gc.c index 35b9598c2a..1061b506bf 100644 --- a/gc.c +++ b/gc.c @@ -3187,6 +3187,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) rb_class_remove_subclass_head(obj); rb_class_remove_from_module_subclasses(obj); rb_class_remove_from_super_subclasses(obj); + rb_class_remove_superclasses(obj); #if SIZEOF_SERIAL_T != SIZEOF_VALUE && USE_RVARGC xfree(RCLASS(obj)->class_serial_ptr); #endif @@ -4619,6 +4620,7 @@ obj_memsize_of(VALUE obj, int use_all_types) if (RCLASS_CC_TBL(obj)) { size += cc_table_memsize(RCLASS_CC_TBL(obj)); } + size += RCLASS_SUPERCLASS_DEPTH(obj) * sizeof(VALUE); #if !USE_RVARGC size += sizeof(rb_classext_t); #endif @@ -10032,6 +10034,14 @@ update_class_ext(rb_objspace_t *objspace, rb_classext_t *ext) } } +static void +update_superclasses(rb_objspace_t *objspace, VALUE obj) +{ + for (size_t i = 0; i < RCLASS_SUPERCLASS_DEPTH(obj); i++) { + UPDATE_IF_MOVED(objspace, RCLASS_SUPERCLASSES(obj)[i]); + } +} + static void gc_update_object_references(rb_objspace_t *objspace, VALUE obj) { @@ -10049,6 +10059,7 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj) update_m_tbl(objspace, RCLASS_M_TBL(obj)); update_cc_tbl(objspace, obj); update_cvc_tbl(objspace, obj); + update_superclasses(objspace, obj); gc_update_tbl_refs(objspace, RCLASS_IV_TBL(obj)); diff --git a/internal/class.h b/internal/class.h index d4c1c72414..c6151299c7 100644 --- a/internal/class.h +++ b/internal/class.h @@ -47,6 +47,8 @@ struct rb_classext_struct { struct rb_id_table *callable_m_tbl; struct rb_id_table *cc_tbl; /* ID -> [[ci, cc1], cc2, ...] */ struct rb_id_table *cvc_tbl; + size_t superclass_depth; + VALUE *superclasses; struct rb_subclass_entry *subclasses; struct rb_subclass_entry *subclass_entry; /** @@ -117,6 +119,8 @@ typedef struct rb_classext_struct rb_classext_t; #define RCLASS_MODULE_SUBCLASS_ENTRY(c) (RCLASS_EXT(c)->module_subclass_entry) #define RCLASS_ALLOCATOR(c) (RCLASS_EXT(c)->allocator) #define RCLASS_SUBCLASSES(c) (RCLASS_EXT(c)->subclasses) +#define RCLASS_SUPERCLASS_DEPTH(c) (RCLASS_EXT(c)->superclass_depth) +#define RCLASS_SUPERCLASSES(c) (RCLASS_EXT(c)->superclasses) #define RICLASS_IS_ORIGIN FL_USER5 #define RCLASS_CLONED FL_USER6 @@ -125,6 +129,8 @@ typedef struct rb_classext_struct rb_classext_t; /* class.c */ void rb_class_subclass_add(VALUE super, VALUE klass); void rb_class_remove_from_super_subclasses(VALUE); +void rb_class_update_superclasses(VALUE); +void rb_class_remove_superclasses(VALUE); void rb_class_remove_subclass_head(VALUE); int rb_singleton_class_internal_p(VALUE sklass); VALUE rb_class_boot(VALUE); @@ -197,6 +203,7 @@ RCLASS_SET_SUPER(VALUE klass, VALUE super) rb_class_subclass_add(super, klass); } RB_OBJ_WRITE(klass, &RCLASS(klass)->super, super); + rb_class_update_superclasses(klass); return super; } diff --git a/object.c b/object.c index ff94469292..84acf0aada 100644 --- a/object.c +++ b/object.c @@ -757,6 +757,26 @@ rb_obj_is_instance_of(VALUE obj, VALUE c) return RBOOL(rb_obj_class(obj) == c); } +// Returns whether c is a proper (c != cl) subclass of cl +// Both c and cl must be T_CLASS +static VALUE +class_search_class_ancestor(VALUE cl, VALUE c) +{ + RUBY_ASSERT(RB_TYPE_P(c, T_CLASS)); + RUBY_ASSERT(RB_TYPE_P(cl, T_CLASS)); + + size_t c_depth = RCLASS_SUPERCLASS_DEPTH(c); + size_t cl_depth = RCLASS_SUPERCLASS_DEPTH(cl); + VALUE *classes = RCLASS_SUPERCLASSES(cl); + + // If c's inheritance chain is longer, it cannot be an ancestor + // We are checking for a proper subclass so don't check if they are equal + if (cl_depth <= c_depth) + return Qfalse; + + // Otherwise check that c is in cl's inheritance chain + return RBOOL(classes[c_depth] == c); +} /* * call-seq: @@ -791,21 +811,34 @@ rb_obj_is_kind_of(VALUE obj, VALUE c) { VALUE cl = CLASS_OF(obj); - RUBY_ASSERT(cl); + RUBY_ASSERT(RB_TYPE_P(cl, T_CLASS)); + + // Fastest path: If the object's class is an exact match we know `c` is a + // class without checking type and can return immediately. + if (cl == c) return Qtrue; + + // Fast path: Both are T_CLASS + if (LIKELY(RB_TYPE_P(c, T_CLASS))) { + return class_search_class_ancestor(cl, c); + } // Note: YJIT needs this function to never allocate and never raise when // `c` is a class or a module. c = class_or_module_required(c); - return RBOOL(class_search_ancestor(cl, RCLASS_ORIGIN(c))); + c = RCLASS_ORIGIN(c); + + // Slow path: check each ancestor in the linked list and its method table + return RBOOL(class_search_ancestor(cl, c)); } + static VALUE class_search_ancestor(VALUE cl, VALUE c) { while (cl) { - if (cl == c || RCLASS_M_TBL(cl) == RCLASS_M_TBL(c)) - return cl; - cl = RCLASS_SUPER(cl); + if (cl == c || RCLASS_M_TBL(cl) == RCLASS_M_TBL(c)) + return cl; + cl = RCLASS_SUPER(cl); } return 0; } -- cgit v1.2.3