From 392238e3fd76beb923de1ba3f8d8d6bd28c7030e Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 3 Mar 2023 16:05:01 -0500 Subject: Implement embedded TypedData objects This commit adds a new flag RUBY_TYPED_EMBEDDABLE that allows the data of a TypedData object to be embedded after the object itself. This will improve cache locality and allow us to save the 8 byte data pointer. Co-Authored-By: Jean Boussier --- error.c | 2 +- gc.c | 61 +++++++++++++++++++++++++++------ include/ruby/internal/core/rtypeddata.h | 39 +++++++++++++++++++-- 3 files changed, 88 insertions(+), 14 deletions(-) diff --git a/error.c b/error.c index 878ec81d13..041ab834f3 100644 --- a/error.c +++ b/error.c @@ -1322,7 +1322,7 @@ rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type) actual = rb_str_new_cstr(name); /* or rb_fstring_cstr? not sure... */ } else { - return DATA_PTR(obj); + return RTYPEDDATA_GET_DATA(obj); } const char *expected = data_type->wrap_struct_name; diff --git a/gc.c b/gc.c index efbb254168..d38f35837d 100644 --- a/gc.c +++ b/gc.c @@ -3142,19 +3142,42 @@ rb_data_object_zalloc(VALUE klass, size_t size, RUBY_DATA_FUNC dmark, RUBY_DATA_ return obj; } -VALUE -rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *type) +static VALUE +typed_data_alloc(VALUE klass, VALUE typed_flag, void *datap, const rb_data_type_t *type, size_t size) { RBIMPL_NONNULL_ARG(type); if (klass) rb_data_object_check(klass); bool wb_protected = (type->flags & RUBY_FL_WB_PROTECTED) || !type->function.dmark; - return newobj_of(GET_RACTOR(), klass, T_DATA, (VALUE)type, (VALUE)1, (VALUE)datap, wb_protected, sizeof(struct RTypedData)); + return newobj_of(GET_RACTOR(), klass, T_DATA, (VALUE)type, 1 | typed_flag, (VALUE)datap, wb_protected, size); +} + +VALUE +rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *type) +{ + if (UNLIKELY(type->flags & RUBY_TYPED_EMBEDDABLE)) { + rb_raise(rb_eTypeError, "Cannot wrap an embeddable TypedData"); + } + + return typed_data_alloc(klass, 0, datap, type, sizeof(struct RTypedData)); } VALUE rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type) { - VALUE obj = rb_data_typed_object_wrap(klass, 0, type); + if (type->flags & RUBY_TYPED_EMBEDDABLE) { + if (!(type->flags & RUBY_TYPED_FREE_IMMEDIATELY)) { + rb_raise(rb_eTypeError, "Embeddable TypedData must be freed immediately"); + } + + size_t embed_size = offsetof(struct RTypedData, data) + size; + if (rb_gc_size_allocatable_p(embed_size)) { + VALUE obj = typed_data_alloc(klass, TYPED_DATA_EMBEDDED, 0, type, embed_size); + memset((char *)obj + offsetof(struct RTypedData, data), 0, size); + return obj; + } + } + + VALUE obj = typed_data_alloc(klass, 0, NULL, type, sizeof(struct RTypedData)); DATA_PTR(obj) = xcalloc(1, size); return obj; } @@ -3162,14 +3185,23 @@ rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type size_t rb_objspace_data_type_memsize(VALUE obj) { + size_t size = 0; if (RTYPEDDATA_P(obj)) { const rb_data_type_t *type = RTYPEDDATA_TYPE(obj); const void *ptr = RTYPEDDATA_DATA(obj); + + if (RTYPEDDATA_TYPE(obj)->flags & RUBY_TYPED_EMBEDDABLE && !RTYPEDDATA_EMBEDDED_P(obj)) { +#ifdef HAVE_MALLOC_USABLE_SIZE + size += malloc_usable_size((void *)ptr); +#endif + } + if (ptr && type->function.dsize) { - return type->function.dsize(ptr); + size += type->function.dsize(ptr); } } - return 0; + + return size; } const char * @@ -3454,17 +3486,23 @@ rb_data_free(rb_objspace_t *objspace, VALUE obj) if (dfree) { if (dfree == RUBY_DEFAULT_FREE) { - xfree(data); - RB_DEBUG_COUNTER_INC(obj_data_xfree); + if (!RTYPEDDATA_EMBEDDED_P(obj)) { + xfree(data); + RB_DEBUG_COUNTER_INC(obj_data_xfree); + } } else if (free_immediately) { (*dfree)(data); + if (RTYPEDDATA_TYPE(obj)->flags & RUBY_TYPED_EMBEDDABLE && !RTYPEDDATA_EMBEDDED_P(obj)) { + xfree(data); + } + RB_DEBUG_COUNTER_INC(obj_data_imm_free); } else { - RB_DEBUG_COUNTER_INC(obj_data_zombie); make_zombie(objspace, obj, dfree, data); - return false; + RB_DEBUG_COUNTER_INC(obj_data_zombie); + return FALSE; } } else { @@ -7313,7 +7351,8 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) case T_DATA: { - void *const ptr = DATA_PTR(obj); + void *const ptr = RTYPEDDATA_P(obj) ? RTYPEDDATA_GET_DATA(obj) : DATA_PTR(obj); + if (ptr) { if (RTYPEDDATA_P(obj) && gc_declarative_marking_p(any->as.typeddata.type)) { gc_mark_from_offset(objspace, obj); diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index c7904746fd..aa22696306 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -114,6 +114,8 @@ #define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1 /** @endcond */ +#define TYPED_DATA_EMBEDDED 2 + /** * @private * @@ -137,6 +139,8 @@ rbimpl_typeddata_flags { */ RUBY_TYPED_FREE_IMMEDIATELY = 1, + RUBY_TYPED_EMBEDDABLE = 2, + /** * This flag has something to do with Ractor. Multiple Ractors run without * protecting each other. Sharing an object among Ractors is basically @@ -460,7 +464,7 @@ RBIMPL_SYMBOL_EXPORT_END() */ #define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \ VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \ - (sval) = RBIMPL_CAST((type *)RTYPEDDATA_DATA(result)); \ + (sval) = RTYPEDDATA_GET_DATA(result); \ RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval)) /** @@ -511,6 +515,36 @@ RBIMPL_SYMBOL_EXPORT_END() #define TypedData_Get_Struct(obj,type,data_type,sval) \ ((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type)))) +static inline bool +RTYPEDDATA_EMBEDDED_P(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + return RTYPEDDATA(obj)->typed_flag & TYPED_DATA_EMBEDDED; +} + +static inline void * +RTYPEDDATA_GET_DATA(VALUE obj) +{ +#if RUBY_DEBUG + if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) { + Check_Type(obj, RUBY_T_DATA); + RBIMPL_UNREACHABLE_RETURN(false); + } +#endif + + /* We reuse the data pointer in embedded TypedData. We can't use offsetof + * since RTypedData a non-POD type in C++. */ + const size_t embedded_typed_data_size = sizeof(struct RTypedData) - sizeof(void *); + + return RTYPEDDATA_EMBEDDED_P(obj) ? (char *)obj + embedded_typed_data_size : RTYPEDDATA(obj)->data; +} + RBIMPL_ATTR_PURE() RBIMPL_ATTR_ARTIFICIAL() /** @@ -527,7 +561,8 @@ RBIMPL_ATTR_ARTIFICIAL() static inline bool rbimpl_rtypeddata_p(VALUE obj) { - return RTYPEDDATA(obj)->typed_flag == 1; + VALUE typed_flag = RTYPEDDATA(obj)->typed_flag; + return typed_flag != 0 && typed_flag <= 3; } RBIMPL_ATTR_PURE_UNLESS_DEBUG() -- cgit v1.2.3