summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-03-03 16:05:01 -0500
committerPeter Zhu <peter@peterzhu.ca>2023-11-07 15:48:06 -0500
commit392238e3fd76beb923de1ba3f8d8d6bd28c7030e (patch)
tree65d6286a223f9eb4527c2d648c27c4053c68e444
parentbc07b0b9e1d55821acaa0effea67a9885a3bb56d (diff)
Implement embedded TypedData objects
This commit adds a new flag RUBY_TYPED_EMBEDDABLE that allows the data of a TypedData object to be embedded after the object itself. This will improve cache locality and allow us to save the 8 byte data pointer. Co-Authored-By: Jean Boussier <byroot@ruby-lang.org>
-rw-r--r--error.c2
-rw-r--r--gc.c61
-rw-r--r--include/ruby/internal/core/rtypeddata.h39
3 files changed, 88 insertions, 14 deletions
diff --git a/error.c b/error.c
index 878ec81d13..041ab834f3 100644
--- a/error.c
+++ b/error.c
@@ -1322,7 +1322,7 @@ rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
actual = rb_str_new_cstr(name); /* or rb_fstring_cstr? not sure... */
}
else {
- return DATA_PTR(obj);
+ return RTYPEDDATA_GET_DATA(obj);
}
const char *expected = data_type->wrap_struct_name;
diff --git a/gc.c b/gc.c
index efbb254168..d38f35837d 100644
--- a/gc.c
+++ b/gc.c
@@ -3142,19 +3142,42 @@ rb_data_object_zalloc(VALUE klass, size_t size, RUBY_DATA_FUNC dmark, RUBY_DATA_
return obj;
}
-VALUE
-rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *type)
+static VALUE
+typed_data_alloc(VALUE klass, VALUE typed_flag, void *datap, const rb_data_type_t *type, size_t size)
{
RBIMPL_NONNULL_ARG(type);
if (klass) rb_data_object_check(klass);
bool wb_protected = (type->flags & RUBY_FL_WB_PROTECTED) || !type->function.dmark;
- return newobj_of(GET_RACTOR(), klass, T_DATA, (VALUE)type, (VALUE)1, (VALUE)datap, wb_protected, sizeof(struct RTypedData));
+ return newobj_of(GET_RACTOR(), klass, T_DATA, (VALUE)type, 1 | typed_flag, (VALUE)datap, wb_protected, size);
+}
+
+VALUE
+rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *type)
+{
+ if (UNLIKELY(type->flags & RUBY_TYPED_EMBEDDABLE)) {
+ rb_raise(rb_eTypeError, "Cannot wrap an embeddable TypedData");
+ }
+
+ return typed_data_alloc(klass, 0, datap, type, sizeof(struct RTypedData));
}
VALUE
rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type)
{
- VALUE obj = rb_data_typed_object_wrap(klass, 0, type);
+ if (type->flags & RUBY_TYPED_EMBEDDABLE) {
+ if (!(type->flags & RUBY_TYPED_FREE_IMMEDIATELY)) {
+ rb_raise(rb_eTypeError, "Embeddable TypedData must be freed immediately");
+ }
+
+ size_t embed_size = offsetof(struct RTypedData, data) + size;
+ if (rb_gc_size_allocatable_p(embed_size)) {
+ VALUE obj = typed_data_alloc(klass, TYPED_DATA_EMBEDDED, 0, type, embed_size);
+ memset((char *)obj + offsetof(struct RTypedData, data), 0, size);
+ return obj;
+ }
+ }
+
+ VALUE obj = typed_data_alloc(klass, 0, NULL, type, sizeof(struct RTypedData));
DATA_PTR(obj) = xcalloc(1, size);
return obj;
}
@@ -3162,14 +3185,23 @@ rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type
size_t
rb_objspace_data_type_memsize(VALUE obj)
{
+ size_t size = 0;
if (RTYPEDDATA_P(obj)) {
const rb_data_type_t *type = RTYPEDDATA_TYPE(obj);
const void *ptr = RTYPEDDATA_DATA(obj);
+
+ if (RTYPEDDATA_TYPE(obj)->flags & RUBY_TYPED_EMBEDDABLE && !RTYPEDDATA_EMBEDDED_P(obj)) {
+#ifdef HAVE_MALLOC_USABLE_SIZE
+ size += malloc_usable_size((void *)ptr);
+#endif
+ }
+
if (ptr && type->function.dsize) {
- return type->function.dsize(ptr);
+ size += type->function.dsize(ptr);
}
}
- return 0;
+
+ return size;
}
const char *
@@ -3454,17 +3486,23 @@ rb_data_free(rb_objspace_t *objspace, VALUE obj)
if (dfree) {
if (dfree == RUBY_DEFAULT_FREE) {
- xfree(data);
- RB_DEBUG_COUNTER_INC(obj_data_xfree);
+ if (!RTYPEDDATA_EMBEDDED_P(obj)) {
+ xfree(data);
+ RB_DEBUG_COUNTER_INC(obj_data_xfree);
+ }
}
else if (free_immediately) {
(*dfree)(data);
+ if (RTYPEDDATA_TYPE(obj)->flags & RUBY_TYPED_EMBEDDABLE && !RTYPEDDATA_EMBEDDED_P(obj)) {
+ xfree(data);
+ }
+
RB_DEBUG_COUNTER_INC(obj_data_imm_free);
}
else {
- RB_DEBUG_COUNTER_INC(obj_data_zombie);
make_zombie(objspace, obj, dfree, data);
- return false;
+ RB_DEBUG_COUNTER_INC(obj_data_zombie);
+ return FALSE;
}
}
else {
@@ -7313,7 +7351,8 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj)
case T_DATA:
{
- void *const ptr = DATA_PTR(obj);
+ void *const ptr = RTYPEDDATA_P(obj) ? RTYPEDDATA_GET_DATA(obj) : DATA_PTR(obj);
+
if (ptr) {
if (RTYPEDDATA_P(obj) && gc_declarative_marking_p(any->as.typeddata.type)) {
gc_mark_from_offset(objspace, obj);
diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h
index c7904746fd..aa22696306 100644
--- a/include/ruby/internal/core/rtypeddata.h
+++ b/include/ruby/internal/core/rtypeddata.h
@@ -114,6 +114,8 @@
#define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1
/** @endcond */
+#define TYPED_DATA_EMBEDDED 2
+
/**
* @private
*
@@ -137,6 +139,8 @@ rbimpl_typeddata_flags {
*/
RUBY_TYPED_FREE_IMMEDIATELY = 1,
+ RUBY_TYPED_EMBEDDABLE = 2,
+
/**
* This flag has something to do with Ractor. Multiple Ractors run without
* protecting each other. Sharing an object among Ractors is basically
@@ -460,7 +464,7 @@ RBIMPL_SYMBOL_EXPORT_END()
*/
#define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \
VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \
- (sval) = RBIMPL_CAST((type *)RTYPEDDATA_DATA(result)); \
+ (sval) = RTYPEDDATA_GET_DATA(result); \
RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval))
/**
@@ -511,6 +515,36 @@ RBIMPL_SYMBOL_EXPORT_END()
#define TypedData_Get_Struct(obj,type,data_type,sval) \
((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type))))
+static inline bool
+RTYPEDDATA_EMBEDDED_P(VALUE obj)
+{
+#if RUBY_DEBUG
+ if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) {
+ Check_Type(obj, RUBY_T_DATA);
+ RBIMPL_UNREACHABLE_RETURN(false);
+ }
+#endif
+
+ return RTYPEDDATA(obj)->typed_flag & TYPED_DATA_EMBEDDED;
+}
+
+static inline void *
+RTYPEDDATA_GET_DATA(VALUE obj)
+{
+#if RUBY_DEBUG
+ if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) {
+ Check_Type(obj, RUBY_T_DATA);
+ RBIMPL_UNREACHABLE_RETURN(false);
+ }
+#endif
+
+ /* We reuse the data pointer in embedded TypedData. We can't use offsetof
+ * since RTypedData a non-POD type in C++. */
+ const size_t embedded_typed_data_size = sizeof(struct RTypedData) - sizeof(void *);
+
+ return RTYPEDDATA_EMBEDDED_P(obj) ? (char *)obj + embedded_typed_data_size : RTYPEDDATA(obj)->data;
+}
+
RBIMPL_ATTR_PURE()
RBIMPL_ATTR_ARTIFICIAL()
/**
@@ -527,7 +561,8 @@ RBIMPL_ATTR_ARTIFICIAL()
static inline bool
rbimpl_rtypeddata_p(VALUE obj)
{
- return RTYPEDDATA(obj)->typed_flag == 1;
+ VALUE typed_flag = RTYPEDDATA(obj)->typed_flag;
+ return typed_flag != 0 && typed_flag <= 3;
}
RBIMPL_ATTR_PURE_UNLESS_DEBUG()