summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2026-04-03 11:13:26 +0100
committerJean Boussier <jean.boussier@gmail.com>2026-04-08 22:24:38 +0200
commit82470d8b9b4a5958b5358e3526316e2b9bd9adce (patch)
tree27b1f55546f7e34778d838407fd8946afcabc1fb
parentd6528d631959c88db6062a39e201a0f3e89e691f (diff)
Allow fixed size hashes to be allocated in smaller slots
When we allocate a RHash using `rb_hash_new_capa()`, if `capa` is larger than `8` it's directly allocated as an `st_stable` in a `80B` slot. However if the requested size if lesser or equal to 8, we allocate it as an `ar_table` in a `160B` slot. Since most hashes are allocated as mutable, we have to be able to accomodate as much as 8 AR_TABLE entries regardless. However there are case where we know the Hash won't ever be resized, that notably the case of all the "literal" hashes allocated by the compiler. These are immediately frozen and hidden upon being constructed, hence we can know for sure they won't ever be resized. This allows us to allocate the smaller ones in smaller slots. ``` size: 0, slot_size: 32 size: 1, slot_size: 48 size: 2, slot_size: 64 size: 3, slot_size: 80 size: 4, slot_size: 96 size: 5, slot_size: 112 size: 6, slot_size: 128 size: 7, slot_size: 144 size: 8, slot_size: 160 ``` ```ruby require "objspace" p ObjectSpace.memsize_of({}.freeze) # => 40 p ObjectSpace.memsize_of({a: 1}.freeze) # => 80 p ObjectSpace.memsize_of({a: 1, b: 2}.freeze) # => 80 p ObjectSpace.memsize_of({a: 1, b: 2, c: 3}.freeze) # => 80 p ObjectSpace.memsize_of({a: 1, b: 2, c: 3, d: 4}.freeze) # => 160 p ObjectSpace.memsize_of({a: 1, b: 2, c: 3, d: 4, e: 5, }.freeze) # => 160 p ObjectSpace.memsize_of({a: 1, b: 2, c: 3, d: 4, e: 5, f: 6}.freeze) # => 160 p ObjectSpace.memsize_of({a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7}.freeze) # => 160 p ObjectSpace.memsize_of({a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8}.freeze) # => 160 ```
-rw-r--r--compile.c10
-rw-r--r--hash.c26
-rw-r--r--internal/hash.h1
-rw-r--r--prism_compile.c8
4 files changed, 32 insertions, 13 deletions
diff --git a/compile.c b/compile.c
index d39438c8ba..e6748d38b3 100644
--- a/compile.c
+++ b/compile.c
@@ -5372,10 +5372,10 @@ compile_hash(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int meth
if (!RB_SPECIAL_CONST_P(elem[1])) RB_OBJ_SET_FROZEN_SHAREABLE(elem[1]);
rb_ary_cat(ary, elem, 2);
}
- VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary) / 2);
+ VALUE hash = rb_hash_alloc_fixed_size(Qfalse, RARRAY_LEN(ary) / 2);
rb_hash_bulk_insert(RARRAY_LEN(ary), RARRAY_CONST_PTR(ary), hash);
RB_GC_GUARD(ary);
- hash = RB_OBJ_SET_FROZEN_SHAREABLE(rb_obj_hide(hash));
+ hash = RB_OBJ_SET_FROZEN_SHAREABLE(hash);
/* Emit optimized code */
FLUSH_CHUNK();
@@ -12167,7 +12167,7 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor,
case TS_CDHASH:
{
int i;
- VALUE map = rb_hash_new_with_size(RARRAY_LEN(op)/2);
+ VALUE map = rb_hash_alloc_fixed_size(Qfalse, RARRAY_LEN(op)/2);
RHASH_TBL_RAW(map)->type = &cdhash_type;
op = rb_to_array_type(op);
@@ -12179,7 +12179,7 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor,
rb_hash_aset(map, key, (VALUE)label | 1);
}
RB_GC_GUARD(op);
- RB_OBJ_SET_SHAREABLE(rb_obj_hide(map)); // allow mutation while compiling
+ RB_OBJ_SET_SHAREABLE(map); // allow mutation while compiling
argv[j] = map;
RB_OBJ_WRITTEN(iseq, Qundef, map);
}
@@ -14334,7 +14334,7 @@ static VALUE
ibf_load_object_hash(const struct ibf_load *load, const struct ibf_object_header *header, ibf_offset_t offset)
{
long len = (long)ibf_load_small_value(load, &offset);
- VALUE obj = rb_hash_new_with_size(len);
+ VALUE obj = header->frozen ? rb_hash_alloc_fixed_size(rb_cHash, len) : rb_hash_new_with_size(len);
int i;
for (i = 0; i < len; i++) {
diff --git a/hash.c b/hash.c
index 79dbd5d8e9..700c429d2a 100644
--- a/hash.c
+++ b/hash.c
@@ -1142,12 +1142,15 @@ ar_values(VALUE hash, st_data_t *values, st_index_t size)
static ar_table*
ar_copy(VALUE hash1, VALUE hash2)
{
+ RUBY_ASSERT(rb_gc_obj_slot_size(hash1) >= sizeof(struct RHash) + sizeof(ar_table));
ar_table *old_tab = RHASH_AR_TABLE(hash2);
ar_table *new_tab = RHASH_AR_TABLE(hash1);
- *new_tab = *old_tab;
+ unsigned int bound = RHASH_AR_TABLE_BOUND(hash2);
+ new_tab->ar_hint.word = old_tab->ar_hint.word;
+ MEMCPY(&new_tab->pairs, &old_tab->pairs, ar_table_pair, bound);
RHASH_AR_TABLE(hash1)->ar_hint.word = RHASH_AR_TABLE(hash2)->ar_hint.word;
- RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
+ RHASH_AR_TABLE_BOUND_SET(hash1, bound);
RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
rb_gc_writebarrier_remember(hash1);
@@ -1490,6 +1493,23 @@ rb_hash_new_capa(long capa)
return rb_hash_new_with_size((st_index_t)capa);
}
+VALUE
+rb_hash_alloc_fixed_size(VALUE klass, st_index_t size)
+{
+ VALUE ret;
+ if (size > RHASH_AR_TABLE_MAX_SIZE) {
+ ret = hash_alloc_flags(klass, 0, Qnil, true);
+ hash_st_table_init(ret, &objhash, size);
+ }
+ else {
+ size_t slot_size = sizeof(struct RHash) + offsetof(ar_table, pairs) + size * sizeof(ar_table_pair);
+ ret = rb_wb_protected_newobj_of(GET_EC(), klass, T_HASH, 0, slot_size);
+ }
+
+ RHASH_SET_IFNONE(ret, Qnil);
+ return ret;
+}
+
static VALUE
hash_copy(VALUE ret, VALUE hash)
{
@@ -7475,7 +7495,7 @@ Init_Hash(void)
rb_define_singleton_method(rb_cHash, "ruby2_keywords_hash?", rb_hash_s_ruby2_keywords_hash_p, 1);
rb_define_singleton_method(rb_cHash, "ruby2_keywords_hash", rb_hash_s_ruby2_keywords_hash, 1);
- rb_cHash_empty_frozen = rb_hash_freeze(rb_hash_new());
+ rb_cHash_empty_frozen = rb_hash_freeze(rb_hash_alloc_fixed_size(rb_cHash, 0));
RB_OBJ_SET_SHAREABLE(rb_cHash_empty_frozen);
rb_vm_register_global_object(rb_cHash_empty_frozen);
diff --git a/internal/hash.h b/internal/hash.h
index 03cd830506..9688478d1e 100644
--- a/internal/hash.h
+++ b/internal/hash.h
@@ -88,6 +88,7 @@ int rb_hash_stlike_delete(VALUE hash, st_data_t *pkey, st_data_t *pval);
int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg);
int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg);
bool rb_hash_default_unredefined(VALUE hash);
+VALUE rb_hash_alloc_fixed_size(VALUE klass, st_index_t size);
VALUE rb_ident_hash_new_with_size(st_index_t size);
void rb_hash_free(VALUE hash);
RUBY_EXTERN VALUE rb_cHash_empty_frozen;
diff --git a/prism_compile.c b/prism_compile.c
index b693f2e05a..8f3f027f18 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -876,11 +876,10 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, pm_scope_node_t
rb_ary_cat(array, pair, 2);
}
- VALUE value = rb_hash_new_with_size(elements->size);
+ VALUE value = rb_hash_alloc_fixed_size(Qfalse, elements->size);
rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), value);
RB_GC_GUARD(array);
- value = rb_obj_hide(value);
RB_OBJ_SET_FROZEN_SHAREABLE(value);
return value;
}
@@ -1560,10 +1559,9 @@ pm_compile_hash_elements(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l
}
index --;
- VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary) / 2);
+ VALUE hash = rb_hash_alloc_fixed_size(Qfalse, RARRAY_LEN(ary) / 2);
rb_hash_bulk_insert(RARRAY_LEN(ary), RARRAY_CONST_PTR(ary), hash);
RB_GC_GUARD(ary);
- hash = rb_obj_hide(hash);
RB_OBJ_SET_FROZEN_SHAREABLE(hash);
// Emit optimized code.
@@ -5772,7 +5770,7 @@ pm_compile_shareable_constant_literal(rb_iseq_t *iseq, const pm_node_t *node, pm
}
case PM_HASH_NODE: {
const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
- VALUE result = rb_hash_new_capa(cast->elements.size);
+ VALUE result = rb_hash_alloc_fixed_size(rb_cHash, cast->elements.size);
for (size_t index = 0; index < cast->elements.size; index++) {
const pm_node_t *element = cast->elements.nodes[index];