From 8f675cdd00e2c5b5a0f143f5e508dbbafdb20ccd Mon Sep 17 00:00:00 2001 From: ko1 Date: Tue, 30 Oct 2018 22:11:51 +0000 Subject: support theap for T_HASH. [Feature #14989] * hash.c, internal.h: support theap for small Hash. Introduce RHASH_ARRAY (li_table) besides st_table and small Hash (<=8 entries) are managed by an array data structure. This array data can be managed by theap. If st_table is needed, then converting array data to st_table data. For st_table using code, we prepare "stlike" APIs which accepts hash value and are very similar to st_ APIs. This work is based on the GSoC achievement by tacinight and refined by ko1. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65454 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- .gdbinit | 8 +- array.c | 18 +- class.c | 9 +- compile.c | 4 +- debug_counter.h | 5 + ext/objspace/objspace.c | 2 +- gc.c | 93 +++- hash.c | 1299 +++++++++++++++++++++++++++++++++++++++++------ include/ruby/intern.h | 3 +- include/ruby/ruby.h | 8 +- include/ruby/st.h | 2 +- internal.h | 103 +++- marshal.c | 2 +- mjit_worker.c | 2 +- process.c | 8 +- st.c | 26 +- test/ruby/test_time.rb | 1 + thread.c | 10 +- transient_heap.c | 26 +- transient_heap.h | 4 +- vm_eval.c | 7 +- vm_insnhelper.c | 2 +- 22 files changed, 1395 insertions(+), 247 deletions(-) diff --git a/.gdbinit b/.gdbinit index 63cd706476..1806196a42 100644 --- a/.gdbinit +++ b/.gdbinit @@ -156,8 +156,12 @@ define rp else if ($flags & RUBY_T_MASK) == RUBY_T_HASH printf "%sT_HASH%s: ", $color_type, $color_end, - if ((struct RHash *)($arg0))->ntbl - printf "len=%ld ", ((struct RHash *)($arg0))->ntbl->num_entries + if (((struct RHash *)($arg0))->basic->flags & RHASH_ST_TABLE_FLAG) + printf "st len=%ld ", ((struct RHash *)($arg0))->as.st->num_entries + else + printf "li len=%ld bound=%ld ", \ + ((((struct RHash *)($arg0))->basic->flags & RHASH_ARRAY_LEN_MASK) >> RHASH_ARRAY_LEN_SHIFT), \ + ((((struct RHash *)($arg0))->basic->flags & RHASH_ARRAY_BOUND_MASK) >> RHASH_ARRAY_BOUND_SHIFT) end print (struct RHash *)($arg0) else diff --git a/array.c b/array.c index d735879b0b..2a3096c8f8 100644 --- a/array.c +++ b/array.c @@ -4420,11 +4420,11 @@ static inline void ary_recycle_hash(VALUE hash) { assert(RBASIC_CLASS(hash) == 0); - if (RHASH(hash)->ntbl) { - st_table *tbl = RHASH(hash)->ntbl; + if (RHASH_TABLE_P(hash)) { + st_table *tbl = RHASH_ST_TABLE(hash); st_free_table(tbl); + RHASH_CLEAR(hash); } - rb_gc_force_recycle(hash); } /* @@ -4467,7 +4467,7 @@ rb_ary_diff(VALUE ary1, VALUE ary2) hash = ary_make_hash(ary2); for (i=0; itype = &cdhash_type; + RHASH_TBL_RAW(literals)->type = &cdhash_type; CHECK(COMPILE(head, "case base", node->nd_head)); @@ -7990,7 +7990,7 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor, int i; VALUE map = rb_hash_new_with_size(RARRAY_LEN(op)/2); - rb_hash_tbl_raw(map)->type = &cdhash_type; + RHASH_TBL_RAW(map)->type = &cdhash_type; op = rb_to_array_type(op); for (i=0; ias.hash.ntbl) { #if USE_DEBUG_COUNTER - if (RHASH_SIZE(obj) >= 8) { - RB_DEBUG_COUNTER_INC(obj_hash_ge8); - } - if (RHASH_SIZE(obj) >= 4) { - RB_DEBUG_COUNTER_INC(obj_hash_ge4); - } - else { - RB_DEBUG_COUNTER_INC(obj_hash_under4); - } -#endif - st_free_table(RANY(obj)->as.hash.ntbl); - } + if (RHASH_SIZE(obj) >= 8) { + RB_DEBUG_COUNTER_INC(obj_hash_ge8); + } + else if (RHASH_SIZE(obj) >= 4) { + RB_DEBUG_COUNTER_INC(obj_hash_ge4); + } + else if (RHASH_SIZE(obj) >= 1) { + RB_DEBUG_COUNTER_INC(obj_hash_under4); + } else { RB_DEBUG_COUNTER_INC(obj_hash_empty); } + + if (RHASH_ARRAY_P(obj)) { + RB_DEBUG_COUNTER_INC(obj_hash_array); + } + else { + RB_DEBUG_COUNTER_INC(obj_hash_st); + } +#endif + if (/* RHASH_ARRAY_P(obj) */ !FL_TEST_RAW(obj, RHASH_ST_TABLE_FLAG)) { + li_table *tab = RHASH(obj)->as.li; + + if (tab) { + if (RHASH_TRANSIENT_P(obj)) { + RB_DEBUG_COUNTER_INC(obj_hash_transient); + } + else { + ruby_xfree(tab); + } + } + } + else { + GC_ASSERT(RHASH_TABLE_P(obj)); + st_free_table(RHASH(obj)->as.st); + } break; case T_REGEXP: if (RANY(obj)->as.regexp.ptr) { @@ -3337,8 +3357,12 @@ obj_memsize_of(VALUE obj, int use_all_types) size += rb_ary_memsize(obj); break; case T_HASH: - if (RHASH(obj)->ntbl) { - size += st_memsize(RHASH(obj)->ntbl); + if (RHASH_ARRAY_P(obj)) { + size += sizeof(li_table); + } + else { + VM_ASSERT(RHASH_ST_TABLE(obj) != NULL); + size += st_memsize(RHASH_ST_TABLE(obj)); } break; case T_REGEXP: @@ -3491,7 +3515,7 @@ count_objects(int argc, VALUE *argv, VALUE os) hash = rb_hash_new(); } else if (!RHASH_EMPTY_P(hash)) { - st_foreach(RHASH_TBL_RAW(hash), set_zero, hash); + rb_hash_stlike_foreach(hash, set_zero, hash); } rb_hash_aset(hash, ID2SYM(rb_intern("TOTAL")), SIZET2NUM(total)); rb_hash_aset(hash, ID2SYM(rb_intern("FREE")), SIZET2NUM(freed)); @@ -4225,7 +4249,23 @@ mark_keyvalue(st_data_t key, st_data_t value, st_data_t data) } static void -mark_hash(rb_objspace_t *objspace, st_table *tbl) +mark_hash(rb_objspace_t *objspace, VALUE hash) +{ + rb_hash_stlike_foreach(hash, mark_keyvalue, (st_data_t)objspace); + + if (RHASH_ARRAY_P(hash)) { + if (objspace->mark_func_data == NULL && RHASH_TRANSIENT_P(hash)) { + rb_transient_heap_mark(hash, RHASH_ARRAY(hash)); + } + } + else { + VM_ASSERT(!RHASH_TRANSIENT_P(hash)); + } + gc_mark(objspace, RHASH(hash)->ifnone); +} + +static void +mark_st(rb_objspace_t *objspace, st_table *tbl) { if (!tbl) return; st_foreach(tbl, mark_keyvalue, (st_data_t)objspace); @@ -4234,7 +4274,7 @@ mark_hash(rb_objspace_t *objspace, st_table *tbl) void rb_mark_hash(st_table *tbl) { - mark_hash(&rb_objspace, tbl); + mark_st(&rb_objspace, tbl); } static void @@ -4699,8 +4739,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) break; case T_HASH: - mark_hash(objspace, any->as.hash.ntbl); - gc_mark(objspace, any->as.hash.ifnone); + mark_hash(objspace, obj); break; case T_STRING: @@ -9582,6 +9621,13 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) { if (SPECIAL_CONST_P(obj)) { snprintf(buff, buff_size, "%s", obj_type_name(obj)); + + if (FIXNUM_P(obj)) { + snprintf(buff, buff_size, "%s %ld", buff, FIX2LONG(obj)); + } + else if (SYMBOL_P(obj)) { + snprintf(buff, buff_size, "%s %s", buff, rb_id2name(SYM2ID(obj))); + } } else { #define TF(c) ((c) != 0 ? "true" : "false") @@ -9658,6 +9704,13 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) snprintf(buff, buff_size, "%s %s", buff, RSTRING_PTR(obj)); break; } + case T_HASH: { + snprintf(buff, buff_size, "%s [%c%c] %d", buff, + RHASH_ARRAY_P(obj) ? 'A' : 'S', + RHASH_TRANSIENT_P(obj) ? 'T' : ' ', + (int)RHASH_SIZE(obj)); + break; + } case T_CLASS: { VALUE class_path = rb_class_path_cached(obj); if (!NIL_P(class_path)) { diff --git a/hash.c b/hash.c index 7a8f3f0faf..3eb4310985 100644 --- a/hash.c +++ b/hash.c @@ -20,7 +20,9 @@ #include "id.h" #include "symbol.h" #include "gc.h" - +#include "debug_counter.h" +#include "transient_heap.h" +#include "ruby_assert.h" #ifdef __APPLE__ # ifdef HAVE_CRT_EXTERNS_H # include @@ -29,6 +31,10 @@ # endif #endif +#ifndef HASH_DEBUG +#define HASH_DEBUG 0 +#endif + #define HAS_EXTRA_STATES(hash, klass) ( \ ((klass = has_extra_methods(rb_obj_class(hash))) != 0) || \ FL_TEST((hash), FL_EXIVAR|FL_TAINT|HASH_PROC_DEFAULT) || \ @@ -299,6 +305,760 @@ static const struct st_hash_type identhash = { rb_ident_hash, }; +#define EQUAL(x,y) ((x) == (y) || (*objhash.compare)((x),(y)) == 0) +#define PTR_EQUAL(ptr, hash_val, key_) \ + ((ptr)->hash == (hash_val) && EQUAL((key_), (ptr)->key)) + +#define RESERVED_HASH_VAL ((st_hash_t) 0) +#define RESERVED_HASH_SUBSTITUTION_VAL (~(st_hash_t) 0) + +#define SET_KEY(entry, _key) (entry)->key = (_key) +#define SET_HASH(entry, _hash) (entry)->hash = (_hash) +#define SET_RECORD(entry, _value) (entry)->record = (_value) + +typedef st_data_t st_hash_t; + +static inline st_hash_t +do_hash(st_data_t key) +{ + st_hash_t hash = (st_hash_t)(*objhash.hash)(key); + return (RESERVED_HASH_VAL == hash) ? RESERVED_HASH_SUBSTITUTION_VAL : hash; +} + +static inline void +set_entry(li_table_entry *entry, st_data_t key, st_data_t val, st_hash_t hash) +{ + SET_HASH(entry, hash); + SET_KEY(entry, key); + SET_RECORD(entry, val); +} + +static inline void +clear_entry(li_table_entry* entry) +{ + SET_KEY(entry, Qundef); + SET_RECORD(entry, Qundef); + SET_HASH(entry, 0); +} + +static inline int +empty_entry(li_table_entry *entry) +{ + return entry->hash == 0; +} + +#define RHASH_ARRAY_SIZE_RAW(h) \ + ((long)((RBASIC(h)->flags & RHASH_ARRAY_SIZE_MASK) >> RHASH_ARRAY_SIZE_SHIFT)) + +#define RHASH_ARRAY_SIZE(h) (HASH_ASSERT(RHASH_ARRAY_P(h)), \ + RHASH_ARRAY_SIZE_RAW(h)) + +#define RHASH_ARRAY_BOUND_RAW(h) \ + ((long)((RBASIC(h)->flags >> RHASH_ARRAY_BOUND_SHIFT) & \ + (RHASH_ARRAY_BOUND_MASK >> RHASH_ARRAY_BOUND_SHIFT))) + +#define RHASH_ARRAY_BOUND(h) (HASH_ASSERT(RHASH_ARRAY_P(h)), \ + RHASH_ARRAY_BOUND_RAW(h)) + +#define RHASH_ST_TABLE_SET(h, s) rb_hash_st_table_set(h, s) +#define RHASH_TYPE(hash) (RHASH_ARRAY_P(hash) ? &objhash : RHASH_ST_TABLE(hash)->type) +#define RHASH_ARRAY_REF(hash, n) (&RHASH_ARRAY(hash)->entries[n]) + +#if HASH_DEBUG +#define hash_verify(hash) hash_verify_(hash, __FILE__, __LINE__) +#define HASH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(1, expr, #expr) + +void +rb_hash_dump(VALUE hash) +{ + rb_obj_info_dump(hash); + + if (RHASH_ARRAY_P(hash)) { + int i, n = 0, bound = RHASH_ARRAY_BOUND(hash); + + fprintf(stderr, " size:%d bound:%d\n", + (int)RHASH_ARRAY_SIZE(hash), (int)RHASH_ARRAY_BOUND(hash)); + + for (i=0; ihash; */ + k = cur_entry->key; + v = cur_entry->record; + fprintf(stderr, " %d key:%s val:%s\n", i, + rb_raw_obj_info(b1, 0x100, k), + rb_raw_obj_info(b2, 0x100, v)); + n++; + } + else { + fprintf(stderr, " %d empty\n", i); + } + } + } +} + +static VALUE +hash_verify_(VALUE hash, const char *file, int line) +{ + HASH_ASSERT(RB_TYPE_P(hash, T_HASH)); + + // hash_dump(hash); + + if (RHASH_ARRAY_P(hash)) { + int i, n = 0, bound = RHASH_ARRAY_BOUND(hash); + + for (i=0; ihash; + k = cur_entry->key; + v = cur_entry->record; + HASH_ASSERT(h != 0); + HASH_ASSERT(k != Qundef); + HASH_ASSERT(v != Qundef); + n++; + } + } + if (n != RHASH_ARRAY_SIZE(hash)) { + rb_bug("n:%d, RHASH_ARRAY_SIZE:%d", (int)n, (int)RHASH_ARRAY_SIZE(hash)); + } + } + else { + HASH_ASSERT(RHASH_ST_TABLE(hash) != NULL); + HASH_ASSERT(RHASH_ARRAY_SIZE_RAW(hash) == 0); + HASH_ASSERT(RHASH_ARRAY_BOUND_RAW(hash) == 0); + } + + if (RHASH_TRANSIENT_P(hash)) { + volatile st_data_t MAYBE_UNUSED(key) = RHASH_ARRAY_REF(hash, 0)->key; /* read */ + HASH_ASSERT(RHASH_ARRAY(hash) != NULL); + HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH_ARRAY(hash))); + } + return hash; +} + +#else +#define hash_verify(h) ((void)0) +#define HASH_ASSERT(e) ((void)0) +#endif + +static inline int +RHASH_TABLE_EMPTY(VALUE hash) +{ + if (RHASH(hash)->as.li == NULL) { + HASH_ASSERT(RHASH_ARRAY_P(hash)); + return TRUE; + } + else { + return FALSE; + } +} + +MJIT_FUNC_EXPORTED int +rb_hash_array_p(VALUE hash) +{ + if (FL_TEST_RAW((hash), RHASH_ST_TABLE_FLAG)) { + HASH_ASSERT(RHASH(hash)->as.st != NULL); + return FALSE; + } + else { + return TRUE; + } +} + +struct li_table * +rb_hash_array(VALUE hash) +{ + HASH_ASSERT(RHASH_ARRAY_P(hash)); + return RHASH(hash)->as.li; +} + +MJIT_FUNC_EXPORTED st_table * +rb_hash_st_table(VALUE hash) +{ + HASH_ASSERT(!RHASH_ARRAY_P(hash)); + return RHASH(hash)->as.st; +} + +void +rb_hash_st_table_set(VALUE hash, st_table *st) +{ + HASH_ASSERT(st != NULL); + FL_SET_RAW((hash), RHASH_ST_TABLE_FLAG); + RHASH(hash)->as.st = st; +} + +static void +hash_array_set(VALUE hash, struct li_table *li) +{ + HASH_ASSERT(RHASH_ARRAY_P(hash)); + RHASH(hash)->as.li = li; + hash_verify(hash); +} + +#define RHASH_ARRAY_SET(h, a) hash_array_set(h, a) + +#define RHASH_SET_ST_FLAG(h) FL_SET_RAW(h, RHASH_ST_TABLE_FLAG) +#define RHASH_UNSET_ST_FLAG(h) FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG) +#define RHASH_SET_TRANSIENT_FLAG(h) FL_SET_RAW(h, RHASH_TRANSIENT_FLAG) +#define RHASH_UNSET_TRANSIENT_FLAG(h) FL_UNSET_RAW(h, RHASH_TRANSIENT_FLAG) + +#define RHASH_ARRAY_BOUND_SET(h, n) do { \ + long tmp_n = n; \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + HASH_ASSERT(tmp_n <= RHASH_ARRAY_MAX_BOUND); \ + RBASIC(h)->flags &= ~RHASH_ARRAY_BOUND_MASK; \ + RBASIC(h)->flags |= (tmp_n) << RHASH_ARRAY_BOUND_SHIFT; \ +} while (0) + +#define RHASH_ARRAY_SIZE_SET(h, n) do { \ + long tmp_n = n; \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RBASIC(h)->flags &= ~RHASH_ARRAY_SIZE_MASK; \ + RBASIC(h)->flags |= (tmp_n) << RHASH_ARRAY_SIZE_SHIFT; \ +} while (0) + +#define HASH_ARRAY_SIZE_ADD(h, n) do { \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RHASH_ARRAY_SIZE_SET((h), RHASH_ARRAY_SIZE(h)+(n)); \ + hash_verify(h); \ +} while (0) + +#define RHASH_ARRAY_SIZE_INC(h) HASH_ARRAY_SIZE_ADD(h, 1) +#define RHASH_ARRAY_SIZE_DEC(h) HASH_ARRAY_SIZE_ADD(h, -1) + +#define RHASH_CLEAR_BITS(h) do { \ + RBASIC(h)->flags &= ~RHASH_ARRAY_SIZE_MASK; \ + RBASIC(h)->flags &= ~RHASH_ARRAY_BOUND_MASK; \ +} while (0) + + +static li_table* +linear_init_table(VALUE hash) +{ + li_table *tab; + + tab = (li_table*)rb_transient_heap_alloc(hash, sizeof(li_table)); + if (tab != NULL) { + RHASH_SET_TRANSIENT_FLAG(hash); + } + else { + RHASH_UNSET_TRANSIENT_FLAG(hash); + tab = (li_table*)ruby_xmalloc(sizeof(li_table)); + } + + RHASH_ARRAY_SIZE_SET(hash, 0); + RHASH_ARRAY_BOUND_SET(hash, 0); + RHASH_ARRAY_SET(hash, tab); + return tab; +} + +static st_index_t +find_entry(VALUE hash, st_hash_t hash_value, st_data_t key) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + + /* if table is NULL, then bound also should be 0 */ + + for (i = 0; i < bound; i++) { + if (PTR_EQUAL(RHASH_ARRAY_REF(hash, i), hash_value, key)) { + return i; + } + } + return RHASH_ARRAY_MAX_BOUND; +} + +static inline void +linear_free_and_clear_table(VALUE hash) +{ + li_table *tab = RHASH_ARRAY(hash); + + if (tab) { + if (RHASH_TRANSIENT_P(hash)) { + RHASH_UNSET_TRANSIENT_FLAG(hash); + } + else { + ruby_xfree(RHASH_ARRAY(hash)); + } + RHASH_CLEAR_BITS(hash); + RHASH_ARRAY_SET(hash, NULL); + } + HASH_ASSERT(RHASH_ARRAY_SIZE(hash) == 0); + HASH_ASSERT(RHASH_ARRAY_BOUND(hash) == 0); + HASH_ASSERT(RHASH_TRANSIENT_P(hash) == 0); +} + +void st_add_direct_with_hash(st_table *tab, st_data_t key, st_data_t value, st_hash_t hash); /* st.c */ + +static void +linear_try_convert_table(VALUE hash) +{ + st_table *new_tab; + li_table_entry *entry; + const int size = RHASH_ARRAY_SIZE(hash); + st_index_t i; + + if (!RHASH_ARRAY_P(hash) || size < RHASH_ARRAY_MAX_SIZE) { + return; + } + + new_tab = st_init_table_with_size(&objhash, size * 2); + + for (i = 0; i < RHASH_ARRAY_MAX_BOUND; i++) { + entry = RHASH_ARRAY_REF(hash, i); + HASH_ASSERT(entry->hash != 0); + + st_add_direct_with_hash(new_tab, entry->key, entry->record, entry->hash); + } + linear_free_and_clear_table(hash); + RHASH_ST_TABLE_SET(hash, new_tab); + return; +} + +static st_table * +linear_force_convert_table(VALUE hash, const char *file, int line) +{ + st_table *new_tab; + + if (RHASH_TABLE_P(hash)) { + return RHASH_ST_TABLE(hash); + } + + if (RHASH_ARRAY(hash)) { + li_table_entry *entry; + int i, bound = RHASH_ARRAY_BOUND(hash); + +#if RHASH_CONVERT_TABLE_DEBUG + rb_obj_info_dump(hash); + fprintf(stderr, "force_convert: %s:%d\n", file, line); + RB_DEBUG_COUNTER_INC(obj_hash_force_convert); +#endif + + new_tab = st_init_table_with_size(&objhash, RHASH_ARRAY_SIZE(hash)); + + for (i = 0; i < bound; i++) { + entry = RHASH_ARRAY_REF(hash, i); + if (empty_entry(entry)) continue; + + st_add_direct_with_hash(new_tab, entry->key, entry->record, entry->hash); + } + linear_free_and_clear_table(hash); + } + else { + new_tab = st_init_table(&objhash); + } + RHASH_ST_TABLE_SET(hash, new_tab); + + return new_tab; +} + +static li_table * +hash_ltbl(VALUE hash) +{ + if (RHASH_TABLE_EMPTY(hash)) { + linear_init_table(hash); + } + return RHASH_ARRAY(hash); +} + +static int +linear_compact_table(VALUE hash) +{ + const int bound = RHASH_ARRAY_BOUND(hash); + const int size = RHASH_ARRAY_SIZE(hash); + + if (size == bound) { + return size; + } + else { + int i, j=0; + li_table_entry *entries = RHASH_ARRAY_REF(hash, 0); + + for (i=0; i= RHASH_ARRAY_MAX_SIZE) { + return 1; + } + else { + if (UNLIKELY(bin >= RHASH_ARRAY_MAX_BOUND)) { + bin = linear_compact_table(hash); + hash_ltbl(hash); + } + HASH_ASSERT(bin < RHASH_ARRAY_MAX_BOUND); + + entry = &tab->entries[bin]; + set_entry(entry, key, val, hash_value); + RHASH_ARRAY_BOUND_SET(hash, bin+1); + RHASH_ARRAY_SIZE_INC(hash); + return 0; + } +} + +static int +linear_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg) +{ + if (RHASH_ARRAY_SIZE(hash) > 0) { + int i, bound = RHASH_ARRAY_BOUND(hash); + + for (i = 0; i < bound; i++) { + enum st_retval retval; + li_table_entry *cur_entry = RHASH_ARRAY_REF(hash, i); + if (empty_entry(cur_entry)) continue; + retval = (*func)(cur_entry->key, cur_entry->record, arg, 0); + /* cur_entry is not valid after that */ + + switch (retval) { + case ST_CONTINUE: + break; + case ST_CHECK: + case ST_STOP: + return 0; + case ST_DELETE: + clear_entry(RHASH_ARRAY_REF(hash, i)); + RHASH_ARRAY_SIZE_DEC(hash); + break; + } + } + } + return 0; +} + +static int +linear_foreach_check(VALUE hash, int (*func)(ANYARGS), st_data_t arg, + st_data_t never) +{ + if (RHASH_ARRAY_SIZE(hash) > 0) { + uint8_t i, ret = 0, bound = RHASH_ARRAY_BOUND(hash); + enum st_retval retval; + li_table_entry *cur_entry; + st_data_t key; + st_hash_t hash_value; + + for (i = 0; i < bound; i++) { + cur_entry = RHASH_ARRAY_REF(hash, i); + if (empty_entry(cur_entry)) + continue; + key = cur_entry->key; + hash_value = cur_entry->hash; + + retval = (*func)(key, cur_entry->record, arg, 0); + hash_verify(hash); + + cur_entry = RHASH_ARRAY_REF(hash, i); + + switch (retval) { + case ST_CHECK: { + if (cur_entry->key == never && cur_entry->hash == 0) break; + ret = find_entry(hash, hash_value, key); + if (ret == RHASH_ARRAY_MAX_BOUND) { + retval = (*func)(0, 0, arg, 1); + return 2; + } + } + case ST_CONTINUE: + break; + case ST_STOP: + return 0; + case ST_DELETE: { + if (!empty_entry(cur_entry)) { + clear_entry(cur_entry); + RHASH_ARRAY_SIZE_DEC(hash); + } + break; + } + } + } + } + return 0; +} + +static int +linear_update(VALUE hash, st_data_t key, + st_update_callback_func *func, st_data_t arg) +{ + int retval, existing; + uint8_t bin; + st_data_t value = 0, old_key; + st_hash_t hash_value = do_hash(key); + + if (RHASH_ARRAY_SIZE(hash) > 0) { + bin = find_entry(hash, hash_value, key); + existing = (bin != RHASH_ARRAY_MAX_BOUND) ? TRUE : FALSE; + } + else { + hash_ltbl(hash); /* allocate ltbl if needed */ + existing = FALSE; + } + + if (existing) { + li_table_entry *entry = RHASH_ARRAY_REF(hash, bin); + key = entry->key; + value = entry->record; + } + old_key = key; + retval = (*func)(&key, &value, arg, existing); + + switch (retval) { + case ST_CONTINUE: + if (!existing) { + if (linear_add_direct_with_hash(hash, key, value, hash_value)) { + return -1; + } + } + else { + li_table_entry *entry = RHASH_ARRAY_REF(hash, bin); + if (old_key != key) { + entry->key = key; + } + entry->record = value; + } + break; + case ST_DELETE: + if (existing) { + clear_entry(RHASH_ARRAY_REF(hash, bin)); + RHASH_ARRAY_SIZE_DEC(hash); + } + break; + } + return existing; +} + +static int +linear_insert(VALUE hash, st_data_t key, st_data_t value) +{ + st_index_t bin = RHASH_ARRAY_BOUND(hash); + st_hash_t hash_value = do_hash(key); + + hash_ltbl(hash); /* prepare ltbl */ + + bin = find_entry(hash, hash_value, key); + if (bin == RHASH_ARRAY_MAX_BOUND) { + if (RHASH_ARRAY_SIZE(hash) >= RHASH_ARRAY_MAX_SIZE) { + return -1; + } + else if (bin >= RHASH_ARRAY_MAX_BOUND) { + bin = linear_compact_table(hash); + hash_ltbl(hash); + } + HASH_ASSERT(bin < RHASH_ARRAY_MAX_BOUND); + + set_entry(RHASH_ARRAY_REF(hash, bin), key, value, hash_value); + RHASH_ARRAY_BOUND_SET(hash, bin+1); + RHASH_ARRAY_SIZE_INC(hash); + return 0; + } + else { + RHASH_ARRAY_REF(hash, bin)->record = value; + return 1; + } +} + +static int +linear_lookup(VALUE hash, st_data_t key, st_data_t *value) +{ + st_hash_t hash_value = do_hash(key); + st_index_t bin = find_entry(hash, hash_value, key); + + if (bin == RHASH_ARRAY_MAX_BOUND) { + return 0; + } + else { + HASH_ASSERT(bin < RHASH_ARRAY_MAX_BOUND); + if (value != NULL) { + *value = RHASH_ARRAY_REF(hash, bin)->record; + } + return 1; + } +} + +static int +linear_delete(VALUE hash, st_data_t *key, st_data_t *value) +{ + st_index_t bin; + st_hash_t hash_value = do_hash(*key); + + + bin = find_entry(hash, hash_value, *key); + + if (bin == RHASH_ARRAY_MAX_BOUND) { + if (value != 0) *value = 0; + return 0; + } + else { + li_table_entry *entry = RHASH_ARRAY_REF(hash, bin); + if (value != 0) *value = entry->record; + clear_entry(entry); + RHASH_ARRAY_SIZE_DEC(hash); + return 1; + } +} + +static int +linear_shift(VALUE hash, st_data_t *key, st_data_t *value) +{ + if (RHASH_ARRAY_SIZE(hash) > 0) { + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + li_table_entry *entry, *entries = RHASH_ARRAY(hash)->entries; + + for (i = 0; i < bound; i++) { + entry = &entries[i]; + if (!empty_entry(entry)) { + if (value != 0) *value = entry->record; + *key = entry->key; + clear_entry(entry); + RHASH_ARRAY_SIZE_DEC(hash); + return 1; + } + } + } + if (value != 0) *value = 0; + return 0; +} + +static int +linear_keys(VALUE hash, st_data_t *keys, st_index_t size) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + st_data_t *keys_start = keys, *keys_end = keys_end; + + for (i = 0; i < bound; i++) { + if (keys == keys_end) { + break; + } + else { + li_table_entry *cur_entry = RHASH_ARRAY_REF(hash, i); + if (!empty_entry(cur_entry)) + *keys++ = cur_entry->key; + } + } + + return keys - keys_start; +} + +static int +linear_values(VALUE hash, st_data_t *values, st_index_t size) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + st_data_t *values_start = values, *values_end = values + size; + + for (i = 0; i < bound; i++) { + if (values == values_end) { + break; + } + else { + li_table_entry *cur_entry = RHASH_ARRAY_REF(hash, i); + if (!empty_entry(cur_entry)) + *values++ = cur_entry->record; + } + } + + return values - values_start; +} + +static li_table* +linear_copy(VALUE hash1, VALUE hash2) +{ + li_table *old_tab = RHASH_ARRAY(hash2); + + if (old_tab) { + li_table *new_tab; + new_tab = (li_table*) rb_transient_heap_alloc(hash1, sizeof(li_table)); + if (new_tab != NULL) { + RHASH_SET_TRANSIENT_FLAG(hash1); + } + else { + RHASH_UNSET_TRANSIENT_FLAG(hash1); + new_tab = (li_table*)ruby_xmalloc(sizeof(li_table)); + } + *new_tab = *old_tab; + RHASH_ARRAY_BOUND_SET(hash1, RHASH_ARRAY_BOUND(hash2)); + RHASH_ARRAY_SIZE_SET(hash1, RHASH_ARRAY_SIZE(hash2)); + RHASH_ARRAY_SET(hash1, new_tab); + return new_tab; + } + else { + RHASH_ARRAY_BOUND_SET(hash1, RHASH_ARRAY_BOUND(hash2)); + RHASH_ARRAY_SIZE_SET(hash1, RHASH_ARRAY_SIZE(hash2)); + RHASH_ARRAY_SET(hash1, old_tab); + return old_tab; + } +} + +static void +linear_clear(VALUE hash) +{ + if (RHASH_ARRAY(hash) != NULL) { + RHASH_ARRAY_SIZE_SET(hash, 0); + RHASH_ARRAY_BOUND_SET(hash, 0); + } + else { + HASH_ASSERT(RHASH_ARRAY_SIZE(hash) == 0); + HASH_ASSERT(RHASH_ARRAY_BOUND(hash) == 0); + } +} + +void +rb_hash_transient_heap_evacuate(VALUE hash, int promote) +{ + if (RHASH_TRANSIENT_P(hash)) { + li_table *new_tab; + li_table *old_tab = RHASH_ARRAY(hash); + + if (UNLIKELY(old_tab == NULL)) { + rb_gc_force_recycle(hash); + return; + } + HASH_ASSERT(old_tab != NULL); + if (promote) { + promote: + new_tab = ruby_xmalloc(sizeof(li_table)); + RHASH_UNSET_TRANSIENT_FLAG(hash); + } + else { + new_tab = rb_transient_heap_alloc(hash, sizeof(li_table)); + if (new_tab == NULL) goto promote; + } + *new_tab = *old_tab; + RHASH_ARRAY_SET(hash, new_tab); + } + hash_verify(hash); +} + + typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); struct foreach_safe_arg { @@ -342,6 +1102,27 @@ struct hash_foreach_arg { VALUE arg; }; +static int +hash_linear_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) +{ + struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp; + int status; + + if (error) return ST_STOP; + status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); + /* TODO: rehash check? rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); */ + + switch (status) { + case ST_DELETE: + return ST_DELETE; + case ST_CONTINUE: + break; + case ST_STOP: + return ST_STOP; + } + return ST_CHECK; +} + static int hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) { @@ -350,10 +1131,10 @@ hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) st_table *tbl; if (error) return ST_STOP; - tbl = RHASH(arg->hash)->ntbl; + tbl = RHASH_ST_TABLE(arg->hash); status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); - if (RHASH(arg->hash)->ntbl != tbl) { - rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + if (RHASH_ST_TABLE(arg->hash) != tbl) { + rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); } switch (status) { case ST_DELETE: @@ -380,12 +1161,32 @@ hash_foreach_ensure(VALUE hash) return 0; } +int +rb_hash_stlike_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg) +{ + if (RHASH_ARRAY_P(hash)) { + return linear_foreach(hash, func, arg); + } + else { + return st_foreach(RHASH_ST_TABLE(hash), func, arg); + } +} + static VALUE hash_foreach_call(VALUE arg) { VALUE hash = ((struct hash_foreach_arg *)arg)->hash; - if (st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, (st_data_t)arg, (st_data_t)Qundef)) { - rb_raise(rb_eRuntimeError, "hash modified during iteration"); + int ret = 0; + if (RHASH_ARRAY_P(hash)) { + ret = linear_foreach_check(hash, hash_linear_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + } + else if (RHASH_TABLE_P(hash)) { + ret = st_foreach_check(RHASH_ST_TABLE(hash), hash_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + } + if (ret) { + rb_raise(rb_eRuntimeError, "ret: %d, hash modified during iteration", ret); } return Qnil; } @@ -395,13 +1196,14 @@ rb_hash_foreach(VALUE hash, int (*func)(ANYARGS), VALUE farg) { struct hash_foreach_arg arg; - if (!RHASH(hash)->ntbl) + if (RHASH_TABLE_EMPTY(hash)) return; RHASH_ITER_LEV(hash)++; arg.hash = hash; arg.func = (rb_foreach_func *)func; arg.arg = farg; rb_ensure(hash_foreach_call, (VALUE)&arg, hash_foreach_ensure, hash); + hash_verify(hash); } static VALUE @@ -439,7 +1241,7 @@ VALUE rb_hash_new_compare_by_id(void) { VALUE hash = rb_hash_new(); - RHASH(hash)->ntbl = rb_init_identtable(); + RHASH_ST_TABLE_SET(hash, rb_init_identtable()); return hash; } @@ -447,8 +1249,14 @@ MJIT_FUNC_EXPORTED VALUE rb_hash_new_with_size(st_index_t size) { VALUE ret = rb_hash_new(); - if (size) - RHASH(ret)->ntbl = st_init_table_with_size(&objhash, size); + if (size) { + if (size <= RHASH_ARRAY_MAX_SIZE) { + RHASH_ARRAY_SET(ret, linear_init_table(ret)); + } + else { + RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size)); + } + } return ret; } @@ -457,8 +1265,12 @@ hash_dup(VALUE hash, VALUE klass, VALUE flags) { VALUE ret = hash_alloc_flags(klass, flags, RHASH_IFNONE(hash)); - if (!RHASH_EMPTY_P(hash)) - RHASH(ret)->ntbl = st_copy(RHASH(hash)->ntbl); + if (!RHASH_EMPTY_P(hash)) { + if (RHASH_ARRAY_P(hash)) + linear_copy(ret, hash); + else if (RHASH_TABLE_P(hash)) + RHASH_ST_TABLE_SET(ret, st_copy(RHASH_ST_TABLE(hash))); + } return ret; } @@ -479,33 +1291,30 @@ rb_hash_modify_check(VALUE hash) rb_check_frozen(hash); } -static struct st_table * -hash_tbl(VALUE hash) +MJIT_FUNC_EXPORTED struct st_table * +#if RHASH_CONVERT_TABLE_DEBUG +rb_hash_tbl_raw(VALUE hash, const char *file, int line) { - if (!RHASH(hash)->ntbl) { - RHASH(hash)->ntbl = st_init_table(&objhash); - } - return RHASH(hash)->ntbl; + return linear_force_convert_table(hash, file, line); } - -struct st_table * -rb_hash_tbl(VALUE hash) +#else +rb_hash_tbl_raw(VALUE hash) { - OBJ_WB_UNPROTECT(hash); - return hash_tbl(hash); + return linear_force_convert_table(hash, NULL, 0); } +#endif -MJIT_FUNC_EXPORTED struct st_table * -rb_hash_tbl_raw(VALUE hash) +struct st_table * +rb_hash_tbl(VALUE hash, const char *file, int line) { - return hash_tbl(hash); + OBJ_WB_UNPROTECT(hash); + return RHASH_TBL_RAW(hash); } static void rb_hash_modify(VALUE hash) { rb_hash_modify_check(hash); - hash_tbl(hash); } NORETURN(static void no_new_key(void)); @@ -545,6 +1354,22 @@ struct update_arg { typedef int (*tbl_update_func)(st_data_t *, st_data_t *, st_data_t, int); +int +rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func func, st_data_t arg) +{ + if (RHASH_ARRAY_P(hash)) { + int result = linear_update(hash, (st_data_t)key, func, arg); + if (result == -1) { + linear_try_convert_table(hash); + } + else { + return result; + } + } + + return st_update(RHASH_ST_TABLE(hash), (st_data_t)key, func, arg); +} + static int tbl_update(VALUE hash, VALUE key, tbl_update_func func, st_data_t optional_arg) { @@ -558,7 +1383,7 @@ tbl_update(VALUE hash, VALUE key, tbl_update_func func, st_data_t optional_arg) arg.new_value = 0; arg.old_value = Qundef; - result = st_update(RHASH(hash)->ntbl, (st_data_t)key, func, (st_data_t)&arg); + result = rb_hash_stlike_update(hash, key, func, (st_data_t)&arg); /* write barrier */ if (arg.new_key) RB_OBJ_WRITTEN(hash, arg.old_key, arg.new_key); @@ -673,11 +1498,14 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) VALUE hash, tmp; if (argc == 1) { - tmp = rb_hash_s_try_convert(Qnil, argv[0]); + tmp = rb_hash_s_try_convert(Qnil, argv[0]); //TODO tmp array flag if (!NIL_P(tmp)) { hash = hash_alloc(klass); - if (RHASH(tmp)->ntbl) { - RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl); + if (RHASH_ARRAY_P(tmp)) { + linear_copy(hash, tmp); + } + else { + RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(tmp))); } return hash; } @@ -725,7 +1553,7 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) hash = hash_alloc(klass); rb_hash_bulk_insert(argc, argv, hash); - + hash_verify(hash); return hash; } @@ -767,9 +1595,12 @@ struct rehash_arg { static int rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg) { - st_table *tbl = (st_table *)arg; - - st_insert(tbl, (st_data_t)key, (st_data_t)value); + if (RHASH_ARRAY_P(arg)) { + linear_insert(arg, (st_data_t)key, (st_data_t)value); + } + else { + st_insert(RHASH_ST_TABLE(arg), (st_data_t)key, (st_data_t)value); + } return ST_CONTINUE; } @@ -803,17 +1634,25 @@ rb_hash_rehash(VALUE hash) rb_raise(rb_eRuntimeError, "rehash during iteration"); } rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return hash; - tmp = hash_alloc(0); - tbl = st_init_table_with_size(RHASH(hash)->ntbl->type, RHASH(hash)->ntbl->num_entries); - RHASH(tmp)->ntbl = tbl; - - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tbl); - st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = tbl; - RHASH(tmp)->ntbl = 0; - + if (RHASH_ARRAY_P(hash)) { + tmp = hash_alloc(0); + linear_init_table(tmp); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + linear_free_and_clear_table(hash); + linear_copy(hash, tmp); + linear_free_and_clear_table(tmp); + } + else if (RHASH_TABLE_P(hash)) { + st_table *old_tab = RHASH_ST_TABLE(hash); + tmp = hash_alloc(0); + tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries); + RHASH_ST_TABLE_SET(tmp, tbl); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + st_free_table(old_tab); + RHASH_ST_TABLE_SET(hash, tbl); + RHASH_CLEAR(tmp); + } + hash_verify(hash); return hash; } @@ -850,10 +1689,27 @@ rb_hash_aref(VALUE hash, VALUE key) { st_data_t val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - return rb_hash_default_value(hash, key); + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { + return (VALUE)val; + } + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH_ST_TABLE(hash), key, &val)) { + return (VALUE)val; + } + hash_verify(hash); + return rb_hash_default_value(hash, key); +} + +MJIT_FUNC_EXPORTED int +rb_hash_stlike_lookup(VALUE hash, st_data_t key, st_data_t *pval) +{ + hash_verify(hash); + + if (RHASH_ARRAY_P(hash)) { + return linear_lookup(hash, key, pval); + } + else { + return st_lookup(RHASH_ST_TABLE(hash), key, pval); } - return (VALUE)val; } VALUE @@ -861,10 +1717,12 @@ rb_hash_lookup2(VALUE hash, VALUE key, VALUE def) { st_data_t val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - return def; /* without Hash#default */ + if (rb_hash_stlike_lookup(hash, key, &val)) { + return (VALUE)val; + } + else { + return def; /* without Hash#default */ } - return (VALUE)val; } VALUE @@ -916,19 +1774,23 @@ rb_hash_fetch_m(int argc, VALUE *argv, VALUE hash) if (block_given && argc == 2) { rb_warn("block supersedes default value argument"); } - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - if (block_given) return rb_yield(key); - if (argc == 1) { - VALUE desc = rb_protect(rb_inspect, key, 0); - if (NIL_P(desc)) { - desc = rb_any_to_s(key); - } - desc = rb_str_ellipsize(desc, 65); - rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key); + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { + return (VALUE)val; + } + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH_ST_TABLE(hash), key, &val)) { + return (VALUE)val; + } + if (block_given) return rb_yield(key); + if (argc == 1) { + VALUE desc = rb_protect(rb_inspect, key, 0); + if (NIL_P(desc)) { + desc = rb_any_to_s(key); } - return argv[1]; + desc = rb_str_ellipsize(desc, 65); + rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key); } - return (VALUE)val; + hash_verify(hash); + return argv[1]; } VALUE @@ -1107,6 +1969,17 @@ rb_hash_index(VALUE hash, VALUE value) return rb_hash_key(hash, value); } +int +rb_hash_stlike_delete(VALUE hash, st_data_t *pkey, st_data_t *pval) +{ + if (RHASH_ARRAY_P(hash)) { + return linear_delete(hash, pkey, pval); + } + else { + return st_delete(RHASH_ST_TABLE(hash), pkey, pval); + } +} + /* * delete a specified entry a given key. * if there is the corresponding entry, return a value of the entry. @@ -1117,14 +1990,11 @@ rb_hash_delete_entry(VALUE hash, VALUE key) { st_data_t ktmp = (st_data_t)key, val; - if (!RHASH(hash)->ntbl) { - return Qundef; - } - else if (st_delete(RHASH(hash)->ntbl, &ktmp, &val)) { - return (VALUE)val; + if (rb_hash_stlike_delete(hash, &ktmp, &val)) { + return (VALUE)val; } else { - return Qundef; + return Qundef; } } @@ -1219,10 +2089,25 @@ rb_hash_shift(VALUE hash) struct shift_var var; rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { + if (RHASH_ARRAY_P(hash)) { var.key = Qundef; if (RHASH_ITER_LEV(hash) == 0) { - if (st_shift(RHASH(hash)->ntbl, &var.key, &var.val)) { + if (linear_shift(hash, &var.key, &var.val)) { + return rb_assoc_new(var.key, var.val); + } + } + else { + rb_hash_foreach(hash, shift_i_safe, (VALUE)&var); + if (var.key != Qundef) { + rb_hash_delete_entry(hash, var.key); + return rb_assoc_new(var.key, var.val); + } + } + } + if (RHASH_TABLE_P(hash)) { + var.key = Qundef; + if (RHASH_ITER_LEV(hash) == 0) { + if (st_shift(RHASH_ST_TABLE(hash), &var.key, &var.val)) { return rb_assoc_new(var.key, var.val); } } @@ -1272,8 +2157,9 @@ rb_hash_delete_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) - rb_hash_foreach(hash, delete_if_i, hash); + if (!RHASH_TABLE_EMPTY(hash)) { + rb_hash_foreach(hash, delete_if_i, hash); + } return hash; } @@ -1296,7 +2182,7 @@ rb_hash_reject_bang(VALUE hash) n = RHASH_SIZE(hash); if (!n) return Qnil; rb_hash_foreach(hash, delete_if_i, hash); - if (n == RHASH(hash)->ntbl->num_entries) return Qnil; + if (n == RHASH_SIZE(hash)) return Qnil; return hash; } @@ -1486,11 +2372,10 @@ rb_hash_select_bang(VALUE hash) RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return Qnil; - n = RHASH(hash)->ntbl->num_entries; + n = RHASH_SIZE(hash); + if (!n) return Qnil; rb_hash_foreach(hash, keep_if_i, hash); - if (n == RHASH(hash)->ntbl->num_entries) return Qnil; + if (n == RHASH_SIZE(hash)) return Qnil; return hash; } @@ -1511,8 +2396,9 @@ rb_hash_keep_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) - rb_hash_foreach(hash, keep_if_i, hash); + if (!RHASH_TABLE_EMPTY(hash)) { + rb_hash_foreach(hash, keep_if_i, hash); + } return hash; } @@ -1537,13 +2423,15 @@ VALUE rb_hash_clear(VALUE hash) { rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return hash; - if (RHASH(hash)->ntbl->num_entries > 0) { - if (RHASH_ITER_LEV(hash) > 0) - rb_hash_foreach(hash, clear_i, 0); - else - st_clear(RHASH(hash)->ntbl); + + if (RHASH_ITER_LEV(hash) > 0) { + rb_hash_foreach(hash, clear_i, 0); + } + else if (RHASH_ARRAY_P(hash)) { + linear_clear(hash); + } + else { + st_clear(RHASH_ST_TABLE(hash)); } return hash; @@ -1618,14 +2506,15 @@ VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val) { int iter_lev = RHASH_ITER_LEV(hash); - st_table *tbl = RHASH(hash)->ntbl; rb_hash_modify(hash); - if (!tbl) { + + if (RHASH_TABLE_EMPTY(hash)) { if (iter_lev > 0) no_new_key(); - tbl = hash_tbl(hash); + linear_init_table(hash); } - if (tbl->type == &identhash || rb_obj_class(key) != rb_cString) { + + if (RHASH_TYPE(hash) == &identhash || rb_obj_class(key) != rb_cString) { RHASH_UPDATE_ITER(hash, iter_lev, key, hash_aset, val); } else { @@ -1646,8 +2535,6 @@ replace_i(VALUE key, VALUE val, VALUE hash) static VALUE rb_hash_initialize_copy(VALUE hash, VALUE hash2) { - st_table *ntbl; - rb_hash_modify_check(hash); hash2 = to_hash(hash2); @@ -1655,15 +2542,23 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) if (hash == hash2) return hash; - ntbl = RHASH(hash)->ntbl; - if (RHASH(hash2)->ntbl) { - if (ntbl) st_free_table(ntbl); - RHASH(hash)->ntbl = st_copy(RHASH(hash2)->ntbl); - if (RHASH(hash)->ntbl->num_entries) + if (RHASH_ARRAY_P(hash2)) { + if (RHASH_ARRAY_P(hash)) linear_free_and_clear_table(hash); + linear_copy(hash, hash2); + if (RHASH_ARRAY_SIZE(hash)) rb_hash_rehash(hash); } - else if (ntbl) { - st_clear(ntbl); + else if (RHASH_TABLE_P(hash2)) { + if (RHASH_TABLE_P(hash)) st_free_table(RHASH_ST_TABLE(hash)); + RHASH_ST_TABLE_SET(hash, st_copy(RHASH_ST_TABLE(hash2))); + if (RHASH_ST_TABLE(hash)->num_entries) + rb_hash_rehash(hash); + } + else if (RHASH_ARRAY_P(hash)) { + linear_clear(hash); + } + else if (RHASH_TABLE_P(hash)) { + st_clear(RHASH_ST_TABLE(hash)); } COPY_DEFAULT(hash, hash2); @@ -1686,19 +2581,28 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) static VALUE rb_hash_replace(VALUE hash, VALUE hash2) { - st_table *table2; - rb_hash_modify_check(hash); if (hash == hash2) return hash; hash2 = to_hash(hash2); COPY_DEFAULT(hash, hash2); - table2 = RHASH(hash2)->ntbl; - rb_hash_clear(hash); - if (table2) hash_tbl(hash)->type = table2->type; - rb_hash_foreach(hash2, replace_i, hash); + + if (RHASH_ARRAY_P(hash)) { + if (RHASH_ARRAY_P(hash2)) { + linear_copy(hash, hash2); + } + else { + goto st_to_st; + } + } + else { + if (RHASH_ARRAY_P(hash2)) linear_force_convert_table(hash2, __FILE__, __LINE__); + st_to_st: + RHASH_TBL_RAW(hash)->type = RHASH_ST_TABLE(hash2)->type; + rb_hash_foreach(hash2, replace_i, hash); + } return hash; } @@ -1725,6 +2629,11 @@ rb_hash_size(VALUE hash) return INT2FIX(RHASH_SIZE(hash)); } +size_t +rb_hash_size_num(VALUE hash) +{ + return (long)RHASH_SIZE(hash); +} /* * call-seq: @@ -1922,7 +2831,7 @@ rb_hash_transform_keys_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { + if (!RHASH_TABLE_EMPTY(hash)) { long i; VALUE pairs = rb_hash_flatten(0, NULL, hash); rb_hash_clear(hash); @@ -1996,7 +2905,7 @@ rb_hash_transform_values_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) + if (!RHASH_TABLE_EMPTY(hash)) rb_hash_foreach(hash, transform_values_i, hash); return hash; } @@ -2187,12 +3096,20 @@ rb_hash_keys(VALUE hash) if (size == 0) return keys; if (ST_DATA_COMPATIBLE_P(VALUE)) { - st_table *table = RHASH(hash)->ntbl; + if (RHASH_ARRAY_P(hash)) { + rb_gc_writebarrier_remember(keys); + RARRAY_PTR_USE(keys, ptr, { + size = linear_keys(hash, ptr, size); + }); + } + else if (RHASH_TABLE_P(hash)) { + st_table *table = RHASH_ST_TABLE(hash); - rb_gc_writebarrier_remember(keys); - RARRAY_PTR_USE(keys, ptr, { - size = st_keys(table, ptr, size); - }); + rb_gc_writebarrier_remember(keys); + RARRAY_PTR_USE(keys, ptr, { + size = st_keys(table, ptr, size); + }); + } rb_ary_set_len(keys, size); } else { @@ -2231,12 +3148,20 @@ rb_hash_values(VALUE hash) if (size == 0) return values; if (ST_DATA_COMPATIBLE_P(VALUE)) { - st_table *table = RHASH(hash)->ntbl; + if (RHASH_ARRAY_P(hash)) { + rb_gc_writebarrier_remember(values); + RARRAY_PTR_USE(values, ptr, { + size = linear_values(hash, ptr, size); + }); + } + else if (RHASH_TABLE_P(hash)) { + st_table *table = RHASH_ST_TABLE(hash); - rb_gc_writebarrier_remember(values); - RARRAY_PTR_USE(values, ptr, { - size = st_values(table, ptr, size); - }); + rb_gc_writebarrier_remember(values); + RARRAY_PTR_USE(values, ptr, { + size = st_values(table, ptr, size); + }); + } rb_ary_set_len(values, size); } else { @@ -2268,9 +3193,10 @@ rb_hash_values(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key) { - if (!RHASH(hash)->ntbl) - return Qfalse; - if (st_lookup(RHASH(hash)->ntbl, key, 0)) { + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, 0)) { + return Qtrue; + } + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH_ST_TABLE(hash), key, 0)) { return Qtrue; } return Qfalse; @@ -2314,7 +3240,7 @@ rb_hash_has_value(VALUE hash, VALUE val) struct equal_data { VALUE result; - st_table *tbl; + VALUE hash; int eql; }; @@ -2324,10 +3250,15 @@ eql_i(VALUE key, VALUE val1, VALUE arg) struct equal_data *data = (struct equal_data *)arg; st_data_t val2; - if (!st_lookup(data->tbl, key, &val2)) { + if (RHASH_ARRAY_P(data->hash) && !linear_lookup(data->hash, key, &val2)) { data->result = Qfalse; return ST_STOP; } + else if (RHASH_TABLE_P(data->hash) && !st_lookup(RHASH_ST_TABLE(data->hash), key, &val2)) { + data->result = Qfalse; + return ST_STOP; + } + if (!(data->eql ? rb_eql(val1, (VALUE)val2) : (int)rb_equal(val1, (VALUE)val2))) { data->result = Qfalse; return ST_STOP; @@ -2372,19 +3303,21 @@ hash_equal(VALUE hash1, VALUE hash2, int eql) } if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) return Qfalse; - if (!RHASH(hash1)->ntbl || !RHASH(hash2)->ntbl) - return Qtrue; - if (RHASH(hash1)->ntbl->type != RHASH(hash2)->ntbl->type) - return Qfalse; + if (!RHASH_TABLE_EMPTY(hash1) && !RHASH_TABLE_EMPTY(hash2)) { + if (RHASH_TYPE(hash1) != RHASH_TYPE(hash2)) + return Qfalse; + + data.hash = hash2; + data.eql = eql; + return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); + } + #if 0 if (!(rb_equal(RHASH_IFNONE(hash1), RHASH_IFNONE(hash2)) && FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2, HASH_PROC_DEFAULT))) return Qfalse; #endif - - data.tbl = RHASH(hash2)->ntbl; - data.eql = eql; - return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); + return Qtrue; } /* @@ -2739,7 +3672,8 @@ static VALUE reset_hash_type(VALUE arg) { struct reset_hash_type_arg *p = (struct reset_hash_type_arg *)arg; - RHASH(p->hash)->ntbl->type = p->orighash; + HASH_ASSERT(RHASH_TABLE_P(p->hash)); + RHASH_ST_TABLE(p->hash)->type = p->orighash; return Qundef; } @@ -2777,7 +3711,10 @@ rb_hash_assoc(VALUE hash, VALUE key) VALUE args[2]; if (RHASH_EMPTY_P(hash)) return Qnil; - table = RHASH(hash)->ntbl; + + linear_force_convert_table(hash, __FILE__, __LINE__); + HASH_ASSERT(RHASH_TABLE_P(hash)); + table = RHASH_ST_TABLE(hash); orighash = table->type; if (orighash != &identhash) { @@ -2787,7 +3724,7 @@ rb_hash_assoc(VALUE hash, VALUE key) assochash.compare = assoc_cmp; assochash.hash = orighash->hash; - table->type = &assochash; + table->type = &assochash; args[0] = hash; args[1] = key; ensure_arg.hash = hash; @@ -2954,11 +3891,12 @@ rb_hash_compact(VALUE hash) static VALUE rb_hash_compact_bang(VALUE hash) { + st_index_t n; rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { - st_index_t n = RHASH(hash)->ntbl->num_entries; + n = RHASH_SIZE(hash); + if (n) { rb_hash_foreach(hash, delete_if_nil, hash); - if (n != RHASH(hash)->ntbl->num_entries) + if (n != RHASH_SIZE(hash)) return hash; } return Qnil; @@ -2983,15 +3921,23 @@ rb_hash_compact_bang(VALUE hash) static VALUE rb_hash_compare_by_id(VALUE hash) { + VALUE tmp; st_table *identtable; + if (rb_hash_compare_by_id_p(hash)) return hash; + rb_hash_modify_check(hash); + linear_force_convert_table(hash, __FILE__, __LINE__); + HASH_ASSERT(RHASH_TABLE_P(hash)); + tmp = hash_alloc(0); identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)identtable); - if (RHASH(hash)->ntbl) - st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = identtable; + RHASH_ST_TABLE_SET(tmp, identtable); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + st_free_table(RHASH_ST_TABLE(hash)); + RHASH_ST_TABLE_SET(hash, identtable); + RHASH_CLEAR(tmp); + rb_gc_force_recycle(tmp); return hash; } @@ -3008,19 +3954,19 @@ rb_hash_compare_by_id(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_compare_by_id_p(VALUE hash) { - if (!RHASH(hash)->ntbl) - return Qfalse; - if (RHASH(hash)->ntbl->type == &identhash) { + if (RHASH_TABLE_P(hash) && RHASH_ST_TABLE(hash)->type == &identhash) { return Qtrue; } - return Qfalse; + else { + return Qfalse; + } } VALUE rb_ident_hash_new(void) { VALUE hash = rb_hash_new(); - RHASH(hash)->ntbl = st_init_table(&identhash); + RHASH_ST_TABLE_SET(hash, st_init_table(&identhash)); return hash; } @@ -3280,13 +4226,70 @@ add_new_i(st_data_t *key, st_data_t *val, st_data_t arg, int existing) * returns non-zero if +key+ was contained. */ int -rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) +rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) //TODO { - st_table *tbl = rb_hash_tbl_raw(hash); + st_table *tbl; + int ret = 0; VALUE args[2]; args[0] = hash; args[1] = val; + + if (RHASH_ARRAY_P(hash)) { + hash_ltbl(hash); + + ret = linear_update(hash, (st_data_t)key, add_new_i, (st_data_t)args); + if (ret != -1) { + return ret; + } + linear_try_convert_table(hash); + } + tbl = RHASH_TBL_RAW(hash); return st_update(tbl, (st_data_t)key, add_new_i, (st_data_t)args); + +} + +static st_data_t +key_stringify(VALUE key) +{ + return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ? + rb_hash_key_str(key) : key; +} + +static void +linear_bulk_insert(VALUE hash, long argc, const VALUE *argv) +{ + long i; + for (i = 0; i < argc; ) { + st_data_t k = key_stringify(argv[i++]); + st_data_t v = argv[i++]; + linear_insert(hash, k, v); + RB_OBJ_WRITTEN(hash, Qundef, k); + RB_OBJ_WRITTEN(hash, Qundef, v); + } +} + +MJIT_FUNC_EXPORTED void +rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) +{ + st_index_t size; + + HASH_ASSERT(argc % 2 == 0); + if (! argc) + return; + size = argc / 2; + if (RHASH_TABLE_EMPTY(hash)) { + if (size <= RHASH_ARRAY_MAX_SIZE) + hash_ltbl(hash); + else + RHASH_TBL_RAW(hash); + } + if (RHASH_ARRAY_P(hash) && + (RHASH_ARRAY_SIZE(hash) + size <= RHASH_ARRAY_MAX_SIZE)) { + linear_bulk_insert(hash, argc, argv); + return; + } + + rb_hash_bulk_insert_into_st_table(argc, argv, hash); } static int path_tainted = -1; diff --git a/include/ruby/intern.h b/include/ruby/intern.h index 11a97cb6a2..d60f370a6d 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -520,11 +520,12 @@ VALUE rb_hash_delete(VALUE,VALUE); VALUE rb_hash_set_ifnone(VALUE hash, VALUE ifnone); typedef VALUE rb_hash_update_func(VALUE newkey, VALUE oldkey, VALUE value); VALUE rb_hash_update_by(VALUE hash1, VALUE hash2, rb_hash_update_func *func); -struct st_table *rb_hash_tbl(VALUE); +struct st_table *rb_hash_tbl(VALUE, const char *file, int line); int rb_path_check(const char*); int rb_env_path_tainted(void); VALUE rb_env_clear(void); VALUE rb_hash_size(VALUE); +void rb_hash_free(VALUE); /* io.c */ #define rb_defout rb_stdout RUBY_EXTERN VALUE rb_fs; diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h index 1512e78179..2cc0eb7531 100644 --- a/include/ruby/ruby.h +++ b/include/ruby/ruby.h @@ -1088,11 +1088,13 @@ struct RRegexp { #define RREGEXP_SRC_LEN(r) RSTRING_LEN(RREGEXP(r)->src) #define RREGEXP_SRC_END(r) RSTRING_END(RREGEXP(r)->src) -/* RHASH_TBL allocates st_table if not available. */ -#define RHASH_TBL(h) rb_hash_tbl(h) +/* RHash is defined at internal.h */ +size_t rb_hash_size_num(VALUE hash); + +#define RHASH_TBL(h) rb_hash_tbl(h, __FILE__, __LINE__) #define RHASH_ITER_LEV(h) rb_hash_iter_lev(h) #define RHASH_IFNONE(h) rb_hash_ifnone(h) -#define RHASH_SIZE(h) NUM2SIZET(rb_hash_size(h)) +#define RHASH_SIZE(h) rb_hash_size_num(h) #define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0) #define RHASH_SET_IFNONE(h, ifnone) rb_hash_set_ifnone((VALUE)h, ifnone) diff --git a/include/ruby/st.h b/include/ruby/st.h index ede3ff4456..149e0ebaef 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -143,7 +143,7 @@ CONSTFUNC(st_index_t st_hash_end(st_index_t h)); CONSTFUNC(st_index_t st_hash_start(st_index_t h)); #define st_hash_start(h) ((st_index_t)(h)) -void rb_hash_bulk_insert(long, const VALUE *, VALUE); +void rb_hash_bulk_insert_into_st_table(long, const VALUE *, VALUE); RUBY_SYMBOL_EXPORT_END diff --git a/internal.h b/internal.h index 3baee61ee3..761c8412dc 100644 --- a/internal.h +++ b/internal.h @@ -672,23 +672,90 @@ struct RComplex { #define RCOMPLEX_SET_REAL(cmp, r) RB_OBJ_WRITE((cmp), &((struct RComplex *)(cmp))->real,(r)) #define RCOMPLEX_SET_IMAG(cmp, i) RB_OBJ_WRITE((cmp), &((struct RComplex *)(cmp))->imag,(i)) +enum ruby_rhash_flags { + RHASH_ST_TABLE_FLAG = FL_USER3, + RHASH_ARRAY_MAX_SIZE = 8, + RHASH_ARRAY_SIZE_MASK = (FL_USER4|FL_USER5|FL_USER6|FL_USER7), + RHASH_ARRAY_SIZE_SHIFT = (FL_USHIFT+4), + RHASH_ARRAY_BOUND_MASK = (FL_USER8|FL_USER9|FL_USER10|FL_USER11), + RHASH_ARRAY_BOUND_SHIFT = (FL_USHIFT+8), + + RHASH_ENUM_END +}; + +#define HASH_PROC_DEFAULT FL_USER2 + +#define RHASH_ARRAY_SIZE_RAW(h) \ + ((long)((RBASIC(h)->flags & RHASH_ARRAY_SIZE_MASK) >> RHASH_ARRAY_SIZE_SHIFT)) + +int rb_hash_array_p(VALUE hash); +struct li_table *rb_hash_array(VALUE hash); +st_table *rb_hash_st_table(VALUE hash); +void rb_hash_st_table_set(VALUE hash, st_table *st); + +#if 0 /* for debug */ +#define RHASH_ARRAY_P(hash) rb_hash_array_p(hash) +#define RHASH_ARRAY(h) rb_hash_array(h) +#define RHASH_ST_TABLE(h) rb_hash_st_table(h) +#else +#define RHASH_ARRAY_P(hash) (!FL_TEST_RAW((hash), RHASH_ST_TABLE_FLAG)) +#define RHASH_ARRAY(hash) (RHASH(hash)->as.li) +#define RHASH_ST_TABLE(hash) (RHASH(hash)->as.st) +#endif + +#define RHASH(obj) (R_CAST(RHash)(obj)) +#define RHASH_ST_SIZE(h) (RHASH_ST_TABLE(h)->num_entries) +#define RHASH_TABLE_P(h) (!RHASH_ARRAY_P(h)) +#define RHASH_CLEAR(h) (FL_UNSET_RAW(h, RHASH_ST_TABLE_FLAG), RHASH(h)->as.li = NULL) + +#define RHASH_ARRAY_SIZE_MASK (VALUE)RHASH_ARRAY_SIZE_MASK +#define RHASH_ARRAY_SIZE_SHIFT RHASH_ARRAY_SIZE_SHIFT +#define RHASH_ARRAY_BOUND_MASK (VALUE)RHASH_ARRAY_BOUND_MASK +#define RHASH_ARRAY_BOUND_SHIFT RHASH_ARRAY_BOUND_SHIFT +#define RHASH_TRANSIENT_FLAG FL_USER14 +#define RHASH_TRANSIENT_P(hash) FL_TEST_RAW((hash), RHASH_TRANSIENT_FLAG) + +#define RHASH_ARRAY_MAX_SIZE 8 +#define RHASH_ARRAY_MAX_BOUND RHASH_ARRAY_MAX_SIZE + +typedef struct li_table_entry { + VALUE hash; + VALUE key; + VALUE record; +} li_table_entry; + +typedef struct li_table { + li_table_entry entries[RHASH_ARRAY_MAX_SIZE]; +} li_table; + +/* + * RHASH_ARRAY_P(h): + * * as.li == NULL or + * as.li points li_table. + * * as.li is allocated by transient heap or xmalloc. + * + * !RHASH_ARRAY_P(h): + * * as.st points st_table. + */ struct RHash { struct RBasic basic; - struct st_table *ntbl; /* possibly 0 */ + union { + struct st_table *st; + struct li_table *li; /* possibly 0 */ + } as; int iter_lev; const VALUE ifnone; }; -#define RHASH(obj) (R_CAST(RHash)(obj)) - #ifdef RHASH_ITER_LEV -#undef RHASH_ITER_LEV -#undef RHASH_IFNONE -#undef RHASH_SIZE -#define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev) -#define RHASH_IFNONE(h) (RHASH(h)->ifnone) -#define RHASH_SIZE(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl->num_entries : (st_index_t)0) -#endif +# undef RHASH_ITER_LEV +# undef RHASH_IFNONE +# undef RHASH_SIZE + +# define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev) +# define RHASH_IFNONE(h) (RHASH(h)->ifnone) +# define RHASH_SIZE(h) (RHASH_ARRAY_P(h) ? RHASH_ARRAY_SIZE_RAW(h) : RHASH_ST_SIZE(h)) +#endif /* #ifdef RHASH_ITER_LEV */ /* missing/setproctitle.c */ #ifndef HAVE_SETPROCTITLE @@ -1372,7 +1439,14 @@ void *rb_aligned_malloc(size_t, size_t); void rb_aligned_free(void *); /* hash.c */ +#if RHASH_CONVERT_TABLE_DEBUG +struct st_table *rb_hash_tbl_raw(VALUE hash, const char *file, int line); +#define RHASH_TBL_RAW(h) rb_hash_tbl_raw(h, __FILE__, __LINE__) +#else struct st_table *rb_hash_tbl_raw(VALUE hash); +#define RHASH_TBL_RAW(h) rb_hash_tbl_raw(h) +#endif + VALUE rb_hash_new_with_size(st_index_t size); RUBY_SYMBOL_EXPORT_BEGIN VALUE rb_hash_new_compare_by_id(void); @@ -1387,14 +1461,17 @@ st_table *rb_init_identtable_with_size(st_index_t size); VALUE rb_hash_compare_by_id_p(VALUE hash); VALUE rb_to_hash_type(VALUE obj); VALUE rb_hash_key_str(VALUE); - -#define RHASH_TBL_RAW(h) rb_hash_tbl_raw(h) VALUE rb_hash_keys(VALUE hash); VALUE rb_hash_values(VALUE hash); VALUE rb_hash_rehash(VALUE hash); int rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val); -#define HASH_PROC_DEFAULT FL_USER2 VALUE rb_hash_set_pair(VALUE hash, VALUE pair); +void rb_hash_bulk_insert(long, const VALUE *, VALUE); + +int rb_hash_stlike_lookup(VALUE hash, st_data_t key, st_data_t *pval); +int rb_hash_stlike_delete(VALUE hash, st_data_t *pkey, st_data_t *pval); +int rb_hash_stlike_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg); +int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func func, st_data_t arg); /* inits.c */ void rb_call_inits(void); diff --git a/marshal.c b/marshal.c index 0618400114..4bd12e725b 100644 --- a/marshal.c +++ b/marshal.c @@ -887,7 +887,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit) else { w_byte(TYPE_HASH_DEF, arg); } - w_long(RHASH_SIZE(obj), arg); + w_long(rb_hash_size_num(obj), arg); rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg); if (!NIL_P(RHASH_IFNONE(obj))) { w_object(RHASH_IFNONE(obj), arg, limit); diff --git a/mjit_worker.c b/mjit_worker.c index 18957cf373..18ac81d6fc 100644 --- a/mjit_worker.c +++ b/mjit_worker.c @@ -489,7 +489,7 @@ mjit_valid_class_serial_p(rb_serial_t class_serial) int found_p; CRITICAL_SECTION_START(3, "in valid_class_serial_p"); - found_p = st_lookup(RHASH_TBL_RAW(valid_class_serials), LONG2FIX(class_serial), NULL); + found_p = rb_hash_stlike_lookup(valid_class_serials, LONG2FIX(class_serial), NULL); CRITICAL_SECTION_FINISH(3, "in valid_class_serial_p"); return found_p; } diff --git a/process.c b/process.c index 13af9d182c..fafcf11ad2 100644 --- a/process.c +++ b/process.c @@ -2214,7 +2214,7 @@ rb_check_exec_options(VALUE opthash, VALUE execarg_obj) { if (RHASH_EMPTY_P(opthash)) return; - st_foreach(rb_hash_tbl_raw(opthash), check_exec_options_i, (st_data_t)execarg_obj); + rb_hash_stlike_foreach(opthash, check_exec_options_i, (st_data_t)execarg_obj); } VALUE @@ -2225,7 +2225,7 @@ rb_execarg_extract_options(VALUE execarg_obj, VALUE opthash) return Qnil; args[0] = execarg_obj; args[1] = Qnil; - st_foreach(rb_hash_tbl_raw(opthash), check_exec_options_i_extract, (st_data_t)args); + rb_hash_stlike_foreach(opthash, check_exec_options_i_extract, (st_data_t)args); return args[1]; } @@ -2269,7 +2269,7 @@ rb_check_exec_env(VALUE hash, VALUE *path) env[0] = hide_obj(rb_ary_new()); env[1] = Qfalse; - st_foreach(rb_hash_tbl_raw(hash), check_exec_env_i, (st_data_t)env); + rb_hash_stlike_foreach(hash, check_exec_env_i, (st_data_t)env); *path = env[1]; return env[0]; @@ -2733,7 +2733,7 @@ rb_execarg_parent_start1(VALUE execarg_obj) } envp_buf = rb_str_buf_new(0); hide_obj(envp_buf); - st_foreach(RHASH_TBL_RAW(envtbl), fill_envp_buf_i, (st_data_t)envp_buf); + rb_hash_stlike_foreach(envtbl, fill_envp_buf_i, (st_data_t)envp_buf); envp_str = rb_str_buf_new(sizeof(char*) * (RHASH_SIZE(envtbl) + 1)); hide_obj(envp_str); p = RSTRING_PTR(envp_buf); diff --git a/st.c b/st.c index 1a47525707..fd564b64d4 100644 --- a/st.c +++ b/st.c @@ -1196,7 +1196,7 @@ st_insert(st_table *tab, st_data_t key, st_data_t value) /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have entry with KEY before the insertion. */ -static inline void +void st_add_direct_with_hash(st_table *tab, st_data_t key, st_data_t value, st_hash_t hash) { @@ -2281,24 +2281,16 @@ st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash) st_rehash(tab); } -/* Mimics ruby's { foo => bar } syntax. This function is placed here - because it touches table internals and write barriers at once. */ +/* Mimics ruby's { foo => bar } syntax. This function is subpart + of rb_hash_bulk_insert. */ void -rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) +rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash) { - st_index_t n; - st_table *tab = RHASH(hash)->ntbl; - - st_assert(argc % 2 == 0); - if (! argc) - return; - if (! tab) { - VALUE tmp = rb_hash_new_with_size(argc / 2); - RBASIC_CLEAR_CLASS(tmp); - RHASH(hash)->ntbl = tab = RHASH(tmp)->ntbl; - RHASH(tmp)->ntbl = NULL; - } - n = tab->num_entries + argc / 2; + st_index_t n, size = argc / 2; + st_table *tab = RHASH_ST_TABLE(hash); + + tab = RHASH_TBL_RAW(hash); + n = tab->num_entries + size; st_expand_table(tab, n); if (UNLIKELY(tab->num_entries)) st_insert_generic(tab, argc, argv, hash); diff --git a/test/ruby/test_time.rb b/test/ruby/test_time.rb index 50ac569c4e..0aac07b05d 100644 --- a/test/ruby/test_time.rb +++ b/test/ruby/test_time.rb @@ -1138,6 +1138,7 @@ class TestTime < Test::Unit::TestCase case size when 20 then expect = 50 when 40 then expect = 86 + when 48 then expect = 94 else flunk "Unsupported RVALUE_SIZE=#{size}, update test_memsize" end diff --git a/thread.c b/thread.c index be6667fe1b..a5f0e556f1 100644 --- a/thread.c +++ b/thread.c @@ -3492,11 +3492,11 @@ rb_thread_variable_p(VALUE thread, VALUE key) locals = rb_ivar_get(thread, id_locals); - if (!RHASH(locals)->ntbl) + if (rb_hash_lookup(locals, ID2SYM(id)) != Qnil) { + return Qtrue; + } + else { return Qfalse; - - if (st_lookup(RHASH(locals)->ntbl, ID2SYM(id), 0)) { - return Qtrue; } return Qfalse; @@ -4349,7 +4349,7 @@ rb_clear_coverages(void) { VALUE coverages = rb_get_coverages(); if (RTEST(coverages)) { - st_foreach(rb_hash_tbl_raw(coverages), clear_coverage_i, 0); + rb_hash_foreach(coverages, clear_coverage_i, 0); } } diff --git a/transient_heap.c b/transient_heap.c index 5f9962f4a0..077b46097c 100644 --- a/transient_heap.c +++ b/transient_heap.c @@ -353,9 +353,10 @@ rb_transient_heap_alloc(VALUE obj, size_t req_size) struct transient_heap* theap = transient_heap_get(); size_t size = ROUND_UP(req_size + sizeof(struct transient_alloc_header), TRANSIENT_HEAP_ALLOC_ALIGN); - TH_ASSERT(RB_TYPE_P(obj, T_ARRAY) || + TH_ASSERT(RB_TYPE_P(obj, T_ARRAY) || RB_TYPE_P(obj, T_OBJECT) || - RB_TYPE_P(obj, T_STRUCT)); /* supported types */ + RB_TYPE_P(obj, T_STRUCT) || + RB_TYPE_P(obj, T_HASH)); /* supported types */ if (size > TRANSIENT_HEAP_ALLOC_MAX) { if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [too big: %ld] %s\n", (long)size, rb_obj_info(obj)); @@ -458,7 +459,6 @@ alloc_header_to_block_verbose(struct transient_heap *theap, struct transient_all else { return NULL; } - return block; } static struct transient_alloc_header * @@ -508,7 +508,7 @@ void rb_transient_heap_mark(VALUE obj, const void *ptr) { struct transient_alloc_header *header = ptr_to_alloc_header(ptr); - + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) rb_bug("rb_transient_heap_mark: wrong header, %s (%p)", rb_obj_info(obj), ptr); if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_mark: %s (%p)\n", rb_obj_info(obj), ptr); #if TRANSIENT_HEAP_CHECK_MODE > 0 @@ -522,7 +522,7 @@ rb_transient_heap_mark(VALUE obj, const void *ptr) rb_bug("rb_transient_heap_mark: magic is broken"); } else if (header->obj != obj) { - transient_heap_dump(theap); + // transient_heap_dump(theap); rb_bug("rb_transient_heap_mark: unmatch (%s is stored, but %s is given)\n", rb_obj_info(header->obj), rb_obj_info(obj)); } @@ -566,6 +566,15 @@ transient_heap_ptr(VALUE obj, int error) ptr = rb_struct_const_heap_ptr(obj); } break; + case T_HASH: + if (RHASH_TRANSIENT_P(obj)) { + TH_ASSERT(RHASH_ARRAY_P(obj)); + ptr = (VALUE *)(RHASH(obj)->as.li); + } + else { + ptr = NULL; + } + break; default: if (error) { rb_bug("transient_heap_ptr: unknown obj %s\n", rb_obj_info(obj)); @@ -657,6 +666,8 @@ transient_heap_block_evacuate(struct transient_heap* theap, struct transient_hea while (marked_index >= 0) { struct transient_alloc_header *header = alloc_header(block, marked_index); VALUE obj = header->obj; + TH_ASSERT(header->magic == TRANSIENT_HEAP_ALLOC_MAGIC); + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) rb_bug("rb_transient_heap_mark: wrong header %s\n", rb_obj_info(obj)); if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, " * transient_heap_block_evacuate %p %s\n", header, rb_obj_info(obj)); @@ -673,8 +684,11 @@ transient_heap_block_evacuate(struct transient_heap* theap, struct transient_hea case T_STRUCT: rb_struct_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE); break; + case T_HASH: + rb_hash_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE); + break; default: - rb_bug("unsupporeted"); + rb_bug("unsupporeted: %s\n", rb_obj_info(obj)); } header->obj = Qundef; /* for debug */ } diff --git a/transient_heap.h b/transient_heap.h index e257a8327b..2ba0b638e5 100644 --- a/transient_heap.h +++ b/transient_heap.h @@ -32,9 +32,9 @@ void rb_transient_heap_dump(void); void rb_transient_heap_verify(void); int rb_transient_heap_managed_ptr_p(const void *ptr); -/* evacuate functions */ +/* evacuate functions for each type */ void rb_ary_transient_heap_evacuate(VALUE ary, int promote); void rb_obj_transient_heap_evacuate(VALUE obj, int promote); +void rb_hash_transient_heap_evacuate(VALUE hash, int promote); void rb_struct_transient_heap_evacuate(VALUE st, int promote); - #endif diff --git a/vm_eval.c b/vm_eval.c index a864b75712..c0d7062947 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -2035,10 +2035,9 @@ static void local_var_list_add(const struct local_var_list *vars, ID lid) { if (lid && rb_is_local_id(lid)) { - /* should skip temporary variable */ - st_table *tbl = RHASH_TBL_RAW(vars->tbl); - st_data_t idx = 0; /* tbl->num_entries */ - st_update(tbl, ID2SYM(lid), local_var_list_update, idx); + /* should skip temporary variable */ + st_data_t idx = 0; /* tbl->num_entries */ + rb_hash_stlike_update(vars->tbl, ID2SYM(lid), local_var_list_update, idx); } } diff --git a/vm_insnhelper.c b/vm_insnhelper.c index a7fec0b71f..3e877f1210 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3327,7 +3327,7 @@ vm_case_dispatch(CDHASH hash, OFFSET else_offset, VALUE key) key = FIXABLE(kval) ? LONG2FIX((long)kval) : rb_dbl2big(kval); } } - if (st_lookup(RHASH_TBL_RAW(hash), key, &val)) { + if (rb_hash_stlike_lookup(hash, key, &val)) { return FIX2LONG((VALUE)val); } else { -- cgit v1.2.3