From 72825c35b0d8b9d566663de961fddbf4f010fff7 Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Thu, 17 Jan 2019 16:53:10 +0000 Subject: Use 1 byte hint for ar_table [Feature #15602] On ar_table, Do not keep a full-length hash value (FLHV, 8 bytes) but keep a 1 byte hint from a FLHV (lowest byte of FLHV). An ar_table only contains at least 8 entries, so hints consumes 8 bytes at most. We can store hints in RHash::ar_hint. On 32bit CPU, we use 4 entries ar_table. The advantages: * We don't need to keep FLHV so ar_table only consumes 16 bytes (VALUEs of key and value) * 8 entries = 128 bytes. * We don't need to scan ar_table, but only need to check hints in many cases. Especially we don't need to access ar_table if there is no match entries (in many cases). It will increase memory cache locality. The disadvantages: * This technique can increase `#eql?` time because hints can conflicts (in theory, it conflicts once in 256 times). It can introduce incompatibility if there is a object x where x.eql? returns true even if hash values are different. I believe we don't need to care such irregular case. * We need to re-calculate FLHV if we need to switch from ar_table to st_table (e.g. exceeds 8 entries). It also can introduce incompatibility, on mutating key objects. I believe we don't need to care such irregular case too. Add new debug counters to measure the performance: * artable_hint_hit - hint is matched and eql?#=>true * artable_hint_miss - hint is not matched but eql?#=>false * artable_hint_notfound - lookup counts --- internal.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'internal.h') diff --git a/internal.h b/internal.h index 0c148bfe3b..1203f1798e 100644 --- a/internal.h +++ b/internal.h @@ -815,6 +815,7 @@ struct RComplex { #define RCOMPLEX_SET_IMAG(cmp, i) RB_OBJ_WRITE((cmp), &((struct RComplex *)(cmp))->imag,(i)) enum ruby_rhash_flags { + RHASH_PROC_DEFAULT = FL_USER2, /* FL 2 */ RHASH_ST_TABLE_FLAG = FL_USER3, /* FL 3 */ RHASH_AR_TABLE_MAX_SIZE = 8, RHASH_AR_TABLE_SIZE_MASK = (FL_USER4|FL_USER5|FL_USER6|FL_USER7), /* FL 4..7 */ @@ -834,8 +835,6 @@ enum ruby_rhash_flags { RHASH_ENUM_END }; -#define HASH_PROC_DEFAULT FL_USER2 - #define RHASH_AR_TABLE_SIZE_RAW(h) \ ((unsigned int)((RBASIC(h)->flags & RHASH_AR_TABLE_SIZE_MASK) >> RHASH_AR_TABLE_SIZE_SHIFT)) @@ -881,7 +880,10 @@ struct RHash { struct ar_table_struct *ar; /* possibly 0 */ } as; const VALUE ifnone; - const VALUE reserved; + union { + unsigned char ary[sizeof(VALUE)]; + VALUE word; + } ar_hint; }; #ifdef RHASH_IFNONE @@ -890,7 +892,7 @@ struct RHash { # define RHASH_IFNONE(h) (RHASH(h)->ifnone) # define RHASH_SIZE(h) (RHASH_AR_TABLE_P(h) ? RHASH_AR_TABLE_SIZE_RAW(h) : RHASH_ST_SIZE(h)) -#endif /* #ifdef RHASH_ITER_LEV */ +#endif /* ifdef RHASH_IFNONE */ struct RMoved { VALUE flags; -- cgit v1.2.3