From 75775157ea68efdd0b71345a37a6930e5bf1d2ef Mon Sep 17 00:00:00 2001 From: ko1 Date: Mon, 7 Nov 2016 00:45:00 +0000 Subject: Introduce table improvement by Vladimir Makarov . [Feature #12142] See header of st.c for improvment details. You can see all of code history here: This improvement is discussed at with many people, especially with Yura Sokolov. * st.c: improve st_table. * include/ruby/st.h: ditto. * internal.h, numeric.c, hash.c (rb_dbl_long_hash): extract a function. * ext/-test-/st/foreach/foreach.c: catch up this change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56650 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- hash.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 31 deletions(-) (limited to 'hash.c') diff --git a/hash.c b/hash.c index 8ed8c48ebb..4615c47b82 100644 --- a/hash.c +++ b/hash.c @@ -145,14 +145,36 @@ rb_hash(VALUE obj) long rb_objid_hash(st_index_t index); -static st_index_t -any_hash(VALUE a, st_index_t (*other_func)(VALUE)) +long +rb_dbl_long_hash(double d) +{ + /* normalize -0.0 to 0.0 */ + if (d == 0.0) d = 0.0; +#if SIZEOF_INT == SIZEOF_VOIDP + return rb_memhash(&d, sizeof(d)); +#else + { + union {double d; uint64_t i;} u; + + u.d = d; + return rb_objid_hash(u.i); + } +#endif +} + +#if SIZEOF_INT == SIZEOF_VOIDP +static const st_index_t str_seed = 0xfa835867; +#else +static const st_index_t str_seed = 0xc42b5e2e6480b23bULL; +#endif + +static inline st_index_t +any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE)) { VALUE hval; st_index_t hnum; if (SPECIAL_CONST_P(a)) { - if (a == Qundef) return 0; if (STATIC_SYM_P(a)) { hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT); goto out; @@ -164,7 +186,9 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE)) hnum = rb_objid_hash((st_index_t)a); } else if (BUILTIN_TYPE(a) == T_STRING) { - hnum = rb_str_hash(a); + hnum = (strong_p + ? rb_str_hash(a) + : st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed)); } else if (BUILTIN_TYPE(a) == T_SYMBOL) { hnum = RSYMBOL(a)->hashval; @@ -175,8 +199,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE)) } else if (BUILTIN_TYPE(a) == T_FLOAT) { flt: - hval = rb_dbl_hash(rb_float_value(a)); - hnum = FIX2LONG(hval); + hnum = rb_dbl_long_hash(rb_float_value(a)); } else { hnum = other_func(a); @@ -193,40 +216,62 @@ obj_any_hash(VALUE obj) return FIX2LONG(obj); } +static inline st_index_t +any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) { + return any_hash_general(a, FALSE, other_func); +} + static st_index_t -rb_any_hash(VALUE a) -{ - return any_hash(a, obj_any_hash); +rb_any_hash_weak(VALUE a) { + return any_hash_weak(a, obj_any_hash); +} + +static inline st_index_t +any_hash(VALUE a, st_index_t (*other_func)(VALUE)) { + return any_hash_general(a, TRUE, other_func); } static st_index_t -rb_num_hash_start(st_index_t n) +rb_any_hash(VALUE a) { + return any_hash(a, obj_any_hash); +} + +/* Here is a hash function for 64-bit key. It is about 5 times faster + (2 times faster when uint128 type is absent) on Haswell than + tailored Spooky or City hash function can be. */ + +/* Here we two primes with random bit generation. */ +static const uint64_t prime1 = 0x2e0bb864e9ea7df5ULL; +static const uint64_t prime2 = 0xcdb32970830fcaa1ULL; + + +static inline uint64_t +mult_and_mix (uint64_t m1, uint64_t m2) { - /* - * This hash function is lightly-tuned for Ruby. Further tuning - * should be possible. Notes: - * - * - (n >> 3) alone is great for heap objects and OK for fixnum, - * however symbols perform poorly. - * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well, - * n.b.: +3 to remove most ID scope, +1 worked well initially, too - * n.b.: +1 (instead of 3) worked well initially, too - * - (n << 16) was finally added to avoid losing bits for fixnums - * - avoid expensive modulo instructions, it is currently only - * shifts and bitmask operations. - */ - return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3); +#if defined(__GNUC__) && UINT_MAX != ULONG_MAX + __uint128_t r = (__uint128_t) m1 * (__uint128_t) m2; + return (uint64_t) (r >> 64) ^ (uint64_t) r; +#else + uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32; + uint64_t lm1 = m1, lm2 = m2; + uint64_t v64_128 = hm1 * hm2; + uint64_t v32_96 = hm1 * lm2 + lm1 * hm2; + uint64_t v1_32 = lm1 * lm2; + + return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32); +#endif +} + +static inline uint64_t +key64_hash (uint64_t key, uint32_t seed) +{ + return mult_and_mix(key + seed, prime1); } long rb_objid_hash(st_index_t index) { - st_index_t hnum = rb_num_hash_start(index); - - hnum = rb_hash_start(hnum); - hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash); - hnum = rb_hash_end(hnum); - return hnum; + return key64_hash(index, (uint32_t) prime2); } static st_index_t @@ -250,6 +295,7 @@ rb_hash_iter_lev(VALUE h) static const struct st_hash_type objhash = { rb_any_cmp, + rb_any_hash_weak, rb_any_hash, }; @@ -269,7 +315,7 @@ rb_ident_hash(st_data_t n) } #endif - return (st_index_t)rb_num_hash_start((st_index_t)n); + return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2); } static const struct st_hash_type identhash = { -- cgit v1.2.3