summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormame <mame@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-02-14 19:55:34 +0000
committermame <mame@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-02-14 19:55:34 +0000
commite722ad99d5b0e6a9bb0249ff3d9c8cce28d3204e (patch)
tree704a4bf758367e8c8b7100dd0cb2c5e8dd26c6b9
parenta48f90b05b654cc98727a91c5fd9952c5a86221c (diff)
* string.c (rb_hash_uint32, rb_hash_uint, rb_hash_start, rb_hash_end),
include/ruby/intern.h: add Murmurhash API. [ruby-dev:37784] * complex.c (nucomp_hash), array.c (rb_ary_hash), time.c (time_hash), string.c (rb_str_hsah), object.c (rb_obj_hash), range.c (range_hash), struct.c (rb_struct_hash), hash.c (rb_any_hash), rational.c (nurat_hash): use Murmurhash. [ruby-dev:37784] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@22317 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--array.c6
-rw-r--r--complex.c17
-rw-r--r--gc.c1
-rw-r--r--hash.c2
-rw-r--r--include/ruby/intern.h4
-rw-r--r--object.c9
-rw-r--r--range.c10
-rw-r--r--rational.c17
-rw-r--r--string.c58
-rw-r--r--struct.c11
-rw-r--r--time.c2
12 files changed, 117 insertions, 30 deletions
diff --git a/ChangeLog b/ChangeLog
index d96429ecf4..da64174aad 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sun Feb 15 04:48:08 2009 Yusuke Endoh <mame@tsg.ne.jp>
+
+ * string.c (rb_hash_uint32, rb_hash_uint, rb_hash_start, rb_hash_end),
+ include/ruby/intern.h: add Murmurhash API. [ruby-dev:37784]
+
+ * complex.c (nucomp_hash), array.c (rb_ary_hash), time.c (time_hash),
+ string.c (rb_str_hsah), object.c (rb_obj_hash), range.c
+ (range_hash), struct.c (rb_struct_hash), hash.c (rb_any_hash),
+ rational.c (nurat_hash): use Murmurhash. [ruby-dev:37784]
+
Sun Feb 15 03:50:21 2009 Yusuke Endoh <mame@tsg.ne.jp>
* hash.c (rb_hash): always return a fixnum value because a return
diff --git a/array.c b/array.c
index 70d18543e6..ca23dd4922 100644
--- a/array.c
+++ b/array.c
@@ -2771,12 +2771,12 @@ recursive_hash(VALUE ary, VALUE dummy, int recur)
if (recur) {
return LONG2FIX(0);
}
- h = RARRAY_LEN(ary);
+ h = rb_hash_start(RARRAY_LEN(ary));
for (i=0; i<RARRAY_LEN(ary); i++) {
- h = (h << 1) | (h<0 ? 1 : 0);
n = rb_hash(RARRAY_PTR(ary)[i]);
- h ^= NUM2LONG(n);
+ h = rb_hash_uint(h, NUM2LONG(n));
}
+ h = rb_hash_end(h);
return LONG2FIX(h);
}
diff --git a/complex.c b/complex.c
index 180e7d23c9..ceb7b75f78 100644
--- a/complex.c
+++ b/complex.c
@@ -22,7 +22,7 @@
VALUE rb_cComplex;
static ID id_abs, id_abs2, id_arg, id_cmp, id_conj, id_convert,
- id_denominator, id_divmod, id_equal_p, id_expt, id_floor, id_hash,
+ id_denominator, id_divmod, id_equal_p, id_expt, id_floor,
id_idiv, id_inspect, id_negate, id_numerator, id_polar, id_quo,
id_real_p, id_to_f, id_to_i, id_to_r, id_to_s;
@@ -153,15 +153,12 @@ f_sub(VALUE x, VALUE y)
return rb_funcall(x, '-', 1, y);
}
-binop(xor, '^')
-
fun1(abs)
fun1(abs2)
fun1(arg)
fun1(conj)
fun1(denominator)
fun1(floor)
-fun1(hash)
fun1(inspect)
fun1(negate)
fun1(numerator)
@@ -857,8 +854,17 @@ nucomp_numerator(VALUE self)
static VALUE
nucomp_hash(VALUE self)
{
+ long v, h[3];
+ VALUE n;
+
get_dat1(self);
- return f_xor(f_hash(dat->real), f_hash(dat->imag));
+ h[0] = rb_hash(rb_obj_class(self));
+ n = rb_hash(dat->real);
+ h[1] = NUM2LONG(n);
+ n = rb_hash(dat->imag);
+ h[2] = NUM2LONG(n);
+ v = rb_memhash(h, sizeof(h));
+ return LONG2FIX(v);
}
static VALUE
@@ -1384,7 +1390,6 @@ Init_Complex(void)
id_equal_p = rb_intern("==");
id_expt = rb_intern("**");
id_floor = rb_intern("floor");
- id_hash = rb_intern("hash");
id_idiv = rb_intern("div");
id_inspect = rb_intern("inspect");
id_negate = rb_intern("-@");
diff --git a/gc.c b/gc.c
index cc694f25b1..94cd66d89f 100644
--- a/gc.c
+++ b/gc.c
@@ -2913,7 +2913,6 @@ Init_GC(void)
OBJ_TAINT(nomem_error);
OBJ_FREEZE(nomem_error);
- rb_define_method(rb_mKernel, "hash", rb_obj_id, 0);
rb_define_method(rb_mKernel, "__id__", rb_obj_id, 0);
rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);
diff --git a/hash.c b/hash.c
index 5d6b20cea9..abce6f7eca 100644
--- a/hash.c
+++ b/hash.c
@@ -84,7 +84,7 @@ rb_any_hash(VALUE a)
case T_NIL:
case T_FALSE:
case T_TRUE:
- hnum = (int)a;
+ hnum = rb_hash_end(rb_hash_start((unsigned int)a));
break;
case T_STRING:
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index e268428579..d39a9352c4 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -610,6 +610,10 @@ VALUE rb_str_cat2(VALUE, const char*);
VALUE rb_str_append(VALUE, VALUE);
VALUE rb_str_concat(VALUE, VALUE);
int rb_memhash(const void *ptr, long len);
+unsigned int rb_hash_start(unsigned int);
+unsigned int rb_hash_uint32(unsigned int, unsigned int);
+unsigned int rb_hash_uint(unsigned int, unsigned int);
+unsigned int rb_hash_end(unsigned int);
int rb_str_hash(VALUE);
int rb_str_hash_cmp(VALUE,VALUE);
int rb_str_comparable(VALUE, VALUE);
diff --git a/object.c b/object.c
index ab940abe8f..39bab324dc 100644
--- a/object.c
+++ b/object.c
@@ -95,6 +95,14 @@ rb_obj_equal(VALUE obj1, VALUE obj2)
return Qfalse;
}
+VALUE
+rb_obj_hash(VALUE obj)
+{
+ VALUE oid = rb_obj_id(obj);
+ unsigned h = rb_hash_end(rb_hash_start(NUM2LONG(oid)));
+ return LONG2NUM(h);
+}
+
/*
* call-seq:
* !obj => true or false
@@ -2505,6 +2513,7 @@ Init_Object(void)
rb_define_method(rb_mKernel, "=~", rb_obj_match, 1);
rb_define_method(rb_mKernel, "!~", rb_obj_not_match, 1);
rb_define_method(rb_mKernel, "eql?", rb_obj_equal, 1);
+ rb_define_method(rb_mKernel, "hash", rb_obj_hash, 0);
rb_define_method(rb_mKernel, "class", rb_obj_class, 0);
rb_define_method(rb_mKernel, "clone", rb_obj_clone, 0);
diff --git a/range.c b/range.c
index 74f9e569f5..4c9fa869f7 100644
--- a/range.c
+++ b/range.c
@@ -213,14 +213,16 @@ range_eql(VALUE range, VALUE obj)
static VALUE
range_hash(VALUE range)
{
- long hash = EXCL(range);
+ unsigned hash = EXCL(range);
VALUE v;
+ hash = rb_hash_start(hash);
v = rb_hash(RANGE_BEG(range));
- hash ^= v << 1;
+ hash = rb_hash_uint(hash, NUM2LONG(v));
v = rb_hash(RANGE_END(range));
- hash ^= v << 9;
- hash ^= EXCL(range) << 24;
+ hash = rb_hash_uint(hash, NUM2LONG(v));
+ hash = rb_hash_uint(hash, EXCL(range) << 24);
+ hash = rb_hash_end(hash);
return LONG2FIX(hash);
}
diff --git a/rational.c b/rational.c
index c90949377a..9a2c23bfa0 100644
--- a/rational.c
+++ b/rational.c
@@ -27,7 +27,7 @@
VALUE rb_cRational;
static ID id_abs, id_cmp, id_convert, id_equal_p, id_expt, id_floor,
- id_hash, id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
+ id_idiv, id_inspect, id_integer_p, id_negate, id_to_f,
id_to_i, id_to_s, id_truncate;
#define f_boolcast(x) ((x) ? Qtrue : Qfalse)
@@ -135,11 +135,8 @@ f_sub(VALUE x, VALUE y)
return rb_funcall(x, '-', 1, y);
}
-binop(xor, '^')
-
fun1(abs)
fun1(floor)
-fun1(hash)
fun1(inspect)
fun1(integer_p)
fun1(negate)
@@ -1161,8 +1158,17 @@ nurat_to_r(VALUE self)
static VALUE
nurat_hash(VALUE self)
{
+ long v, h[3];
+ VALUE n;
+
get_dat1(self);
- return f_xor(f_hash(dat->num), f_hash(dat->den));
+ h[0] = rb_hash(rb_obj_class(self));
+ n = rb_hash(dat->num);
+ h[1] = NUM2LONG(n);
+ n = rb_hash(dat->den);
+ h[2] = NUM2LONG(n);
+ v = rb_memhash(h, sizeof(h));
+ return LONG2FIX(v);
}
static VALUE
@@ -1554,7 +1560,6 @@ Init_Rational(void)
id_equal_p = rb_intern("==");
id_expt = rb_intern("**");
id_floor = rb_intern("floor");
- id_hash = rb_intern("hash");
id_idiv = rb_intern("div");
id_inspect = rb_intern("inspect");
id_integer_p = rb_intern("integer?");
diff --git a/string.c b/string.c
index 28410a448c..4f30aea591 100644
--- a/string.c
+++ b/string.c
@@ -1912,6 +1912,7 @@ murmur(unsigned int h, unsigned int k, int r)
#endif
return h;
}
+#define murmur16(h) murmur_step(h, 16)
static inline unsigned int
murmur_finish(unsigned int h)
@@ -2053,8 +2054,53 @@ hash(const unsigned char * data, int len, unsigned int h)
return murmur_finish(h);
}
-int
-rb_memhash(const void *ptr, long len)
+unsigned int
+rb_hash_uint32(unsigned int h, unsigned int i)
+{
+ return murmur_step(h + i, 16);
+}
+
+unsigned int
+rb_hash_uint(unsigned int h, unsigned int i)
+{
+ unsigned int v = 0;
+ h += i;
+#ifdef WORDS_BIGENDIAN
+#if SIZEOF_INT*CHAR_BIT > 12*8
+ v = murmur16(v + (h >> 12*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 8*8
+ v = murmur16(v + (h >> 8*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 4*8
+ v = murmur16(v + (h >> 4*8));
+#endif
+#endif
+ v = murmur16(v + h);
+#ifndef WORDS_BIGENDIAN
+#if SIZEOF_INT*CHAR_BIT > 4*8
+ v = murmur16(v + (h >> 4*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 8*8
+ v = murmur16(v + (h >> 8*8));
+#endif
+#if SIZEOF_INT*CHAR_BIT > 12*8
+ v = murmur16(v + (h >> 12*8));
+#endif
+#endif
+ return v;
+}
+
+unsigned int
+rb_hash_end(unsigned int h)
+{
+ h = murmur_step(h, 10);
+ h = murmur_step(h, 17);
+ return h;
+}
+
+unsigned int
+rb_hash_start(unsigned int h)
{
static int hashseed_init = 0;
static unsigned int hashseed;
@@ -2064,7 +2110,13 @@ rb_memhash(const void *ptr, long len)
hashseed_init = 1;
}
- return hash(ptr, len, hashseed);
+ return hashseed + h;
+}
+
+int
+rb_memhash(const void *ptr, long len)
+{
+ return hash(ptr, len, rb_hash_start(0));
}
int
diff --git a/struct.c b/struct.c
index 0cc836123f..838830aa69 100644
--- a/struct.c
+++ b/struct.c
@@ -804,16 +804,17 @@ rb_struct_equal(VALUE s, VALUE s2)
static VALUE
rb_struct_hash(VALUE s)
{
- long i, h;
+ long i;
+ unsigned h;
VALUE n;
- h = rb_hash(rb_obj_class(s));
+ h = rb_hash_start(rb_hash(rb_obj_class(s)));
for (i = 0; i < RSTRUCT_LEN(s); i++) {
- h = (h << 1) | (h<0 ? 1 : 0);
n = rb_hash(RSTRUCT_PTR(s)[i]);
- h ^= NUM2LONG(n);
+ h = rb_hash_uint(h, NUM2LONG(n));
}
- return LONG2FIX(h);
+ h = rb_hash_end(h);
+ return INT2FIX(h);
}
/*
diff --git a/time.c b/time.c
index fe70b2d12f..f6a754a332 100644
--- a/time.c
+++ b/time.c
@@ -1183,7 +1183,7 @@ time_hash(VALUE time)
long hash;
GetTimeval(time, tobj);
- hash = tobj->ts.tv_sec ^ tobj->ts.tv_nsec;
+ hash = rb_hash_end(rb_hash_uint(rb_hash_start(tobj->ts.tv_sec), tobj->ts.tv_nsec));
return LONG2FIX(hash);
}