summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-11-01 14:23:42 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2006-11-01 14:23:42 +0000
commit11e562f34afff4d657da12e85f959200207d6bd5 (patch)
tree5813cf220086b7661ba574803907c56fea3140f6 /string.c
parentdd937ba9dce30fb761da032ffda03f8b08c35e6e (diff)
* string.c (hash): use Bob Jenkins' hash algorithm.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@11255 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c202
1 files changed, 114 insertions, 88 deletions
diff --git a/string.c b/string.c
index 529c375b4f..79fa217871 100644
--- a/string.c
+++ b/string.c
@@ -879,102 +879,128 @@ rb_str_concat(VALUE str1, VALUE str2)
return rb_str_append(str1, str2);
}
-/*
- * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code
- *
- * @(#) $hash_32.Revision: 1.1 $
- * @(#) $hash_32.Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $
- * @(#) $hash_32.Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $
- *
- ***
- *
- * Fowler/Noll/Vo hash
- *
- * The basis of this hash algorithm was taken from an idea sent
- * as reviewer comments to the IEEE POSIX P1003.2 committee by:
- *
- * Phong Vo (http://www.research.att.com/info/kpv/)
- * Glenn Fowler (http://www.research.att.com/~gsf/)
- *
- * In a subsequent ballot round:
- *
- * Landon Curt Noll (http://www.isthe.com/chongo/)
- *
- * improved on their algorithm. Some people tried this hash
- * and found that it worked rather well. In an EMail message
- * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash.
- *
- * FNV hashes are designed to be fast while maintaining a low
- * collision rate. The FNV speed allows one to quickly hash lots
- * of data while maintaining a reasonable collision rate. See:
- *
- * http://www.isthe.com/chongo/tech/comp/fnv/index.html
- *
- * for more details as well as other forms of the FNV hash.
- ***
- *
- * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
- * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
- *
- ***
- *
- * Please do not copyright this code. This code is in the public domain.
- *
- * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
- * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- *
- * By:
- * chongo <Landon Curt Noll> /\oo/\
- * http://www.isthe.com/chongo/
- *
- * Share and Enjoy! :-)
- */
+typedef unsigned int ub4; /* unsigned 4-byte quantities */
+typedef unsigned char ub1; /* unsigned 1-byte quantities */
+
+#define hashsize(n) ((ub4)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
/*
- * 32 bit FNV-1 and FNV-1a non-zero initial basis
- *
- * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets:
- *
- * chongo <Landon Curt Noll> /\../\
- *
- * NOTE: The \'s above are not back-slashing escape characters.
- * They are literal ASCII backslash 0x5c characters.
- *
- * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition.
- */
-#define FNV1_32A_INIT 0x811c9dc5
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bits set, and the deltas of all three
+ high bits or all three low bits, whether the original value of a,b,c
+ is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+ have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+ 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a
+ structure that could supported 2x parallelism, like so:
+ a -= b;
+ a -= c; x = (c>>13);
+ b -= c; a ^= x;
+ b -= a; x = (a<<8);
+ c -= a; b ^= x;
+ c -= b; x = (b>>13);
+ ...
+ Unfortunately, superscalar Pentiums and Sparcs can't take advantage
+ of that parallelism. They've also turned some of those single-cycle
+ latency instructions into multi-cycle latency instructions. Still,
+ this is the fastest good hash I could find. There were about 2^^68
+ to choose from. I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
/*
- * 32 bit magic FNV-1a prime
- */
-#define FNV_32_PRIME 0x01000193
+--------------------------------------------------------------------
+hash() -- hash a variable-length key into a 32-bit value
+ k : the key (the unaligned variable-length array of bytes)
+ len : the length of the key, counting by bytes
+ initval : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Every 1-bit and 2-bit delta achieves avalanche.
+About 6*len+35 instructions.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+ h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+ for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^^32 is
+acceptable. Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+static ub4
+hash(const ub1 *k, ub4 length, ub4 initval)
+ /* k: the key */
+ /* length: the length of the key */
+ /* initval: the previous hash, or an arbitrary value */
+{
+ register ub4 a,b,c,len;
+
+ /* Set up the internal state */
+ len = length;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = initval; /* the previous hash value */
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 12) {
+ a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
+ b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
+ c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /*------------------------------------- handle the last 11 bytes */
+ c += length;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11: c+=((ub4)k[10]<<24);
+ case 10: c+=((ub4)k[9]<<16);
+ case 9 : c+=((ub4)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((ub4)k[7]<<24);
+ case 7 : b+=((ub4)k[6]<<16);
+ case 6 : b+=((ub4)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((ub4)k[3]<<24);
+ case 3 : a+=((ub4)k[2]<<16);
+ case 2 : a+=((ub4)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /*-------------------------------------------- report the result */
+ return c;
+}
int
rb_memhash(const void *ptr, long len)
{
- register const unsigned char *p = ptr;
- register unsigned int hval = FNV1_32A_INIT;
-
- /*
- * FNV-1a hash each octet in the buffer
- */
- while (len--) {
- /* xor the bottom with the current octet */
- hval ^= (unsigned int)*p++;
-
- /* multiply by the 32 bit FNV magic prime mod 2^32 */
-#if defined(FNV_GCC_OPTIMIZATION)
- hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
-#else
- hval *= FNV_32_PRIME;
-#endif
- }
- return hval;
+ return hash(ptr, len, 0);
}
int