summaryrefslogtreecommitdiff
path: root/st.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-09-26 14:29:13 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-09-26 14:29:13 +0000
commitf2103ee4a99c8a6ad7498dc500e214406ee47472 (patch)
tree0cdc9fa3b6e9e10920cfe14c27cac389c2a0d5a3 /st.c
parente860f30bf172866c347263b8d0e29342c83f56a4 (diff)
* st.c: moved murmur hash from string.c. [ruby-dev:39376]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25106 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'st.c')
-rw-r--r--st.c257
1 files changed, 257 insertions, 0 deletions
diff --git a/st.c b/st.c
index eb66e1c..1974113 100644
--- a/st.c
+++ b/st.c
@@ -966,6 +966,7 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
*/
#define FNV_32_PRIME 0x01000193
+#ifdef ST_USE_FNV1
static st_index_t
strhash(st_data_t arg)
{
@@ -984,6 +985,262 @@ strhash(st_data_t arg)
}
return hval;
}
+#else
+
+#ifndef UNALIGNED_WORD_ACCESS
+# if defined __i386__ || defined _M_IX86
+# define UNALIGNED_WORD_ACCESS 1
+# endif
+#endif
+#ifndef UNALIGNED_WORD_ACCESS
+# define UNALIGNED_WORD_ACCESS 0
+#endif
+
+/* MurmurHash described in http://murmurhash.googlepages.com/ */
+#ifndef MURMUR
+#define MURMUR 2
+#endif
+
+typedef char check_murmur_voidp[SIZEOF_VOIDP == (int)sizeof(st_index_t) ? 1 : -1];
+#define SIZEOF_ST_INDEX_T SIZEOF_VOIDP
+
+#if MURMUR == 1
+#define MurmurMagic 0xc6a4a793
+#elif MURMUR == 2
+#if SIZEOF_ST_INDEX_T > 4
+#define MurmurMagic 0xc6a4a7935bd1e995
+#else
+#define MurmurMagic 0x5bd1e995
+#endif
+#endif
+
+static inline st_index_t
+murmur(st_index_t h, st_index_t k, int r)
+{
+ const st_index_t m = MurmurMagic;
+#if MURMUR == 1
+ h += k;
+ h *= m;
+ h ^= h >> r;
+#elif MURMUR == 2
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+#endif
+ return h;
+}
+
+static inline st_index_t
+murmur_finish(st_index_t h)
+{
+#if MURMUR == 1
+ h = murmur(h, 0, 10);
+ h = murmur(h, 0, 17);
+#elif MURMUR == 2
+ h ^= h >> 13;
+ h *= MurmurMagic;
+ h ^= h >> 15;
+#endif
+ return h;
+}
+
+#define murmur_step(h, k) murmur(h, k, 16)
+
+#if MURMUR == 1
+#define murmur1(h) murmur_step(h, 16)
+#else
+#define murmur1(h) murmur_step(h, 24)
+#endif
+
+st_index_t
+st_hash(const void *ptr, size_t len, st_index_t h)
+{
+ const char *data = ptr;
+ st_index_t t = 0;
+
+ h += 0xdeadbeef;
+
+#define data_at(n) (st_index_t)((unsigned char)data[n])
+#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
+#if SIZEOF_ST_INDEX_T > 4
+#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
+#if SIZEOF_ST_INDEX_T > 8
+#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
+ UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
+#endif
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
+#else
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
+#endif
+ if (len >= sizeof(st_index_t)) {
+#if !UNALIGNED_WORD_ACCESS
+ int align = (int)((st_data_t)data % sizeof(st_index_t));
+ if (align) {
+ st_index_t d = 0;
+ int sl, sr, pack;
+
+ switch (align) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
+ t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
+#else
+# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
+ t |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+ }
+
+#ifdef WORDS_BIGENDIAN
+ t >>= (CHAR_BIT * align) - CHAR_BIT;
+#else
+ t <<= (CHAR_BIT * align);
+#endif
+
+ data += sizeof(st_index_t)-align;
+ len -= sizeof(st_index_t)-align;
+
+ sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
+ sr = CHAR_BIT * align;
+
+ while (len >= sizeof(st_index_t)) {
+ d = *(st_index_t *)data;
+#ifdef WORDS_BIGENDIAN
+ t = (t << sr) | (d >> sl);
+#else
+ t = (t >> sr) | (d << sl);
+#endif
+ h = murmur_step(h, t);
+ t = d;
+ data += sizeof(st_index_t);
+ len -= sizeof(st_index_t);
+ }
+
+ pack = len < (size_t)align ? (int)len : align;
+ d = 0;
+ switch (pack) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
+#else
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ d |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+ }
+#ifdef WORDS_BIGENDIAN
+ t = (t << sr) | (d >> sl);
+#else
+ t = (t >> sr) | (d << sl);
+#endif
+
+#if MURMUR == 2
+ if (len < (size_t)align) goto skip_tail;
+#endif
+ h = murmur_step(h, t);
+ data += pack;
+ len -= pack;
+ }
+ else
+#endif
+ {
+ do {
+ h = murmur_step(h, *(st_index_t *)data);
+ data += sizeof(st_index_t);
+ len -= sizeof(st_index_t);
+ } while (len >= sizeof(st_index_t));
+ }
+ }
+
+ t = 0;
+ switch (len) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
+#else
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ t |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+#if MURMUR == 1
+ h = murmur_step(h, t);
+#elif MURMUR == 2
+# if !UNALIGNED_WORD_ACCESS
+ skip_tail:
+# endif
+ h ^= t;
+ h *= MurmurMagic;
+#endif
+ }
+
+ return murmur_finish(h);
+}
+
+st_index_t
+st_hash_uint32(st_index_t h, uint32_t i)
+{
+ return murmur_step(h + i, 16);
+}
+
+st_index_t
+st_hash_uint(st_index_t h, st_index_t i)
+{
+ st_index_t v = 0;
+ h += i;
+#ifdef WORDS_BIGENDIAN
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
+ v = murmur1(v + (h >> 12*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
+ v = murmur1(v + (h >> 8*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
+ v = murmur1(v + (h >> 4*8));
+#endif
+#endif
+ v = murmur1(v + h);
+#ifndef WORDS_BIGENDIAN
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
+ v = murmur1(v + (h >> 4*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
+ v = murmur1(v + (h >> 8*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
+ v = murmur1(v + (h >> 12*8));
+#endif
+#endif
+ return v;
+}
+
+st_index_t
+st_hash_end(st_index_t h)
+{
+ h = murmur_step(h, 10);
+ h = murmur_step(h, 17);
+ return h;
+}
+
+#undef st_hash_start
+st_index_t
+st_hash_start(st_index_t h)
+{
+ return h;
+}
+
+static st_index_t
+strhash(st_data_t arg)
+{
+ register const char *string = (const char *)arg;
+ return st_hash(string, strlen(string), FNV1_32A_INIT);
+}
+#endif
int
st_strcasecmp(const char *s1, const char *s2)