summaryrefslogtreecommitdiff
path: root/st.c
diff options
context:
space:
mode:
authorko1 <ko1@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-11-07 00:45:00 (GMT)
committerko1 <ko1@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-11-07 00:45:00 (GMT)
commit75775157ea68efdd0b71345a37a6930e5bf1d2ef (patch)
tree311920522c27ab515ddfda5d1f45d1d3f68f142a /st.c
parent2bb96b87a752ba186bd285e43239119c6c2f2678 (diff)
Introduce table improvement by Vladimir Makarov <vmakarov@redhat.com>.
[Feature #12142] See header of st.c for improvment details. You can see all of code history here: <https://github.com/vnmakarov/ruby/tree/hash_tables_with_open_addressing> This improvement is discussed at <https://bugs.ruby-lang.org/issues/12142> with many people, especially with Yura Sokolov. * st.c: improve st_table. * include/ruby/st.h: ditto. * internal.h, numeric.c, hash.c (rb_dbl_long_hash): extract a function. * ext/-test-/st/foreach/foreach.c: catch up this change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56650 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'st.c')
-rw-r--r--st.c2387
1 files changed, 1360 insertions, 1027 deletions
diff --git a/st.c b/st.c
index 07b57d9..7bf5e13 100644
--- a/st.c
+++ b/st.c
@@ -1,6 +1,99 @@
-/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
-
-/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
+/* This is a public domain general purpose hash table package
+ originally written by Peter Moore @ UCB.
+
+ The hash table data structures were redesigned and the package was
+ rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
+
+/* The original package implemented classic bucket-based hash tables
+ with entries doubly linked for an access by their insertion order.
+ To decrease pointer chasing and as a consequence to improve a data
+ locality the current implementation is based on storing entries in
+ an array and using hash tables with open addressing. The current
+ entries are more compact in comparison with the original ones and
+ this also improves the data locality.
+
+ The hash table has two arrays called *bins* and *entries*.
+
+ bins:
+ -------
+ | | entries array:
+ |-------| --------------------------------
+ | index | | | entry: | | |
+ |-------| | | | | |
+ | ... | | ... | hash | ... | ... |
+ |-------| | | key | | |
+ | empty | | | record | | |
+ |-------| --------------------------------
+ | ... | ^ ^
+ |-------| |_ entries start |_ entries bound
+ |deleted|
+ -------
+
+ o The entry array contains table entries in the same order as they
+ were inserted.
+
+ When the first entry is deleted, a variable containing index of
+ the current first entry (*entries start*) is changed. In all
+ other cases of the deletion, we just mark the entry as deleted by
+ using a reserved hash value.
+
+ Such organization of the entry storage makes operations of the
+ table shift and the entries traversal very fast.
+
+ o The bins provide access to the entries by their keys. The
+ key hash is mapped to a bin containing *index* of the
+ corresponding entry in the entry array.
+
+ The bin array size is always power of two, it makes mapping very
+ fast by using the corresponding lower bits of the hash.
+ Generally it is not a good idea to ignore some part of the hash.
+ But alternative approach is worse. For example, we could use a
+ modulo operation for mapping and a prime number for the size of
+ the bin array. Unfortunately, the modulo operation for big
+ 64-bit numbers are extremely slow (it takes more than 100 cycles
+ on modern Intel CPUs).
+
+ Still other bits of the hash value are used when the mapping
+ results in a collision. In this case we use a secondary hash
+ value which is a result of a function of the collision bin
+ index and the original hash value. The function choice
+ guarantees that we can traverse all bins and finally find the
+ corresponding bin as after several iterations the function
+ becomes a full cycle linear congruential generator because it
+ satisfies requirements of the Hull-Dobell theorem.
+
+ When an entry is removed from the table besides marking the
+ hash in the corresponding entry described above, we also mark
+ the bin by a special value in order to find entries which had
+ a collision with the removed entries.
+
+ There are two reserved values for the bins. One denotes an
+ empty bin, another one denotes a bin for a deleted entry.
+
+ o The length of the bin array is at least two times more than the
+ entry array length. This keeps the table load factor healthy.
+ The trigger of rebuilding the table is always a case when we can
+ not insert an entry anymore at the entries bound. We could
+ change the entries bound too in case of deletion but than we need
+ a special code to count bins with corresponding deleted entries
+ and reset the bin values when there are too many bins
+ corresponding deleted entries
+
+ Table rebuilding is done by creation of a new entry array and
+ bins of an appropriate size. We also try to reuse the arrays
+ in some cases by compacting the array and removing deleted
+ entries.
+
+ o To save memory very small tables have no allocated arrays
+ bins. We use a linear search for an access by a key.
+
+ o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
+ bins depending on the current hash table size.
+
+ This implementation speeds up the Ruby hash table benchmarks in
+ average by more 40% on Intel Haswell CPU.
+
+*/
#ifdef NOT_RUBY
#include "regint.h"
@@ -14,46 +107,33 @@
#include <stdlib.h>
#endif
#include <string.h>
-#include "ccan/list/list.h"
+#include <assert.h>
+
+#ifdef __GNUC__
+#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
+#define EXPECT(expr, val) __builtin_expect(expr, val)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define PREFETCH(addr, write_p)
+#define EXPECT(expr, val) (expr)
+#define ATTRIBUTE_UNUSED
+#endif
-typedef struct st_table_entry st_table_entry;
+#ifdef ST_DEBUG
+#define st_assert(cond) assert(cond)
+#else
+#define st_assert(cond) ((void)(0 && (cond)))
+#endif
+
+/* The type of hashes. */
+typedef st_index_t st_hash_t;
struct st_table_entry {
- st_index_t hash;
+ st_hash_t hash;
st_data_t key;
st_data_t record;
- st_table_entry *next;
- struct list_node olist;
};
-typedef struct st_packed_entry {
- st_index_t hash;
- st_data_t key, val;
-} st_packed_entry;
-
-#ifndef STATIC_ASSERT
-#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1]
-#endif
-
-#define ST_DEFAULT_MAX_DENSITY 5
-#define ST_DEFAULT_INIT_TABLE_SIZE 16
-#define ST_DEFAULT_PACKED_TABLE_SIZE 18
-#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
-#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
-
-STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]));
-STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]));
-
- /*
- * DEFAULT_MAX_DENSITY is the default for the largest we allow the
- * average number of items per bin before increasing the number of
- * bins
- *
- * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
- * allocated initially
- *
- */
-
#define type_numhash st_hashtype_num
const struct st_hash_type st_hashtype_num = {
st_numcmp,
@@ -73,105 +153,365 @@ static const struct st_hash_type type_strcasehash = {
strcasehash,
};
-static void rehash(st_table *);
+/* Value used to catch uninitialized entries/bins during debugging.
+ There is a possibility for a false alarm, but its probability is
+ extremely small. */
+#define ST_INIT_VAL 0xafafafafafafafaf
+#define ST_INIT_VAL_BYTE 0xafa
#ifdef RUBY
#undef malloc
#undef realloc
#undef calloc
#undef free
-#define malloc xmalloc
-#define calloc xcalloc
-#define realloc xrealloc
-#define free(x) xfree(x)
-#endif
-
-#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
-
-#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
-#define hash_pos(h,n) ((h) & (n - 1))
-
-/* preparation for possible allocation improvements */
-#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
-#define st_free_entry(entry) free(entry)
-#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
-#define st_dealloc_table(table) free(table)
-#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
-#define st_free_bins(bins, size) free(bins)
-static inline st_table_entry**
-st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
-{
- bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
- MEMZERO(bins, st_table_entry*, newsize);
- return bins;
-}
-
-/* Shortcut */
-#define bins as.big.bins
-#define real_entries as.packed.real_entries
-
-/* preparation for possible packing improvements */
-#define PACKED_BINS(table) ((table)->as.packed.entries)
-#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
-#define PKEY(table, i) PACKED_ENT((table), (i)).key
-#define PVAL(table, i) PACKED_ENT((table), (i)).val
-#define PHASH(table, i) PACKED_ENT((table), (i)).hash
-#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
-#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
-#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
-
-/* this function depends much on packed layout, so that it placed here */
-static inline void
-remove_packed_entry(st_table *table, st_index_t i)
-{
- table->real_entries--;
- table->num_entries--;
- if (i < table->real_entries) {
- MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1),
- st_packed_entry, table->real_entries - i);
- }
-}
+#define malloc ruby_xmalloc
+#define calloc ruby_xcalloc
+#define realloc ruby_xrealloc
+#define free ruby_xfree
+#endif
-static inline void
-remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never)
-{
- table->num_entries--;
- PKEY_SET(table, i, never);
- PVAL_SET(table, i, never);
- PHASH_SET(table, i, 0);
-}
+#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
+#define PTR_EQUAL(tab, ptr, hash_val, key) \
+ ((ptr)->hash == (hash_val) && EQUAL((tab), (key), (ptr)->key))
+
+/* Features of a table. */
+struct st_features {
+ /* Power of 2 used for number of allocated entries. */
+ unsigned char entry_power;
+ /* Power of 2 used for number of allocated bins. Depending on the
+ table size, the number of bins is 2-4 times more than the
+ number of entries. */
+ unsigned char bin_power;
+ /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
+ unsigned char size_ind;
+ /* Bins are packed in words of type st_index_t. The following is
+ a size of bins counted by words. */
+ st_index_t bins_words;
+};
-static st_index_t
-next_pow2(st_index_t x)
-{
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
+/* Features of all possible size tables. */
#if SIZEOF_ST_INDEX_T == 8
- x |= x >> 32;
+#define MAX_POWER2 62
+struct st_features features[] = {
+ {0, 1, 0, 0x0},
+ {1, 2, 0, 0x1},
+ {2, 3, 0, 0x1},
+ {3, 4, 0, 0x2},
+ {4, 5, 0, 0x4},
+ {5, 6, 0, 0x8},
+ {6, 7, 0, 0x10},
+ {7, 8, 0, 0x20},
+ {8, 9, 1, 0x80},
+ {9, 10, 1, 0x100},
+ {10, 11, 1, 0x200},
+ {11, 12, 1, 0x400},
+ {12, 13, 1, 0x800},
+ {13, 14, 1, 0x1000},
+ {14, 15, 1, 0x2000},
+ {15, 16, 1, 0x4000},
+ {16, 17, 2, 0x10000},
+ {17, 18, 2, 0x20000},
+ {18, 19, 2, 0x40000},
+ {19, 20, 2, 0x80000},
+ {20, 21, 2, 0x100000},
+ {21, 22, 2, 0x200000},
+ {22, 23, 2, 0x400000},
+ {23, 24, 2, 0x800000},
+ {24, 25, 2, 0x1000000},
+ {25, 26, 2, 0x2000000},
+ {26, 27, 2, 0x4000000},
+ {27, 28, 2, 0x8000000},
+ {28, 29, 2, 0x10000000},
+ {29, 30, 2, 0x20000000},
+ {30, 31, 2, 0x40000000},
+ {31, 32, 2, 0x80000000},
+ {32, 33, 3, 0x200000000},
+ {33, 34, 3, 0x400000000},
+ {34, 35, 3, 0x800000000},
+ {35, 36, 3, 0x1000000000},
+ {36, 37, 3, 0x2000000000},
+ {37, 38, 3, 0x4000000000},
+ {38, 39, 3, 0x8000000000},
+ {39, 40, 3, 0x10000000000},
+ {40, 41, 3, 0x20000000000},
+ {41, 42, 3, 0x40000000000},
+ {42, 43, 3, 0x80000000000},
+ {43, 44, 3, 0x100000000000},
+ {44, 45, 3, 0x200000000000},
+ {45, 46, 3, 0x400000000000},
+ {46, 47, 3, 0x800000000000},
+ {47, 48, 3, 0x1000000000000},
+ {48, 49, 3, 0x2000000000000},
+ {49, 50, 3, 0x4000000000000},
+ {50, 51, 3, 0x8000000000000},
+ {51, 52, 3, 0x10000000000000},
+ {52, 53, 3, 0x20000000000000},
+ {53, 54, 3, 0x40000000000000},
+ {54, 55, 3, 0x80000000000000},
+ {55, 56, 3, 0x100000000000000},
+ {56, 57, 3, 0x200000000000000},
+ {57, 58, 3, 0x400000000000000},
+ {58, 59, 3, 0x800000000000000},
+ {59, 60, 3, 0x1000000000000000},
+ {60, 61, 3, 0x2000000000000000},
+ {61, 62, 3, 0x4000000000000000},
+ {62, 63, 3, 0x8000000000000000},
+};
+
+#else
+#define MAX_POWER2 30
+
+struct st_features features[] = {
+ {0, 1, 0, 0x1},
+ {1, 2, 0, 0x1},
+ {2, 3, 0, 0x2},
+ {3, 4, 0, 0x4},
+ {4, 5, 0, 0x8},
+ {5, 6, 0, 0x10},
+ {6, 7, 0, 0x20},
+ {7, 8, 0, 0x40},
+ {8, 9, 1, 0x100},
+ {9, 10, 1, 0x200},
+ {10, 11, 1, 0x400},
+ {11, 12, 1, 0x800},
+ {12, 13, 1, 0x1000},
+ {13, 14, 1, 0x2000},
+ {14, 15, 1, 0x4000},
+ {15, 16, 1, 0x8000},
+ {16, 17, 2, 0x20000},
+ {17, 18, 2, 0x40000},
+ {18, 19, 2, 0x80000},
+ {19, 20, 2, 0x100000},
+ {20, 21, 2, 0x200000},
+ {21, 22, 2, 0x400000},
+ {22, 23, 2, 0x800000},
+ {23, 24, 2, 0x1000000},
+ {24, 25, 2, 0x2000000},
+ {25, 26, 2, 0x4000000},
+ {26, 27, 2, 0x8000000},
+ {27, 28, 2, 0x10000000},
+ {28, 29, 2, 0x20000000},
+ {29, 30, 2, 0x40000000},
+ {30, 31, 2, 0x80000000},
+};
+
+#endif
+
+/* The reserved hash value and its substitution. */
+#define RESERVED_HASH_VAL (~(st_hash_t) 0)
+#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
+
+/* Return hash value of KEY for table TAB. */
+static inline st_hash_t
+do_hash(st_data_t key, st_table *tab) {
+ st_index_t h = (st_index_t)(tab->curr_hash)(key);
+#if SIZEOF_INT == SIZEOF_VOIDP
+ st_hash_t hash = h;
+#else
+ st_hash_t hash = h;
#endif
- return x + 1;
+
+ /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
+ another value. Such mapping should be extremely rare. */
+ return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
}
-static st_index_t
-new_size(st_index_t size)
-{
- st_index_t n;
+/* Power of 2 defining the minimal number of allocated entries. */
+#define MINIMAL_POWER2 2
+
+#if MINIMAL_POWER2 < 2
+#error "MINIMAL_POWER2 should be >= 2"
+#endif
- if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
- return size;
+/* If the power2 of the allocated `entries` is less than the following
+ value, don't allocate bins and use a linear search. */
+#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
- n = next_pow2(size);
- if (n > size)
- return n;
+/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
+static int
+get_power2(st_index_t size) {
+ unsigned int n;
+
+ for (n = 0; size != 0; n++)
+ size >>= 1;
+ if (n <= MAX_POWER2)
+ return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
#ifndef NOT_RUBY
+ /* Ran out of the table entries */
rb_raise(rb_eRuntimeError, "st_table too big");
#endif
- return -1; /* should raise exception */
+ /* should raise exception */
+ return -1;
}
+/* Return value of N-th bin in array BINS of table with bins size
+ index S. */
+static inline st_index_t
+get_bin(st_index_t *bins, int s, st_index_t n) {
+ return (s == 0 ? ((unsigned char *) bins)[n]
+ : s == 1 ? ((unsigned short *) bins)[n]
+ : s == 2 ? ((unsigned int *) bins)[n]
+ : ((st_index_t *) bins)[n]);
+}
+
+/* Set up N-th bin in array BINS of table with bins size index S to
+ value V. */
+static inline void
+set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) {
+ if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
+ else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
+ else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
+ else ((st_index_t *) bins)[n] = v;
+}
+
+/* These macros define reserved values for empty table bin and table
+ bin which contains a deleted entry. We will never use such values
+ for an entry index in bins. */
+#define EMPTY_BIN 0
+#define DELETED_BIN 1
+/* Base of a real entry index in the bins. */
+#define ENTRY_BASE 2
+
+/* Mark I-th bin of table TAB as empty, in other words not
+ corresponding to any entry. */
+#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
+
+/* Values used for not found entry and bin with given
+ characteristics. */
+#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
+#define UNDEFINED_BIN_IND (~(st_index_t) 0)
+
+/* Mark I-th bin of table TAB as corresponding to a deleted table
+ entry. Update number of entries in the table and number of bins
+ corresponding to deleted entries. */
+#define MARK_BIN_DELETED(tab, i) \
+ do { \
+ st_assert(i != UNDEFINED_BIN_IND); \
+ st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \
+ set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
+ } while (0)
+
+/* Macros to check that value B is used empty bins and bins
+ corresponding deleted entries. */
+#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
+#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
+#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
+
+/* Macros to check empty bins and bins corresponding to deleted
+ entries. Bins are given by their index I in table TAB. */
+#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
+#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
+#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
+
+/* Macros for marking and checking deleted entries given by their
+ pointer E_PTR. */
+#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
+#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
+
+/* Return bin size index of table TAB. */
+static inline unsigned int
+get_size_ind(const st_table *tab) {
+ return tab->size_ind;
+}
+
+/* Return the number of allocated bins of table TAB. */
+static inline st_index_t
+get_bins_num(const st_table *tab) {
+ return 1<<tab->bin_power;
+}
+
+/* Return mask for a bin index in table TAB. */
+static inline st_index_t
+bins_mask(const st_table *tab) {
+ return get_bins_num(tab) - 1;
+}
+
+/* Return the index of table TAB bin corresponding to
+ HASH_VALUE. */
+static inline st_index_t
+hash_bin(st_hash_t hash_value, st_table *tab) {
+ return hash_value & bins_mask(tab);
+}
+
+/* Return the number of allocated entries of table TAB. */
+static inline st_index_t
+get_allocated_entries(const st_table *tab) {
+ return 1<<tab->entry_power;
+}
+
+/* Return size of the allocated bins of table TAB. */
+static inline st_index_t
+bins_size(const st_table *tab) {
+ return features[tab->entry_power].bins_words * sizeof (st_index_t);
+}
+
+/* Mark all bins of table TAB as empty. */
+static void
+initialize_bins(st_table *tab) {
+ memset(tab->bins, 0, bins_size(tab));
+}
+
+/* Make table TAB empty. */
+static void
+make_tab_empty(st_table *tab)
+{
+ tab->curr_hash = tab->type->hash;
+ tab->num_entries = 0;
+ tab->entries_start = tab->entries_bound = 0;
+ if (tab->bins != NULL)
+ initialize_bins(tab);
+}
+
+#ifdef ST_DEBUG
+/* Check the table T consistency. It can be extremely slow. So use
+ it only for debugging. */
+static void
+st_check(st_table *tab) {
+ st_index_t d, e, i, n, p;
+
+ for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
+ ;
+ p = i;
+ assert(p >= MINIMAL_POWER2);
+ assert(tab->entries_bound <= get_allocated_entries(tab)
+ && tab->entries_start <= tab->entries_bound);
+ n = 0;
+ return;
+ if (tab->entries_bound != 0)
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ assert(tab->entries[i].hash != (st_hash_t) ST_INIT_VAL
+ && tab->entries[i].key != ST_INIT_VAL
+ && tab->entries[i].record != ST_INIT_VAL);
+ if (! DELETED_ENTRY_P(&tab->entries[i]))
+ n++;
+ }
+ assert(n == tab->num_entries);
+ if (tab->bins == NULL)
+ assert(p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
+ else {
+ assert(p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
+ for (n = d = i = 0; i < get_bins_num(tab); i++) {
+ assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
+ if (IND_DELETED_BIN_P(tab, i)) {
+ d++;
+ continue;
+ }
+ else if (IND_EMPTY_BIN_P(tab, i))
+ continue;
+ n++;
+ e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
+ assert(tab->entries_start <= e && e < tab->entries_bound);
+ assert(! DELETED_ENTRY_P(&tab->entries[e]));
+ assert(tab->entries[e].hash != (st_hash_t) ST_INIT_VAL
+ && tab->entries[e].key != ST_INIT_VAL
+ && tab->entries[e].record != ST_INIT_VAL);
+ }
+ assert(n == tab->num_entries);
+ assert(n + d < get_bins_num(tab));
+ }
+}
+#endif
+
#ifdef HASH_LOG
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -179,8 +519,13 @@ new_size(st_index_t size)
static struct {
int all, total, num, str, strcase;
} collision;
+
+/* Flag switching off output of package statistics at the end of
+ program. */
static int init_st = 0;
+/* Output overall number of table searches and collisions into a
+ temporary file. */
static void
stat_col(void)
{
@@ -189,1098 +534,1086 @@ stat_col(void)
if (!collision.total) return;
f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
- ((double)collision.all / (collision.total)) * 100);
+ ((double)collision.all / (collision.total)) * 100);
fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
fclose(f);
}
#endif
-static struct list_head *
-st_head(const st_table *tbl)
-{
- uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
- return (struct list_head *)addr;
-}
-
-st_table*
-st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
-{
- st_table *tbl;
+/* Create and return table with TYPE which can hold at least SIZE
+ entries. The real number of entries which the table can hold is
+ the nearest power of two for SIZE. */
+st_table *
+st_init_table_with_size(const struct st_hash_type *type, st_index_t size) {
+ st_table *tab;
+ int n;
#ifdef HASH_LOG
-# if HASH_LOG+0 < 0
+#if HASH_LOG+0 < 0
{
- const char *e = getenv("ST_HASH_LOG");
- if (!e || !*e) init_st = 1;
+ const char *e = getenv("ST_HASH_LOG");
+ if (!e || !*e) init_st = 1;
}
-# endif
+#endif
if (init_st == 0) {
- init_st = 1;
- atexit(stat_col);
+ init_st = 1;
+ atexit(stat_col);
}
#endif
-
- tbl = st_alloc_table();
- tbl->type = type;
- tbl->num_entries = 0;
- tbl->entries_packed = size <= MAX_PACKED_HASH;
- if (tbl->entries_packed) {
- size = ST_DEFAULT_PACKED_TABLE_SIZE;
- tbl->real_entries = 0;
- }
- else {
- size = new_size(size); /* round up to power-of-two */
- list_head_init(st_head(tbl));
- }
- tbl->num_bins = size;
- tbl->bins = st_alloc_bins(size);
-
- return tbl;
+ n = get_power2(size);
+ tab = (st_table *) malloc(sizeof (st_table));
+ tab->type = type;
+ tab->entry_power = n;
+ tab->bin_power = features[n].bin_power;
+ tab->size_ind = features[n].size_ind;
+ tab->inside_rebuild_p = FALSE;
+ if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
+ tab->bins = NULL;
+ else
+ tab->bins = (st_index_t *) malloc(bins_size(tab));
+ tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
+ * sizeof(st_table_entry));
+#ifdef ST_DEBUG
+ memset(tab->entries, ST_INIT_VAL_BYTE,
+ get_allocated_entries(tab) * sizeof(st_table_entry));
+ if (tab->bins != NULL)
+ memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
+#endif
+ make_tab_empty(tab);
+ tab->rebuilds_num = 0;
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ return tab;
}
-st_table*
-st_init_table(const struct st_hash_type *type)
-{
+/* Create and return table with TYPE which can hold a minimal number
+ of entries (see comments for get_power2). */
+st_table *
+st_init_table(const struct st_hash_type *type) {
return st_init_table_with_size(type, 0);
}
-st_table*
-st_init_numtable(void)
-{
+/* Create and return table which can hold a minimal number of
+ numbers. */
+st_table *
+st_init_numtable(void) {
return st_init_table(&type_numhash);
}
-st_table*
-st_init_numtable_with_size(st_index_t size)
-{
+/* Create and return table which can hold SIZE numbers. */
+st_table *
+st_init_numtable_with_size(st_index_t size) {
return st_init_table_with_size(&type_numhash, size);
}
-st_table*
-st_init_strtable(void)
-{
+/* Create and return table which can hold a minimal number of
+ strings. */
+st_table *
+st_init_strtable(void) {
return st_init_table(&type_strhash);
}
-st_table*
-st_init_strtable_with_size(st_index_t size)
-{
+/* Create and return table which can hold SIZE strings. */
+st_table *
+st_init_strtable_with_size(st_index_t size) {
return st_init_table_with_size(&type_strhash, size);
}
-st_table*
-st_init_strcasetable(void)
-{
+/* Create and return table which can hold a minimal number of strings
+ whose character case is ignored. */
+st_table *
+st_init_strcasetable(void) {
return st_init_table(&type_strcasehash);
}
-st_table*
-st_init_strcasetable_with_size(st_index_t size)
-{
+/* Create and return table which can hold SIZE strings whose character
+ case is ignored. */
+st_table *
+st_init_strcasetable_with_size(st_index_t size) {
return st_init_table_with_size(&type_strcasehash, size);
}
+/* Make table TAB empty. */
void
-st_clear(st_table *table)
-{
- register st_table_entry *ptr = 0, *next;
-
- if (table->entries_packed) {
- table->num_entries = 0;
- table->real_entries = 0;
- return;
- }
-
- list_for_each_safe(st_head(table), ptr, next, olist) {
- /* list_del is not needed */
- st_free_entry(ptr);
- }
- table->num_entries = 0;
- MEMZERO(table->bins, st_table_entry*, table->num_bins);
- list_head_init(st_head(table));
+st_clear(st_table *tab) {
+ make_tab_empty(tab);
+ tab->rebuilds_num++;
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
}
+/* Free table TAB space. */
void
-st_free_table(st_table *table)
-{
- st_clear(table);
- st_free_bins(table->bins, table->num_bins);
- st_dealloc_table(table);
+st_free_table(st_table *tab) {
+ if (tab->bins != NULL)
+ free(tab->bins);
+ free(tab->entries);
+ free(tab);
}
+/* Return byte size of memory allocted for table TAB. */
size_t
-st_memsize(const st_table *table)
-{
- if (table->entries_packed) {
- return table->num_bins * sizeof (void *) + sizeof(st_table);
- }
- else {
- return table->num_entries * sizeof(struct st_table_entry) + table->num_bins * sizeof (void *) + sizeof(st_table);
- }
+st_memsize(const st_table *tab) {
+ return(sizeof(st_table)
+ + (tab->bins == NULL ? 0 : bins_size(tab))
+ + get_allocated_entries(tab) * sizeof(st_table_entry));
}
-#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
-((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
+static st_index_t
+find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
+ st_data_t key, st_index_t *bin_ind);
#ifdef HASH_LOG
static void
-count_collision(const struct st_hash_type *type)
-{
+count_collision(const struct st_hash_type *type) {
collision.all++;
if (type == &type_numhash) {
- collision.num++;
+ collision.num++;
}
else if (type == &type_strhash) {
- collision.strcase++;
+ collision.strcase++;
}
else if (type == &type_strcasehash) {
- collision.str++;
+ collision.str++;
}
}
-#define COLLISION (collision_check ? count_collision(table->type) : (void)0)
-#define FOUND_ENTRY (collision_check ? collision.total++ : (void)0)
+
+#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
+#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
#define collision_check 0
#else
#define COLLISION
-#define FOUND_ENTRY
+#define FOUND_BIN
#endif
-#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \
- ((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = hash_pos(hash_val, (table)->num_bins))))
+/* If the number of entries in the table is at least REBUILD_THRESHOLD
+ times less than the entry array length, decrease the table
+ size. */
+#define REBUILD_THRESHOLD 4
-static st_table_entry *
-find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
- st_index_t bin_pos)
-{
- register st_table_entry *ptr = table->bins[bin_pos];
- FOUND_ENTRY;
- if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
- COLLISION;
- while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
- ptr = ptr->next;
- }
- ptr = ptr->next;
- }
- return ptr;
-}
-
-static inline st_index_t
-find_packed_index_from(const st_table *table, st_index_t hash_val,
- st_data_t key, st_index_t i)
-{
- while (i < table->real_entries &&
- (PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
- i++;
- }
- return i;
-}
-
-static inline st_index_t
-find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
-{
- return find_packed_index_from(table, hash_val, key, 0);
-}
-
-int
-st_lookup(st_table *table, register st_data_t key, st_data_t *value)
-{
- st_index_t hash_val;
- register st_table_entry *ptr;
-
- hash_val = do_hash(key, table);
-
- if (table->entries_packed) {
- st_index_t i = find_packed_index(table, hash_val, key);
- if (i < table->real_entries) {
- if (value != 0) *value = PVAL(table, i);
- return 1;
- }
- return 0;
- }
-
- ptr = find_entry(table, key, hash_val, hash_pos(hash_val, table->num_bins));
+#if REBUILD_THRESHOLD < 2
+#error "REBUILD_THRESHOLD should be >= 2"
+#endif
- if (ptr == 0) {
- return 0;
+/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
+ and can change size of the table entries and bins arrays.
+ Rebuilding is implemented by creation of a new table or by
+ compaction of the existing one. */
+static void
+rebuild_table(st_table *tab) {
+ st_index_t i, ni, bound;
+ unsigned int size_ind;
+ st_table *new_tab;
+ st_table_entry *entries, *new_entries;
+ st_table_entry *curr_entry_ptr;
+ st_index_t *bins;
+ st_index_t bin_ind;
+
+ st_assert(tab != NULL);
+ bound = tab->entries_bound;
+ entries = tab->entries;
+ tab->inside_rebuild_p = TRUE;
+ if ((2 * tab->num_entries <= get_allocated_entries(tab)
+ && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
+ || tab->num_entries < (1 << MINIMAL_POWER2)) {
+ /* Compaction: */
+ tab->num_entries = 0;
+ if (tab->bins != NULL)
+ initialize_bins(tab);
+ new_tab = tab;
+ new_entries = entries;
}
else {
- if (value != 0) *value = ptr->record;
- return 1;
- }
-}
-
-int
-st_get_key(st_table *table, register st_data_t key, st_data_t *result)
-{
- st_index_t hash_val;
- register st_table_entry *ptr;
-
- hash_val = do_hash(key, table);
-
- if (table->entries_packed) {
- st_index_t i = find_packed_index(table, hash_val, key);
- if (i < table->real_entries) {
- if (result != 0) *result = PKEY(table, i);
- return 1;
+ new_tab = st_init_table_with_size(tab->type,
+ 2 * tab->num_entries - 1);
+ st_assert(new_tab->curr_hash == new_tab->type->hash);
+ new_tab->curr_hash = tab->curr_hash;
+ new_entries = new_tab->entries;
+ }
+ ni = 0;
+ bins = new_tab->bins;
+ size_ind = get_size_ind(new_tab);
+ for (i = tab->entries_start; i < bound; i++) {
+ curr_entry_ptr = &entries[i];
+ PREFETCH(entries + i + 1, 0);
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
+ continue;
+ if (&new_entries[ni] != curr_entry_ptr)
+ new_entries[ni] = *curr_entry_ptr;
+ if (EXPECT(bins != NULL, 1)) {
+ bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
+ curr_entry_ptr->key);
+ st_assert(bin_ind != UNDEFINED_BIN_IND
+ && (tab == new_tab || new_tab->rebuilds_num == 0)
+ && IND_EMPTY_BIN_P(new_tab, bin_ind));
+ set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
}
- return 0;
- }
-
- ptr = find_entry(table, key, hash_val, hash_pos(hash_val, table->num_bins));
-
- if (ptr == 0) {
- return 0;
- }
- else {
- if (result != 0) *result = ptr->key;
- return 1;
- }
+ new_tab->num_entries++;
+ ni++;
+ }
+ if (new_tab != tab) {
+ tab->entry_power = new_tab->entry_power;
+ tab->bin_power = new_tab->bin_power;
+ tab->size_ind = new_tab->size_ind;
+ st_assert (tab->num_entries == ni && new_tab->num_entries == ni);
+ if (tab->bins != NULL)
+ free(tab->bins);
+ tab->bins = new_tab->bins;
+ free(tab->entries);
+ tab->entries = new_tab->entries;
+ free(new_tab);
+ }
+ tab->entries_start = 0;
+ tab->entries_bound = tab->num_entries;
+ tab->rebuilds_num++;
+ tab->inside_rebuild_p = FALSE;
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
}
-static inline st_table_entry *
-new_entry(st_table * table, st_data_t key, st_data_t value,
- st_index_t hash_val, register st_index_t bin_pos)
-{
- register st_table_entry *entry = st_alloc_entry();
+/* Return the next secondary hash index for table TAB using previous
+ index IND and PERTERB. Finally modulo of the function becomes a
+ full *cycle linear congruential generator*, in other words it
+ guarantees traversing all table bins in extreme case.
- entry->next = table->bins[bin_pos];
- table->bins[bin_pos] = entry;
- entry->hash = hash_val;
- entry->key = key;
- entry->record = value;
+ According the Hull-Dobell theorem a generator
+ "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff
+ o m and c are relatively prime
+ o a-1 is divisible by all prime factors of m
+ o a-1 is divisible by 4 if m is divisible by 4.
- return entry;
+ For our case a is 5, c is 1, and m is a power of two. */
+static inline st_index_t
+secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb) {
+ *perterb >>= 11;
+ ind = (ind << 2) + ind + *perterb + 1;
+ return hash_bin(ind, tab);
}
-static inline void
-add_direct(st_table *table, st_data_t key, st_data_t value,
- st_index_t hash_val, register st_index_t bin_pos)
-{
- register st_table_entry *entry;
- if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
- rehash(table);
- bin_pos = hash_pos(hash_val, table->num_bins);
- }
+/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
+ search. Return the index of the found entry in array `entries`.
+ If it is not found, return UNDEFINED_ENTRY_IND. */
+static inline st_index_t
+find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) {
+ st_index_t i, bound;
+ st_table_entry *entries;
- entry = new_entry(table, key, value, hash_val, bin_pos);
- list_add_tail(st_head(table), &entry->olist);
- table->num_entries++;
+ bound = tab->entries_bound;
+ entries = tab->entries;
+ for (i = tab->entries_start; i < bound; i++) {
+ if (PTR_EQUAL(tab, &entries[i], hash_value, key))
+ return i;
+ }
+ return UNDEFINED_ENTRY_IND;
}
-static void
-unpack_entries(register st_table *table)
-{
- st_index_t i;
- st_packed_entry packed_bins[MAX_PACKED_HASH];
- register st_table_entry *entry;
- st_table tmp_table = *table;
-
- MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
- table->as.packed.entries = packed_bins;
- tmp_table.entries_packed = 0;
-#if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
- MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
+/* Use the quadratic probing. The method has a better data locality
+ but more collisions than the current approach. In average it
+ results in a bit slower search. */
+/*#define QUADRATIC_PROBE*/
+
+/* Return index of entry with HASH_VALUE and KEY in table TAB. If
+ there is no such entry, return UNDEFINED_ENTRY_IND. */
+static st_index_t
+find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) {
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
#else
- tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
- tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
+ st_index_t peterb;
#endif
+ st_index_t bin;
+ st_table_entry *entries = tab->entries;
- /*
- * order is important here, we need to keep the original table
- * walkable during GC (GC may be triggered by new_entry call)
- */
- i = 0;
- list_head_init(st_head(&tmp_table));
- do {
- st_data_t key = packed_bins[i].key;
- st_data_t val = packed_bins[i].val;
- st_index_t hash = packed_bins[i].hash;
- entry = new_entry(&tmp_table, key, val, hash,
- hash_pos(hash, ST_DEFAULT_INIT_TABLE_SIZE));
- list_add_tail(st_head(&tmp_table), &entry->olist);
- } while (++i < MAX_PACKED_HASH);
- *table = tmp_table;
- list_head_init(st_head(table));
- list_append_list(st_head(table), st_head(&tmp_table));
-}
-
-static void
-add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
-{
- if (table->real_entries < MAX_PACKED_HASH) {
- st_index_t i = table->real_entries++;
- PKEY_SET(table, i, key);
- PVAL_SET(table, i, value);
- PHASH_SET(table, i, hash_val);
- table->num_entries++;
- }
- else {
- unpack_entries(table);
- add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins));
+ st_assert(tab != NULL && tab->bins != NULL);
+ ind = hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ peterb = hash_value;
+#endif
+ FOUND_BIN;
+ for (;;) {
+ bin = get_bin(tab->bins, get_size_ind(tab), ind);
+ if (! EMPTY_OR_DELETED_BIN_P(bin)
+ && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
+ break;
+ else if (EMPTY_BIN_P(bin))
+ return UNDEFINED_ENTRY_IND;
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &peterb);
+#endif
+ COLLISION;
}
+ return bin;
}
+/* Find and return index of table TAB bin corresponding to an entry
+ with HASH_VALUE and KEY. If there is no such bin, return
+ UNDEFINED_BIN_IND. */
+static st_index_t
+find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) {
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t peterb;
+#endif
+ st_index_t bin;
+ st_table_entry *entries = tab->entries;
-int
-st_insert(register st_table *table, register st_data_t key, st_data_t value)
-{
- st_index_t hash_val;
- register st_index_t bin_pos;
- register st_table_entry *ptr;
-
- hash_val = do_hash(key, table);
-
- if (table->entries_packed) {
- st_index_t i = find_packed_index(table, hash_val, key);
- if (i < table->real_entries) {
- PVAL_SET(table, i, value);
- return 1;
- }
- add_packed_direct(table, key, value, hash_val);
- return 0;
- }
-
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
-
- if (ptr == 0) {
- add_direct(table, key, value, hash_val, bin_pos);
- return 0;
- }
- else {
- ptr->record = value;
- return 1;
+ st_assert(tab != NULL && tab->bins != NULL);
+ ind = hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ peterb = hash_value;
+#endif
+ FOUND_BIN;
+ for (;;) {
+ bin = get_bin(tab->bins, get_size_ind(tab), ind);
+ if (! EMPTY_OR_DELETED_BIN_P(bin)
+ && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
+ break;
+ else if (EMPTY_BIN_P(bin))
+ return UNDEFINED_BIN_IND;
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &peterb);
+#endif
+ COLLISION;
}
+ return ind;
}
-int
-st_insert2(register st_table *table, register st_data_t key, st_data_t value,
- st_data_t (*func)(st_data_t))
-{
- st_index_t hash_val;
- register st_index_t bin_pos;
- register st_table_entry *ptr;
-
- hash_val = do_hash(key, table);
-
- if (table->entries_packed) {
- st_index_t i = find_packed_index(table, hash_val, key);
- if (i < table->real_entries) {
- PVAL_SET(table, i, value);
- return 1;
- }
- key = (*func)(key);
- add_packed_direct(table, key, value, hash_val);
- return 0;
- }
-
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
+/* Find and return index of table TAB bin corresponding to an entry
+ with HASH_VALUE and KEY. The entry should be in the table
+ already. */
+static st_index_t
+find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) {
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t peterb;
+#endif
+ st_index_t bin;
+ st_table_entry *entries = tab->entries;
- if (ptr == 0) {
- key = (*func)(key);
- add_direct(table, key, value, hash_val, bin_pos);
- return 0;
- }
- else {
- ptr->record = value;
- return 1;
+ st_assert(tab != NULL && tab->bins != NULL);
+ ind = hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ peterb = hash_value;
+#endif
+ FOUND_BIN;
+ for (;;) {
+ bin = get_bin(tab->bins, get_size_ind(tab), ind);
+ if (EMPTY_OR_DELETED_BIN_P(bin))
+ return ind;
+ st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &peterb);
+#endif
+ COLLISION;
}
}
-void
-st_add_direct(st_table *table, st_data_t key, st_data_t value)
+/* Recalculate hashes of entries in table TAB. */
+static void
+reset_entry_hashes (st_table *tab)
{
- st_index_t hash_val;
+ st_index_t i, bound;
+ st_table_entry *entries, *curr_entry_ptr;
- hash_val = do_hash(key, table);
- if (table->entries_packed) {
- add_packed_direct(table, key, value, hash_val);
- return;
+ bound = tab->entries_bound;
+ entries = tab->entries;
+ for (i = tab->entries_start; i < bound; i++) {
+ curr_entry_ptr = &entries[i];
+ if (! DELETED_ENTRY_P(curr_entry_ptr))
+ curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
}
-
- add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins));
}
-static void
-rehash(register st_table *table)
-{
- register st_table_entry *ptr = 0, **new_bins;
- st_index_t new_num_bins, hash_val;
-
- new_num_bins = new_size(table->num_bins+1);
- new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
- table->num_bins = new_num_bins;
- table->bins = new_bins;
-
- list_for_each(st_head(table), ptr, olist) {
- hash_val = hash_pos(ptr->hash, new_num_bins);
- ptr->next = new_bins[hash_val];
- new_bins[hash_val] = ptr;
+/* If we have the following number of collisions with different keys
+ but with the same hash during finding a bin for new entry
+ inclusions, possibly a denial attack is going on. Start to use a
+ stronger hash. */
+#define HIT_THRESHOULD_FOR_STRONG_HASH 10
+
+/* Return index of table TAB bin for HASH_VALUE and KEY through
+ BIN_IND and the pointed value as the function result. Reserve the
+ bin for inclusion of the corresponding entry into the table if it
+ is not there yet. We always find such bin as bins array length is
+ bigger entries array. Although we can reuse a deleted bin, the
+ result bin value is always empty if the table has no entry with
+ KEY. Return the entries array index of the found entry or
+ UNDEFINED_ENTRY_IND if it is not found. */
+static st_index_t
+find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
+ st_data_t key, st_index_t *bin_ind) {
+ st_index_t ind;
+ st_hash_t curr_hash_value = *hash_value;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t peterb;
+#endif
+ st_index_t entry_index;
+ st_index_t first_deleted_bin_ind;
+ st_table_entry *entries;
+ int hit;
+
+ st_assert(tab != NULL && tab->bins != NULL
+ && tab->entries_bound <= get_allocated_entries(tab)
+ && tab->entries_start <= tab->entries_bound);
+ repeat:
+ ind = hash_bin(curr_hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ peterb = curr_hash_value;
+#endif
+ FOUND_BIN;
+ first_deleted_bin_ind = UNDEFINED_BIN_IND;
+ entries = tab->entries;
+ hit = 0;
+ for (;;) {
+ entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
+ if (EMPTY_BIN_P(entry_index)) {
+ tab->num_entries++;
+ entry_index = UNDEFINED_ENTRY_IND;
+ if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
+ /* We can reuse bin of a deleted entry. */
+ ind = first_deleted_bin_ind;
+ MARK_BIN_EMPTY(tab, ind);
+ }
+ break;
+ } else if (! DELETED_BIN_P(entry_index)) {
+ if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
+ break;
+ if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
+ hit++;
+ if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
+ && tab->curr_hash != tab->type->strong_hash
+ && tab->type->strong_hash != NULL
+ && ! tab->inside_rebuild_p) {
+ tab->curr_hash = tab->type->strong_hash;
+ *hash_value = curr_hash_value = do_hash(key, tab);
+ reset_entry_hashes(tab);
+ rebuild_table(tab);
+ goto repeat;
+ }
+ }
+ } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
+ first_deleted_bin_ind = ind;
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &peterb);
+#endif
+ COLLISION;
}
+ *bin_ind = ind;
+ return entry_index;
}
-st_table*
-st_copy(st_table *old_table)
-{
- st_table *new_table;
- st_table_entry *ptr = 0, *entry;
- st_index_t num_bins = old_table->num_bins;
-
- new_table = st_alloc_table();
- if (new_table == 0) {
- return 0;
- }
-
- *new_table = *old_table;
- new_table->bins = st_alloc_bins(num_bins);
-
- if (new_table->bins == 0) {
- st_dealloc_table(new_table);
- return 0;
- }
+/* Find an entry with KEY in table TAB. Return non-zero if we found
+ it. Set up *RECORD to the found entry record. */
+int
+st_lookup(st_table *tab, st_data_t key, st_data_t *value) {
+ st_index_t bin;
+ st_hash_t hash = do_hash(key, tab);
- if (old_table->entries_packed) {
- MEMCPY(new_table->bins, old_table->bins, st_table_entry*, old_table->num_bins);
- return new_table;
+ if (tab->bins == NULL) {
+ bin = find_entry(tab, hash, key);
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ } else {
+ bin = find_table_entry_ind(tab, hash, key);
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ bin -= ENTRY_BASE;
}
+ if (value != 0)
+ *value = tab->entries[bin].record;
+ return 1;
+}
- list_head_init(st_head(new_table));
+/* Find an entry with KEY in table TAB. Return non-zero if we found
+ it. Set up *RESULT to the found table entry key. */
+int
+st_get_key(st_table *tab, st_data_t key, st_data_t *result) {
+ st_index_t bin;
+ st_hash_t hash = do_hash(key, tab);
- list_for_each(st_head(old_table), ptr, olist) {
- entry = new_entry(new_table, ptr->key, ptr->record, ptr->hash,
- hash_pos(ptr->hash, num_bins));
- list_add_tail(st_head(new_table), &entry->olist);
+ if (tab->bins == NULL) {
+ bin = find_entry(tab, hash, key);
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ } else {
+ bin = find_table_entry_ind(tab, hash, key);
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ bin -= ENTRY_BASE;
}
-
- return new_table;
+ if (result != 0)
+ *result = tab->entries[bin].key;
+ return 1;
}
+/* Check the table and rebuild it if it is necessary. */
static inline void
-remove_entry(st_table *table, st_table_entry *ptr)
-{
- list_del(&ptr->olist);
- table->num_entries--;
+rebuild_table_if_necessary (st_table *tab) {
+ st_index_t bound = tab->entries_bound;
+
+ if (bound == get_allocated_entries(tab))
+ rebuild_table(tab);
+ st_assert(tab->entries_bound < get_allocated_entries(tab));
}
+/* Insert (KEY, VALUE) into table TAB and return zero. If there is
+ already entry with KEY in the table, return nonzero and and update
+ the value of the found entry. */
int
-st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
-{
- st_index_t hash_val;
- st_table_entry **prev;
- register st_table_entry *ptr;
-
- hash_val = do_hash(*key, table);
-
- if (table->entries_packed) {
- st_index_t i = find_packed_index(table, hash_val, *key);
- if (i < table->real_entries) {
- if (value != 0) *value = PVAL(table, i);
- *key = PKEY(table, i);
- remove_packed_entry(table, i);
- return 1;
- }
- if (value != 0) *value = 0;
+st_insert(st_table *tab, st_data_t key, st_data_t value) {
+ st_table_entry *entry;
+ st_index_t bin;
+ st_index_t ind;
+ st_hash_t hash_value;
+ st_index_t bin_ind;
+ int new_p;
+
+ rebuild_table_if_necessary(tab);
+ hash_value = do_hash(key, tab);
+ if (tab->bins == NULL) {
+ bin = find_entry(tab, hash_value, key);
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ if (new_p)
+ tab->num_entries++;
+ bin_ind = UNDEFINED_BIN_IND;
+ } else {
+ bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
+ key, &bin_ind);
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ bin -= ENTRY_BASE;
+ }
+ if (new_p) {
+ st_assert(tab->entries_bound < get_allocated_entries(tab));
+ ind = tab->entries_bound++;
+ entry = &tab->entries[ind];
+ entry->hash = hash_value;
+ entry->key = key;
+ entry->record = value;
+ if (bin_ind != UNDEFINED_BIN_IND)
+ set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
return 0;
}
-
- prev = &table->bins[hash_pos(hash_val, table->num_bins)];
- for (;(ptr = *prev) != 0; prev = &ptr->next) {
- if (EQUAL(table, *key, ptr)) {
- *prev = ptr->next;
- remove_entry(table, ptr);
- if (value != 0) *value = ptr->record;
- *key = ptr->key;
- st_free_entry(ptr);
- return 1;
- }
- }
-
- if (value != 0) *value = 0;
- return 0;
+ tab->entries[bin].record = value;
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ return 1;
}
-int
-st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *value, st_data_t never)
-{
- st_index_t hash_val;
- register st_table_entry *ptr;
-
- hash_val = do_hash(*key, table);
-
- if (table->entries_packed) {
- st_index_t i = find_packed_index(table, hash_val, *key);
- if (i < table->real_entries) {
- if (value != 0) *value = PVAL(table, i);
- *key = PKEY(table, i);
- remove_safe_packed_entry(table, i, never);
- return 1;
- }
- if (value != 0) *value = 0;
- return 0;
- }
-
- ptr = table->bins[hash_pos(hash_val, table->num_bins)];
+/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
+ entry with KEY before the insertion. */
+static inline void
+st_add_direct_with_hash(st_table *tab,
+ st_data_t key, st_data_t value, st_hash_t hash) {
+ st_table_entry *entry;
+ st_index_t ind;
+ st_index_t bin_ind;
+
+ rebuild_table_if_necessary(tab);
+ ind = tab->entries_bound++;
+ entry = &tab->entries[ind];
+ entry->hash = hash;
+ entry->key = key;
+ entry->record = value;
+ tab->num_entries++;
+ if (tab->bins != NULL) {
+ bin_ind = find_table_bin_ind_direct(tab, hash, key);
+ st_assert (bin_ind != UNDEFINED_BIN_IND);
+ set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+ }
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+}
- for (; ptr != 0; ptr = ptr->next) {
- if ((ptr->key != never) && EQUAL(table, *key, ptr)) {
- remove_entry(table, ptr);
- *key = ptr->key;
- if (value != 0) *value = ptr->record;
- ptr->key = ptr->record = never;
- return 1;
- }
- }
+/* Insert (KEY, VALUE) into table TAB. The table should not have
+ entry with KEY before the insertion. */
+void
+st_add_direct(st_table *tab, st_data_t key, st_data_t value) {
+ st_hash_t hash_value;
- if (value != 0) *value = 0;
- return 0;
+ hash_value = do_hash(key, tab);
+ st_add_direct_with_hash(tab, key, value, hash_value);
}
+/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
+ there is already entry with KEY in the table, return nonzero and
+ and update the value of the found entry. */
int
-st_shift(register st_table *table, register st_data_t *key, st_data_t *value)
-{
- st_table_entry *old;
- st_table_entry **prev;
- register st_table_entry *ptr;
-
- if (table->num_entries == 0) {
- if (value != 0) *value = 0;
+st_insert2(st_table *tab, st_data_t key, st_data_t value,
+ st_data_t (*func)(st_data_t)) {
+ st_table_entry *entry;
+ st_index_t bin;
+ st_index_t ind, check;
+ st_hash_t hash_value;
+ st_index_t bin_ind;
+ int new_p;
+
+ rebuild_table_if_necessary (tab);
+ hash_value = do_hash(key, tab);
+ if (tab->bins == NULL) {
+ bin = find_entry(tab, hash_value, key);
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ bin_ind = UNDEFINED_BIN_IND;
+ } else {
+ bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
+ key, &bin_ind);
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ bin -= ENTRY_BASE;
+ }
+ if (new_p) {
+ st_assert(tab->entries_bound < get_allocated_entries(tab));
+ check = tab->rebuilds_num;
+ key = (*func)(key);
+ st_assert(check == tab->rebuilds_num
+ && do_hash(key, tab) == hash_value);
+ ind = tab->entries_bound++;
+ entry = &tab->entries[ind];
+ entry->hash = hash_value;
+ entry->key = key;
+ entry->record = value;
+ if (bin_ind != UNDEFINED_BIN_IND)
+ set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
return 0;
}
+ tab->entries[bin].record = value;
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ return 1;
+}
- if (table->entries_packed) {
- if (value != 0) *value = PVAL(table, 0);
- *key = PKEY(table, 0);
- remove_packed_entry(table, 0);
- return 1;
- }
+/* Create and return a copy of table OLD_TAB. */
+st_table *
+st_copy(st_table *old_tab) {
+ st_table *new_tab;
+
+ new_tab = (st_table *) malloc(sizeof(st_table));
+ *new_tab = *old_tab;
+ if (old_tab->bins == NULL)
+ new_tab->bins = NULL;
+ else
+ new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
+ new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
+ * sizeof(st_table_entry));
+ MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
+ get_allocated_entries(old_tab));
+ if (old_tab->bins != NULL)
+ MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
+#ifdef ST_DEBUG
+ st_check(new_tab);
+#endif
+ return new_tab;
+}
- old = list_pop(st_head(table), st_table_entry, olist);
- table->num_entries--;
- prev = &table->bins[hash_pos(old->hash, table->num_bins)];
- while ((ptr = *prev) != old) prev = &ptr->next;
- *prev = ptr->next;
- if (value != 0) *value = ptr->record;
- *key = ptr->key;
- st_free_entry(ptr);
- return 1;
+/* Update the entries start of table TAB after removing an entry
+ with index N in the array entries. */
+static inline void
+update_range_for_deleted(st_table *tab, st_index_t n) {
+ /* Do not update entries_bound here. Otherwise, we can fill all
+ bins by deleted entry value before rebuilding the table. */
+ if (tab->entries_start == n)
+ tab->entries_start = n + 1;
}
-void
-st_cleanup_safe(st_table *table, st_data_t never)
+/* Delete entry with KEY from table TAB, set up *VALUE (unless
+ VALUE is zero) from deleted table entry, and return non-zero. If
+ there is no entry with KEY in the table, clear *VALUE (unless VALUE
+ is zero), and return zero. */
+static int
+st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
{
- st_table_entry *ptr, **last, *tmp;
- st_index_t i;
-
- if (table->entries_packed) {
- st_index_t i = 0, j = 0;
- while (PKEY(table, i) != never) {
- if (i++ == table->real_entries) return;
- }
- for (j = i; ++i < table->real_entries;) {
- if (PKEY(table, i) == never) continue;
- PACKED_ENT(table, j) = PACKED_ENT(table, i);
- j++;
+ st_table_entry *entry;
+ st_index_t bin;
+ st_index_t bin_ind;
+ st_hash_t hash;
+
+ st_assert(tab != NULL);
+ hash = do_hash(*key, tab);
+ if (tab->bins == NULL) {
+ bin = find_entry(tab, hash, *key);
+ if (bin == UNDEFINED_ENTRY_IND) {
+ if (value != 0) *value = 0;
+ return 0;
}
- table->real_entries = j;
- /* table->num_entries really should be equal j at this moment, but let set it anyway */
- table->num_entries = j;
- return;
- }
-
- for (i = 0; i < table->num_bins; i++) {
- ptr = *(last = &table->bins[i]);
- while (ptr != 0) {
- if (ptr->key == never) {
- tmp = ptr;
- *last = ptr = ptr->next;
- st_free_entry(tmp);
- }
- else {
- ptr = *(last = &ptr->next);
- }
+ } else {
+ bin_ind = find_table_bin_ind(tab, hash, *key);
+ if (bin_ind == UNDEFINED_BIN_IND) {
+ if (value != 0) *value = 0;
+ return 0;
}
- }
+ bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ MARK_BIN_DELETED(tab, bin_ind);
+ }
+ entry = &tab->entries[bin];
+ *key = entry->key;
+ if (value != 0) *value = entry->record;
+ MARK_ENTRY_DELETED(entry);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ return 1;
}
int
-st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg)
-{
- st_index_t hash_val, bin_pos;
- register st_table_entry *ptr, **last, *tmp;
- st_data_t value = 0, old_key;
- int retval, existing = 0;
+st_delete(st_table *tab, st_data_t *key, st_data_t *value) {
+ return st_general_delete(tab, key, value);
+}
- hash_val = do_hash(key, table);
+/* The function and other functions with suffix '_safe' or '_check'
+ are originated from the previous implementation of the hash tables.
+ It was necessary for correct deleting entries during traversing
+ tables. The current implementation permits deletion during
+ traversing without a specific way to do this. */
+int
+st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
+ st_data_t never ATTRIBUTE_UNUSED) {
+ return st_general_delete(tab, key, value);
+}
- if (table->entries_packed) {
- st_index_t i = find_packed_index(table, hash_val, key);
- if (i < table->real_entries) {
- key = PKEY(table, i);
- value = PVAL(table, i);
- existing = 1;
- }
- {
- old_key = key;
- retval = (*func)(&key, &value, arg, existing);
- if (!table->entries_packed) {
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
- goto unpacked;
- }
- switch (retval) {
- case ST_CONTINUE:
- if (!existing) {
- add_packed_direct(table, key, value, hash_val);
- break;
- }
- if (old_key != key) {
- PKEY(table, i) = key;
- }
- PVAL_SET(table, i, value);
- break;
- case ST_DELETE:
- if (!existing) break;
- remove_packed_entry(table, i);
+/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
+ return zero. Otherwise, remove the first entry in the table.
+ Return its key through KEY and its record through VALUE (unless
+ VALUE is zero). */
+int
+st_shift(st_table *tab, st_data_t *key, st_data_t *value) {
+ st_index_t i, bound;
+ st_index_t bin;
+ st_table_entry *entries, *curr_entry_ptr;
+ st_index_t bin_ind;
+
+ entries = tab->entries;
+ bound = tab->entries_bound;
+ for (i = tab->entries_start; i < bound; i++) {
+ curr_entry_ptr = &entries[i];
+ if (! DELETED_ENTRY_P(curr_entry_ptr)) {
+ if (value != 0) *value = curr_entry_ptr->record;
+ *key = curr_entry_ptr->key;
+ if (tab->bins == NULL) {
+ bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key);
+ st_assert(bin != UNDEFINED_ENTRY_IND
+ && &entries[bin] == curr_entry_ptr);
+ } else {
+ bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash,
+ curr_entry_ptr->key);
+ st_assert(bin_ind != UNDEFINED_BIN_IND
+ && &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
+ - ENTRY_BASE] == curr_entry_ptr);
+ MARK_BIN_DELETED(tab, bin_ind);
}
+ MARK_ENTRY_DELETED(curr_entry_ptr);
+ tab->num_entries--;
+ update_range_for_deleted(tab, i);
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ return 1;
}
- return existing;
}
+ st_assert(tab->num_entries == 0);
+ tab->entries_start = tab->entries_bound = 0;
+ if (value != 0) *value = 0;
+ return 0;
+}
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
-
- if (ptr != 0) {
- key = ptr->key;
- value = ptr->record;
- existing = 1;
- }
- {
- old_key = key;
- retval = (*func)(&key, &value, arg, existing);
- unpacked:
- switch (retval) {
- case ST_CONTINUE:
- if (!existing) {
- add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins));
- break;
- }
- if (old_key != key) {
- ptr->key = key;
- }
- ptr->record = value;
- break;
- case ST_DELETE:
- if (!existing) break;
- last = &table->bins[bin_pos];
- for (; (tmp = *last) != 0; last = &tmp->next) {
- if (ptr == tmp) {
- *last = ptr->next;
- remove_entry(table, ptr);
- st_free_entry(ptr);
- break;
- }
- }
- break;
- }
- return existing;
- }
+/* See comments for function st_delete_safe. */
+void
+st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
+ st_data_t never ATTRIBUTE_UNUSED) {
}
+/* Find entry with KEY in table TAB, call FUNC with the key and the
+ value of the found entry, and non-zero as the 3rd argument. If the
+ entry is not found, call FUNC with KEY, and 2 zero arguments. If
+ the call returns ST_CONTINUE, the table will have an entry with key
+ and value returned by FUNC through the 1st and 2nd parameters. If
+ the call of FUNC returns ST_DELETE, the table will not have entry
+ with KEY. The function returns flag of that the entry with KEY was
+ in the table before the call. */
int
-st_foreach_check(st_table *table, int (*func)(ANYARGS), st_data_t arg, st_data_t never)
-{
- st_table_entry *ptr = 0, **last, *tmp, *next;
- struct list_head *head;
- enum st_retval retval;
- st_index_t i;
-
- if (table->entries_packed) {
- for (i = 0; i < table->real_entries; i++) {
- st_data_t key, val;
- st_index_t hash;
- key = PKEY(table, i);
- val = PVAL(table, i);
- hash = PHASH(table, i);
- if (key == never) continue;
- retval = (*func)(key, val, arg, 0);
- if (!table->entries_packed) {
- FIND_ENTRY(table, ptr, hash, i);
- if (retval == ST_CHECK) {
- if (!ptr) goto deleted;
- }
- if (table->num_entries == 0) return 0;
- head = st_head(table);
- next = list_entry(ptr->olist.next, st_table_entry, olist);
- goto unpacked;
- }
- switch (retval) {
- case ST_CHECK: /* check if hash is modified during iteration */
- if (PHASH(table, i) == 0 && PKEY(table, i) == never) {
- break;
- }
- i = find_packed_index_from(table, hash, key, i);
- if (i >= table->real_entries) {
- i = find_packed_index(table, hash, key);
- if (i >= table->real_entries) goto deleted;
- }
- /* fall through */
- case ST_CONTINUE:
- break;
- case ST_STOP:
- return 0;
- case ST_DELETE:
- remove_safe_packed_entry(table, i, never);
- break;
- }
+st_update(st_table *tab, st_data_t key,
+ st_update_callback_func *func, st_data_t arg) {
+ st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
+ st_index_t bin = 0; /* Ditto */
+ st_table_entry *entries;
+ st_index_t bin_ind;
+ st_data_t value = 0, old_key;
+ st_index_t check;
+ int retval, existing;
+ st_hash_t hash = do_hash(key, tab);
+
+ entries = tab->entries;
+ if (tab->bins == NULL) {
+ bin = find_entry(tab, hash, key);
+ existing = bin != UNDEFINED_ENTRY_IND;
+ entry = &entries[bin];
+ bin_ind = UNDEFINED_BIN_IND;
+ } else {
+ bin_ind = find_table_bin_ind(tab, hash, key);
+ existing = bin_ind != UNDEFINED_BIN_IND;
+ if (existing) {
+ bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ entry = &entries[bin];
}
- return 0;
}
-
- head = st_head(table);
- list_for_each_safe(head, ptr, next, olist) {
- if (ptr->key != never) {
- i = hash_pos(ptr->hash, table->num_bins);
- retval = (*func)(ptr->key, ptr->record, arg, 0);
- unpacked:
- switch (retval) {
- case ST_CHECK: /* check if hash is modified during iteration */
- for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) {
- if (!tmp) {
- deleted:
- /* call func with error notice */
- retval = (*func)(0, 0, arg, 1);
- return 1;
- }
- }
- /* fall through */
- case ST_CONTINUE:
- break;
- case ST_STOP:
- return 0;
- case ST_DELETE:
- last = &table->bins[hash_pos(ptr->hash, table->num_bins)];
- for (; (tmp = *last) != 0; last = &tmp->next) {
- if (ptr == tmp) {
- remove_entry(table, ptr);
- ptr->key = ptr->record = never;
- ptr->hash = 0;
- break;
- }
- }
- if (table->num_entries == 0) return 0;
- }
- }
+ if (existing) {
+ key = entry->key;
+ value = entry->record;
+ }
+ old_key = key;
+ check = tab->rebuilds_num;
+ retval = (*func)(&key, &value, arg, existing);
+ st_assert(check == tab->rebuilds_num);
+ switch (retval) {
+ case ST_CONTINUE:
+ if (! existing) {
+ st_add_direct_with_hash(tab, key, value, hash);
+ break;
+ }
+ if (old_key != key) {
+ entry->key = key;
+ }
+ entry->record = value;
+ break;
+ case ST_DELETE:
+ if (existing) {
+ if (bin_ind != UNDEFINED_BIN_IND)
+ MARK_BIN_DELETED(tab, bin_ind);
+ MARK_ENTRY_DELETED(entry);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ }
+ break;
}
- return 0;
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ return existing;
}
-int
-st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
-{
- st_table_entry *ptr = 0, **last, *tmp, *next;
+/* Traverse all entries in table TAB calling FUNC with current entry
+ key and value and zero. If the call returns ST_STOP, stop
+ traversing. If the call returns ST_DELETE, delete the current
+ entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
+ traversing. The function returns zero unless an error is found.
+ CHECK_P is flag of st_foreach_check call. The behavior is a bit
+ different for ST_CHECK and when the current element is removed
+ during traversing. */
+static inline int
+st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
+ int check_p) {
+ st_index_t bin;
+ st_index_t bin_ind;
+ st_table_entry *entries, *curr_entry_ptr;
enum st_retval retval;
- struct list_head *head;
- st_index_t i;
-
- if (table->entries_packed) {
- for (i = 0; i < table->real_entries; i++) {
- st_data_t key, val;
- st_index_t hash;
- key = PKEY(table, i);
- val = PVAL(table, i);
- hash = PHASH(table, i);
- retval = (*func)(key, val, arg, 0);
- if (!table->entries_packed) {
- FIND_ENTRY(table, ptr, hash, i);
- if (!ptr) return 0;
- head = st_head(table);
- next = list_entry(ptr->olist.next, st_table_entry, olist);
- goto unpacked;
+ st_index_t i, rebuilds_num;
+ st_hash_t hash;
+ st_data_t key;
+ int error_p, packed_p = tab->bins == NULL;
+
+ st_assert(tab->entries_start <= tab->entries_bound);
+ entries = tab->entries;
+ /* The bound can change inside the loop even without rebuilding
+ the table, e.g. by an entry inesrtion. */
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ curr_entry_ptr = &entries[i];
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
+ continue;
+ key = curr_entry_ptr->key;
+ rebuilds_num = tab->rebuilds_num;
+ hash = curr_entry_ptr->hash;
+ retval = (*func)(key, curr_entry_ptr->record, arg, 0);
+ if (rebuilds_num != tab->rebuilds_num) {
+ entries = tab->entries;
+ packed_p = tab->bins == NULL;
+ if (packed_p) {
+ i = find_entry(tab, hash, key);
+ error_p = i == UNDEFINED_ENTRY_IND;
+ } else {
+ i = find_table_entry_ind(tab, hash, key);
+ error_p = i == UNDEFINED_ENTRY_IND;
+ i -= ENTRY_BASE;
}
- switch (retval) {
- case ST_CONTINUE:
- break;
- case ST_CHECK:
- case ST_STOP:
- return 0;
- case ST_DELETE:
- remove_packed_entry(table, i);
- i--;
- break;
+ if (error_p && check_p) {
+ /* call func with error notice */
+ retval = (*func)(0, 0, arg, 1);
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ return 1;
}
+ curr_entry_ptr = &entries[i];
}
- return 0;
- }
-
- head = st_head(table);
- list_for_each_safe(head, ptr, next, olist) {
- i = hash_pos(ptr->hash, table->num_bins);
- retval = (*func)(ptr->key, ptr->record, arg, 0);
- unpacked:
switch (retval) {
- case ST_CONTINUE:
+ case ST_CONTINUE:
break;
- case ST_CHECK:
- case ST_STOP:
+ case ST_CHECK:
+ if (check_p)
+ break;
+ case ST_STOP:
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
return 0;
- case ST_DELETE:
- last = &table->bins[hash_pos(ptr->hash, table->num_bins)];
- for (; (tmp = *last) != 0; last = &tmp->next) {
- if (ptr == tmp) {
- *last = ptr->next;
- remove_entry(table, ptr);
- st_free_entry(ptr);
+ case ST_DELETE:
+ if (packed_p) {
+ bin = find_entry(tab, hash, curr_entry_ptr->key);
+ if (bin == UNDEFINED_ENTRY_IND)
break;
- }
+ } else {
+ bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key);
+ if (bin_ind == UNDEFINED_BIN_IND)
+ break;
+ bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ MARK_BIN_DELETED(tab, bin_ind);
}
- if (table->num_entries == 0) return 0;
+ st_assert(&entries[bin] == curr_entry_ptr);
+ MARK_ENTRY_DELETED(curr_entry_ptr);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
+ break;
}
}
+#ifdef ST_DEBUG
+ st_check(tab);
+#endif
return 0;
}
-static st_index_t
-get_keys(const st_table *table, st_data_t *keys, st_index_t size,
- int check, st_data_t never)
-{
- st_data_t key;
- st_data_t *keys_start = keys;
-
- if (table->entries_packed) {
- st_index_t i;
+int
+st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg) {
+ return st_general_foreach(tab, func, arg, FALSE);
+}
- if (size > table->real_entries) size = table->real_entries;
- for (i = 0; i < size; i++) {
- key = PKEY(table, i);
- if (check && key == never) continue;
- *keys++ = key;
- }
- }
- else {
- st_table_entry *ptr = 0;
- st_data_t *keys_end = keys + size;
+/* See comments for function st_delete_safe. */
+int
+st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
+ st_data_t never ATTRIBUTE_UNUSED) {
+ return st_general_foreach(tab, func, arg, TRUE);
+}
- list_for_each(st_head(table), ptr, olist) {
- if (keys >= keys_end) break;
- key = ptr->key;
- if (check && key == never) continue;
+/* Set up array KEYS by at most SIZE keys of head table TAB entries.
+ Return the number of keys set up in array KEYS. */
+static inline st_index_t
+st_general_keys(st_table *tab, st_data_t *keys, st_index_t size) {
+ st_index_t i, bound;
+ st_data_t key, *keys_start, *keys_end;
+ st_table_entry *curr_entry_ptr, *entries = tab->entries;
+
+ bound = tab->entries_bound;
+ keys_start = keys;
+ keys_end = keys + size;
+ for (i = tab->entries_start; i < bound; i++) {
+ if (keys == keys_end)
+ break;
+ curr_entry_ptr = &entries[i];
+ key = curr_entry_ptr->key;
+ if (! DELETED_ENTRY_P(curr_entry_ptr))
*keys++ = key;
- }
}
return keys - keys_start;
}
st_index_t
-st_keys(st_table *table, st_data_t *keys, st_index_t size)
-{
- return get_keys(table, keys, size, 0, 0);
+st_keys(st_table *tab, st_data_t *keys, st_index_t size) {
+ return st_general_keys(tab, keys, size);
}
+/* See comments for function st_delete_safe. */
st_index_t
-st_keys_check(st_table *table, st_data_t *keys, st_index_t size, st_data_t never)
-{
- return get_keys(table, keys, size, 1, never);
+st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
+ st_data_t never ATTRIBUTE_UNUSED) {
+ return st_general_keys(tab, keys, size);
}
-static st_index_t
-get_values(const st_table *table, st_data_t *values, st_index_t size,
- int check, st_data_t never)
-{
- st_data_t key;
- st_data_t *values_start = values;
-
- if (table->entries_packed) {
- st_index_t i;
-
- if (size > table->real_entries) size = table->real_entries;
- for (i = 0; i < size; i++) {
- key = PKEY(table, i);
- if (check && key == never) continue;
- *values++ = PVAL(table, i);
- }
- }
- else {
- st_table_entry *ptr = 0;
- st_data_t *values_end = values + size;
-
- list_for_each(st_head(table), ptr, olist) {
- if (values >= values_end) break;
- key = ptr->key;
- if (check && key == never) continue;
- *values++ = ptr->record;
- }
+/* Set up array VALUES by at most SIZE values of head table TAB
+ entries. Return the number of values set up in array VALUES. */
+static inline st_index_t
+st_general_values(st_table *tab, st_data_t *values, st_index_t size) {
+ st_index_t i, bound;
+ st_data_t *values_start, *values_end;
+ st_table_entry *curr_entry_ptr, *entries = tab->entries;
+
+ values_start = values;
+ values_end = values + size;
+ bound = tab->entries_bound;
+ st_assert(bound != 0);
+ for (i = tab->entries_start; i < bound; i++) {
+ if (values == values_end)
+ break;
+ curr_entry_ptr = &entries[i];
+ if (! DELETED_ENTRY_P(curr_entry_ptr))
+ *values++ = curr_entry_ptr->record;
}
return values - values_start;
}
st_index_t
-st_values(st_table *table, st_data_t *values, st_index_t size)
-{
- return get_values(table, values, size, 0, 0);
+st_values(st_table *tab, st_data_t *values, st_index_t size) {
+ return st_general_values(tab, values, size);
}
+/* See comments for function st_delete_safe. */
st_index_t
-st_values_check(st_table *table, st_data_t *values, st_index_t size, st_data_t never)
-{
- return get_values(table, values, size, 1, never);
+st_values_check(st_table *tab, st_data_t *values, st_index_t size,
+ st_data_t never ATTRIBUTE_UNUSED) {
+ return st_general_values(tab, values, size);
}
-
-#if 0 /* unused right now */
-int
-st_reverse_foreach_check(st_table *table, int (*func)(ANYARGS), st_data_t arg, st_data_t never)
-{
- st_table_entry *ptr, **last, *tmp, *next;
- struct list_head *head;
- enum st_retval retval;
- st_index_t i;
-
- if (table->entries_packed) {
- for (i = table->real_entries; 0 < i;) {
- st_data_t key, val;
- st_index_t hash;
- --i;
- key = PKEY(table, i);
- val = PVAL(table, i);
- hash = PHASH(table, i);
- if (key == never) continue;
- retval = (*func)(key, val, arg, 0);
- if (!table->entries_packed) {
- FIND_ENTRY(table, ptr, hash, i);
- if (retval == ST_CHECK) {
- if (!ptr) goto deleted;
- }
- if (table->num_entries == 0) return 0;
- head = st_head(table);
- next = list_entry(ptr->olist.next, st_table_entry, olist);
- goto unpacked;
- }
- switch (retval) {
- case ST_CHECK: /* check if hash is modified during iteration */
- if (PHASH(table, i) == 0 && PKEY(table, i) == never) {
- break;
- }
- i = find_packed_index_from(table, hash, key, i);
- if (i >= table->real_entries) {
- i = find_packed_index(table, hash, key);
- if (i >= table->real_entries) goto deleted;
- }
- /* fall through */
- case ST_CONTINUE:
- break;
- case ST_STOP:
- return 0;
- case ST_DELETE:
- remove_safe_packed_entry(table, i, never);
- break;
- }
- }
- return 0;
- }
-
- head = st_head(table);
- list_for_each_rev_safe(head, ptr, next, olist) {
- if (ptr->key != never) {
- i = hash_pos(ptr->hash, table->num_bins);
- retval = (*func)(ptr->key, ptr->record, arg, 0);
- unpacked:
- switch (retval) {
- case ST_CHECK: /* check if hash is modified during iteration */
- for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) {
- if (!tmp) {
- deleted:
- /* call func with error notice */
- retval = (*func)(0, 0, arg, 1);
- return 1;
- }
- }
- /* fall through */
- case ST_CONTINUE:
- break;
- case ST_STOP:
- return 0;
- case ST_DELETE:
- last = &table->bins[hash_pos(ptr->hash, table->num_bins)];
- for (; (tmp = *last) != 0; last = &tmp->next) {
- if (ptr == tmp) {
- remove_entry(table, ptr);
- ptr->key = ptr->record = never;
- ptr->hash = 0;
- break;
- }
- }
- if (table->num_entries == 0) return 0;
- }
- }
- }
- return 0;
-}
-
-int
-st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
-{
- st_table_entry *ptr, **last, *tmp, *next;
- enum st_retval retval;
- struct list_head *head;
- st_index_t i;
-
- if (table->entries_packed) {
- for (i = table->real_entries; 0 < i;) {
- st_data_t key, val;
- st_index_t hash;
- --i;
- key = PKEY(table, i);
- val = PVAL(table, i);
- hash = PHASH(table, i);
- retval = (*func)(key, val, arg, 0);
- if (!table->entries_packed) {
- FIND_ENTRY(table, ptr, hash, i);
- if (!ptr) return 0;
- head = st_head(table);
- next = list_entry(ptr->olist.next, st_table_entry, olist);
- goto unpacked;
- }
- switch (retval) {
- case ST_CONTINUE:
- break;
- case ST_CHECK:
- case ST_STOP:
- return 0;
- case ST_DELETE:
- remove_packed_entry(table, i);
- break;
- }
- }
- return 0;
- }
-
- head = st_head(table);
- list_for_each_rev_safe(head, ptr, next, olist) {
- i = hash_pos(ptr->hash, table->num_bins);
- retval = (*func)(ptr->key, ptr->record, arg, 0);
- unpacked:
- switch (retval) {
- case ST_CONTINUE:
- break;
- case ST_CHECK:
- case ST_STOP:
- return 0;
- case ST_DELETE:
- last = &table->bins[hash_pos(ptr->hash, table->num_bins)];
- for (; (tmp = *last) != 0; last = &tmp->next) {
- if (ptr == tmp) {
- *last = ptr->next;
- remove_entry(table, ptr);
- st_free_entry(ptr);
- break;
- }
- }
- if (table->num_entries == 0) return 0;
- }
- }
- return 0;
-}
-#endif
-
/*
* hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code
*