From 75775157ea68efdd0b71345a37a6930e5bf1d2ef Mon Sep 17 00:00:00 2001 From: ko1 Date: Mon, 7 Nov 2016 00:45:00 +0000 Subject: Introduce table improvement by Vladimir Makarov . [Feature #12142] See header of st.c for improvment details. You can see all of code history here: This improvement is discussed at with many people, especially with Yura Sokolov. * st.c: improve st_table. * include/ruby/st.h: ditto. * internal.h, numeric.c, hash.c (rb_dbl_long_hash): extract a function. * ext/-test-/st/foreach/foreach.c: catch up this change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56650 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- st.c | 2387 ++++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 1360 insertions(+), 1027 deletions(-) (limited to 'st.c') diff --git a/st.c b/st.c index 07b57d906e..7bf5e13284 100644 --- a/st.c +++ b/st.c @@ -1,6 +1,99 @@ -/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ - -/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ +/* This is a public domain general purpose hash table package + originally written by Peter Moore @ UCB. + + The hash table data structures were redesigned and the package was + rewritten by Vladimir Makarov . */ + +/* The original package implemented classic bucket-based hash tables + with entries doubly linked for an access by their insertion order. + To decrease pointer chasing and as a consequence to improve a data + locality the current implementation is based on storing entries in + an array and using hash tables with open addressing. The current + entries are more compact in comparison with the original ones and + this also improves the data locality. + + The hash table has two arrays called *bins* and *entries*. + + bins: + ------- + | | entries array: + |-------| -------------------------------- + | index | | | entry: | | | + |-------| | | | | | + | ... | | ... | hash | ... | ... | + |-------| | | key | | | + | empty | | | record | | | + |-------| -------------------------------- + | ... | ^ ^ + |-------| |_ entries start |_ entries bound + |deleted| + ------- + + o The entry array contains table entries in the same order as they + were inserted. + + When the first entry is deleted, a variable containing index of + the current first entry (*entries start*) is changed. In all + other cases of the deletion, we just mark the entry as deleted by + using a reserved hash value. + + Such organization of the entry storage makes operations of the + table shift and the entries traversal very fast. + + o The bins provide access to the entries by their keys. The + key hash is mapped to a bin containing *index* of the + corresponding entry in the entry array. + + The bin array size is always power of two, it makes mapping very + fast by using the corresponding lower bits of the hash. + Generally it is not a good idea to ignore some part of the hash. + But alternative approach is worse. For example, we could use a + modulo operation for mapping and a prime number for the size of + the bin array. Unfortunately, the modulo operation for big + 64-bit numbers are extremely slow (it takes more than 100 cycles + on modern Intel CPUs). + + Still other bits of the hash value are used when the mapping + results in a collision. In this case we use a secondary hash + value which is a result of a function of the collision bin + index and the original hash value. The function choice + guarantees that we can traverse all bins and finally find the + corresponding bin as after several iterations the function + becomes a full cycle linear congruential generator because it + satisfies requirements of the Hull-Dobell theorem. + + When an entry is removed from the table besides marking the + hash in the corresponding entry described above, we also mark + the bin by a special value in order to find entries which had + a collision with the removed entries. + + There are two reserved values for the bins. One denotes an + empty bin, another one denotes a bin for a deleted entry. + + o The length of the bin array is at least two times more than the + entry array length. This keeps the table load factor healthy. + The trigger of rebuilding the table is always a case when we can + not insert an entry anymore at the entries bound. We could + change the entries bound too in case of deletion but than we need + a special code to count bins with corresponding deleted entries + and reset the bin values when there are too many bins + corresponding deleted entries + + Table rebuilding is done by creation of a new entry array and + bins of an appropriate size. We also try to reuse the arrays + in some cases by compacting the array and removing deleted + entries. + + o To save memory very small tables have no allocated arrays + bins. We use a linear search for an access by a key. + + o To save more memory we use 8-, 16-, 32- and 64- bit indexes in + bins depending on the current hash table size. + + This implementation speeds up the Ruby hash table benchmarks in + average by more 40% on Intel Haswell CPU. + +*/ #ifdef NOT_RUBY #include "regint.h" @@ -14,46 +107,33 @@ #include #endif #include -#include "ccan/list/list.h" +#include + +#ifdef __GNUC__ +#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p) +#define EXPECT(expr, val) __builtin_expect(expr, val) +#define ATTRIBUTE_UNUSED __attribute__((unused)) +#else +#define PREFETCH(addr, write_p) +#define EXPECT(expr, val) (expr) +#define ATTRIBUTE_UNUSED +#endif -typedef struct st_table_entry st_table_entry; +#ifdef ST_DEBUG +#define st_assert(cond) assert(cond) +#else +#define st_assert(cond) ((void)(0 && (cond))) +#endif + +/* The type of hashes. */ +typedef st_index_t st_hash_t; struct st_table_entry { - st_index_t hash; + st_hash_t hash; st_data_t key; st_data_t record; - st_table_entry *next; - struct list_node olist; }; -typedef struct st_packed_entry { - st_index_t hash; - st_data_t key, val; -} st_packed_entry; - -#ifndef STATIC_ASSERT -#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1] -#endif - -#define ST_DEFAULT_MAX_DENSITY 5 -#define ST_DEFAULT_INIT_TABLE_SIZE 16 -#define ST_DEFAULT_PACKED_TABLE_SIZE 18 -#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*)) -#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry)) - -STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT])); -STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE])); - - /* - * DEFAULT_MAX_DENSITY is the default for the largest we allow the - * average number of items per bin before increasing the number of - * bins - * - * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins - * allocated initially - * - */ - #define type_numhash st_hashtype_num const struct st_hash_type st_hashtype_num = { st_numcmp, @@ -73,105 +153,365 @@ static const struct st_hash_type type_strcasehash = { strcasehash, }; -static void rehash(st_table *); +/* Value used to catch uninitialized entries/bins during debugging. + There is a possibility for a false alarm, but its probability is + extremely small. */ +#define ST_INIT_VAL 0xafafafafafafafaf +#define ST_INIT_VAL_BYTE 0xafa #ifdef RUBY #undef malloc #undef realloc #undef calloc #undef free -#define malloc xmalloc -#define calloc xcalloc -#define realloc xrealloc -#define free(x) xfree(x) -#endif - -#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0) - -#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key)) -#define hash_pos(h,n) ((h) & (n - 1)) - -/* preparation for possible allocation improvements */ -#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry)) -#define st_free_entry(entry) free(entry) -#define st_alloc_table() (st_table *)malloc(sizeof(st_table)) -#define st_dealloc_table(table) free(table) -#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *)) -#define st_free_bins(bins, size) free(bins) -static inline st_table_entry** -st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize) -{ - bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *)); - MEMZERO(bins, st_table_entry*, newsize); - return bins; -} - -/* Shortcut */ -#define bins as.big.bins -#define real_entries as.packed.real_entries - -/* preparation for possible packing improvements */ -#define PACKED_BINS(table) ((table)->as.packed.entries) -#define PACKED_ENT(table, i) PACKED_BINS(table)[i] -#define PKEY(table, i) PACKED_ENT((table), (i)).key -#define PVAL(table, i) PACKED_ENT((table), (i)).val -#define PHASH(table, i) PACKED_ENT((table), (i)).hash -#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v)) -#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v)) -#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v)) - -/* this function depends much on packed layout, so that it placed here */ -static inline void -remove_packed_entry(st_table *table, st_index_t i) -{ - table->real_entries--; - table->num_entries--; - if (i < table->real_entries) { - MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1), - st_packed_entry, table->real_entries - i); - } -} +#define malloc ruby_xmalloc +#define calloc ruby_xcalloc +#define realloc ruby_xrealloc +#define free ruby_xfree +#endif -static inline void -remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never) -{ - table->num_entries--; - PKEY_SET(table, i, never); - PVAL_SET(table, i, never); - PHASH_SET(table, i, 0); -} +#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) +#define PTR_EQUAL(tab, ptr, hash_val, key) \ + ((ptr)->hash == (hash_val) && EQUAL((tab), (key), (ptr)->key)) + +/* Features of a table. */ +struct st_features { + /* Power of 2 used for number of allocated entries. */ + unsigned char entry_power; + /* Power of 2 used for number of allocated bins. Depending on the + table size, the number of bins is 2-4 times more than the + number of entries. */ + unsigned char bin_power; + /* Enumeration of sizes of bins (8-bit, 16-bit etc). */ + unsigned char size_ind; + /* Bins are packed in words of type st_index_t. The following is + a size of bins counted by words. */ + st_index_t bins_words; +}; -static st_index_t -next_pow2(st_index_t x) -{ - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; +/* Features of all possible size tables. */ #if SIZEOF_ST_INDEX_T == 8 - x |= x >> 32; +#define MAX_POWER2 62 +struct st_features features[] = { + {0, 1, 0, 0x0}, + {1, 2, 0, 0x1}, + {2, 3, 0, 0x1}, + {3, 4, 0, 0x2}, + {4, 5, 0, 0x4}, + {5, 6, 0, 0x8}, + {6, 7, 0, 0x10}, + {7, 8, 0, 0x20}, + {8, 9, 1, 0x80}, + {9, 10, 1, 0x100}, + {10, 11, 1, 0x200}, + {11, 12, 1, 0x400}, + {12, 13, 1, 0x800}, + {13, 14, 1, 0x1000}, + {14, 15, 1, 0x2000}, + {15, 16, 1, 0x4000}, + {16, 17, 2, 0x10000}, + {17, 18, 2, 0x20000}, + {18, 19, 2, 0x40000}, + {19, 20, 2, 0x80000}, + {20, 21, 2, 0x100000}, + {21, 22, 2, 0x200000}, + {22, 23, 2, 0x400000}, + {23, 24, 2, 0x800000}, + {24, 25, 2, 0x1000000}, + {25, 26, 2, 0x2000000}, + {26, 27, 2, 0x4000000}, + {27, 28, 2, 0x8000000}, + {28, 29, 2, 0x10000000}, + {29, 30, 2, 0x20000000}, + {30, 31, 2, 0x40000000}, + {31, 32, 2, 0x80000000}, + {32, 33, 3, 0x200000000}, + {33, 34, 3, 0x400000000}, + {34, 35, 3, 0x800000000}, + {35, 36, 3, 0x1000000000}, + {36, 37, 3, 0x2000000000}, + {37, 38, 3, 0x4000000000}, + {38, 39, 3, 0x8000000000}, + {39, 40, 3, 0x10000000000}, + {40, 41, 3, 0x20000000000}, + {41, 42, 3, 0x40000000000}, + {42, 43, 3, 0x80000000000}, + {43, 44, 3, 0x100000000000}, + {44, 45, 3, 0x200000000000}, + {45, 46, 3, 0x400000000000}, + {46, 47, 3, 0x800000000000}, + {47, 48, 3, 0x1000000000000}, + {48, 49, 3, 0x2000000000000}, + {49, 50, 3, 0x4000000000000}, + {50, 51, 3, 0x8000000000000}, + {51, 52, 3, 0x10000000000000}, + {52, 53, 3, 0x20000000000000}, + {53, 54, 3, 0x40000000000000}, + {54, 55, 3, 0x80000000000000}, + {55, 56, 3, 0x100000000000000}, + {56, 57, 3, 0x200000000000000}, + {57, 58, 3, 0x400000000000000}, + {58, 59, 3, 0x800000000000000}, + {59, 60, 3, 0x1000000000000000}, + {60, 61, 3, 0x2000000000000000}, + {61, 62, 3, 0x4000000000000000}, + {62, 63, 3, 0x8000000000000000}, +}; + +#else +#define MAX_POWER2 30 + +struct st_features features[] = { + {0, 1, 0, 0x1}, + {1, 2, 0, 0x1}, + {2, 3, 0, 0x2}, + {3, 4, 0, 0x4}, + {4, 5, 0, 0x8}, + {5, 6, 0, 0x10}, + {6, 7, 0, 0x20}, + {7, 8, 0, 0x40}, + {8, 9, 1, 0x100}, + {9, 10, 1, 0x200}, + {10, 11, 1, 0x400}, + {11, 12, 1, 0x800}, + {12, 13, 1, 0x1000}, + {13, 14, 1, 0x2000}, + {14, 15, 1, 0x4000}, + {15, 16, 1, 0x8000}, + {16, 17, 2, 0x20000}, + {17, 18, 2, 0x40000}, + {18, 19, 2, 0x80000}, + {19, 20, 2, 0x100000}, + {20, 21, 2, 0x200000}, + {21, 22, 2, 0x400000}, + {22, 23, 2, 0x800000}, + {23, 24, 2, 0x1000000}, + {24, 25, 2, 0x2000000}, + {25, 26, 2, 0x4000000}, + {26, 27, 2, 0x8000000}, + {27, 28, 2, 0x10000000}, + {28, 29, 2, 0x20000000}, + {29, 30, 2, 0x40000000}, + {30, 31, 2, 0x80000000}, +}; + +#endif + +/* The reserved hash value and its substitution. */ +#define RESERVED_HASH_VAL (~(st_hash_t) 0) +#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0) + +/* Return hash value of KEY for table TAB. */ +static inline st_hash_t +do_hash(st_data_t key, st_table *tab) { + st_index_t h = (st_index_t)(tab->curr_hash)(key); +#if SIZEOF_INT == SIZEOF_VOIDP + st_hash_t hash = h; +#else + st_hash_t hash = h; #endif - return x + 1; + + /* RESERVED_HASH_VAL is used for a deleted entry. Map it into + another value. Such mapping should be extremely rare. */ + return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash; } -static st_index_t -new_size(st_index_t size) -{ - st_index_t n; +/* Power of 2 defining the minimal number of allocated entries. */ +#define MINIMAL_POWER2 2 + +#if MINIMAL_POWER2 < 2 +#error "MINIMAL_POWER2 should be >= 2" +#endif - if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */ - return size; +/* If the power2 of the allocated `entries` is less than the following + value, don't allocate bins and use a linear search. */ +#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4 - n = next_pow2(size); - if (n > size) - return n; +/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */ +static int +get_power2(st_index_t size) { + unsigned int n; + + for (n = 0; size != 0; n++) + size >>= 1; + if (n <= MAX_POWER2) + return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n; #ifndef NOT_RUBY + /* Ran out of the table entries */ rb_raise(rb_eRuntimeError, "st_table too big"); #endif - return -1; /* should raise exception */ + /* should raise exception */ + return -1; } +/* Return value of N-th bin in array BINS of table with bins size + index S. */ +static inline st_index_t +get_bin(st_index_t *bins, int s, st_index_t n) { + return (s == 0 ? ((unsigned char *) bins)[n] + : s == 1 ? ((unsigned short *) bins)[n] + : s == 2 ? ((unsigned int *) bins)[n] + : ((st_index_t *) bins)[n]); +} + +/* Set up N-th bin in array BINS of table with bins size index S to + value V. */ +static inline void +set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) { + if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v; + else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v; + else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v; + else ((st_index_t *) bins)[n] = v; +} + +/* These macros define reserved values for empty table bin and table + bin which contains a deleted entry. We will never use such values + for an entry index in bins. */ +#define EMPTY_BIN 0 +#define DELETED_BIN 1 +/* Base of a real entry index in the bins. */ +#define ENTRY_BASE 2 + +/* Mark I-th bin of table TAB as empty, in other words not + corresponding to any entry. */ +#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN)) + +/* Values used for not found entry and bin with given + characteristics. */ +#define UNDEFINED_ENTRY_IND (~(st_index_t) 0) +#define UNDEFINED_BIN_IND (~(st_index_t) 0) + +/* Mark I-th bin of table TAB as corresponding to a deleted table + entry. Update number of entries in the table and number of bins + corresponding to deleted entries. */ +#define MARK_BIN_DELETED(tab, i) \ + do { \ + st_assert(i != UNDEFINED_BIN_IND); \ + st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \ + set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \ + } while (0) + +/* Macros to check that value B is used empty bins and bins + corresponding deleted entries. */ +#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN) +#define DELETED_BIN_P(b) ((b) == DELETED_BIN) +#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN) + +/* Macros to check empty bins and bins corresponding to deleted + entries. Bins are given by their index I in table TAB. */ +#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) +#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) +#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) + +/* Macros for marking and checking deleted entries given by their + pointer E_PTR. */ +#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL) +#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL) + +/* Return bin size index of table TAB. */ +static inline unsigned int +get_size_ind(const st_table *tab) { + return tab->size_ind; +} + +/* Return the number of allocated bins of table TAB. */ +static inline st_index_t +get_bins_num(const st_table *tab) { + return 1<bin_power; +} + +/* Return mask for a bin index in table TAB. */ +static inline st_index_t +bins_mask(const st_table *tab) { + return get_bins_num(tab) - 1; +} + +/* Return the index of table TAB bin corresponding to + HASH_VALUE. */ +static inline st_index_t +hash_bin(st_hash_t hash_value, st_table *tab) { + return hash_value & bins_mask(tab); +} + +/* Return the number of allocated entries of table TAB. */ +static inline st_index_t +get_allocated_entries(const st_table *tab) { + return 1<entry_power; +} + +/* Return size of the allocated bins of table TAB. */ +static inline st_index_t +bins_size(const st_table *tab) { + return features[tab->entry_power].bins_words * sizeof (st_index_t); +} + +/* Mark all bins of table TAB as empty. */ +static void +initialize_bins(st_table *tab) { + memset(tab->bins, 0, bins_size(tab)); +} + +/* Make table TAB empty. */ +static void +make_tab_empty(st_table *tab) +{ + tab->curr_hash = tab->type->hash; + tab->num_entries = 0; + tab->entries_start = tab->entries_bound = 0; + if (tab->bins != NULL) + initialize_bins(tab); +} + +#ifdef ST_DEBUG +/* Check the table T consistency. It can be extremely slow. So use + it only for debugging. */ +static void +st_check(st_table *tab) { + st_index_t d, e, i, n, p; + + for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1) + ; + p = i; + assert(p >= MINIMAL_POWER2); + assert(tab->entries_bound <= get_allocated_entries(tab) + && tab->entries_start <= tab->entries_bound); + n = 0; + return; + if (tab->entries_bound != 0) + for (i = tab->entries_start; i < tab->entries_bound; i++) { + assert(tab->entries[i].hash != (st_hash_t) ST_INIT_VAL + && tab->entries[i].key != ST_INIT_VAL + && tab->entries[i].record != ST_INIT_VAL); + if (! DELETED_ENTRY_P(&tab->entries[i])) + n++; + } + assert(n == tab->num_entries); + if (tab->bins == NULL) + assert(p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS); + else { + assert(p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS); + for (n = d = i = 0; i < get_bins_num(tab); i++) { + assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL); + if (IND_DELETED_BIN_P(tab, i)) { + d++; + continue; + } + else if (IND_EMPTY_BIN_P(tab, i)) + continue; + n++; + e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE; + assert(tab->entries_start <= e && e < tab->entries_bound); + assert(! DELETED_ENTRY_P(&tab->entries[e])); + assert(tab->entries[e].hash != (st_hash_t) ST_INIT_VAL + && tab->entries[e].key != ST_INIT_VAL + && tab->entries[e].record != ST_INIT_VAL); + } + assert(n == tab->num_entries); + assert(n + d < get_bins_num(tab)); + } +} +#endif + #ifdef HASH_LOG #ifdef HAVE_UNISTD_H #include @@ -179,8 +519,13 @@ new_size(st_index_t size) static struct { int all, total, num, str, strcase; } collision; + +/* Flag switching off output of package statistics at the end of + program. */ static int init_st = 0; +/* Output overall number of table searches and collisions into a + temporary file. */ static void stat_col(void) { @@ -189,1098 +534,1086 @@ stat_col(void) if (!collision.total) return; f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w"); fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total, - ((double)collision.all / (collision.total)) * 100); + ((double)collision.all / (collision.total)) * 100); fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase); fclose(f); } #endif -static struct list_head * -st_head(const st_table *tbl) -{ - uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head; - return (struct list_head *)addr; -} - -st_table* -st_init_table_with_size(const struct st_hash_type *type, st_index_t size) -{ - st_table *tbl; +/* Create and return table with TYPE which can hold at least SIZE + entries. The real number of entries which the table can hold is + the nearest power of two for SIZE. */ +st_table * +st_init_table_with_size(const struct st_hash_type *type, st_index_t size) { + st_table *tab; + int n; #ifdef HASH_LOG -# if HASH_LOG+0 < 0 +#if HASH_LOG+0 < 0 { - const char *e = getenv("ST_HASH_LOG"); - if (!e || !*e) init_st = 1; + const char *e = getenv("ST_HASH_LOG"); + if (!e || !*e) init_st = 1; } -# endif +#endif if (init_st == 0) { - init_st = 1; - atexit(stat_col); + init_st = 1; + atexit(stat_col); } #endif - - tbl = st_alloc_table(); - tbl->type = type; - tbl->num_entries = 0; - tbl->entries_packed = size <= MAX_PACKED_HASH; - if (tbl->entries_packed) { - size = ST_DEFAULT_PACKED_TABLE_SIZE; - tbl->real_entries = 0; - } - else { - size = new_size(size); /* round up to power-of-two */ - list_head_init(st_head(tbl)); - } - tbl->num_bins = size; - tbl->bins = st_alloc_bins(size); - - return tbl; + n = get_power2(size); + tab = (st_table *) malloc(sizeof (st_table)); + tab->type = type; + tab->entry_power = n; + tab->bin_power = features[n].bin_power; + tab->size_ind = features[n].size_ind; + tab->inside_rebuild_p = FALSE; + if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) + tab->bins = NULL; + else + tab->bins = (st_index_t *) malloc(bins_size(tab)); + tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab) + * sizeof(st_table_entry)); +#ifdef ST_DEBUG + memset(tab->entries, ST_INIT_VAL_BYTE, + get_allocated_entries(tab) * sizeof(st_table_entry)); + if (tab->bins != NULL) + memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab)); +#endif + make_tab_empty(tab); + tab->rebuilds_num = 0; +#ifdef ST_DEBUG + st_check(tab); +#endif + return tab; } -st_table* -st_init_table(const struct st_hash_type *type) -{ +/* Create and return table with TYPE which can hold a minimal number + of entries (see comments for get_power2). */ +st_table * +st_init_table(const struct st_hash_type *type) { return st_init_table_with_size(type, 0); } -st_table* -st_init_numtable(void) -{ +/* Create and return table which can hold a minimal number of + numbers. */ +st_table * +st_init_numtable(void) { return st_init_table(&type_numhash); } -st_table* -st_init_numtable_with_size(st_index_t size) -{ +/* Create and return table which can hold SIZE numbers. */ +st_table * +st_init_numtable_with_size(st_index_t size) { return st_init_table_with_size(&type_numhash, size); } -st_table* -st_init_strtable(void) -{ +/* Create and return table which can hold a minimal number of + strings. */ +st_table * +st_init_strtable(void) { return st_init_table(&type_strhash); } -st_table* -st_init_strtable_with_size(st_index_t size) -{ +/* Create and return table which can hold SIZE strings. */ +st_table * +st_init_strtable_with_size(st_index_t size) { return st_init_table_with_size(&type_strhash, size); } -st_table* -st_init_strcasetable(void) -{ +/* Create and return table which can hold a minimal number of strings + whose character case is ignored. */ +st_table * +st_init_strcasetable(void) { return st_init_table(&type_strcasehash); } -st_table* -st_init_strcasetable_with_size(st_index_t size) -{ +/* Create and return table which can hold SIZE strings whose character + case is ignored. */ +st_table * +st_init_strcasetable_with_size(st_index_t size) { return st_init_table_with_size(&type_strcasehash, size); } +/* Make table TAB empty. */ void -st_clear(st_table *table) -{ - register st_table_entry *ptr = 0, *next; - - if (table->entries_packed) { - table->num_entries = 0; - table->real_entries = 0; - return; - } - - list_for_each_safe(st_head(table), ptr, next, olist) { - /* list_del is not needed */ - st_free_entry(ptr); - } - table->num_entries = 0; - MEMZERO(table->bins, st_table_entry*, table->num_bins); - list_head_init(st_head(table)); +st_clear(st_table *tab) { + make_tab_empty(tab); + tab->rebuilds_num++; +#ifdef ST_DEBUG + st_check(tab); +#endif } +/* Free table TAB space. */ void -st_free_table(st_table *table) -{ - st_clear(table); - st_free_bins(table->bins, table->num_bins); - st_dealloc_table(table); +st_free_table(st_table *tab) { + if (tab->bins != NULL) + free(tab->bins); + free(tab->entries); + free(tab); } +/* Return byte size of memory allocted for table TAB. */ size_t -st_memsize(const st_table *table) -{ - if (table->entries_packed) { - return table->num_bins * sizeof (void *) + sizeof(st_table); - } - else { - return table->num_entries * sizeof(struct st_table_entry) + table->num_bins * sizeof (void *) + sizeof(st_table); - } +st_memsize(const st_table *tab) { + return(sizeof(st_table) + + (tab->bins == NULL ? 0 : bins_size(tab)) + + get_allocated_entries(tab) * sizeof(st_table_entry)); } -#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ -((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr)))) +static st_index_t +find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key); + +static st_index_t +find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key); + +static st_index_t +find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key); + +static st_index_t +find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, + st_data_t key, st_index_t *bin_ind); #ifdef HASH_LOG static void -count_collision(const struct st_hash_type *type) -{ +count_collision(const struct st_hash_type *type) { collision.all++; if (type == &type_numhash) { - collision.num++; + collision.num++; } else if (type == &type_strhash) { - collision.strcase++; + collision.strcase++; } else if (type == &type_strcasehash) { - collision.str++; + collision.str++; } } -#define COLLISION (collision_check ? count_collision(table->type) : (void)0) -#define FOUND_ENTRY (collision_check ? collision.total++ : (void)0) + +#define COLLISION (collision_check ? count_collision(tab->type) : (void)0) +#define FOUND_BIN (collision_check ? collision.total++ : (void)0) #define collision_check 0 #else #define COLLISION -#define FOUND_ENTRY +#define FOUND_BIN #endif -#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \ - ((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = hash_pos(hash_val, (table)->num_bins)))) +/* If the number of entries in the table is at least REBUILD_THRESHOLD + times less than the entry array length, decrease the table + size. */ +#define REBUILD_THRESHOLD 4 -static st_table_entry * -find_entry(const st_table *table, st_data_t key, st_index_t hash_val, - st_index_t bin_pos) -{ - register st_table_entry *ptr = table->bins[bin_pos]; - FOUND_ENTRY; - if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) { - COLLISION; - while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) { - ptr = ptr->next; - } - ptr = ptr->next; - } - return ptr; -} - -static inline st_index_t -find_packed_index_from(const st_table *table, st_index_t hash_val, - st_data_t key, st_index_t i) -{ - while (i < table->real_entries && - (PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) { - i++; - } - return i; -} - -static inline st_index_t -find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key) -{ - return find_packed_index_from(table, hash_val, key, 0); -} - -int -st_lookup(st_table *table, register st_data_t key, st_data_t *value) -{ - st_index_t hash_val; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - - if (table->entries_packed) { - st_index_t i = find_packed_index(table, hash_val, key); - if (i < table->real_entries) { - if (value != 0) *value = PVAL(table, i); - return 1; - } - return 0; - } - - ptr = find_entry(table, key, hash_val, hash_pos(hash_val, table->num_bins)); +#if REBUILD_THRESHOLD < 2 +#error "REBUILD_THRESHOLD should be >= 2" +#endif - if (ptr == 0) { - return 0; +/* Rebuild table TAB. Rebuilding removes all deleted bins and entries + and can change size of the table entries and bins arrays. + Rebuilding is implemented by creation of a new table or by + compaction of the existing one. */ +static void +rebuild_table(st_table *tab) { + st_index_t i, ni, bound; + unsigned int size_ind; + st_table *new_tab; + st_table_entry *entries, *new_entries; + st_table_entry *curr_entry_ptr; + st_index_t *bins; + st_index_t bin_ind; + + st_assert(tab != NULL); + bound = tab->entries_bound; + entries = tab->entries; + tab->inside_rebuild_p = TRUE; + if ((2 * tab->num_entries <= get_allocated_entries(tab) + && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab)) + || tab->num_entries < (1 << MINIMAL_POWER2)) { + /* Compaction: */ + tab->num_entries = 0; + if (tab->bins != NULL) + initialize_bins(tab); + new_tab = tab; + new_entries = entries; } else { - if (value != 0) *value = ptr->record; - return 1; - } -} - -int -st_get_key(st_table *table, register st_data_t key, st_data_t *result) -{ - st_index_t hash_val; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - - if (table->entries_packed) { - st_index_t i = find_packed_index(table, hash_val, key); - if (i < table->real_entries) { - if (result != 0) *result = PKEY(table, i); - return 1; + new_tab = st_init_table_with_size(tab->type, + 2 * tab->num_entries - 1); + st_assert(new_tab->curr_hash == new_tab->type->hash); + new_tab->curr_hash = tab->curr_hash; + new_entries = new_tab->entries; + } + ni = 0; + bins = new_tab->bins; + size_ind = get_size_ind(new_tab); + for (i = tab->entries_start; i < bound; i++) { + curr_entry_ptr = &entries[i]; + PREFETCH(entries + i + 1, 0); + if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) + continue; + if (&new_entries[ni] != curr_entry_ptr) + new_entries[ni] = *curr_entry_ptr; + if (EXPECT(bins != NULL, 1)) { + bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash, + curr_entry_ptr->key); + st_assert(bin_ind != UNDEFINED_BIN_IND + && (tab == new_tab || new_tab->rebuilds_num == 0) + && IND_EMPTY_BIN_P(new_tab, bin_ind)); + set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE); } - return 0; - } - - ptr = find_entry(table, key, hash_val, hash_pos(hash_val, table->num_bins)); - - if (ptr == 0) { - return 0; - } - else { - if (result != 0) *result = ptr->key; - return 1; - } + new_tab->num_entries++; + ni++; + } + if (new_tab != tab) { + tab->entry_power = new_tab->entry_power; + tab->bin_power = new_tab->bin_power; + tab->size_ind = new_tab->size_ind; + st_assert (tab->num_entries == ni && new_tab->num_entries == ni); + if (tab->bins != NULL) + free(tab->bins); + tab->bins = new_tab->bins; + free(tab->entries); + tab->entries = new_tab->entries; + free(new_tab); + } + tab->entries_start = 0; + tab->entries_bound = tab->num_entries; + tab->rebuilds_num++; + tab->inside_rebuild_p = FALSE; +#ifdef ST_DEBUG + st_check(tab); +#endif } -static inline st_table_entry * -new_entry(st_table * table, st_data_t key, st_data_t value, - st_index_t hash_val, register st_index_t bin_pos) -{ - register st_table_entry *entry = st_alloc_entry(); +/* Return the next secondary hash index for table TAB using previous + index IND and PERTERB. Finally modulo of the function becomes a + full *cycle linear congruential generator*, in other words it + guarantees traversing all table bins in extreme case. - entry->next = table->bins[bin_pos]; - table->bins[bin_pos] = entry; - entry->hash = hash_val; - entry->key = key; - entry->record = value; + According the Hull-Dobell theorem a generator + "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff + o m and c are relatively prime + o a-1 is divisible by all prime factors of m + o a-1 is divisible by 4 if m is divisible by 4. - return entry; + For our case a is 5, c is 1, and m is a power of two. */ +static inline st_index_t +secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb) { + *perterb >>= 11; + ind = (ind << 2) + ind + *perterb + 1; + return hash_bin(ind, tab); } -static inline void -add_direct(st_table *table, st_data_t key, st_data_t value, - st_index_t hash_val, register st_index_t bin_pos) -{ - register st_table_entry *entry; - if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) { - rehash(table); - bin_pos = hash_pos(hash_val, table->num_bins); - } +/* Find an entry with HASH_VALUE and KEY in TABLE using a linear + search. Return the index of the found entry in array `entries`. + If it is not found, return UNDEFINED_ENTRY_IND. */ +static inline st_index_t +find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) { + st_index_t i, bound; + st_table_entry *entries; - entry = new_entry(table, key, value, hash_val, bin_pos); - list_add_tail(st_head(table), &entry->olist); - table->num_entries++; + bound = tab->entries_bound; + entries = tab->entries; + for (i = tab->entries_start; i < bound; i++) { + if (PTR_EQUAL(tab, &entries[i], hash_value, key)) + return i; + } + return UNDEFINED_ENTRY_IND; } -static void -unpack_entries(register st_table *table) -{ - st_index_t i; - st_packed_entry packed_bins[MAX_PACKED_HASH]; - register st_table_entry *entry; - st_table tmp_table = *table; - - MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH); - table->as.packed.entries = packed_bins; - tmp_table.entries_packed = 0; -#if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE - MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins); +/* Use the quadratic probing. The method has a better data locality + but more collisions than the current approach. In average it + results in a bit slower search. */ +/*#define QUADRATIC_PROBE*/ + +/* Return index of entry with HASH_VALUE and KEY in table TAB. If + there is no such entry, return UNDEFINED_ENTRY_IND. */ +static st_index_t +find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) { + st_index_t ind; +#ifdef QUADRATIC_PROBE + st_index_t d; #else - tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins); - tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE; + st_index_t peterb; #endif + st_index_t bin; + st_table_entry *entries = tab->entries; - /* - * order is important here, we need to keep the original table - * walkable during GC (GC may be triggered by new_entry call) - */ - i = 0; - list_head_init(st_head(&tmp_table)); - do { - st_data_t key = packed_bins[i].key; - st_data_t val = packed_bins[i].val; - st_index_t hash = packed_bins[i].hash; - entry = new_entry(&tmp_table, key, val, hash, - hash_pos(hash, ST_DEFAULT_INIT_TABLE_SIZE)); - list_add_tail(st_head(&tmp_table), &entry->olist); - } while (++i < MAX_PACKED_HASH); - *table = tmp_table; - list_head_init(st_head(table)); - list_append_list(st_head(table), st_head(&tmp_table)); -} - -static void -add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val) -{ - if (table->real_entries < MAX_PACKED_HASH) { - st_index_t i = table->real_entries++; - PKEY_SET(table, i, key); - PVAL_SET(table, i, value); - PHASH_SET(table, i, hash_val); - table->num_entries++; - } - else { - unpack_entries(table); - add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins)); + st_assert(tab != NULL && tab->bins != NULL); + ind = hash_bin(hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + peterb = hash_value; +#endif + FOUND_BIN; + for (;;) { + bin = get_bin(tab->bins, get_size_ind(tab), ind); + if (! EMPTY_OR_DELETED_BIN_P(bin) + && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) + break; + else if (EMPTY_BIN_P(bin)) + return UNDEFINED_ENTRY_IND; +#ifdef QUADRATIC_PROBE + ind = hash_bin(ind + d, tab); + d++; +#else + ind = secondary_hash(ind, tab, &peterb); +#endif + COLLISION; } + return bin; } +/* Find and return index of table TAB bin corresponding to an entry + with HASH_VALUE and KEY. If there is no such bin, return + UNDEFINED_BIN_IND. */ +static st_index_t +find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) { + st_index_t ind; +#ifdef QUADRATIC_PROBE + st_index_t d; +#else + st_index_t peterb; +#endif + st_index_t bin; + st_table_entry *entries = tab->entries; -int -st_insert(register st_table *table, register st_data_t key, st_data_t value) -{ - st_index_t hash_val; - register st_index_t bin_pos; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - - if (table->entries_packed) { - st_index_t i = find_packed_index(table, hash_val, key); - if (i < table->real_entries) { - PVAL_SET(table, i, value); - return 1; - } - add_packed_direct(table, key, value, hash_val); - return 0; - } - - FIND_ENTRY(table, ptr, hash_val, bin_pos); - - if (ptr == 0) { - add_direct(table, key, value, hash_val, bin_pos); - return 0; - } - else { - ptr->record = value; - return 1; + st_assert(tab != NULL && tab->bins != NULL); + ind = hash_bin(hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + peterb = hash_value; +#endif + FOUND_BIN; + for (;;) { + bin = get_bin(tab->bins, get_size_ind(tab), ind); + if (! EMPTY_OR_DELETED_BIN_P(bin) + && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)) + break; + else if (EMPTY_BIN_P(bin)) + return UNDEFINED_BIN_IND; +#ifdef QUADRATIC_PROBE + ind = hash_bin(ind + d, tab); + d++; +#else + ind = secondary_hash(ind, tab, &peterb); +#endif + COLLISION; } + return ind; } -int -st_insert2(register st_table *table, register st_data_t key, st_data_t value, - st_data_t (*func)(st_data_t)) -{ - st_index_t hash_val; - register st_index_t bin_pos; - register st_table_entry *ptr; - - hash_val = do_hash(key, table); - - if (table->entries_packed) { - st_index_t i = find_packed_index(table, hash_val, key); - if (i < table->real_entries) { - PVAL_SET(table, i, value); - return 1; - } - key = (*func)(key); - add_packed_direct(table, key, value, hash_val); - return 0; - } - - FIND_ENTRY(table, ptr, hash_val, bin_pos); +/* Find and return index of table TAB bin corresponding to an entry + with HASH_VALUE and KEY. The entry should be in the table + already. */ +static st_index_t +find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) { + st_index_t ind; +#ifdef QUADRATIC_PROBE + st_index_t d; +#else + st_index_t peterb; +#endif + st_index_t bin; + st_table_entry *entries = tab->entries; - if (ptr == 0) { - key = (*func)(key); - add_direct(table, key, value, hash_val, bin_pos); - return 0; - } - else { - ptr->record = value; - return 1; + st_assert(tab != NULL && tab->bins != NULL); + ind = hash_bin(hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + peterb = hash_value; +#endif + FOUND_BIN; + for (;;) { + bin = get_bin(tab->bins, get_size_ind(tab), ind); + if (EMPTY_OR_DELETED_BIN_P(bin)) + return ind; + st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key)); +#ifdef QUADRATIC_PROBE + ind = hash_bin(ind + d, tab); + d++; +#else + ind = secondary_hash(ind, tab, &peterb); +#endif + COLLISION; } } -void -st_add_direct(st_table *table, st_data_t key, st_data_t value) +/* Recalculate hashes of entries in table TAB. */ +static void +reset_entry_hashes (st_table *tab) { - st_index_t hash_val; + st_index_t i, bound; + st_table_entry *entries, *curr_entry_ptr; - hash_val = do_hash(key, table); - if (table->entries_packed) { - add_packed_direct(table, key, value, hash_val); - return; + bound = tab->entries_bound; + entries = tab->entries; + for (i = tab->entries_start; i < bound; i++) { + curr_entry_ptr = &entries[i]; + if (! DELETED_ENTRY_P(curr_entry_ptr)) + curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab); } - - add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins)); } -static void -rehash(register st_table *table) -{ - register st_table_entry *ptr = 0, **new_bins; - st_index_t new_num_bins, hash_val; - - new_num_bins = new_size(table->num_bins+1); - new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins); - table->num_bins = new_num_bins; - table->bins = new_bins; - - list_for_each(st_head(table), ptr, olist) { - hash_val = hash_pos(ptr->hash, new_num_bins); - ptr->next = new_bins[hash_val]; - new_bins[hash_val] = ptr; +/* If we have the following number of collisions with different keys + but with the same hash during finding a bin for new entry + inclusions, possibly a denial attack is going on. Start to use a + stronger hash. */ +#define HIT_THRESHOULD_FOR_STRONG_HASH 10 + +/* Return index of table TAB bin for HASH_VALUE and KEY through + BIN_IND and the pointed value as the function result. Reserve the + bin for inclusion of the corresponding entry into the table if it + is not there yet. We always find such bin as bins array length is + bigger entries array. Although we can reuse a deleted bin, the + result bin value is always empty if the table has no entry with + KEY. Return the entries array index of the found entry or + UNDEFINED_ENTRY_IND if it is not found. */ +static st_index_t +find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, + st_data_t key, st_index_t *bin_ind) { + st_index_t ind; + st_hash_t curr_hash_value = *hash_value; +#ifdef QUADRATIC_PROBE + st_index_t d; +#else + st_index_t peterb; +#endif + st_index_t entry_index; + st_index_t first_deleted_bin_ind; + st_table_entry *entries; + int hit; + + st_assert(tab != NULL && tab->bins != NULL + && tab->entries_bound <= get_allocated_entries(tab) + && tab->entries_start <= tab->entries_bound); + repeat: + ind = hash_bin(curr_hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + peterb = curr_hash_value; +#endif + FOUND_BIN; + first_deleted_bin_ind = UNDEFINED_BIN_IND; + entries = tab->entries; + hit = 0; + for (;;) { + entry_index = get_bin(tab->bins, get_size_ind(tab), ind); + if (EMPTY_BIN_P(entry_index)) { + tab->num_entries++; + entry_index = UNDEFINED_ENTRY_IND; + if (first_deleted_bin_ind != UNDEFINED_BIN_IND) { + /* We can reuse bin of a deleted entry. */ + ind = first_deleted_bin_ind; + MARK_BIN_EMPTY(tab, ind); + } + break; + } else if (! DELETED_BIN_P(entry_index)) { + if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key)) + break; + if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) { + hit++; + if (hit > HIT_THRESHOULD_FOR_STRONG_HASH + && tab->curr_hash != tab->type->strong_hash + && tab->type->strong_hash != NULL + && ! tab->inside_rebuild_p) { + tab->curr_hash = tab->type->strong_hash; + *hash_value = curr_hash_value = do_hash(key, tab); + reset_entry_hashes(tab); + rebuild_table(tab); + goto repeat; + } + } + } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) + first_deleted_bin_ind = ind; +#ifdef QUADRATIC_PROBE + ind = hash_bin(ind + d, tab); + d++; +#else + ind = secondary_hash(ind, tab, &peterb); +#endif + COLLISION; } + *bin_ind = ind; + return entry_index; } -st_table* -st_copy(st_table *old_table) -{ - st_table *new_table; - st_table_entry *ptr = 0, *entry; - st_index_t num_bins = old_table->num_bins; - - new_table = st_alloc_table(); - if (new_table == 0) { - return 0; - } - - *new_table = *old_table; - new_table->bins = st_alloc_bins(num_bins); - - if (new_table->bins == 0) { - st_dealloc_table(new_table); - return 0; - } +/* Find an entry with KEY in table TAB. Return non-zero if we found + it. Set up *RECORD to the found entry record. */ +int +st_lookup(st_table *tab, st_data_t key, st_data_t *value) { + st_index_t bin; + st_hash_t hash = do_hash(key, tab); - if (old_table->entries_packed) { - MEMCPY(new_table->bins, old_table->bins, st_table_entry*, old_table->num_bins); - return new_table; + if (tab->bins == NULL) { + bin = find_entry(tab, hash, key); + if (bin == UNDEFINED_ENTRY_IND) + return 0; + } else { + bin = find_table_entry_ind(tab, hash, key); + if (bin == UNDEFINED_ENTRY_IND) + return 0; + bin -= ENTRY_BASE; } + if (value != 0) + *value = tab->entries[bin].record; + return 1; +} - list_head_init(st_head(new_table)); +/* Find an entry with KEY in table TAB. Return non-zero if we found + it. Set up *RESULT to the found table entry key. */ +int +st_get_key(st_table *tab, st_data_t key, st_data_t *result) { + st_index_t bin; + st_hash_t hash = do_hash(key, tab); - list_for_each(st_head(old_table), ptr, olist) { - entry = new_entry(new_table, ptr->key, ptr->record, ptr->hash, - hash_pos(ptr->hash, num_bins)); - list_add_tail(st_head(new_table), &entry->olist); + if (tab->bins == NULL) { + bin = find_entry(tab, hash, key); + if (bin == UNDEFINED_ENTRY_IND) + return 0; + } else { + bin = find_table_entry_ind(tab, hash, key); + if (bin == UNDEFINED_ENTRY_IND) + return 0; + bin -= ENTRY_BASE; } - - return new_table; + if (result != 0) + *result = tab->entries[bin].key; + return 1; } +/* Check the table and rebuild it if it is necessary. */ static inline void -remove_entry(st_table *table, st_table_entry *ptr) -{ - list_del(&ptr->olist); - table->num_entries--; +rebuild_table_if_necessary (st_table *tab) { + st_index_t bound = tab->entries_bound; + + if (bound == get_allocated_entries(tab)) + rebuild_table(tab); + st_assert(tab->entries_bound < get_allocated_entries(tab)); } +/* Insert (KEY, VALUE) into table TAB and return zero. If there is + already entry with KEY in the table, return nonzero and and update + the value of the found entry. */ int -st_delete(register st_table *table, register st_data_t *key, st_data_t *value) -{ - st_index_t hash_val; - st_table_entry **prev; - register st_table_entry *ptr; - - hash_val = do_hash(*key, table); - - if (table->entries_packed) { - st_index_t i = find_packed_index(table, hash_val, *key); - if (i < table->real_entries) { - if (value != 0) *value = PVAL(table, i); - *key = PKEY(table, i); - remove_packed_entry(table, i); - return 1; - } - if (value != 0) *value = 0; +st_insert(st_table *tab, st_data_t key, st_data_t value) { + st_table_entry *entry; + st_index_t bin; + st_index_t ind; + st_hash_t hash_value; + st_index_t bin_ind; + int new_p; + + rebuild_table_if_necessary(tab); + hash_value = do_hash(key, tab); + if (tab->bins == NULL) { + bin = find_entry(tab, hash_value, key); + new_p = bin == UNDEFINED_ENTRY_IND; + if (new_p) + tab->num_entries++; + bin_ind = UNDEFINED_BIN_IND; + } else { + bin = find_table_bin_ptr_and_reserve(tab, &hash_value, + key, &bin_ind); + new_p = bin == UNDEFINED_ENTRY_IND; + bin -= ENTRY_BASE; + } + if (new_p) { + st_assert(tab->entries_bound < get_allocated_entries(tab)); + ind = tab->entries_bound++; + entry = &tab->entries[ind]; + entry->hash = hash_value; + entry->key = key; + entry->record = value; + if (bin_ind != UNDEFINED_BIN_IND) + set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); +#ifdef ST_DEBUG + st_check(tab); +#endif return 0; } - - prev = &table->bins[hash_pos(hash_val, table->num_bins)]; - for (;(ptr = *prev) != 0; prev = &ptr->next) { - if (EQUAL(table, *key, ptr)) { - *prev = ptr->next; - remove_entry(table, ptr); - if (value != 0) *value = ptr->record; - *key = ptr->key; - st_free_entry(ptr); - return 1; - } - } - - if (value != 0) *value = 0; - return 0; + tab->entries[bin].record = value; +#ifdef ST_DEBUG + st_check(tab); +#endif + return 1; } -int -st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *value, st_data_t never) -{ - st_index_t hash_val; - register st_table_entry *ptr; - - hash_val = do_hash(*key, table); - - if (table->entries_packed) { - st_index_t i = find_packed_index(table, hash_val, *key); - if (i < table->real_entries) { - if (value != 0) *value = PVAL(table, i); - *key = PKEY(table, i); - remove_safe_packed_entry(table, i, never); - return 1; - } - if (value != 0) *value = 0; - return 0; - } - - ptr = table->bins[hash_pos(hash_val, table->num_bins)]; +/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have + entry with KEY before the insertion. */ +static inline void +st_add_direct_with_hash(st_table *tab, + st_data_t key, st_data_t value, st_hash_t hash) { + st_table_entry *entry; + st_index_t ind; + st_index_t bin_ind; + + rebuild_table_if_necessary(tab); + ind = tab->entries_bound++; + entry = &tab->entries[ind]; + entry->hash = hash; + entry->key = key; + entry->record = value; + tab->num_entries++; + if (tab->bins != NULL) { + bin_ind = find_table_bin_ind_direct(tab, hash, key); + st_assert (bin_ind != UNDEFINED_BIN_IND); + set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); + } +#ifdef ST_DEBUG + st_check(tab); +#endif +} - for (; ptr != 0; ptr = ptr->next) { - if ((ptr->key != never) && EQUAL(table, *key, ptr)) { - remove_entry(table, ptr); - *key = ptr->key; - if (value != 0) *value = ptr->record; - ptr->key = ptr->record = never; - return 1; - } - } +/* Insert (KEY, VALUE) into table TAB. The table should not have + entry with KEY before the insertion. */ +void +st_add_direct(st_table *tab, st_data_t key, st_data_t value) { + st_hash_t hash_value; - if (value != 0) *value = 0; - return 0; + hash_value = do_hash(key, tab); + st_add_direct_with_hash(tab, key, value, hash_value); } +/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If + there is already entry with KEY in the table, return nonzero and + and update the value of the found entry. */ int -st_shift(register st_table *table, register st_data_t *key, st_data_t *value) -{ - st_table_entry *old; - st_table_entry **prev; - register st_table_entry *ptr; - - if (table->num_entries == 0) { - if (value != 0) *value = 0; +st_insert2(st_table *tab, st_data_t key, st_data_t value, + st_data_t (*func)(st_data_t)) { + st_table_entry *entry; + st_index_t bin; + st_index_t ind, check; + st_hash_t hash_value; + st_index_t bin_ind; + int new_p; + + rebuild_table_if_necessary (tab); + hash_value = do_hash(key, tab); + if (tab->bins == NULL) { + bin = find_entry(tab, hash_value, key); + new_p = bin == UNDEFINED_ENTRY_IND; + bin_ind = UNDEFINED_BIN_IND; + } else { + bin = find_table_bin_ptr_and_reserve(tab, &hash_value, + key, &bin_ind); + new_p = bin == UNDEFINED_ENTRY_IND; + bin -= ENTRY_BASE; + } + if (new_p) { + st_assert(tab->entries_bound < get_allocated_entries(tab)); + check = tab->rebuilds_num; + key = (*func)(key); + st_assert(check == tab->rebuilds_num + && do_hash(key, tab) == hash_value); + ind = tab->entries_bound++; + entry = &tab->entries[ind]; + entry->hash = hash_value; + entry->key = key; + entry->record = value; + if (bin_ind != UNDEFINED_BIN_IND) + set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); +#ifdef ST_DEBUG + st_check(tab); +#endif return 0; } + tab->entries[bin].record = value; +#ifdef ST_DEBUG + st_check(tab); +#endif + return 1; +} - if (table->entries_packed) { - if (value != 0) *value = PVAL(table, 0); - *key = PKEY(table, 0); - remove_packed_entry(table, 0); - return 1; - } +/* Create and return a copy of table OLD_TAB. */ +st_table * +st_copy(st_table *old_tab) { + st_table *new_tab; + + new_tab = (st_table *) malloc(sizeof(st_table)); + *new_tab = *old_tab; + if (old_tab->bins == NULL) + new_tab->bins = NULL; + else + new_tab->bins = (st_index_t *) malloc(bins_size(old_tab)); + new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab) + * sizeof(st_table_entry)); + MEMCPY(new_tab->entries, old_tab->entries, st_table_entry, + get_allocated_entries(old_tab)); + if (old_tab->bins != NULL) + MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab)); +#ifdef ST_DEBUG + st_check(new_tab); +#endif + return new_tab; +} - old = list_pop(st_head(table), st_table_entry, olist); - table->num_entries--; - prev = &table->bins[hash_pos(old->hash, table->num_bins)]; - while ((ptr = *prev) != old) prev = &ptr->next; - *prev = ptr->next; - if (value != 0) *value = ptr->record; - *key = ptr->key; - st_free_entry(ptr); - return 1; +/* Update the entries start of table TAB after removing an entry + with index N in the array entries. */ +static inline void +update_range_for_deleted(st_table *tab, st_index_t n) { + /* Do not update entries_bound here. Otherwise, we can fill all + bins by deleted entry value before rebuilding the table. */ + if (tab->entries_start == n) + tab->entries_start = n + 1; } -void -st_cleanup_safe(st_table *table, st_data_t never) +/* Delete entry with KEY from table TAB, set up *VALUE (unless + VALUE is zero) from deleted table entry, and return non-zero. If + there is no entry with KEY in the table, clear *VALUE (unless VALUE + is zero), and return zero. */ +static int +st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) { - st_table_entry *ptr, **last, *tmp; - st_index_t i; - - if (table->entries_packed) { - st_index_t i = 0, j = 0; - while (PKEY(table, i) != never) { - if (i++ == table->real_entries) return; - } - for (j = i; ++i < table->real_entries;) { - if (PKEY(table, i) == never) continue; - PACKED_ENT(table, j) = PACKED_ENT(table, i); - j++; + st_table_entry *entry; + st_index_t bin; + st_index_t bin_ind; + st_hash_t hash; + + st_assert(tab != NULL); + hash = do_hash(*key, tab); + if (tab->bins == NULL) { + bin = find_entry(tab, hash, *key); + if (bin == UNDEFINED_ENTRY_IND) { + if (value != 0) *value = 0; + return 0; } - table->real_entries = j; - /* table->num_entries really should be equal j at this moment, but let set it anyway */ - table->num_entries = j; - return; - } - - for (i = 0; i < table->num_bins; i++) { - ptr = *(last = &table->bins[i]); - while (ptr != 0) { - if (ptr->key == never) { - tmp = ptr; - *last = ptr = ptr->next; - st_free_entry(tmp); - } - else { - ptr = *(last = &ptr->next); - } + } else { + bin_ind = find_table_bin_ind(tab, hash, *key); + if (bin_ind == UNDEFINED_BIN_IND) { + if (value != 0) *value = 0; + return 0; } - } + bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; + MARK_BIN_DELETED(tab, bin_ind); + } + entry = &tab->entries[bin]; + *key = entry->key; + if (value != 0) *value = entry->record; + MARK_ENTRY_DELETED(entry); + tab->num_entries--; + update_range_for_deleted(tab, bin); +#ifdef ST_DEBUG + st_check(tab); +#endif + return 1; } int -st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg) -{ - st_index_t hash_val, bin_pos; - register st_table_entry *ptr, **last, *tmp; - st_data_t value = 0, old_key; - int retval, existing = 0; +st_delete(st_table *tab, st_data_t *key, st_data_t *value) { + return st_general_delete(tab, key, value); +} - hash_val = do_hash(key, table); +/* The function and other functions with suffix '_safe' or '_check' + are originated from the previous implementation of the hash tables. + It was necessary for correct deleting entries during traversing + tables. The current implementation permits deletion during + traversing without a specific way to do this. */ +int +st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value, + st_data_t never ATTRIBUTE_UNUSED) { + return st_general_delete(tab, key, value); +} - if (table->entries_packed) { - st_index_t i = find_packed_index(table, hash_val, key); - if (i < table->real_entries) { - key = PKEY(table, i); - value = PVAL(table, i); - existing = 1; - } - { - old_key = key; - retval = (*func)(&key, &value, arg, existing); - if (!table->entries_packed) { - FIND_ENTRY(table, ptr, hash_val, bin_pos); - goto unpacked; - } - switch (retval) { - case ST_CONTINUE: - if (!existing) { - add_packed_direct(table, key, value, hash_val); - break; - } - if (old_key != key) { - PKEY(table, i) = key; - } - PVAL_SET(table, i, value); - break; - case ST_DELETE: - if (!existing) break; - remove_packed_entry(table, i); +/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and + return zero. Otherwise, remove the first entry in the table. + Return its key through KEY and its record through VALUE (unless + VALUE is zero). */ +int +st_shift(st_table *tab, st_data_t *key, st_data_t *value) { + st_index_t i, bound; + st_index_t bin; + st_table_entry *entries, *curr_entry_ptr; + st_index_t bin_ind; + + entries = tab->entries; + bound = tab->entries_bound; + for (i = tab->entries_start; i < bound; i++) { + curr_entry_ptr = &entries[i]; + if (! DELETED_ENTRY_P(curr_entry_ptr)) { + if (value != 0) *value = curr_entry_ptr->record; + *key = curr_entry_ptr->key; + if (tab->bins == NULL) { + bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key); + st_assert(bin != UNDEFINED_ENTRY_IND + && &entries[bin] == curr_entry_ptr); + } else { + bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash, + curr_entry_ptr->key); + st_assert(bin_ind != UNDEFINED_BIN_IND + && &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind) + - ENTRY_BASE] == curr_entry_ptr); + MARK_BIN_DELETED(tab, bin_ind); } + MARK_ENTRY_DELETED(curr_entry_ptr); + tab->num_entries--; + update_range_for_deleted(tab, i); +#ifdef ST_DEBUG + st_check(tab); +#endif + return 1; } - return existing; } + st_assert(tab->num_entries == 0); + tab->entries_start = tab->entries_bound = 0; + if (value != 0) *value = 0; + return 0; +} - FIND_ENTRY(table, ptr, hash_val, bin_pos); - - if (ptr != 0) { - key = ptr->key; - value = ptr->record; - existing = 1; - } - { - old_key = key; - retval = (*func)(&key, &value, arg, existing); - unpacked: - switch (retval) { - case ST_CONTINUE: - if (!existing) { - add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins)); - break; - } - if (old_key != key) { - ptr->key = key; - } - ptr->record = value; - break; - case ST_DELETE: - if (!existing) break; - last = &table->bins[bin_pos]; - for (; (tmp = *last) != 0; last = &tmp->next) { - if (ptr == tmp) { - *last = ptr->next; - remove_entry(table, ptr); - st_free_entry(ptr); - break; - } - } - break; - } - return existing; - } +/* See comments for function st_delete_safe. */ +void +st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED, + st_data_t never ATTRIBUTE_UNUSED) { } +/* Find entry with KEY in table TAB, call FUNC with the key and the + value of the found entry, and non-zero as the 3rd argument. If the + entry is not found, call FUNC with KEY, and 2 zero arguments. If + the call returns ST_CONTINUE, the table will have an entry with key + and value returned by FUNC through the 1st and 2nd parameters. If + the call of FUNC returns ST_DELETE, the table will not have entry + with KEY. The function returns flag of that the entry with KEY was + in the table before the call. */ int -st_foreach_check(st_table *table, int (*func)(ANYARGS), st_data_t arg, st_data_t never) -{ - st_table_entry *ptr = 0, **last, *tmp, *next; - struct list_head *head; - enum st_retval retval; - st_index_t i; - - if (table->entries_packed) { - for (i = 0; i < table->real_entries; i++) { - st_data_t key, val; - st_index_t hash; - key = PKEY(table, i); - val = PVAL(table, i); - hash = PHASH(table, i); - if (key == never) continue; - retval = (*func)(key, val, arg, 0); - if (!table->entries_packed) { - FIND_ENTRY(table, ptr, hash, i); - if (retval == ST_CHECK) { - if (!ptr) goto deleted; - } - if (table->num_entries == 0) return 0; - head = st_head(table); - next = list_entry(ptr->olist.next, st_table_entry, olist); - goto unpacked; - } - switch (retval) { - case ST_CHECK: /* check if hash is modified during iteration */ - if (PHASH(table, i) == 0 && PKEY(table, i) == never) { - break; - } - i = find_packed_index_from(table, hash, key, i); - if (i >= table->real_entries) { - i = find_packed_index(table, hash, key); - if (i >= table->real_entries) goto deleted; - } - /* fall through */ - case ST_CONTINUE: - break; - case ST_STOP: - return 0; - case ST_DELETE: - remove_safe_packed_entry(table, i, never); - break; - } +st_update(st_table *tab, st_data_t key, + st_update_callback_func *func, st_data_t arg) { + st_table_entry *entry = NULL; /* to avoid uninitialized value warning */ + st_index_t bin = 0; /* Ditto */ + st_table_entry *entries; + st_index_t bin_ind; + st_data_t value = 0, old_key; + st_index_t check; + int retval, existing; + st_hash_t hash = do_hash(key, tab); + + entries = tab->entries; + if (tab->bins == NULL) { + bin = find_entry(tab, hash, key); + existing = bin != UNDEFINED_ENTRY_IND; + entry = &entries[bin]; + bin_ind = UNDEFINED_BIN_IND; + } else { + bin_ind = find_table_bin_ind(tab, hash, key); + existing = bin_ind != UNDEFINED_BIN_IND; + if (existing) { + bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; + entry = &entries[bin]; } - return 0; } - - head = st_head(table); - list_for_each_safe(head, ptr, next, olist) { - if (ptr->key != never) { - i = hash_pos(ptr->hash, table->num_bins); - retval = (*func)(ptr->key, ptr->record, arg, 0); - unpacked: - switch (retval) { - case ST_CHECK: /* check if hash is modified during iteration */ - for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) { - if (!tmp) { - deleted: - /* call func with error notice */ - retval = (*func)(0, 0, arg, 1); - return 1; - } - } - /* fall through */ - case ST_CONTINUE: - break; - case ST_STOP: - return 0; - case ST_DELETE: - last = &table->bins[hash_pos(ptr->hash, table->num_bins)]; - for (; (tmp = *last) != 0; last = &tmp->next) { - if (ptr == tmp) { - remove_entry(table, ptr); - ptr->key = ptr->record = never; - ptr->hash = 0; - break; - } - } - if (table->num_entries == 0) return 0; - } - } + if (existing) { + key = entry->key; + value = entry->record; + } + old_key = key; + check = tab->rebuilds_num; + retval = (*func)(&key, &value, arg, existing); + st_assert(check == tab->rebuilds_num); + switch (retval) { + case ST_CONTINUE: + if (! existing) { + st_add_direct_with_hash(tab, key, value, hash); + break; + } + if (old_key != key) { + entry->key = key; + } + entry->record = value; + break; + case ST_DELETE: + if (existing) { + if (bin_ind != UNDEFINED_BIN_IND) + MARK_BIN_DELETED(tab, bin_ind); + MARK_ENTRY_DELETED(entry); + tab->num_entries--; + update_range_for_deleted(tab, bin); +#ifdef ST_DEBUG + st_check(tab); +#endif + } + break; } - return 0; +#ifdef ST_DEBUG + st_check(tab); +#endif + return existing; } -int -st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg) -{ - st_table_entry *ptr = 0, **last, *tmp, *next; +/* Traverse all entries in table TAB calling FUNC with current entry + key and value and zero. If the call returns ST_STOP, stop + traversing. If the call returns ST_DELETE, delete the current + entry from the table. In case of ST_CHECK or ST_CONTINUE, continue + traversing. The function returns zero unless an error is found. + CHECK_P is flag of st_foreach_check call. The behavior is a bit + different for ST_CHECK and when the current element is removed + during traversing. */ +static inline int +st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, + int check_p) { + st_index_t bin; + st_index_t bin_ind; + st_table_entry *entries, *curr_entry_ptr; enum st_retval retval; - struct list_head *head; - st_index_t i; - - if (table->entries_packed) { - for (i = 0; i < table->real_entries; i++) { - st_data_t key, val; - st_index_t hash; - key = PKEY(table, i); - val = PVAL(table, i); - hash = PHASH(table, i); - retval = (*func)(key, val, arg, 0); - if (!table->entries_packed) { - FIND_ENTRY(table, ptr, hash, i); - if (!ptr) return 0; - head = st_head(table); - next = list_entry(ptr->olist.next, st_table_entry, olist); - goto unpacked; + st_index_t i, rebuilds_num; + st_hash_t hash; + st_data_t key; + int error_p, packed_p = tab->bins == NULL; + + st_assert(tab->entries_start <= tab->entries_bound); + entries = tab->entries; + /* The bound can change inside the loop even without rebuilding + the table, e.g. by an entry inesrtion. */ + for (i = tab->entries_start; i < tab->entries_bound; i++) { + curr_entry_ptr = &entries[i]; + if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) + continue; + key = curr_entry_ptr->key; + rebuilds_num = tab->rebuilds_num; + hash = curr_entry_ptr->hash; + retval = (*func)(key, curr_entry_ptr->record, arg, 0); + if (rebuilds_num != tab->rebuilds_num) { + entries = tab->entries; + packed_p = tab->bins == NULL; + if (packed_p) { + i = find_entry(tab, hash, key); + error_p = i == UNDEFINED_ENTRY_IND; + } else { + i = find_table_entry_ind(tab, hash, key); + error_p = i == UNDEFINED_ENTRY_IND; + i -= ENTRY_BASE; } - switch (retval) { - case ST_CONTINUE: - break; - case ST_CHECK: - case ST_STOP: - return 0; - case ST_DELETE: - remove_packed_entry(table, i); - i--; - break; + if (error_p && check_p) { + /* call func with error notice */ + retval = (*func)(0, 0, arg, 1); +#ifdef ST_DEBUG + st_check(tab); +#endif + return 1; } + curr_entry_ptr = &entries[i]; } - return 0; - } - - head = st_head(table); - list_for_each_safe(head, ptr, next, olist) { - i = hash_pos(ptr->hash, table->num_bins); - retval = (*func)(ptr->key, ptr->record, arg, 0); - unpacked: switch (retval) { - case ST_CONTINUE: + case ST_CONTINUE: break; - case ST_CHECK: - case ST_STOP: + case ST_CHECK: + if (check_p) + break; + case ST_STOP: +#ifdef ST_DEBUG + st_check(tab); +#endif return 0; - case ST_DELETE: - last = &table->bins[hash_pos(ptr->hash, table->num_bins)]; - for (; (tmp = *last) != 0; last = &tmp->next) { - if (ptr == tmp) { - *last = ptr->next; - remove_entry(table, ptr); - st_free_entry(ptr); + case ST_DELETE: + if (packed_p) { + bin = find_entry(tab, hash, curr_entry_ptr->key); + if (bin == UNDEFINED_ENTRY_IND) break; - } + } else { + bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key); + if (bin_ind == UNDEFINED_BIN_IND) + break; + bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; + MARK_BIN_DELETED(tab, bin_ind); } - if (table->num_entries == 0) return 0; + st_assert(&entries[bin] == curr_entry_ptr); + MARK_ENTRY_DELETED(curr_entry_ptr); + tab->num_entries--; + update_range_for_deleted(tab, bin); +#ifdef ST_DEBUG + st_check(tab); +#endif + break; } } +#ifdef ST_DEBUG + st_check(tab); +#endif return 0; } -static st_index_t -get_keys(const st_table *table, st_data_t *keys, st_index_t size, - int check, st_data_t never) -{ - st_data_t key; - st_data_t *keys_start = keys; - - if (table->entries_packed) { - st_index_t i; +int +st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg) { + return st_general_foreach(tab, func, arg, FALSE); +} - if (size > table->real_entries) size = table->real_entries; - for (i = 0; i < size; i++) { - key = PKEY(table, i); - if (check && key == never) continue; - *keys++ = key; - } - } - else { - st_table_entry *ptr = 0; - st_data_t *keys_end = keys + size; +/* See comments for function st_delete_safe. */ +int +st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg, + st_data_t never ATTRIBUTE_UNUSED) { + return st_general_foreach(tab, func, arg, TRUE); +} - list_for_each(st_head(table), ptr, olist) { - if (keys >= keys_end) break; - key = ptr->key; - if (check && key == never) continue; +/* Set up array KEYS by at most SIZE keys of head table TAB entries. + Return the number of keys set up in array KEYS. */ +static inline st_index_t +st_general_keys(st_table *tab, st_data_t *keys, st_index_t size) { + st_index_t i, bound; + st_data_t key, *keys_start, *keys_end; + st_table_entry *curr_entry_ptr, *entries = tab->entries; + + bound = tab->entries_bound; + keys_start = keys; + keys_end = keys + size; + for (i = tab->entries_start; i < bound; i++) { + if (keys == keys_end) + break; + curr_entry_ptr = &entries[i]; + key = curr_entry_ptr->key; + if (! DELETED_ENTRY_P(curr_entry_ptr)) *keys++ = key; - } } return keys - keys_start; } st_index_t -st_keys(st_table *table, st_data_t *keys, st_index_t size) -{ - return get_keys(table, keys, size, 0, 0); +st_keys(st_table *tab, st_data_t *keys, st_index_t size) { + return st_general_keys(tab, keys, size); } +/* See comments for function st_delete_safe. */ st_index_t -st_keys_check(st_table *table, st_data_t *keys, st_index_t size, st_data_t never) -{ - return get_keys(table, keys, size, 1, never); +st_keys_check(st_table *tab, st_data_t *keys, st_index_t size, + st_data_t never ATTRIBUTE_UNUSED) { + return st_general_keys(tab, keys, size); } -static st_index_t -get_values(const st_table *table, st_data_t *values, st_index_t size, - int check, st_data_t never) -{ - st_data_t key; - st_data_t *values_start = values; - - if (table->entries_packed) { - st_index_t i; - - if (size > table->real_entries) size = table->real_entries; - for (i = 0; i < size; i++) { - key = PKEY(table, i); - if (check && key == never) continue; - *values++ = PVAL(table, i); - } - } - else { - st_table_entry *ptr = 0; - st_data_t *values_end = values + size; - - list_for_each(st_head(table), ptr, olist) { - if (values >= values_end) break; - key = ptr->key; - if (check && key == never) continue; - *values++ = ptr->record; - } +/* Set up array VALUES by at most SIZE values of head table TAB + entries. Return the number of values set up in array VALUES. */ +static inline st_index_t +st_general_values(st_table *tab, st_data_t *values, st_index_t size) { + st_index_t i, bound; + st_data_t *values_start, *values_end; + st_table_entry *curr_entry_ptr, *entries = tab->entries; + + values_start = values; + values_end = values + size; + bound = tab->entries_bound; + st_assert(bound != 0); + for (i = tab->entries_start; i < bound; i++) { + if (values == values_end) + break; + curr_entry_ptr = &entries[i]; + if (! DELETED_ENTRY_P(curr_entry_ptr)) + *values++ = curr_entry_ptr->record; } return values - values_start; } st_index_t -st_values(st_table *table, st_data_t *values, st_index_t size) -{ - return get_values(table, values, size, 0, 0); +st_values(st_table *tab, st_data_t *values, st_index_t size) { + return st_general_values(tab, values, size); } +/* See comments for function st_delete_safe. */ st_index_t -st_values_check(st_table *table, st_data_t *values, st_index_t size, st_data_t never) -{ - return get_values(table, values, size, 1, never); +st_values_check(st_table *tab, st_data_t *values, st_index_t size, + st_data_t never ATTRIBUTE_UNUSED) { + return st_general_values(tab, values, size); } - -#if 0 /* unused right now */ -int -st_reverse_foreach_check(st_table *table, int (*func)(ANYARGS), st_data_t arg, st_data_t never) -{ - st_table_entry *ptr, **last, *tmp, *next; - struct list_head *head; - enum st_retval retval; - st_index_t i; - - if (table->entries_packed) { - for (i = table->real_entries; 0 < i;) { - st_data_t key, val; - st_index_t hash; - --i; - key = PKEY(table, i); - val = PVAL(table, i); - hash = PHASH(table, i); - if (key == never) continue; - retval = (*func)(key, val, arg, 0); - if (!table->entries_packed) { - FIND_ENTRY(table, ptr, hash, i); - if (retval == ST_CHECK) { - if (!ptr) goto deleted; - } - if (table->num_entries == 0) return 0; - head = st_head(table); - next = list_entry(ptr->olist.next, st_table_entry, olist); - goto unpacked; - } - switch (retval) { - case ST_CHECK: /* check if hash is modified during iteration */ - if (PHASH(table, i) == 0 && PKEY(table, i) == never) { - break; - } - i = find_packed_index_from(table, hash, key, i); - if (i >= table->real_entries) { - i = find_packed_index(table, hash, key); - if (i >= table->real_entries) goto deleted; - } - /* fall through */ - case ST_CONTINUE: - break; - case ST_STOP: - return 0; - case ST_DELETE: - remove_safe_packed_entry(table, i, never); - break; - } - } - return 0; - } - - head = st_head(table); - list_for_each_rev_safe(head, ptr, next, olist) { - if (ptr->key != never) { - i = hash_pos(ptr->hash, table->num_bins); - retval = (*func)(ptr->key, ptr->record, arg, 0); - unpacked: - switch (retval) { - case ST_CHECK: /* check if hash is modified during iteration */ - for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) { - if (!tmp) { - deleted: - /* call func with error notice */ - retval = (*func)(0, 0, arg, 1); - return 1; - } - } - /* fall through */ - case ST_CONTINUE: - break; - case ST_STOP: - return 0; - case ST_DELETE: - last = &table->bins[hash_pos(ptr->hash, table->num_bins)]; - for (; (tmp = *last) != 0; last = &tmp->next) { - if (ptr == tmp) { - remove_entry(table, ptr); - ptr->key = ptr->record = never; - ptr->hash = 0; - break; - } - } - if (table->num_entries == 0) return 0; - } - } - } - return 0; -} - -int -st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg) -{ - st_table_entry *ptr, **last, *tmp, *next; - enum st_retval retval; - struct list_head *head; - st_index_t i; - - if (table->entries_packed) { - for (i = table->real_entries; 0 < i;) { - st_data_t key, val; - st_index_t hash; - --i; - key = PKEY(table, i); - val = PVAL(table, i); - hash = PHASH(table, i); - retval = (*func)(key, val, arg, 0); - if (!table->entries_packed) { - FIND_ENTRY(table, ptr, hash, i); - if (!ptr) return 0; - head = st_head(table); - next = list_entry(ptr->olist.next, st_table_entry, olist); - goto unpacked; - } - switch (retval) { - case ST_CONTINUE: - break; - case ST_CHECK: - case ST_STOP: - return 0; - case ST_DELETE: - remove_packed_entry(table, i); - break; - } - } - return 0; - } - - head = st_head(table); - list_for_each_rev_safe(head, ptr, next, olist) { - i = hash_pos(ptr->hash, table->num_bins); - retval = (*func)(ptr->key, ptr->record, arg, 0); - unpacked: - switch (retval) { - case ST_CONTINUE: - break; - case ST_CHECK: - case ST_STOP: - return 0; - case ST_DELETE: - last = &table->bins[hash_pos(ptr->hash, table->num_bins)]; - for (; (tmp = *last) != 0; last = &tmp->next) { - if (ptr == tmp) { - *last = ptr->next; - remove_entry(table, ptr); - st_free_entry(ptr); - break; - } - } - if (table->num_entries == 0) return 0; - } - } - return 0; -} -#endif - /* * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code * -- cgit v1.2.3