summaryrefslogtreecommitdiff
path: root/st.c
diff options
context:
space:
mode:
Diffstat (limited to 'st.c')
-rw-r--r--st.c3487
1 files changed, 3095 insertions, 392 deletions
diff --git a/st.c b/st.c
index c16c3109a8..6bf83c94cd 100644
--- a/st.c
+++ b/st.c
@@ -1,575 +1,3278 @@
-/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+/* This is a public domain general purpose hash table package
+ originally written by Peter Moore @ UCB.
+
+ The hash table data structures were redesigned and the package was
+ rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
+
+/* The original package implemented classic bucket-based hash tables
+ with entries doubly linked for an access by their insertion order.
+ To decrease pointer chasing and as a consequence to improve a data
+ locality the current implementation is based on storing entries in
+ an array and using hash tables with open addressing. The current
+ entries are more compact in comparison with the original ones and
+ this also improves the data locality.
+
+ The hash table has two arrays called *bins* and *entries*.
+
+ bins:
+ -------
+ | | entries array:
+ |-------| --------------------------------
+ | index | | | entry: | | |
+ |-------| | | | | |
+ | ... | | ... | hash | ... | ... |
+ |-------| | | key | | |
+ | empty | | | record | | |
+ |-------| --------------------------------
+ | ... | ^ ^
+ |-------| |_ entries start |_ entries bound
+ |deleted|
+ -------
+
+ o The entry array contains table entries in the same order as they
+ were inserted.
+
+ When the first entry is deleted, a variable containing index of
+ the current first entry (*entries start*) is changed. In all
+ other cases of the deletion, we just mark the entry as deleted by
+ using a reserved hash value.
+
+ Such organization of the entry storage makes operations of the
+ table shift and the entries traversal very fast.
+
+ o The bins provide access to the entries by their keys. The
+ key hash is mapped to a bin containing *index* of the
+ corresponding entry in the entry array.
+
+ The bin array size is always power of two, it makes mapping very
+ fast by using the corresponding lower bits of the hash.
+ Generally it is not a good idea to ignore some part of the hash.
+ But alternative approach is worse. For example, we could use a
+ modulo operation for mapping and a prime number for the size of
+ the bin array. Unfortunately, the modulo operation for big
+ 64-bit numbers are extremely slow (it takes more than 100 cycles
+ on modern Intel CPUs).
+
+ Still other bits of the hash value are used when the mapping
+ results in a collision. In this case we use a secondary hash
+ value which is a result of a function of the collision bin
+ index and the original hash value. The function choice
+ guarantees that we can traverse all bins and finally find the
+ corresponding bin as after several iterations the function
+ becomes a full cycle linear congruential generator because it
+ satisfies requirements of the Hull-Dobell theorem.
+
+ When an entry is removed from the table besides marking the
+ hash in the corresponding entry described above, we also mark
+ the bin by a special value in order to find entries which had
+ a collision with the removed entries.
+
+ There are two reserved values for the bins. One denotes an
+ empty bin, another one denotes a bin for a deleted entry.
+
+ o The length of the bin array is at least two times more than the
+ entry array length. This keeps the table load factor healthy.
+ The trigger of rebuilding the table is always a case when we can
+ not insert an entry anymore at the entries bound. We could
+ change the entries bound too in case of deletion but than we need
+ a special code to count bins with corresponding deleted entries
+ and reset the bin values when there are too many bins
+ corresponding deleted entries
+
+ Table rebuilding is done by creation of a new entry array and
+ bins of an appropriate size. We also try to reuse the arrays
+ in some cases by compacting the array and removing deleted
+ entries.
+
+ o To save memory very small tables have no allocated arrays
+ bins. We use a linear search for an access by a key.
+
+ o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
+ bins depending on the current hash table size.
+
+ o The implementation takes into account that the table can be
+ rebuilt during hashing or comparison functions. It can happen if
+ the functions are implemented in Ruby and a thread switch occurs
+ during their execution.
+
+ This implementation speeds up the Ruby hash table benchmarks in
+ average by more 40% on Intel Haswell CPU.
-/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
+*/
+
+#ifdef NOT_RUBY
+#include "regint.h"
+#include "st.h"
+#include <assert.h>
+#elif defined RUBY_EXPORT
+#include "internal.h"
+#include "internal/bits.h"
+#include "internal/gc.h"
+#include "internal/hash.h"
+#include "internal/sanitizers.h"
+#include "internal/set_table.h"
+#include "internal/st.h"
+#include "ruby_assert.h"
+#endif
-#include "config.h"
-#include "defines.h"
#include <stdio.h>
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <string.h>
-#include "st.h"
-typedef struct st_table_entry st_table_entry;
+#ifdef __GNUC__
+#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
+#define EXPECT(expr, val) __builtin_expect(expr, val)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define PREFETCH(addr, write_p)
+#define EXPECT(expr, val) (expr)
+#define ATTRIBUTE_UNUSED
+#endif
+
+/* The type of hashes. */
+typedef st_index_t st_hash_t;
struct st_table_entry {
- unsigned int hash;
+ st_hash_t hash;
st_data_t key;
st_data_t record;
- st_table_entry *next;
};
-#define ST_DEFAULT_MAX_DENSITY 5
-#define ST_DEFAULT_INIT_TABLE_SIZE 11
-
- /*
- * DEFAULT_MAX_DENSITY is the default for the largest we allow the
- * average number of items per bin before increasing the number of
- * bins
- *
- * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
- * allocated initially
- *
- */
-static int numcmp(long, long);
-static int numhash(long);
-static struct st_hash_type type_numhash = {
- numcmp,
- numhash,
+#define type_numhash st_hashtype_num
+static const struct st_hash_type st_hashtype_num = {
+ st_numcmp,
+ st_numhash,
};
-/* extern int strcmp(const char *, const char *); */
-static int strhash(const char *);
-static struct st_hash_type type_strhash = {
- strcmp,
+static int st_strcmp(st_data_t, st_data_t);
+static st_index_t strhash(st_data_t);
+static const struct st_hash_type type_strhash = {
+ st_strcmp,
strhash,
};
-static void rehash(st_table *);
+static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
+static st_index_t strcasehash(st_data_t);
+static const struct st_hash_type type_strcasehash = {
+ st_locale_insensitive_strcasecmp_i,
+ strcasehash,
+};
+
+/* Value used to catch uninitialized entries/bins during debugging.
+ There is a possibility for a false alarm, but its probability is
+ extremely small. */
+#define ST_INIT_VAL 0xafafafafafafafaf
+#define ST_INIT_VAL_BYTE 0xafa
#ifdef RUBY
-#define malloc xmalloc
-#define calloc xcalloc
+#undef malloc
+#undef realloc
+#undef calloc
+#undef free
+#define malloc ruby_xmalloc
+#define calloc ruby_xcalloc
+#define realloc ruby_xrealloc
+#define sized_realloc ruby_xrealloc_sized
+#define free ruby_xfree
+#define sized_free ruby_xfree_sized
+#define free_fixed_ptr(v) ruby_xfree_sized((v), sizeof(*(v)))
+#else
+#define sized_realloc(ptr, new_size, old_size) realloc(ptr, new_size)
+#define sized_free(v, s) free(v)
+#define free_fixed_ptr(v) free(v)
#endif
-#define alloc(type) (type*)malloc((unsigned)sizeof(type))
-#define Calloc(n,s) (char*)calloc((n),(s))
+#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
+#define PTR_EQUAL(tab, ptr, hash_val, key_) \
+ ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
+
+/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
+ up to TRUE if the table is rebuilt during the comparison. */
+#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
+ do { \
+ unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
+ res = PTR_EQUAL(tab, ptr, hash_val, key); \
+ rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
+ } while (FALSE)
+
+/* Features of a table. */
+struct st_features {
+ /* Power of 2 used for number of allocated entries. */
+ unsigned char entry_power;
+ /* Power of 2 used for number of allocated bins. Depending on the
+ table size, the number of bins is 2-4 times more than the
+ number of entries. */
+ unsigned char bin_power;
+ /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
+ unsigned char size_ind;
+ /* Bins are packed in words of type st_index_t. The following is
+ a size of bins counted by words. */
+ st_index_t bins_words;
+};
-#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
+/* Features of all possible size tables. */
+#if SIZEOF_ST_INDEX_T == 8
+#define MAX_POWER2 62
+static const struct st_features features[] = {
+ {0, 1, 0, 0x0},
+ {1, 2, 0, 0x1},
+ {2, 3, 0, 0x1},
+ {3, 4, 0, 0x2},
+ {4, 5, 0, 0x4},
+ {5, 6, 0, 0x8},
+ {6, 7, 0, 0x10},
+ {7, 8, 0, 0x20},
+ {8, 9, 1, 0x80},
+ {9, 10, 1, 0x100},
+ {10, 11, 1, 0x200},
+ {11, 12, 1, 0x400},
+ {12, 13, 1, 0x800},
+ {13, 14, 1, 0x1000},
+ {14, 15, 1, 0x2000},
+ {15, 16, 1, 0x4000},
+ {16, 17, 2, 0x10000},
+ {17, 18, 2, 0x20000},
+ {18, 19, 2, 0x40000},
+ {19, 20, 2, 0x80000},
+ {20, 21, 2, 0x100000},
+ {21, 22, 2, 0x200000},
+ {22, 23, 2, 0x400000},
+ {23, 24, 2, 0x800000},
+ {24, 25, 2, 0x1000000},
+ {25, 26, 2, 0x2000000},
+ {26, 27, 2, 0x4000000},
+ {27, 28, 2, 0x8000000},
+ {28, 29, 2, 0x10000000},
+ {29, 30, 2, 0x20000000},
+ {30, 31, 2, 0x40000000},
+ {31, 32, 2, 0x80000000},
+ {32, 33, 3, 0x200000000},
+ {33, 34, 3, 0x400000000},
+ {34, 35, 3, 0x800000000},
+ {35, 36, 3, 0x1000000000},
+ {36, 37, 3, 0x2000000000},
+ {37, 38, 3, 0x4000000000},
+ {38, 39, 3, 0x8000000000},
+ {39, 40, 3, 0x10000000000},
+ {40, 41, 3, 0x20000000000},
+ {41, 42, 3, 0x40000000000},
+ {42, 43, 3, 0x80000000000},
+ {43, 44, 3, 0x100000000000},
+ {44, 45, 3, 0x200000000000},
+ {45, 46, 3, 0x400000000000},
+ {46, 47, 3, 0x800000000000},
+ {47, 48, 3, 0x1000000000000},
+ {48, 49, 3, 0x2000000000000},
+ {49, 50, 3, 0x4000000000000},
+ {50, 51, 3, 0x8000000000000},
+ {51, 52, 3, 0x10000000000000},
+ {52, 53, 3, 0x20000000000000},
+ {53, 54, 3, 0x40000000000000},
+ {54, 55, 3, 0x80000000000000},
+ {55, 56, 3, 0x100000000000000},
+ {56, 57, 3, 0x200000000000000},
+ {57, 58, 3, 0x400000000000000},
+ {58, 59, 3, 0x800000000000000},
+ {59, 60, 3, 0x1000000000000000},
+ {60, 61, 3, 0x2000000000000000},
+ {61, 62, 3, 0x4000000000000000},
+ {62, 63, 3, 0x8000000000000000},
+};
-#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
-#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
+#else
+#define MAX_POWER2 30
+
+static const struct st_features features[] = {
+ {0, 1, 0, 0x1},
+ {1, 2, 0, 0x1},
+ {2, 3, 0, 0x2},
+ {3, 4, 0, 0x4},
+ {4, 5, 0, 0x8},
+ {5, 6, 0, 0x10},
+ {6, 7, 0, 0x20},
+ {7, 8, 0, 0x40},
+ {8, 9, 1, 0x100},
+ {9, 10, 1, 0x200},
+ {10, 11, 1, 0x400},
+ {11, 12, 1, 0x800},
+ {12, 13, 1, 0x1000},
+ {13, 14, 1, 0x2000},
+ {14, 15, 1, 0x4000},
+ {15, 16, 1, 0x8000},
+ {16, 17, 2, 0x20000},
+ {17, 18, 2, 0x40000},
+ {18, 19, 2, 0x80000},
+ {19, 20, 2, 0x100000},
+ {20, 21, 2, 0x200000},
+ {21, 22, 2, 0x400000},
+ {22, 23, 2, 0x800000},
+ {23, 24, 2, 0x1000000},
+ {24, 25, 2, 0x2000000},
+ {25, 26, 2, 0x4000000},
+ {26, 27, 2, 0x8000000},
+ {27, 28, 2, 0x10000000},
+ {28, 29, 2, 0x20000000},
+ {29, 30, 2, 0x40000000},
+ {30, 31, 2, 0x80000000},
+};
-/*
- * MINSIZE is the minimum size of a dictionary.
- */
+#endif
-#define MINSIZE 8
+/* The reserved hash value and its substitution. */
+#define RESERVED_HASH_VAL (~(st_hash_t) 0)
+#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
-/*
-Table of prime numbers 2^n+a, 2<=n<=30.
-*/
-static long primes[] = {
- 8 + 3,
- 16 + 3,
- 32 + 5,
- 64 + 3,
- 128 + 3,
- 256 + 27,
- 512 + 9,
- 1024 + 9,
- 2048 + 5,
- 4096 + 3,
- 8192 + 27,
- 16384 + 43,
- 32768 + 3,
- 65536 + 45,
- 131072 + 29,
- 262144 + 3,
- 524288 + 21,
- 1048576 + 7,
- 2097152 + 17,
- 4194304 + 15,
- 8388608 + 9,
- 16777216 + 43,
- 33554432 + 35,
- 67108864 + 15,
- 134217728 + 29,
- 268435456 + 3,
- 536870912 + 11,
- 1073741824 + 85,
- 0
-};
+static inline st_hash_t
+normalize_hash_value(st_hash_t hash)
+{
+ /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
+ another value. Such mapping should be extremely rare. */
+ return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
+}
+
+/* Return hash value of KEY for table TAB. */
+static inline st_hash_t
+do_hash(st_data_t key, st_table *tab)
+{
+ st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
+ return normalize_hash_value(hash);
+}
+/* Power of 2 defining the minimal number of allocated entries. */
+#define MINIMAL_POWER2 2
+
+#if MINIMAL_POWER2 < 2
+#error "MINIMAL_POWER2 should be >= 2"
+#endif
+
+/* If the power2 of the allocated `entries` is less than the following
+ value, don't allocate bins and use a linear search. */
+#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
+
+/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
static int
-new_size(size)
- int size;
+get_power2(st_index_t size)
+{
+ unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
+ if (n <= MAX_POWER2)
+ return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
+#ifdef RUBY
+ /* Ran out of the table entries */
+ rb_raise(rb_eRuntimeError, "st_table too big");
+#endif
+ /* should raise exception */
+ return -1;
+}
+
+/* Return value of N-th bin in array BINS of table with bins size
+ index S. */
+static inline st_index_t
+get_bin(st_index_t *bins, int s, st_index_t n)
{
- int i;
+ return (s == 0 ? ((unsigned char *) bins)[n]
+ : s == 1 ? ((unsigned short *) bins)[n]
+ : s == 2 ? ((unsigned int *) bins)[n]
+ : ((st_index_t *) bins)[n]);
+}
-#if 0
- for (i=3; i<31; i++) {
- if ((1<<i) > size) return 1<<i;
+/* Set up N-th bin in array BINS of table with bins size index S to
+ value V. */
+static inline void
+set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
+{
+ if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
+ else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
+ else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
+ else ((st_index_t *) bins)[n] = v;
+}
+
+/* These macros define reserved values for empty table bin and table
+ bin which contains a deleted entry. We will never use such values
+ for an entry index in bins. */
+#define EMPTY_BIN 0
+#define DELETED_BIN 1
+/* Base of a real entry index in the bins. */
+#define ENTRY_BASE 2
+
+/* Mark I-th bin of table TAB as empty, in other words not
+ corresponding to any entry. */
+#define MARK_BIN_EMPTY(tab, i) (set_bin(st_bins_ptr(tab), get_size_ind(tab), i, EMPTY_BIN))
+
+/* Values used for not found entry and bin with given
+ characteristics. */
+#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
+#define UNDEFINED_BIN_IND (~(st_index_t) 0)
+
+/* Entry and bin values returned when we found a table rebuild during
+ the search. */
+#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
+#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
+
+/* Mark I-th bin of table TAB as corresponding to a deleted table
+ entry. Update number of entries in the table and number of bins
+ corresponding to deleted entries. */
+#define MARK_BIN_DELETED(tab, i) \
+ do { \
+ set_bin(st_bins_ptr(tab), get_size_ind(tab), i, DELETED_BIN); \
+ } while (0)
+
+/* Macros to check that value B is used empty bins and bins
+ corresponding deleted entries. */
+#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
+#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
+#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
+
+/* Macros to check empty bins and bins corresponding to deleted
+ entries. Bins are given by their index I in table TAB. */
+#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
+#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
+#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin(st_bins_ptr(tab), get_size_ind(tab), i)))
+
+/* Macros for marking and checking deleted entries given by their
+ pointer E_PTR. */
+#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
+#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
+
+/* Return the number of allocated entries of table TAB. */
+static inline st_index_t
+get_allocated_entries(const st_table *tab)
+{
+ return ((st_index_t) 1)<<tab->entry_power;
+}
+
+/* Return bin size index of table TAB. */
+static inline unsigned int
+get_size_ind(const st_table *tab)
+{
+ return tab->size_ind;
+}
+
+/* Return the number of allocated bins of table TAB. */
+static inline st_index_t
+get_bins_num(const st_table *tab)
+{
+ return ((st_index_t) 1)<<tab->bin_power;
+}
+
+/* Return mask for a bin index in table TAB. */
+static inline st_index_t
+bins_mask(const st_table *tab)
+{
+ return get_bins_num(tab) - 1;
+}
+
+static inline bool
+st_has_bins(const st_table *tab)
+{
+ return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
+}
+
+static inline size_t
+st_allocated_entries_size(const st_table *tab)
+{
+ return get_allocated_entries(tab) * sizeof(st_table_entry);
+}
+
+static inline st_index_t *
+st_bins_ptr(const st_table *tab)
+{
+ if (st_has_bins(tab)) {
+ return (st_index_t *)(((char *)tab->entries) + st_allocated_entries_size(tab));
}
- return -1;
-#else
- int newsize;
- for (i = 0, newsize = MINSIZE;
- i < sizeof(primes)/sizeof(primes[0]);
- i++, newsize <<= 1)
- {
- if (newsize > size) return primes[i];
+ return NULL;
+}
+
+/* Return the index of table TAB bin corresponding to
+ HASH_VALUE. */
+static inline st_index_t
+hash_bin(st_hash_t hash_value, st_table *tab)
+{
+ return hash_value & bins_mask(tab);
+}
+
+/* Return size of the allocated bins of table TAB. */
+static inline st_index_t
+bins_size(const st_table *tab)
+{
+ if (st_has_bins(tab)) {
+ return features[tab->entry_power].bins_words * sizeof (st_index_t);
}
- /* Ran out of polynomials */
- return -1; /* should raise exception */
-#endif
+ return 0;
+}
+
+/* Mark all bins of table TAB as empty. */
+static void
+initialize_bins(st_table *tab)
+{
+ memset(st_bins_ptr(tab), 0, bins_size(tab));
+}
+
+/* Make table TAB empty. */
+static void
+make_tab_empty(st_table *tab)
+{
+ tab->num_entries = 0;
+ tab->entries_start = tab->entries_bound = 0;
+ if (st_bins_ptr(tab) != NULL)
+ initialize_bins(tab);
}
#ifdef HASH_LOG
-static int collision = 0;
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+static struct {
+ int all, total, num, str, strcase;
+} collision;
+
+/* Flag switching off output of package statistics at the end of
+ program. */
static int init_st = 0;
+/* Output overall number of table searches and collisions into a
+ temporary file. */
static void
-stat_col()
+stat_col(void)
{
- FILE *f = fopen("/tmp/col", "w");
- fprintf(f, "collision: %d\n", collision);
+ char fname[10+sizeof(long)*3];
+ FILE *f;
+ if (!collision.total) return;
+ f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
+ if (f == NULL)
+ return;
+ fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
+ ((double)collision.all / (collision.total)) * 100);
+ fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
fclose(f);
}
#endif
-st_table*
-st_init_table_with_size(type, size)
- struct st_hash_type *type;
- int size;
+st_table *
+st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
{
- st_table *tbl;
+ int n;
#ifdef HASH_LOG
+#if HASH_LOG+0 < 0
+ {
+ const char *e = getenv("ST_HASH_LOG");
+ if (!e || !*e) init_st = 1;
+ }
+#endif
if (init_st == 0) {
- init_st = 1;
- atexit(stat_col);
+ init_st = 1;
+ atexit(stat_col);
+ }
+#endif
+
+ n = get_power2(size);
+#ifndef RUBY
+ if (n < 0)
+ return NULL;
+#endif
+
+ tab->type = type;
+ tab->entry_power = n;
+ tab->bin_power = features[n].bin_power;
+ tab->size_ind = features[n].size_ind;
+
+ size_t memsize = get_allocated_entries(tab) * sizeof(st_table_entry);
+ if (tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS) {
+ memsize += bins_size(tab);
+ }
+ tab->entries = (st_table_entry *)malloc(memsize);
+#ifndef RUBY
+ if (tab->entries == NULL) {
+ st_free_table(tab);
+ return NULL;
}
#endif
+ make_tab_empty(tab);
+ tab->rebuilds_num = 0;
+ return tab;
+}
- size = new_size(size); /* round up to prime number */
+st_table *
+st_init_existing_numtable_with_size(st_table *tab, st_index_t size)
+{
+ return st_init_existing_table_with_size(tab, &type_numhash, size);
+}
- tbl = alloc(st_table);
- tbl->type = type;
- tbl->num_entries = 0;
- tbl->num_bins = size;
- tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+/* Create and return table with TYPE which can hold at least SIZE
+ entries. The real number of entries which the table can hold is
+ the nearest power of two for SIZE. */
+st_table *
+st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
+{
+ st_table *tab = malloc(sizeof(st_table));
+#ifndef RUBY
+ if (tab == NULL)
+ return NULL;
+#endif
- return tbl;
+#ifdef RUBY
+ st_init_existing_table_with_size(tab, type, size);
+#else
+ if (st_init_existing_table_with_size(tab, type, size) == NULL) {
+ free_fixed_ptr(tab);
+ return NULL;
+ }
+#endif
+
+ return tab;
+}
+
+size_t
+st_table_size(const struct st_table *tbl)
+{
+ return tbl->num_entries;
}
-st_table*
-st_init_table(type)
- struct st_hash_type *type;
+/* Create and return table with TYPE which can hold a minimal number
+ of entries (see comments for get_power2). */
+st_table *
+st_init_table(const struct st_hash_type *type)
{
return st_init_table_with_size(type, 0);
}
-st_table*
+/* Create and return table which can hold a minimal number of
+ numbers. */
+st_table *
st_init_numtable(void)
{
return st_init_table(&type_numhash);
}
-st_table*
-st_init_numtable_with_size(size)
- int size;
+/* Create and return table which can hold SIZE numbers. */
+st_table *
+st_init_numtable_with_size(st_index_t size)
{
return st_init_table_with_size(&type_numhash, size);
}
-st_table*
+/* Create and return table which can hold a minimal number of
+ strings. */
+st_table *
st_init_strtable(void)
{
return st_init_table(&type_strhash);
}
-st_table*
-st_init_strtable_with_size(size)
- int size;
+/* Create and return table which can hold SIZE strings. */
+st_table *
+st_init_strtable_with_size(st_index_t size)
{
return st_init_table_with_size(&type_strhash, size);
}
+st_table *
+st_init_existing_strtable_with_size(st_table *tab, st_index_t size)
+{
+ return st_init_existing_table_with_size(tab, &type_strhash, size);
+}
+
+
+/* Create and return table which can hold a minimal number of strings
+ whose character case is ignored. */
+st_table *
+st_init_strcasetable(void)
+{
+ return st_init_table(&type_strcasehash);
+}
+
+/* Create and return table which can hold SIZE strings whose character
+ case is ignored. */
+st_table *
+st_init_strcasetable_with_size(st_index_t size)
+{
+ return st_init_table_with_size(&type_strcasehash, size);
+}
+
+/* Make table TAB empty. */
void
-st_free_table(table)
- st_table *table;
+st_clear(st_table *tab)
{
- register st_table_entry *ptr, *next;
- int i;
+ make_tab_empty(tab);
+ tab->rebuilds_num++;
+}
- for(i = 0; i < table->num_bins; i++) {
- ptr = table->bins[i];
- while (ptr != 0) {
- next = ptr->next;
- free(ptr);
- ptr = next;
- }
- }
- free(table->bins);
- free(table);
+static inline size_t
+st_entries_memsize(const st_table *tab)
+{
+ return get_allocated_entries(tab) * sizeof(st_table_entry);
+}
+
+static inline void
+st_free_entries(const st_table *tab)
+{
+ sized_free(tab->entries, st_entries_memsize(tab) + bins_size(tab));
+}
+
+void
+st_free_embedded_table(st_table *tab)
+{
+ st_free_entries(tab);
}
-#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
-((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
+/* Free table TAB space. */
+void
+st_free_table(st_table *tab)
+{
+ st_free_embedded_table(tab);
+ free_fixed_ptr(tab);
+}
+
+/* Return byte size of memory allocated for table TAB. */
+size_t
+st_memsize(const st_table *tab)
+{
+ RUBY_ASSERT(tab != NULL);
+ return(sizeof(st_table)
+ + bins_size(tab)
+ + st_entries_memsize(tab));
+}
+
+static st_index_t
+find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
+ st_data_t key, st_index_t *bin_ind);
#ifdef HASH_LOG
-#define COLLISION collision++
+static void
+count_collision(const struct st_hash_type *type)
+{
+ collision.all++;
+ if (type == &type_numhash) {
+ collision.num++;
+ }
+ else if (type == &type_strhash) {
+ collision.strcase++;
+ }
+ else if (type == &type_strcasehash) {
+ collision.str++;
+ }
+}
+
+#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
+#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
+#define collision_check 0
#else
#define COLLISION
+#define FOUND_BIN
#endif
-#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
- bin_pos = hash_val%(table)->num_bins;\
- ptr = (table)->bins[bin_pos];\
- if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
- COLLISION;\
- while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
- ptr = ptr->next;\
- }\
- ptr = ptr->next;\
- }\
-} while (0)
+/* If the number of entries in the table is at least REBUILD_THRESHOLD
+ times less than the entry array length, decrease the table
+ size. */
+#define REBUILD_THRESHOLD 4
-int
-st_lookup(table, key, value)
- st_table *table;
- register st_data_t key;
- st_data_t *value;
+#if REBUILD_THRESHOLD < 2
+#error "REBUILD_THRESHOLD should be >= 2"
+#endif
+
+static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
+static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
+static void rebuild_cleanup(st_table *const tab);
+
+/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
+ and can change size of the table entries and bins arrays.
+ Rebuilding is implemented by creation of a new table or by
+ compaction of the existing one. */
+static void
+rebuild_table(st_table *tab)
+{
+ if ((2 * tab->num_entries <= get_allocated_entries(tab)
+ && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
+ || tab->num_entries < (1 << MINIMAL_POWER2)) {
+ /* Compaction: */
+ tab->num_entries = 0;
+ if (st_has_bins(tab))
+ initialize_bins(tab);
+ rebuild_table_with(tab, tab);
+ }
+ else {
+ st_table *new_tab;
+ /* This allocation could trigger GC and compaction. If tab is the
+ * gen_fields_tbl, then tab could have changed in size due to objects being
+ * freed and/or moved. Do not store attributes of tab before this line. */
+ new_tab = st_init_table_with_size(tab->type,
+ 2 * tab->num_entries - 1);
+ rebuild_table_with(new_tab, tab);
+ rebuild_move_table(new_tab, tab);
+ }
+ rebuild_cleanup(tab);
+}
+
+static void
+rebuild_table_with(st_table *const new_tab, st_table *const tab)
+{
+ st_index_t i, ni;
+ unsigned int size_ind;
+ st_table_entry *new_entries;
+ st_table_entry *curr_entry_ptr;
+ st_index_t *bins;
+ st_index_t bin_ind;
+
+ new_entries = new_tab->entries;
+
+ ni = 0;
+ bins = st_bins_ptr(new_tab);
+ size_ind = get_size_ind(new_tab);
+ st_index_t bound = tab->entries_bound;
+ st_table_entry *entries = tab->entries;
+
+ for (i = tab->entries_start; i < bound; i++) {
+ curr_entry_ptr = &entries[i];
+ PREFETCH(entries + i + 1, 0);
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
+ continue;
+ if (&new_entries[ni] != curr_entry_ptr)
+ new_entries[ni] = *curr_entry_ptr;
+ if (EXPECT(bins != NULL, 1)) {
+ bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
+ curr_entry_ptr->key);
+ set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
+ }
+ new_tab->num_entries++;
+ ni++;
+ }
+
+ assert(new_tab->num_entries == tab->num_entries);
+}
+
+static void
+rebuild_move_table(st_table *const new_tab, st_table *const tab)
{
- unsigned int hash_val, bin_pos;
- register st_table_entry *ptr;
+ st_free_entries(tab);
+ tab->entry_power = new_tab->entry_power;
+ tab->bin_power = new_tab->bin_power;
+ tab->size_ind = new_tab->size_ind;
+ tab->entries = new_tab->entries;
+ free_fixed_ptr(new_tab);
+}
+
+static void
+rebuild_cleanup(st_table *const tab)
+{
+ tab->entries_start = 0;
+ tab->entries_bound = tab->num_entries;
+ tab->rebuilds_num++;
+}
+
+/* Return the next secondary hash index for table TAB using previous
+ index IND and PERTURB. Finally modulo of the function becomes a
+ full *cycle linear congruential generator*, in other words it
+ guarantees traversing all table bins in extreme case.
- hash_val = do_hash(key, table);
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
+ According the Hull-Dobell theorem a generator
+ "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
+ o m and c are relatively prime
+ o a-1 is divisible by all prime factors of m
+ o a-1 is divisible by 4 if m is divisible by 4.
- if (ptr == 0) {
- return 0;
+ For our case a is 5, c is 1, and m is a power of two. */
+static inline st_index_t
+secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
+{
+ *perturb >>= 11;
+ ind = (ind << 2) + ind + *perturb + 1;
+ return hash_bin(ind, tab);
+}
+
+/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
+ search. Return the index of the found entry in array `entries`.
+ If it is not found, return UNDEFINED_ENTRY_IND. If the table was
+ rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
+static inline st_index_t
+find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
+{
+ int eq_p, rebuilt_p;
+ st_index_t i, bound;
+ st_table_entry *entries;
+
+ bound = tab->entries_bound;
+ entries = tab->entries;
+ for (i = tab->entries_start; i < bound; i++) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
+ return i;
+ }
+ return UNDEFINED_ENTRY_IND;
+}
+
+/* Use the quadratic probing. The method has a better data locality
+ but more collisions than the current approach. In average it
+ results in a bit slower search. */
+/*#define QUADRATIC_PROBE*/
+
+/* Return index of entry with HASH_VALUE and KEY in table TAB. If
+ there is no such entry, return UNDEFINED_ENTRY_IND. If the table
+ was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
+static st_index_t
+find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
+{
+ int eq_p, rebuilt_p;
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t perturb;
+#endif
+ st_index_t bin;
+ st_table_entry *entries = tab->entries;
+
+ ind = hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = hash_value;
+#endif
+ FOUND_BIN;
+ for (;;) {
+ bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
+ break;
+ }
+ else if (EMPTY_BIN_P(bin))
+ return UNDEFINED_ENTRY_IND;
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &perturb);
+#endif
+ COLLISION;
+ }
+ return bin;
+}
+
+/* Find and return index of table TAB bin corresponding to an entry
+ with HASH_VALUE and KEY. If there is no such bin, return
+ UNDEFINED_BIN_IND. If the table was rebuilt during the search,
+ return REBUILT_TABLE_BIN_IND. */
+static st_index_t
+find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
+{
+ int eq_p, rebuilt_p;
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t perturb;
+#endif
+ st_index_t bin;
+ st_table_entry *entries = tab->entries;
+
+ ind = hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = hash_value;
+#endif
+ FOUND_BIN;
+ for (;;) {
+ bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_BIN_IND;
+ if (eq_p)
+ break;
+ }
+ else if (EMPTY_BIN_P(bin))
+ return UNDEFINED_BIN_IND;
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &perturb);
+#endif
+ COLLISION;
+ }
+ return ind;
+}
+
+/* Find and return index of table TAB bin corresponding to an entry
+ with HASH_VALUE and KEY. The entry should be in the table
+ already. */
+static st_index_t
+find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
+{
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t perturb;
+#endif
+ st_index_t bin;
+
+ ind = hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = hash_value;
+#endif
+ FOUND_BIN;
+ for (;;) {
+ bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
+ if (EMPTY_OR_DELETED_BIN_P(bin))
+ return ind;
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &perturb);
+#endif
+ COLLISION;
+ }
+}
+
+/* Return index of table TAB bin for HASH_VALUE and KEY through
+ BIN_IND and the pointed value as the function result. Reserve the
+ bin for inclusion of the corresponding entry into the table if it
+ is not there yet. We always find such bin as bins array length is
+ bigger entries array. Although we can reuse a deleted bin, the
+ result bin value is always empty if the table has no entry with
+ KEY. Return the entries array index of the found entry or
+ UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
+ during the search, return REBUILT_TABLE_ENTRY_IND. */
+static st_index_t
+find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
+ st_data_t key, st_index_t *bin_ind)
+{
+ int eq_p, rebuilt_p;
+ st_index_t ind;
+ st_hash_t curr_hash_value = *hash_value;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t perturb;
+#endif
+ st_index_t entry_index;
+ st_index_t first_deleted_bin_ind;
+ st_table_entry *entries;
+
+ ind = hash_bin(curr_hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = curr_hash_value;
+#endif
+ FOUND_BIN;
+ first_deleted_bin_ind = UNDEFINED_BIN_IND;
+ entries = tab->entries;
+ for (;;) {
+ entry_index = get_bin(st_bins_ptr(tab), get_size_ind(tab), ind);
+ if (EMPTY_BIN_P(entry_index)) {
+ tab->num_entries++;
+ entry_index = UNDEFINED_ENTRY_IND;
+ if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
+ /* We can reuse bin of a deleted entry. */
+ ind = first_deleted_bin_ind;
+ MARK_BIN_EMPTY(tab, ind);
+ }
+ break;
+ }
+ else if (! DELETED_BIN_P(entry_index)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
+ break;
+ }
+ else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
+ first_deleted_bin_ind = ind;
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &perturb);
+#endif
+ COLLISION;
+ }
+ *bin_ind = ind;
+ return entry_index;
+}
+
+/* Find an entry with KEY in table TAB. Return non-zero if we found
+ it. Set up *RECORD to the found entry record. */
+int
+st_lookup(st_table *tab, st_data_t key, st_data_t *value)
+{
+ st_index_t bin;
+ st_hash_t hash = do_hash(key, tab);
+
+ retry:
+ if (!st_has_bins(tab)) {
+ bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
}
else {
- if (value != 0) *value = ptr->record;
- return 1;
- }
-}
-
-#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
-do {\
- st_table_entry *entry;\
- if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
- rehash(table);\
- bin_pos = hash_val % table->num_bins;\
- }\
- \
- entry = alloc(st_table_entry);\
- \
- entry->hash = hash_val;\
- entry->key = key;\
- entry->record = value;\
- entry->next = table->bins[bin_pos];\
- table->bins[bin_pos] = entry;\
- table->num_entries++;\
-} while (0)
+ bin = find_table_entry_ind(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ bin -= ENTRY_BASE;
+ }
+ if (value != 0)
+ *value = tab->entries[bin].record;
+ return 1;
+}
+/* Find an entry with KEY in table TAB. Return non-zero if we found
+ it. Set up *RESULT to the found table entry key. */
int
-st_insert(table, key, value)
- register st_table *table;
- register st_data_t key;
- st_data_t value;
+st_get_key(st_table *tab, st_data_t key, st_data_t *result)
{
- unsigned int hash_val, bin_pos;
- register st_table_entry *ptr;
+ st_index_t bin;
+ st_hash_t hash = do_hash(key, tab);
+
+ retry:
+ if (!st_has_bins(tab)) {
+ bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ }
+ else {
+ bin = find_table_entry_ind(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ bin -= ENTRY_BASE;
+ }
+ if (result != 0)
+ *result = tab->entries[bin].key;
+ return 1;
+}
- hash_val = do_hash(key, table);
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
+/* Check the table and rebuild it if it is necessary. */
+static inline void
+rebuild_table_if_necessary (st_table *tab)
+{
+ st_index_t bound = tab->entries_bound;
- if (ptr == 0) {
- ADD_DIRECT(table, key, value, hash_val, bin_pos);
- return 0;
+ if (bound == get_allocated_entries(tab))
+ rebuild_table(tab);
+}
+
+/* Insert (KEY, VALUE) into table TAB and return zero. If there is
+ already entry with KEY in the table, return nonzero and update
+ the value of the found entry. */
+int
+st_insert(st_table *tab, st_data_t key, st_data_t value)
+{
+ st_table_entry *entry;
+ st_index_t bin;
+ st_index_t ind;
+ st_hash_t hash_value;
+ st_index_t bin_ind;
+ int new_p;
+
+ hash_value = do_hash(key, tab);
+ retry:
+ rebuild_table_if_necessary(tab);
+ if (!st_has_bins(tab)) {
+ bin = find_entry(tab, hash_value, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ if (new_p)
+ tab->num_entries++;
+ bin_ind = UNDEFINED_BIN_IND;
}
else {
- ptr->record = value;
- return 1;
+ bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
+ key, &bin_ind);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ bin -= ENTRY_BASE;
}
+ if (new_p) {
+ ind = tab->entries_bound++;
+ entry = &tab->entries[ind];
+ entry->hash = hash_value;
+ entry->key = key;
+ entry->record = value;
+ if (bin_ind != UNDEFINED_BIN_IND)
+ set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+ return 0;
+ }
+ tab->entries[bin].record = value;
+ return 1;
}
-void
-st_add_direct(table, key, value)
- st_table *table;
- st_data_t key;
- st_data_t value;
+/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
+ entry with KEY before the insertion. */
+static inline void
+st_add_direct_with_hash(st_table *tab,
+ st_data_t key, st_data_t value, st_hash_t hash)
{
- unsigned int hash_val, bin_pos;
+ st_table_entry *entry;
+ st_index_t ind;
+ st_index_t bin_ind;
+
+ assert(hash != RESERVED_HASH_VAL);
+
+ rebuild_table_if_necessary(tab);
+ ind = tab->entries_bound++;
+ entry = &tab->entries[ind];
+ entry->hash = hash;
+ entry->key = key;
+ entry->record = value;
+ tab->num_entries++;
+ if (st_has_bins(tab)) {
+ bin_ind = find_table_bin_ind_direct(tab, hash, key);
+ set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+ }
+}
- hash_val = do_hash(key, table);
- bin_pos = hash_val % table->num_bins;
- ADD_DIRECT(table, key, value, hash_val, bin_pos);
+void
+rb_st_add_direct_with_hash(st_table *tab,
+ st_data_t key, st_data_t value, st_hash_t hash)
+{
+ st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
}
-static void
-rehash(table)
- register st_table *table;
+/* Insert (KEY, VALUE) into table TAB. The table should not have
+ entry with KEY before the insertion. */
+void
+st_add_direct(st_table *tab, st_data_t key, st_data_t value)
{
- register st_table_entry *ptr, *next, **new_bins;
- int i, old_num_bins = table->num_bins, new_num_bins;
- unsigned int hash_val;
+ st_hash_t hash_value;
+
+ hash_value = do_hash(key, tab);
+ st_add_direct_with_hash(tab, key, value, hash_value);
+}
- new_num_bins = new_size(old_num_bins+1);
- new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
+/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
+ there is already entry with KEY in the table, return nonzero and
+ update the value of the found entry. */
+int
+st_insert2(st_table *tab, st_data_t key, st_data_t value,
+ st_data_t (*func)(st_data_t))
+{
+ st_table_entry *entry;
+ st_index_t bin;
+ st_index_t ind;
+ st_hash_t hash_value;
+ st_index_t bin_ind;
+ int new_p;
+
+ hash_value = do_hash(key, tab);
+ retry:
+ rebuild_table_if_necessary (tab);
+ if (!st_has_bins(tab)) {
+ bin = find_entry(tab, hash_value, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ if (new_p)
+ tab->num_entries++;
+ bin_ind = UNDEFINED_BIN_IND;
+ }
+ else {
+ bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
+ key, &bin_ind);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ bin -= ENTRY_BASE;
+ }
+ if (new_p) {
+ key = (*func)(key);
+ ind = tab->entries_bound++;
+ entry = &tab->entries[ind];
+ entry->hash = hash_value;
+ entry->key = key;
+ entry->record = value;
+ if (bin_ind != UNDEFINED_BIN_IND)
+ set_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+ return 0;
+ }
+ tab->entries[bin].record = value;
+ return 1;
+}
- for(i = 0; i < old_num_bins; i++) {
- ptr = table->bins[i];
- while (ptr != 0) {
- next = ptr->next;
- hash_val = ptr->hash % new_num_bins;
- ptr->next = new_bins[hash_val];
- new_bins[hash_val] = ptr;
- ptr = next;
- }
+/* Create a copy of old_tab into new_tab. */
+st_table *
+st_replace(st_table *new_tab, st_table *old_tab)
+{
+ *new_tab = *old_tab;
+ size_t memsize = get_allocated_entries(old_tab) * sizeof(st_table_entry);
+ memsize += bins_size(old_tab);
+ new_tab->entries = (st_table_entry *) malloc(memsize);
+#ifndef RUBY
+ if (new_tab->entries == NULL) {
+ return NULL;
}
- free(table->bins);
- table->num_bins = new_num_bins;
- table->bins = new_bins;
+#endif
+ MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
+
+ return new_tab;
}
-st_table*
-st_copy(old_table)
- st_table *old_table;
+/* Create and return a copy of table OLD_TAB. */
+st_table *
+st_copy(st_table *old_tab)
{
- st_table *new_table;
- st_table_entry *ptr, *entry;
- int i, num_bins = old_table->num_bins;
+ st_table *new_tab;
- new_table = alloc(st_table);
- if (new_table == 0) {
- return 0;
+ new_tab = (st_table *) malloc(sizeof(st_table));
+#ifndef RUBY
+ if (new_tab == NULL)
+ return NULL;
+#endif
+
+ if (st_replace(new_tab, old_tab) == NULL) {
+ st_free_table(new_tab);
+ return NULL;
}
- *new_table = *old_table;
- new_table->bins = (st_table_entry**)
- Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+ return new_tab;
+}
- if (new_table->bins == 0) {
- free(new_table);
- return 0;
+/* Update the entries start of table TAB after removing an entry
+ with index N in the array entries. */
+static inline void
+update_range_for_deleted(st_table *tab, st_index_t n)
+{
+ /* Do not update entries_bound here. Otherwise, we can fill all
+ bins by deleted entry value before rebuilding the table. */
+ if (tab->entries_start == n) {
+ st_index_t start = n + 1;
+ st_index_t bound = tab->entries_bound;
+ st_table_entry *entries = tab->entries;
+ while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
+ tab->entries_start = start;
}
+}
- for(i = 0; i < num_bins; i++) {
- new_table->bins[i] = 0;
- ptr = old_table->bins[i];
- while (ptr != 0) {
- entry = alloc(st_table_entry);
- if (entry == 0) {
- free(new_table->bins);
- free(new_table);
- return 0;
- }
- *entry = *ptr;
- entry->next = new_table->bins[i];
- new_table->bins[i] = entry;
- ptr = ptr->next;
- }
+/* Delete entry with KEY from table TAB, set up *VALUE (unless
+ VALUE is zero) from deleted table entry, and return non-zero. If
+ there is no entry with KEY in the table, clear *VALUE (unless VALUE
+ is zero), and return zero. */
+static int
+st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
+{
+ st_table_entry *entry;
+ st_index_t bin;
+ st_index_t bin_ind;
+ st_hash_t hash;
+
+ hash = do_hash(*key, tab);
+ retry:
+ if (!st_has_bins(tab)) {
+ bin = find_entry(tab, hash, *key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
+ }
+ else {
+ bin_ind = find_table_bin_ind(tab, hash, *key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto retry;
+ if (bin_ind == UNDEFINED_BIN_IND) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
+ bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ MARK_BIN_DELETED(tab, bin_ind);
}
- return new_table;
+ entry = &tab->entries[bin];
+ *key = entry->key;
+ if (value != 0) *value = entry->record;
+ MARK_ENTRY_DELETED(entry);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+ return 1;
}
int
-st_delete(table, key, value)
- register st_table *table;
- register st_data_t *key;
- st_data_t *value;
+st_delete(st_table *tab, st_data_t *key, st_data_t *value)
{
- unsigned int hash_val;
- st_table_entry *tmp;
- register st_table_entry *ptr;
+ return st_general_delete(tab, key, value);
+}
- hash_val = do_hash_bin(*key, table);
- ptr = table->bins[hash_val];
+/* The function and other functions with suffix '_safe' or '_check'
+ are originated from the previous implementation of the hash tables.
+ It was necessary for correct deleting entries during traversing
+ tables. The current implementation permits deletion during
+ traversing without a specific way to do this. */
+int
+st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
+ st_data_t never ATTRIBUTE_UNUSED)
+{
+ return st_general_delete(tab, key, value);
+}
- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
+/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
+ return zero. Otherwise, remove the first entry in the table.
+ Return its key through KEY and its record through VALUE (unless
+ VALUE is zero). */
+int
+st_shift(st_table *tab, st_data_t *key, st_data_t *value)
+{
+ st_index_t i, bound;
+ st_index_t bin;
+ st_table_entry *entries, *curr_entry_ptr;
+ st_index_t bin_ind;
+
+ entries = tab->entries;
+ bound = tab->entries_bound;
+ for (i = tab->entries_start; i < bound; i++) {
+ curr_entry_ptr = &entries[i];
+ if (! DELETED_ENTRY_P(curr_entry_ptr)) {
+ st_hash_t entry_hash = curr_entry_ptr->hash;
+ st_data_t entry_key = curr_entry_ptr->key;
+
+ if (value != 0) *value = curr_entry_ptr->record;
+ *key = entry_key;
+ retry:
+ if (!st_has_bins(tab)) {
+ bin = find_entry(tab, entry_hash, entry_key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
+ entries = tab->entries;
+ goto retry;
+ }
+ curr_entry_ptr = &entries[bin];
+ }
+ else {
+ bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
+ entries = tab->entries;
+ goto retry;
+ }
+ curr_entry_ptr = &entries[get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind)
+ - ENTRY_BASE];
+ MARK_BIN_DELETED(tab, bin_ind);
+ }
+ MARK_ENTRY_DELETED(curr_entry_ptr);
+ tab->num_entries--;
+ update_range_for_deleted(tab, i);
+ return 1;
+ }
}
+ if (value != 0) *value = 0;
+ return 0;
+}
- if (EQUAL(table, *key, ptr->key)) {
- table->bins[hash_val] = ptr->next;
- table->num_entries--;
- if (value != 0) *value = ptr->record;
- *key = ptr->key;
- free(ptr);
- return 1;
- }
+/* See comments for function st_delete_safe. */
+void
+st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
+ st_data_t never ATTRIBUTE_UNUSED)
+{
+}
- for(; ptr->next != 0; ptr = ptr->next) {
- if (EQUAL(table, ptr->next->key, *key)) {
- tmp = ptr->next;
- ptr->next = ptr->next->next;
- table->num_entries--;
- if (value != 0) *value = tmp->record;
- *key = tmp->key;
- free(tmp);
- return 1;
- }
+/* Find entry with KEY in table TAB, call FUNC with pointers to copies
+ of the key and the value of the found entry, and non-zero as the
+ 3rd argument. If the entry is not found, call FUNC with a pointer
+ to KEY, a pointer to zero, and a zero argument. If the call
+ returns ST_CONTINUE, the table will have an entry with key and
+ value returned by FUNC through the 1st and 2nd parameters. If the
+ call of FUNC returns ST_DELETE, the table will not have entry with
+ KEY. The function returns flag of that the entry with KEY was in
+ the table before the call. */
+int
+st_update(st_table *tab, st_data_t key,
+ st_update_callback_func *func, st_data_t arg)
+{
+ st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
+ st_index_t bin = 0; /* Ditto */
+ st_table_entry *entries;
+ st_index_t bin_ind;
+ st_data_t value = 0, old_key;
+ int retval, existing;
+ st_hash_t hash = do_hash(key, tab);
+
+ retry:
+ entries = tab->entries;
+ if (!st_has_bins(tab)) {
+ bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ existing = bin != UNDEFINED_ENTRY_IND;
+ entry = &entries[bin];
+ bin_ind = UNDEFINED_BIN_IND;
+ }
+ else {
+ bin_ind = find_table_bin_ind(tab, hash, key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto retry;
+ existing = bin_ind != UNDEFINED_BIN_IND;
+ if (existing) {
+ bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ entry = &entries[bin];
+ }
}
+ if (existing) {
+ key = entry->key;
+ value = entry->record;
+ }
+ old_key = key;
+
+ unsigned int rebuilds_num = tab->rebuilds_num;
+
+ retval = (*func)(&key, &value, arg, existing);
+
+ // We need to make sure that the callback didn't cause a table rebuild
+ // Ideally we would make sure no operations happened
+ assert(rebuilds_num == tab->rebuilds_num);
+ (void)rebuilds_num;
+
+ switch (retval) {
+ case ST_CONTINUE:
+ if (! existing) {
+ st_add_direct_with_hash(tab, key, value, hash);
+ break;
+ }
+ if (old_key != key) {
+ entry->key = key;
+ }
+ entry->record = value;
+ break;
+ case ST_DELETE:
+ if (existing) {
+ if (bin_ind != UNDEFINED_BIN_IND)
+ MARK_BIN_DELETED(tab, bin_ind);
+ MARK_ENTRY_DELETED(entry);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+ }
+ break;
+ }
+ return existing;
+}
+/* Traverse all entries in table TAB calling FUNC with current entry
+ key and value and zero. If the call returns ST_STOP, stop
+ traversing. If the call returns ST_DELETE, delete the current
+ entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
+ traversing. The function returns zero unless an error is found.
+ CHECK_P is flag of st_foreach_check call. The behavior is a bit
+ different for ST_CHECK and when the current element is removed
+ during traversing. */
+static inline int
+st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
+ int check_p)
+{
+ st_index_t bin;
+ st_index_t bin_ind;
+ st_table_entry *entries, *curr_entry_ptr;
+ enum st_retval retval;
+ st_index_t i, rebuilds_num;
+ st_hash_t hash;
+ st_data_t key;
+ int error_p, packed_p = !st_has_bins(tab);
+
+ entries = tab->entries;
+ /* The bound can change inside the loop even without rebuilding
+ the table, e.g. by an entry insertion. */
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ curr_entry_ptr = &entries[i];
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
+ continue;
+ key = curr_entry_ptr->key;
+ rebuilds_num = tab->rebuilds_num;
+ hash = curr_entry_ptr->hash;
+ retval = (*func)(key, curr_entry_ptr->record, arg, 0);
+
+ if (retval == ST_REPLACE && replace) {
+ st_data_t value;
+ value = curr_entry_ptr->record;
+ retval = (*replace)(&key, &value, arg, TRUE);
+ curr_entry_ptr->key = key;
+ curr_entry_ptr->record = value;
+ }
+
+ if (rebuilds_num != tab->rebuilds_num) {
+ retry:
+ entries = tab->entries;
+ packed_p = !st_has_bins(tab);
+ if (packed_p) {
+ i = find_entry(tab, hash, key);
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ error_p = i == UNDEFINED_ENTRY_IND;
+ }
+ else {
+ i = find_table_entry_ind(tab, hash, key);
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ error_p = i == UNDEFINED_ENTRY_IND;
+ i -= ENTRY_BASE;
+ }
+ if (error_p && check_p) {
+ /* call func with error notice */
+ retval = (*func)(0, 0, arg, 1);
+ return 1;
+ }
+ curr_entry_ptr = &entries[i];
+ }
+ switch (retval) {
+ case ST_REPLACE:
+ break;
+ case ST_CONTINUE:
+ break;
+ case ST_CHECK:
+ if (check_p)
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE: {
+ st_data_t key = curr_entry_ptr->key;
+
+ again:
+ if (packed_p) {
+ bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto again;
+ if (bin == UNDEFINED_ENTRY_IND)
+ break;
+ }
+ else {
+ bin_ind = find_table_bin_ind(tab, hash, key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto again;
+ if (bin_ind == UNDEFINED_BIN_IND)
+ break;
+ bin = get_bin(st_bins_ptr(tab), get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ MARK_BIN_DELETED(tab, bin_ind);
+ }
+ curr_entry_ptr = &entries[bin];
+ MARK_ENTRY_DELETED(curr_entry_ptr);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+ break;
+ }
+ }
+ }
return 0;
}
int
-st_delete_safe(table, key, value, never)
- register st_table *table;
- register st_data_t *key;
- st_data_t *value;
- st_data_t never;
+st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
+{
+ return st_general_foreach(tab, func, replace, arg, TRUE);
+}
+
+struct functor {
+ st_foreach_callback_func *func;
+ st_data_t arg;
+};
+
+static int
+apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
+{
+ const struct functor *f = (void *)d;
+ return f->func(k, v, f->arg);
+}
+
+int
+st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
{
- unsigned int hash_val;
- register st_table_entry *ptr;
+ const struct functor f = { func, arg };
+ return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
+}
- hash_val = do_hash_bin(*key, table);
- ptr = table->bins[hash_val];
+/* See comments for function st_delete_safe. */
+int
+st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
+ st_data_t never ATTRIBUTE_UNUSED)
+{
+ return st_general_foreach(tab, func, 0, arg, TRUE);
+}
- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
+/* Set up array KEYS by at most SIZE keys of head table TAB entries.
+ Return the number of keys set up in array KEYS. */
+static inline st_index_t
+st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
+{
+ st_index_t i, bound;
+ st_data_t key, *keys_start, *keys_end;
+ st_table_entry *curr_entry_ptr, *entries = tab->entries;
+
+ bound = tab->entries_bound;
+ keys_start = keys;
+ keys_end = keys + size;
+ for (i = tab->entries_start; i < bound; i++) {
+ if (keys == keys_end)
+ break;
+ curr_entry_ptr = &entries[i];
+ key = curr_entry_ptr->key;
+ if (! DELETED_ENTRY_P(curr_entry_ptr))
+ *keys++ = key;
}
- for(; ptr != 0; ptr = ptr->next) {
- if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
- table->num_entries--;
- *key = ptr->key;
- if (value != 0) *value = ptr->record;
- ptr->key = ptr->record = never;
- return 1;
- }
+ return keys - keys_start;
+}
+
+st_index_t
+st_keys(st_table *tab, st_data_t *keys, st_index_t size)
+{
+ return st_general_keys(tab, keys, size);
+}
+
+/* See comments for function st_delete_safe. */
+st_index_t
+st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
+ st_data_t never ATTRIBUTE_UNUSED)
+{
+ return st_general_keys(tab, keys, size);
+}
+
+/* Set up array VALUES by at most SIZE values of head table TAB
+ entries. Return the number of values set up in array VALUES. */
+static inline st_index_t
+st_general_values(st_table *tab, st_data_t *values, st_index_t size)
+{
+ st_index_t i, bound;
+ st_data_t *values_start, *values_end;
+ st_table_entry *curr_entry_ptr, *entries = tab->entries;
+
+ values_start = values;
+ values_end = values + size;
+ bound = tab->entries_bound;
+ for (i = tab->entries_start; i < bound; i++) {
+ if (values == values_end)
+ break;
+ curr_entry_ptr = &entries[i];
+ if (! DELETED_ENTRY_P(curr_entry_ptr))
+ *values++ = curr_entry_ptr->record;
}
+ return values - values_start;
+}
+
+st_index_t
+st_values(st_table *tab, st_data_t *values, st_index_t size)
+{
+ return st_general_values(tab, values, size);
+}
+
+/* See comments for function st_delete_safe. */
+st_index_t
+st_values_check(st_table *tab, st_data_t *values, st_index_t size,
+ st_data_t never ATTRIBUTE_UNUSED)
+{
+ return st_general_values(tab, values, size);
+}
+
+#define FNV1_32A_INIT 0x811c9dc5
+
+/*
+ * 32 bit magic FNV-1a prime
+ */
+#define FNV_32_PRIME 0x01000193
+
+/* __POWERPC__ added to accommodate Darwin case. */
+#ifndef UNALIGNED_WORD_ACCESS
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
+ defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
+ defined(__mc68020__)
+# define UNALIGNED_WORD_ACCESS 1
+# endif
+#endif
+#ifndef UNALIGNED_WORD_ACCESS
+# define UNALIGNED_WORD_ACCESS 0
+#endif
+
+/* This hash function is quite simplified MurmurHash3
+ * Simplification is legal, cause most of magic still happens in finalizator.
+ * And finalizator is almost the same as in MurmurHash3 */
+#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
+#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
+
+#if ST_INDEX_BITS <= 32
+#define C1 (st_index_t)0xcc9e2d51
+#define C2 (st_index_t)0x1b873593
+#else
+#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
+#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
+#endif
+NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
+NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
+NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
+
+static inline st_index_t
+murmur_step(st_index_t h, st_index_t k)
+{
+#if ST_INDEX_BITS <= 32
+#define r1 (17)
+#define r2 (11)
+#else
+#define r1 (33)
+#define r2 (24)
+#endif
+ k *= C1;
+ h ^= ROTL(k, r1);
+ h *= C2;
+ h = ROTL(h, r2);
+ return h;
+}
+#undef r1
+#undef r2
+
+static inline st_index_t
+murmur_finish(st_index_t h)
+{
+#if ST_INDEX_BITS <= 32
+#define r1 (16)
+#define r2 (13)
+#define r3 (16)
+ const st_index_t c1 = 0x85ebca6b;
+ const st_index_t c2 = 0xc2b2ae35;
+#else
+/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
+#define r1 (30)
+#define r2 (27)
+#define r3 (31)
+ const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
+ const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
+#endif
+#if ST_INDEX_BITS > 64
+ h ^= h >> 64;
+ h *= c2;
+ h ^= h >> 65;
+#endif
+ h ^= h >> r1;
+ h *= c1;
+ h ^= h >> r2;
+ h *= c2;
+ h ^= h >> r3;
+ return h;
+}
+#undef r1
+#undef r2
+#undef r3
+
+st_index_t
+st_hash(const void *ptr, size_t len, st_index_t h)
+{
+ const char *data = ptr;
+ st_index_t t = 0;
+ size_t l = len;
+
+#define data_at(n) (st_index_t)((unsigned char)data[(n)])
+#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
+#if SIZEOF_ST_INDEX_T > 4
+#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
+#if SIZEOF_ST_INDEX_T > 8
+#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
+ UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
+#endif
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
+#else
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
+#endif
+#undef SKIP_TAIL
+ if (len >= sizeof(st_index_t)) {
+#if !UNALIGNED_WORD_ACCESS
+ int align = (int)((st_data_t)data % sizeof(st_index_t));
+ if (align) {
+ st_index_t d = 0;
+ int sl, sr, pack;
+
+ switch (align) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
+ t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
+#else
+# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
+ t |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+ }
+
+#ifdef WORDS_BIGENDIAN
+ t >>= (CHAR_BIT * align) - CHAR_BIT;
+#else
+ t <<= (CHAR_BIT * align);
+#endif
+
+ data += sizeof(st_index_t)-align;
+ len -= sizeof(st_index_t)-align;
+
+ sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
+ sr = CHAR_BIT * align;
+
+ while (len >= sizeof(st_index_t)) {
+ d = *(st_index_t *)data;
+#ifdef WORDS_BIGENDIAN
+ t = (t << sr) | (d >> sl);
+#else
+ t = (t >> sr) | (d << sl);
+#endif
+ h = murmur_step(h, t);
+ t = d;
+ data += sizeof(st_index_t);
+ len -= sizeof(st_index_t);
+ }
+
+ pack = len < (size_t)align ? (int)len : align;
+ d = 0;
+ switch (pack) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
+#else
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ d |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+ }
+#ifdef WORDS_BIGENDIAN
+ t = (t << sr) | (d >> sl);
+#else
+ t = (t >> sr) | (d << sl);
+#endif
+
+ if (len < (size_t)align) goto skip_tail;
+# define SKIP_TAIL 1
+ h = murmur_step(h, t);
+ data += pack;
+ len -= pack;
+ }
+ else
+#endif
+#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
+#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
+#else
+#define aligned_data data
+#endif
+ {
+ do {
+ h = murmur_step(h, *(st_index_t *)aligned_data);
+ data += sizeof(st_index_t);
+ len -= sizeof(st_index_t);
+ } while (len >= sizeof(st_index_t));
+ }
+ }
+
+ t = 0;
+ switch (len) {
+#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
+ /* in this case byteorder doesn't really matter */
+#if SIZEOF_ST_INDEX_T > 4
+ case 7: t |= data_at(6) << 48;
+ case 6: t |= data_at(5) << 40;
+ case 5: t |= data_at(4) << 32;
+ case 4:
+ t |= (st_index_t)*(uint32_t*)aligned_data;
+ goto skip_tail;
+# define SKIP_TAIL 1
+#endif
+ case 3: t |= data_at(2) << 16;
+ case 2: t |= data_at(1) << 8;
+ case 1: t |= data_at(0);
+#else
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
+#else
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ t |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+#endif
+#ifdef SKIP_TAIL
+ skip_tail:
+#endif
+ h ^= t; h -= ROTL(t, 7);
+ h *= C2;
+ }
+ h ^= l;
+#undef aligned_data
+
+ return murmur_finish(h);
+}
+
+st_index_t
+st_hash_uint32(st_index_t h, uint32_t i)
+{
+ return murmur_step(h, i);
+}
+
+NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
+st_index_t
+st_hash_uint(st_index_t h, st_index_t i)
+{
+ i += h;
+/* no matter if it is BigEndian or LittleEndian,
+ * we hash just integers */
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
+ h = murmur_step(h, i >> 8*8);
+#endif
+ h = murmur_step(h, i);
+ return h;
+}
+
+st_index_t
+st_hash_end(st_index_t h)
+{
+ h = murmur_finish(h);
+ return h;
+}
+
+#undef st_hash_start
+st_index_t
+rb_st_hash_start(st_index_t h)
+{
+ return h;
+}
+
+static st_index_t
+strhash(st_data_t arg)
+{
+ register const char *string = (const char *)arg;
+ return st_hash(string, strlen(string), FNV1_32A_INIT);
+}
+
+int
+st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
+{
+ char c1, c2;
+
+ while (1) {
+ c1 = *s1++;
+ c2 = *s2++;
+ if (c1 == '\0' || c2 == '\0') {
+ if (c1 != '\0') return 1;
+ if (c2 != '\0') return -1;
+ return 0;
+ }
+ if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
+ if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
+ if (c1 != c2) {
+ if (c1 > c2)
+ return 1;
+ else
+ return -1;
+ }
+ }
+}
+
+int
+st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
+{
+ char c1, c2;
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ c1 = *s1++;
+ c2 = *s2++;
+ if (c1 == '\0' || c2 == '\0') {
+ if (c1 != '\0') return 1;
+ if (c2 != '\0') return -1;
+ return 0;
+ }
+ if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
+ if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
+ if (c1 != c2) {
+ if (c1 > c2)
+ return 1;
+ else
+ return -1;
+ }
+ }
return 0;
}
static int
-delete_never(key, value, never)
- st_data_t key, value, never;
+st_strcmp(st_data_t lhs, st_data_t rhs)
{
- if (value == never) return ST_DELETE;
- return ST_CONTINUE;
+ const char *s1 = (char *)lhs;
+ const char *s2 = (char *)rhs;
+ return strcmp(s1, s2);
}
-void
-st_cleanup_safe(table, never)
- st_table *table;
- st_data_t never;
+static int
+st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
{
- int num_entries = table->num_entries;
+ const char *s1 = (char *)lhs;
+ const char *s2 = (char *)rhs;
+ return st_locale_insensitive_strcasecmp(s1, s2);
+}
- st_foreach(table, delete_never, never);
- table->num_entries = num_entries;
+NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
+static st_index_t
+strcasehash(st_data_t arg)
+{
+ register const char *string = (const char *)arg;
+ register st_index_t hval = FNV1_32A_INIT;
+
+ /*
+ * FNV-1a hash each octet in the buffer
+ */
+ while (*string) {
+ unsigned int c = (unsigned char)*string++;
+ if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
+ hval ^= c;
+
+ /* multiply by the 32 bit FNV magic prime mod 2^32 */
+ hval *= FNV_32_PRIME;
+ }
+ return hval;
}
int
-st_foreach(table, func, arg)
- st_table *table;
- int (*func)();
- st_data_t arg;
+st_numcmp(st_data_t x, st_data_t y)
{
- st_table_entry *ptr, *last, *tmp;
- enum st_retval retval;
- int i;
-
- for(i = 0; i < table->num_bins; i++) {
- last = 0;
- for(ptr = table->bins[i]; ptr != 0;) {
- retval = (*func)(ptr->key, ptr->record, arg);
- switch (retval) {
- case ST_CHECK: /* check if hash is modified during iteration */
- tmp = 0;
- if (i < table->num_bins) {
- for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
- if (tmp == ptr) break;
- }
- }
- if (!tmp) {
- /* call func with error notice */
- return 1;
- }
- /* fall through */
- case ST_CONTINUE:
- last = ptr;
- ptr = ptr->next;
- break;
- case ST_STOP:
- return 0;
- case ST_DELETE:
- tmp = ptr;
- if (last == 0) {
- table->bins[i] = ptr->next;
- }
- else {
- last->next = ptr->next;
- }
- ptr = ptr->next;
- free(tmp);
- table->num_entries--;
- }
- }
+ return x != y;
+}
+
+st_index_t
+st_numhash(st_data_t n)
+{
+ enum {s1 = 11, s2 = 3};
+ return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
+}
+
+#ifdef RUBY
+/* Expand TAB to be suitable for holding SIZ entries in total.
+ Pre-existing entries remain not deleted inside of TAB, but its bins
+ are cleared to expect future reconstruction. See rehash below. */
+static void
+st_expand_table(st_table *tab, st_index_t siz)
+{
+ st_table *tmp;
+ st_index_t n;
+
+ if (siz <= get_allocated_entries(tab))
+ return; /* enough room already */
+
+ tmp = st_init_table_with_size(tab->type, siz);
+ n = get_allocated_entries(tab);
+ MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
+ st_free_entries(tab);
+
+ tab->entry_power = tmp->entry_power;
+ tab->bin_power = tmp->bin_power;
+ tab->size_ind = tmp->size_ind;
+ tab->entries = tmp->entries;
+ tab->rebuilds_num++;
+ free_fixed_ptr(tmp);
+}
+
+/* Rehash using linear search. Return TRUE if we found that the table
+ was rebuilt. */
+static int
+st_rehash_linear(st_table *tab)
+{
+ int eq_p, rebuilt_p;
+ st_index_t i, j;
+ st_table_entry *p, *q;
+
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ p = &tab->entries[i];
+ if (DELETED_ENTRY_P(p))
+ continue;
+ for (j = i + 1; j < tab->entries_bound; j++) {
+ q = &tab->entries[j];
+ if (DELETED_ENTRY_P(q))
+ continue;
+ DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return TRUE;
+ if (eq_p) {
+ *p = *q;
+ MARK_ENTRY_DELETED(q);
+ tab->num_entries--;
+ update_range_for_deleted(tab, j);
+ }
+ }
}
- return 0;
+ return FALSE;
}
+/* Rehash using index. Return TRUE if we found that the table was
+ rebuilt. */
static int
-strhash(string)
- register const char *string;
+st_rehash_indexed(st_table *tab)
+{
+ int eq_p, rebuilt_p;
+ st_index_t i;
+
+ unsigned int const size_ind = get_size_ind(tab);
+ initialize_bins(tab);
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ st_table_entry *p = &tab->entries[i];
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d = 1;
+#else
+ st_index_t perturb = p->hash;
+#endif
+
+ if (DELETED_ENTRY_P(p))
+ continue;
+
+ ind = hash_bin(p->hash, tab);
+ for (;;) {
+ st_index_t bin = get_bin(st_bins_ptr(tab), size_ind, ind);
+ if (EMPTY_OR_DELETED_BIN_P(bin)) {
+ /* ok, new room */
+ set_bin(st_bins_ptr(tab), size_ind, ind, i + ENTRY_BASE);
+ break;
+ }
+ else {
+ st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
+ DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return TRUE;
+ if (eq_p) {
+ /* duplicated key; delete it */
+ q->record = p->record;
+ MARK_ENTRY_DELETED(p);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+ break;
+ }
+ else {
+ /* hash collision; skip it */
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &perturb);
+#endif
+ }
+ }
+ }
+ }
+ return FALSE;
+}
+
+/* Reconstruct TAB's bins according to TAB's entries. This function
+ permits conflicting keys inside of entries. No errors are reported
+ then. All but one of them are discarded silently. */
+static void
+st_rehash(st_table *tab)
{
- register int c;
+ int rebuilt_p;
+
+ do {
+ if (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
+ rebuilt_p = st_rehash_linear(tab);
+ else
+ rebuilt_p = st_rehash_indexed(tab);
+ } while (rebuilt_p);
+}
-#ifdef HASH_ELFHASH
- register unsigned int h = 0, g;
+static st_data_t
+st_stringify(VALUE key)
+{
+ return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
+ rb_hash_key_str(key) : key;
+}
+
+static void
+st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
+{
+ st_data_t k = st_stringify(key);
+ st_table_entry e;
+ e.hash = do_hash(k, tab);
+ e.key = k;
+ e.record = val;
+
+ tab->entries[tab->entries_bound++] = e;
+ tab->num_entries++;
+ RB_OBJ_WRITTEN(hash, Qundef, k);
+ RB_OBJ_WRITTEN(hash, Qundef, val);
+}
- while ((c = *string++) != '\0') {
- h = ( h << 4 ) + c;
- if ( g = h & 0xF0000000 )
- h ^= g >> 24;
- h &= ~g;
+static void
+st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
+{
+ long i;
+
+ for (i = 0; i < argc; /* */) {
+ st_data_t k = st_stringify(argv[i++]);
+ st_data_t v = argv[i++];
+ st_insert(tab, k, v);
+ RB_OBJ_WRITTEN(hash, Qundef, k);
+ RB_OBJ_WRITTEN(hash, Qundef, v);
}
- return h;
-#elif defined(HASH_PERL)
- register int val = 0;
+}
+
+static void
+st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
+{
+ long i;
+
+ /* push elems */
+ for (i = 0; i < argc; /* */) {
+ VALUE key = argv[i++];
+ VALUE val = argv[i++];
+ st_insert_single(tab, hash, key, val);
+ }
+
+ /* reindex */
+ st_rehash(tab);
+}
+
+/* Mimics ruby's { foo => bar } syntax. This function is subpart
+ of rb_hash_bulk_insert. */
+void
+rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
+{
+ st_index_t n, size = argc / 2;
+ st_table *tab = RHASH_ST_TABLE(hash);
+
+ tab = RHASH_TBL_RAW(hash);
+ n = tab->entries_bound + size;
+ st_expand_table(tab, n);
+ if (UNLIKELY(tab->num_entries))
+ st_insert_generic(tab, argc, argv, hash);
+ else if (argc <= 2)
+ st_insert_single(tab, hash, argv[0], argv[1]);
+ else if (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
+ st_insert_linear(tab, argc, argv, hash);
+ else
+ st_insert_generic(tab, argc, argv, hash);
+}
+
+void
+rb_st_compact_table(st_table *tab)
+{
+ st_index_t num = tab->num_entries;
+ if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
+ /* Compaction: */
+ st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
+ rebuild_table_with(new_tab, tab);
+ rebuild_move_table(new_tab, tab);
+ rebuild_cleanup(tab);
+ }
+}
+
+/*
+ * set_table related code
+ */
+
+struct set_table_entry {
+ st_hash_t hash;
+ st_data_t key;
+};
+
+/* Return hash value of KEY for table TAB. */
+static inline st_hash_t
+set_do_hash(st_data_t key, set_table *tab)
+{
+ st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
+ return normalize_hash_value(hash);
+}
+
+/* Return bin size index of table TAB. */
+static inline unsigned int
+set_get_size_ind(const set_table *tab)
+{
+ return tab->size_ind;
+}
+
+/* Return the number of allocated bins of table TAB. */
+static inline st_index_t
+set_get_bins_num(const set_table *tab)
+{
+ return ((st_index_t) 1)<<tab->bin_power;
+}
+
+/* Return mask for a bin index in table TAB. */
+static inline st_index_t
+set_bins_mask(const set_table *tab)
+{
+ return set_get_bins_num(tab) - 1;
+}
+
+/* Return the index of table TAB bin corresponding to
+ HASH_VALUE. */
+static inline st_index_t
+set_hash_bin(st_hash_t hash_value, set_table *tab)
+{
+ return hash_value & set_bins_mask(tab);
+}
+
+/* Return the number of allocated entries of table TAB. */
+static inline st_index_t
+set_get_allocated_entries(const set_table *tab)
+{
+ return ((st_index_t) 1)<<tab->entry_power;
+}
+
+static inline size_t
+set_allocated_entries_size(const set_table *tab)
+{
+ return set_get_allocated_entries(tab) * sizeof(set_table_entry);
+}
+
+static inline bool
+set_has_bins(const set_table *tab)
+{
+ return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
+}
+
+/* Return size of the allocated bins of table TAB. */
+static inline st_index_t
+set_bins_size(const set_table *tab)
+{
+ if (set_has_bins(tab)) {
+ return features[tab->entry_power].bins_words * sizeof (st_index_t);
+ }
+
+ return 0;
+}
+
+static inline st_index_t *
+set_bins_ptr(const set_table *tab)
+{
+ if (set_has_bins(tab)) {
+ return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
+ }
+
+ return NULL;
+}
+
+/* Mark all bins of table TAB as empty. */
+static void
+set_initialize_bins(set_table *tab)
+{
+ memset(set_bins_ptr(tab), 0, set_bins_size(tab));
+}
+
+/* Make table TAB empty. */
+static void
+set_make_tab_empty(set_table *tab)
+{
+ tab->num_entries = 0;
+ tab->entries_start = tab->entries_bound = 0;
+ if (set_bins_ptr(tab) != NULL)
+ set_initialize_bins(tab);
+}
+
+static inline size_t
+set_entries_memsize(set_table *tab)
+{
+ size_t memsize = set_get_allocated_entries(tab) * sizeof(set_table_entry);
+ if (set_has_bins(tab)) {
+ memsize += set_bins_size(tab);
+ }
+ return memsize;
+}
+
+static set_table *
+set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
+{
+ int n;
+
+#ifdef HASH_LOG
+#if HASH_LOG+0 < 0
+ {
+ const char *e = getenv("ST_HASH_LOG");
+ if (!e || !*e) init_st = 1;
+ }
+#endif
+ if (init_st == 0) {
+ init_st = 1;
+ atexit(stat_col);
+ }
+#endif
+
+ n = get_power2(size);
+
+ tab->type = type;
+ tab->entry_power = n;
+ tab->bin_power = features[n].bin_power;
+ tab->size_ind = features[n].size_ind;
+
+ tab->entries = (set_table_entry *)malloc(set_entries_memsize(tab));
+ set_make_tab_empty(tab);
+ tab->rebuilds_num = 0;
+ return tab;
+}
+
+/* Create and return table with TYPE which can hold at least SIZE
+ entries. The real number of entries which the table can hold is
+ the nearest power of two for SIZE. */
+set_table *
+set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
+{
+ if (tab == NULL) tab = malloc(sizeof(set_table));
+
+ set_init_existing_table_with_size(tab, type, size);
+
+ return tab;
+}
+
+set_table *
+set_init_numtable(void)
+{
+ return set_init_table_with_size(NULL, &type_numhash, 0);
+}
+
+set_table *
+set_init_numtable_with_size(st_index_t size)
+{
+ return set_init_table_with_size(NULL, &type_numhash, size);
+}
- while ((c = *string++) != '\0') {
- val += c;
- val += (val << 10);
- val ^= (val >> 6);
+set_table *
+set_init_embedded_numtable_with_size(set_table *tab, st_index_t size)
+{
+ return set_init_existing_table_with_size(tab, &type_numhash, size);
+}
+
+size_t
+set_table_size(const struct set_table *tbl)
+{
+ return tbl->num_entries;
+}
+
+/* Make table TAB empty. */
+void
+set_table_clear(set_table *tab)
+{
+ set_make_tab_empty(tab);
+ tab->rebuilds_num++;
+}
+
+void
+set_free_embedded_table(set_table *tab)
+{
+ sized_free(tab->entries, set_entries_memsize(tab));
+}
+
+/* Free table TAB space. This should only be used if you passed NULL to
+ set_init_table_with_size/set_copy when creating the table. */
+void
+set_free_table(set_table *tab)
+{
+ set_free_embedded_table(tab);
+ free_fixed_ptr(tab);
+}
+
+/* Return byte size of memory allocated for table TAB. */
+size_t
+set_memsize(const set_table *tab)
+{
+ return(sizeof(set_table)
+ + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
+ + set_get_allocated_entries(tab) * sizeof(set_table_entry));
+}
+
+static st_index_t
+set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
+
+static st_index_t
+set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
+ st_data_t key, st_index_t *bin_ind);
+
+static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
+static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
+static void set_rebuild_cleanup(set_table *const tab);
+
+/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
+ and can change size of the table entries and bins arrays.
+ Rebuilding is implemented by creation of a new table or by
+ compaction of the existing one. */
+static void
+set_rebuild_table(set_table *tab)
+{
+ if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
+ && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
+ || tab->num_entries < (1 << MINIMAL_POWER2)) {
+ /* Compaction: */
+ tab->num_entries = 0;
+ if (set_has_bins(tab))
+ set_initialize_bins(tab);
+ set_rebuild_table_with(tab, tab);
+ }
+ else {
+ set_table *new_tab;
+ /* This allocation could trigger GC and compaction. If tab is the
+ * gen_fields_tbl, then tab could have changed in size due to objects being
+ * freed and/or moved. Do not store attributes of tab before this line. */
+ new_tab = set_init_table_with_size(NULL, tab->type,
+ 2 * tab->num_entries - 1);
+ set_rebuild_table_with(new_tab, tab);
+ set_rebuild_move_table(new_tab, tab);
+ }
+ set_rebuild_cleanup(tab);
+}
+
+static void
+set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
+{
+ st_index_t i, ni;
+ unsigned int size_ind;
+ set_table_entry *new_entries;
+ set_table_entry *curr_entry_ptr;
+ st_index_t *bins;
+ st_index_t bin_ind;
+
+ new_entries = new_tab->entries;
+
+ ni = 0;
+ bins = set_bins_ptr(new_tab);
+ size_ind = set_get_size_ind(new_tab);
+ st_index_t bound = tab->entries_bound;
+ set_table_entry *entries = tab->entries;
+
+ for (i = tab->entries_start; i < bound; i++) {
+ curr_entry_ptr = &entries[i];
+ PREFETCH(entries + i + 1, 0);
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
+ continue;
+ if (&new_entries[ni] != curr_entry_ptr)
+ new_entries[ni] = *curr_entry_ptr;
+ if (EXPECT(bins != NULL, 1)) {
+ bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
+ curr_entry_ptr->key);
+ set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
+ }
+ new_tab->num_entries++;
+ ni++;
}
- val += (val << 3);
- val ^= (val >> 11);
- return val + (val << 15);
+ assert(new_tab->num_entries == tab->num_entries);
+}
+
+static void
+set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
+{
+ sized_free(tab->entries, set_entries_memsize(tab));
+ tab->entries = new_tab->entries;
+
+ tab->entry_power = new_tab->entry_power;
+ tab->bin_power = new_tab->bin_power;
+ tab->size_ind = new_tab->size_ind;
+
+ free_fixed_ptr(new_tab);
+}
+
+static void
+set_rebuild_cleanup(set_table *const tab)
+{
+ tab->entries_start = 0;
+ tab->entries_bound = tab->num_entries;
+ tab->rebuilds_num++;
+}
+
+/* Return the next secondary hash index for table TAB using previous
+ index IND and PERTURB. Finally modulo of the function becomes a
+ full *cycle linear congruential generator*, in other words it
+ guarantees traversing all table bins in extreme case.
+
+ According the Hull-Dobell theorem a generator
+ "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
+ o m and c are relatively prime
+ o a-1 is divisible by all prime factors of m
+ o a-1 is divisible by 4 if m is divisible by 4.
+
+ For our case a is 5, c is 1, and m is a power of two. */
+static inline st_index_t
+set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
+{
+ *perturb >>= 11;
+ ind = (ind << 2) + ind + *perturb + 1;
+ return set_hash_bin(ind, tab);
+}
+
+/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
+ search. Return the index of the found entry in array `entries`.
+ If it is not found, return UNDEFINED_ENTRY_IND. If the table was
+ rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
+static inline st_index_t
+set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
+{
+ int eq_p, rebuilt_p;
+ st_index_t i, bound;
+ set_table_entry *entries;
+
+ bound = tab->entries_bound;
+ entries = tab->entries;
+ for (i = tab->entries_start; i < bound; i++) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
+ return i;
+ }
+ return UNDEFINED_ENTRY_IND;
+}
+
+/* Use the quadratic probing. The method has a better data locality
+ but more collisions than the current approach. In average it
+ results in a bit slower search. */
+/*#define QUADRATIC_PROBE*/
+
+/* Return index of entry with HASH_VALUE and KEY in table TAB. If
+ there is no such entry, return UNDEFINED_ENTRY_IND. If the table
+ was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
+static st_index_t
+set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
+{
+ int eq_p, rebuilt_p;
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
#else
- register int val = 0;
+ st_index_t perturb;
+#endif
+ st_index_t bin;
+ set_table_entry *entries = tab->entries;
- while ((c = *string++) != '\0') {
- val = val*997 + c;
+ ind = set_hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = hash_value;
+#endif
+ for (;;) {
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
+ break;
+ }
+ else if (EMPTY_BIN_P(bin))
+ return UNDEFINED_ENTRY_IND;
+#ifdef QUADRATIC_PROBE
+ ind = set_hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = set_secondary_hash(ind, tab, &perturb);
+#endif
}
+ return bin;
+}
+
+/* Find and return index of table TAB bin corresponding to an entry
+ with HASH_VALUE and KEY. If there is no such bin, return
+ UNDEFINED_BIN_IND. If the table was rebuilt during the search,
+ return REBUILT_TABLE_BIN_IND. */
+static st_index_t
+set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
+{
+ int eq_p, rebuilt_p;
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t perturb;
+#endif
+ st_index_t bin;
+ set_table_entry *entries = tab->entries;
- return val + (val>>5);
+ ind = set_hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = hash_value;
+#endif
+ for (;;) {
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_BIN_IND;
+ if (eq_p)
+ break;
+ }
+ else if (EMPTY_BIN_P(bin))
+ return UNDEFINED_BIN_IND;
+#ifdef QUADRATIC_PROBE
+ ind = set_hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = set_secondary_hash(ind, tab, &perturb);
#endif
+ }
+ return ind;
}
-static int
-numcmp(x, y)
- long x, y;
+/* Find and return index of table TAB bin corresponding to an entry
+ with HASH_VALUE and KEY. The entry should be in the table
+ already. */
+static st_index_t
+set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
{
- return x != y;
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t perturb;
+#endif
+ st_index_t bin;
+
+ ind = set_hash_bin(hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = hash_value;
+#endif
+ for (;;) {
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
+ if (EMPTY_OR_DELETED_BIN_P(bin))
+ return ind;
+#ifdef QUADRATIC_PROBE
+ ind = set_hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = set_secondary_hash(ind, tab, &perturb);
+#endif
+ }
+}
+
+/* Mark I-th bin of table TAB as empty, in other words not
+ corresponding to any entry. */
+#define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
+
+/* Return index of table TAB bin for HASH_VALUE and KEY through
+ BIN_IND and the pointed value as the function result. Reserve the
+ bin for inclusion of the corresponding entry into the table if it
+ is not there yet. We always find such bin as bins array length is
+ bigger entries array. Although we can reuse a deleted bin, the
+ result bin value is always empty if the table has no entry with
+ KEY. Return the entries array index of the found entry or
+ UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
+ during the search, return REBUILT_TABLE_ENTRY_IND. */
+static st_index_t
+set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
+ st_data_t key, st_index_t *bin_ind)
+{
+ int eq_p, rebuilt_p;
+ st_index_t ind;
+ st_hash_t curr_hash_value = *hash_value;
+#ifdef QUADRATIC_PROBE
+ st_index_t d;
+#else
+ st_index_t perturb;
+#endif
+ st_index_t entry_index;
+ st_index_t firset_deleted_bin_ind;
+ set_table_entry *entries;
+
+ ind = set_hash_bin(curr_hash_value, tab);
+#ifdef QUADRATIC_PROBE
+ d = 1;
+#else
+ perturb = curr_hash_value;
+#endif
+ firset_deleted_bin_ind = UNDEFINED_BIN_IND;
+ entries = tab->entries;
+ for (;;) {
+ entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
+ if (EMPTY_BIN_P(entry_index)) {
+ tab->num_entries++;
+ entry_index = UNDEFINED_ENTRY_IND;
+ if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
+ /* We can reuse bin of a deleted entry. */
+ ind = firset_deleted_bin_ind;
+ MARK_SET_BIN_EMPTY(tab, ind);
+ }
+ break;
+ }
+ else if (! DELETED_BIN_P(entry_index)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
+ break;
+ }
+ else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
+ firset_deleted_bin_ind = ind;
+#ifdef QUADRATIC_PROBE
+ ind = set_hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = set_secondary_hash(ind, tab, &perturb);
+#endif
+ }
+ *bin_ind = ind;
+ return entry_index;
+}
+
+/* Find an entry with KEY in table TAB. Return non-zero if we found
+ it. */
+int
+set_table_lookup(set_table *tab, st_data_t key)
+{
+ st_index_t bin;
+ st_hash_t hash = set_do_hash(key, tab);
+
+ retry:
+ if (!set_has_bins(tab)) {
+ bin = set_find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ }
+ else {
+ bin = set_find_table_entry_ind(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND)
+ return 0;
+ bin -= ENTRY_BASE;
+ }
+ return 1;
+}
+
+/* Check the table and rebuild it if it is necessary. */
+static inline void
+set_rebuild_table_if_necessary (set_table *tab)
+{
+ st_index_t bound = tab->entries_bound;
+
+ if (bound == set_get_allocated_entries(tab))
+ set_rebuild_table(tab);
+}
+
+/* Insert KEY into table TAB and return zero. If there is
+ already entry with KEY in the table, return nonzero and update
+ the value of the found entry. */
+int
+set_insert(set_table *tab, st_data_t key)
+{
+ set_table_entry *entry;
+ st_index_t bin;
+ st_index_t ind;
+ st_hash_t hash_value;
+ st_index_t bin_ind;
+ int new_p;
+
+ hash_value = set_do_hash(key, tab);
+ retry:
+ set_rebuild_table_if_necessary(tab);
+ if (!set_has_bins(tab)) {
+ bin = set_find_entry(tab, hash_value, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ if (new_p)
+ tab->num_entries++;
+ bin_ind = UNDEFINED_BIN_IND;
+ }
+ else {
+ bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
+ key, &bin_ind);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ new_p = bin == UNDEFINED_ENTRY_IND;
+ bin -= ENTRY_BASE;
+ }
+ if (new_p) {
+ ind = tab->entries_bound++;
+ entry = &tab->entries[ind];
+ entry->hash = hash_value;
+ entry->key = key;
+ if (bin_ind != UNDEFINED_BIN_IND)
+ set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+ return 0;
+ }
+ return 1;
+}
+
+/* Create a copy of old_tab into new_tab. */
+static set_table *
+set_replace(set_table *new_tab, set_table *old_tab)
+{
+ *new_tab = *old_tab;
+ size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
+ new_tab->entries = (set_table_entry *)malloc(memsize);
+ MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
+ return new_tab;
+}
+
+/* Create and return a copy of table OLD_TAB. */
+set_table *
+set_copy(set_table *new_tab, set_table *old_tab)
+{
+ if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
+
+ if (set_replace(new_tab, old_tab) == NULL) {
+ set_free_table(new_tab);
+ return NULL;
+ }
+
+ return new_tab;
+}
+
+/* Update the entries start of table TAB after removing an entry
+ with index N in the array entries. */
+static inline void
+set_update_range_for_deleted(set_table *tab, st_index_t n)
+{
+ /* Do not update entries_bound here. Otherwise, we can fill all
+ bins by deleted entry value before rebuilding the table. */
+ if (tab->entries_start == n) {
+ st_index_t start = n + 1;
+ st_index_t bound = tab->entries_bound;
+ set_table_entry *entries = tab->entries;
+ while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
+ tab->entries_start = start;
+ }
+}
+
+/* Mark I-th bin of table TAB as corresponding to a deleted table
+ entry. Update number of entries in the table and number of bins
+ corresponding to deleted entries. */
+#define MARK_SET_BIN_DELETED(tab, i) \
+ do { \
+ set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
+ } while (0)
+
+/* Delete entry with KEY from table TAB, and return non-zero. If
+ there is no entry with KEY in the table, return zero. */
+int
+set_table_delete(set_table *tab, st_data_t *key)
+{
+ set_table_entry *entry;
+ st_index_t bin;
+ st_index_t bin_ind;
+ st_hash_t hash;
+
+ hash = set_do_hash(*key, tab);
+ retry:
+ if (!set_has_bins(tab)) {
+ bin = set_find_entry(tab, hash, *key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ if (bin == UNDEFINED_ENTRY_IND) {
+ return 0;
+ }
+ }
+ else {
+ bin_ind = set_find_table_bin_ind(tab, hash, *key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto retry;
+ if (bin_ind == UNDEFINED_BIN_IND) {
+ return 0;
+ }
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ MARK_SET_BIN_DELETED(tab, bin_ind);
+ }
+ entry = &tab->entries[bin];
+ *key = entry->key;
+ MARK_ENTRY_DELETED(entry);
+ tab->num_entries--;
+ set_update_range_for_deleted(tab, bin);
+ return 1;
+}
+
+/* Traverse all entries in table TAB calling FUNC with current entry
+ key and zero. If the call returns ST_STOP, stop
+ traversing. If the call returns ST_DELETE, delete the current
+ entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
+ traversing. The function returns zero unless an error is found.
+ CHECK_P is flag of set_foreach_check call. The behavior is a bit
+ different for ST_CHECK and when the current element is removed
+ during traversing. */
+static inline int
+set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
+ set_update_callback_func *replace, st_data_t arg,
+ int check_p)
+{
+ st_index_t bin;
+ st_index_t bin_ind;
+ set_table_entry *entries, *curr_entry_ptr;
+ enum st_retval retval;
+ st_index_t i, rebuilds_num;
+ st_hash_t hash;
+ st_data_t key;
+ int error_p, packed_p = !set_has_bins(tab);
+
+ entries = tab->entries;
+ /* The bound can change inside the loop even without rebuilding
+ the table, e.g. by an entry insertion. */
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ curr_entry_ptr = &entries[i];
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
+ continue;
+ key = curr_entry_ptr->key;
+ rebuilds_num = tab->rebuilds_num;
+ hash = curr_entry_ptr->hash;
+ retval = (*func)(key, arg, 0);
+
+ if (retval == ST_REPLACE && replace) {
+ retval = (*replace)(&key, arg, TRUE);
+ curr_entry_ptr->key = key;
+ }
+
+ if (rebuilds_num != tab->rebuilds_num) {
+ retry:
+ entries = tab->entries;
+ packed_p = !set_has_bins(tab);
+ if (packed_p) {
+ i = set_find_entry(tab, hash, key);
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ error_p = i == UNDEFINED_ENTRY_IND;
+ }
+ else {
+ i = set_find_table_entry_ind(tab, hash, key);
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
+ error_p = i == UNDEFINED_ENTRY_IND;
+ i -= ENTRY_BASE;
+ }
+ if (error_p && check_p) {
+ /* call func with error notice */
+ retval = (*func)(0, arg, 1);
+ return 1;
+ }
+ curr_entry_ptr = &entries[i];
+ }
+ switch (retval) {
+ case ST_REPLACE:
+ break;
+ case ST_CONTINUE:
+ break;
+ case ST_CHECK:
+ if (check_p)
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE: {
+ st_data_t key = curr_entry_ptr->key;
+
+ again:
+ if (packed_p) {
+ bin = set_find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto again;
+ if (bin == UNDEFINED_ENTRY_IND)
+ break;
+ }
+ else {
+ bin_ind = set_find_table_bin_ind(tab, hash, key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto again;
+ if (bin_ind == UNDEFINED_BIN_IND)
+ break;
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ MARK_SET_BIN_DELETED(tab, bin_ind);
+ }
+ curr_entry_ptr = &entries[bin];
+ MARK_ENTRY_DELETED(curr_entry_ptr);
+ tab->num_entries--;
+ set_update_range_for_deleted(tab, bin);
+ break;
+ }
+ }
+ }
+ return 0;
}
+int
+set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
+{
+ return set_general_foreach(tab, func, replace, arg, TRUE);
+}
+
+struct set_functor {
+ set_foreach_callback_func *func;
+ st_data_t arg;
+};
+
static int
-numhash(n)
- long n;
+set_apply_functor(st_data_t k, st_data_t d, int _)
+{
+ const struct set_functor *f = (void *)d;
+ return f->func(k, f->arg);
+}
+
+int
+set_table_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
+{
+ const struct set_functor f = { func, arg };
+ return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
+}
+
+/* See comments for function set_delete_safe. */
+int
+set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
+ st_data_t never ATTRIBUTE_UNUSED)
+{
+ return set_general_foreach(tab, func, NULL, arg, TRUE);
+}
+
+/* Set up array KEYS by at most SIZE keys of head table TAB entries.
+ Return the number of keys set up in array KEYS. */
+inline st_index_t
+set_keys(set_table *tab, st_data_t *keys, st_index_t size)
{
- return n;
+ st_index_t i, bound;
+ st_data_t key, *keys_start, *keys_end;
+ set_table_entry *curr_entry_ptr, *entries = tab->entries;
+
+ bound = tab->entries_bound;
+ keys_start = keys;
+ keys_end = keys + size;
+ for (i = tab->entries_start; i < bound; i++) {
+ if (keys == keys_end)
+ break;
+ curr_entry_ptr = &entries[i];
+ key = curr_entry_ptr->key;
+ if (! DELETED_ENTRY_P(curr_entry_ptr))
+ *keys++ = key;
+ }
+
+ return keys - keys_start;
}
+
+void
+set_compact_table(set_table *tab)
+{
+ st_index_t num = tab->num_entries;
+ if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
+ /* Compaction: */
+ set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
+ set_rebuild_table_with(new_tab, tab);
+ set_rebuild_move_table(new_tab, tab);
+ set_rebuild_cleanup(tab);
+ }
+}
+
+#endif