summaryrefslogtreecommitdiff
path: root/st.c
diff options
context:
space:
mode:
Diffstat (limited to 'st.c')
-rw-r--r--st.c258
1 files changed, 185 insertions, 73 deletions
diff --git a/st.c b/st.c
index 56ae30ce47..0c52e7a2ef 100644
--- a/st.c
+++ b/st.c
@@ -90,6 +90,11 @@
o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
bins depending on the current hash table size.
+ o The implementation takes into account that the table can be
+ rebuilt during hashing or comparison functions. It can happen if
+ the functions are implemented in Ruby and a thread switch occurs
+ during their execution.
+
This implementation speeds up the Ruby hash table benchmarks in
average by more 40% on Intel Haswell CPU.
@@ -174,6 +179,15 @@ static const struct st_hash_type type_strcasehash = {
#define PTR_EQUAL(tab, ptr, hash_val, key_) \
((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
+/* As PRT_EQUAL only its result is returned in RES. REBUILT_P is set
+ up to TRUE if the table is rebuilt during the comparison. */
+#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
+ do { \
+ unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
+ res = PTR_EQUAL(tab, ptr, hash_val, key); \
+ rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
+ } while (FALSE)
+
/* Features of a table. */
struct st_features {
/* Power of 2 used for number of allocated entries. */
@@ -380,6 +394,11 @@ set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
+/* Entry and bin values returned when we found a table rebuild during
+ the search. */
+#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
+#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
+
/* Mark I-th bin of table TAB as corresponding to a deleted table
entry. Update number of entries in the table and number of bins
corresponding to deleted entries. */
@@ -823,17 +842,22 @@ secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
search. Return the index of the found entry in array `entries`.
- If it is not found, return UNDEFINED_ENTRY_IND. */
+ If it is not found, return UNDEFINED_ENTRY_IND. If the table was
+ rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
static inline st_index_t
find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
{
+ int eq_p, rebuilt_p;
st_index_t i, bound;
st_table_entry *entries;
bound = tab->entries_bound;
entries = tab->entries;
for (i = tab->entries_start; i < bound; i++) {
- if (PTR_EQUAL(tab, &entries[i], hash_value, key))
+ DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
return i;
}
return UNDEFINED_ENTRY_IND;
@@ -845,10 +869,12 @@ find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
/*#define QUADRATIC_PROBE*/
/* Return index of entry with HASH_VALUE and KEY in table TAB. If
- there is no such entry, return UNDEFINED_ENTRY_IND. */
+ there is no such entry, return UNDEFINED_ENTRY_IND. If the table
+ was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
static st_index_t
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
+ int eq_p, rebuilt_p;
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@@ -869,10 +895,13 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
- if (! EMPTY_OR_DELETED_BIN_P(bin)
- && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
- break;
- else if (EMPTY_BIN_P(bin))
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
+ break;
+ } else if (EMPTY_BIN_P(bin))
return UNDEFINED_ENTRY_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@@ -887,10 +916,12 @@ find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
/* Find and return index of table TAB bin corresponding to an entry
with HASH_VALUE and KEY. If there is no such bin, return
- UNDEFINED_BIN_IND. */
+ UNDEFINED_BIN_IND. If the table was rebuilt during the search,
+ return REBUILT_TABLE_BIN_IND. */
static st_index_t
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
{
+ int eq_p, rebuilt_p;
st_index_t ind;
#ifdef QUADRATIC_PROBE
st_index_t d;
@@ -911,10 +942,13 @@ find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
FOUND_BIN;
for (;;) {
bin = get_bin(tab->bins, get_size_ind(tab), ind);
- if (! EMPTY_OR_DELETED_BIN_P(bin)
- && PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key))
- break;
- else if (EMPTY_BIN_P(bin))
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_BIN_IND;
+ if (eq_p)
+ break;
+ } else if (EMPTY_BIN_P(bin))
return UNDEFINED_BIN_IND;
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
@@ -955,7 +989,7 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bin = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_OR_DELETED_BIN_P(bin))
return ind;
- st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
+ st_assert (entries[bin - ENTRY_BASE].hash != hash_value);
#ifdef QUADRATIC_PROBE
ind = hash_bin(ind + d, tab);
d++;
@@ -973,11 +1007,13 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
bigger entries array. Although we can reuse a deleted bin, the
result bin value is always empty if the table has no entry with
KEY. Return the entries array index of the found entry or
- UNDEFINED_ENTRY_IND if it is not found. */
+ UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
+ during the search, return REBUILT_TABLE_ENTRY_IND. */
static st_index_t
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_data_t key, st_index_t *bin_ind)
{
+ int eq_p, rebuilt_p;
st_index_t ind;
st_hash_t curr_hash_value = *hash_value;
#ifdef QUADRATIC_PROBE
@@ -1015,7 +1051,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
break;
}
else if (! DELETED_BIN_P(entry_index)) {
- if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
+ DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return REBUILT_TABLE_ENTRY_IND;
+ if (eq_p)
break;
}
else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
@@ -1040,13 +1079,18 @@ st_lookup(st_table *tab, st_data_t key, st_data_t *value)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
+ retry:
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@@ -1064,13 +1108,18 @@ st_get_key(st_table *tab, st_data_t key, st_data_t *result)
st_index_t bin;
st_hash_t hash = do_hash(key, tab);
+ retry:
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
}
else {
bin = find_table_entry_ind(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
if (bin == UNDEFINED_ENTRY_IND)
return 0;
bin -= ENTRY_BASE;
@@ -1104,10 +1153,13 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
st_index_t bin_ind;
int new_p;
- rebuild_table_if_necessary(tab);
hash_value = do_hash(key, tab);
+ retry:
+ rebuild_table_if_necessary(tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@@ -1116,6 +1168,8 @@ st_insert(st_table *tab, st_data_t key, st_data_t value)
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@@ -1192,10 +1246,13 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
st_index_t bin_ind;
int new_p;
- rebuild_table_if_necessary (tab);
hash_value = do_hash(key, tab);
+ retry:
+ rebuild_table_if_necessary (tab);
if (tab->bins == NULL) {
bin = find_entry(tab, hash_value, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
if (new_p)
tab->num_entries++;
@@ -1204,6 +1261,8 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
else {
bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
key, &bin_ind);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
new_p = bin == UNDEFINED_ENTRY_IND;
bin -= ENTRY_BASE;
}
@@ -1212,7 +1271,6 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
check = tab->rebuilds_num;
key = (*func)(key);
st_assert(check == tab->rebuilds_num);
- st_assert(do_hash(key, tab) == hash_value);
ind = tab->entries_bound++;
entry = &tab->entries[ind];
entry->hash = hash_value;
@@ -1220,6 +1278,7 @@ st_insert2(st_table *tab, st_data_t key, st_data_t value,
entry->record = value;
if (bin_ind != UNDEFINED_BIN_IND)
set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
+ st_assert(do_hash(key, tab) == hash_value);
#ifdef ST_DEBUG
st_check(tab);
#endif
@@ -1281,8 +1340,11 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
st_assert(tab != NULL);
hash = do_hash(*key, tab);
+ retry:
if (tab->bins == NULL) {
bin = find_entry(tab, hash, *key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
if (bin == UNDEFINED_ENTRY_IND) {
if (value != 0) *value = 0;
return 0;
@@ -1290,6 +1352,8 @@ st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
}
else {
bin_ind = find_table_bin_ind(tab, hash, *key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto retry;
if (bin_ind == UNDEFINED_BIN_IND) {
if (value != 0) *value = 0;
return 0;
@@ -1344,21 +1408,33 @@ st_shift(st_table *tab, st_data_t *key, st_data_t *value)
for (i = tab->entries_start; i < bound; i++) {
curr_entry_ptr = &entries[i];
if (! DELETED_ENTRY_P(curr_entry_ptr)) {
+ st_hash_t entry_hash = curr_entry_ptr->hash;
+ st_data_t entry_key = curr_entry_ptr->key;
+
if (value != 0) *value = curr_entry_ptr->record;
- *key = curr_entry_ptr->key;
+ *key = entry_key;
+ retry:
if (tab->bins == NULL) {
- bin = find_entry(tab, curr_entry_ptr->hash, curr_entry_ptr->key);
+ bin = find_entry(tab, entry_hash, entry_key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
+ entries = tab->entries;
+ goto retry;
+ }
st_assert(bin != UNDEFINED_ENTRY_IND);
- st_assert(&entries[bin] == curr_entry_ptr);
+ curr_entry_ptr = &entries[bin];
}
else {
- bin_ind = find_table_bin_ind(tab, curr_entry_ptr->hash,
- curr_entry_ptr->key);
+ bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
+ entries = tab->entries;
+ goto retry;
+ }
st_assert(bin_ind != UNDEFINED_BIN_IND);
- st_assert(&entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
- - ENTRY_BASE] == curr_entry_ptr);
+ curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
+ - ENTRY_BASE];
MARK_BIN_DELETED(tab, bin_ind);
}
+ st_assert(entry_hash != curr_entry_ptr->hash && entry_key == curr_entry_ptr->key);
MARK_ENTRY_DELETED(curr_entry_ptr);
tab->num_entries--;
update_range_for_deleted(tab, i);
@@ -1402,15 +1478,20 @@ st_update(st_table *tab, st_data_t key,
int retval, existing;
st_hash_t hash = do_hash(key, tab);
+ retry:
entries = tab->entries;
if (tab->bins == NULL) {
bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
existing = bin != UNDEFINED_ENTRY_IND;
entry = &entries[bin];
bin_ind = UNDEFINED_BIN_IND;
}
else {
bin_ind = find_table_bin_ind(tab, hash, key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto retry;
existing = bin_ind != UNDEFINED_BIN_IND;
if (existing) {
bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
@@ -1489,14 +1570,19 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
hash = curr_entry_ptr->hash;
retval = (*func)(key, curr_entry_ptr->record, arg, 0);
if (rebuilds_num != tab->rebuilds_num) {
+ retry:
entries = tab->entries;
packed_p = tab->bins == NULL;
if (packed_p) {
i = find_entry(tab, hash, key);
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
error_p = i == UNDEFINED_ENTRY_IND;
}
else {
i = find_table_entry_ind(tab, hash, key);
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
+ goto retry;
error_p = i == UNDEFINED_ENTRY_IND;
i -= ENTRY_BASE;
}
@@ -1512,36 +1598,44 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg,
}
switch (retval) {
case ST_CONTINUE:
- break;
+ break;
case ST_CHECK:
- if (check_p)
- break;
+ if (check_p)
+ break;
case ST_STOP:
#ifdef ST_DEBUG
- st_check(tab);
-#endif
- return 0;
- case ST_DELETE:
- if (packed_p) {
- bin = find_entry(tab, hash, curr_entry_ptr->key);
- if (bin == UNDEFINED_ENTRY_IND)
- break;
- }
- else {
- bin_ind = find_table_bin_ind(tab, hash, curr_entry_ptr->key);
- if (bin_ind == UNDEFINED_BIN_IND)
- break;
- bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
- MARK_BIN_DELETED(tab, bin_ind);
- }
- st_assert(&entries[bin] == curr_entry_ptr);
- MARK_ENTRY_DELETED(curr_entry_ptr);
- tab->num_entries--;
- update_range_for_deleted(tab, bin);
+ st_check(tab);
+#endif
+ return 0;
+ case ST_DELETE: {
+ st_data_t key = curr_entry_ptr->key;
+
+ again:
+ if (packed_p) {
+ bin = find_entry(tab, hash, key);
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
+ goto again;
+ if (bin == UNDEFINED_ENTRY_IND)
+ break;
+ }
+ else {
+ bin_ind = find_table_bin_ind(tab, hash, key);
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
+ goto again;
+ if (bin_ind == UNDEFINED_BIN_IND)
+ break;
+ bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
+ MARK_BIN_DELETED(tab, bin_ind);
+ }
+ curr_entry_ptr = &entries[bin];
+ MARK_ENTRY_DELETED(curr_entry_ptr);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
#ifdef ST_DEBUG
- st_check(tab);
+ st_check(tab);
#endif
- break;
+ break;
+ }
}
}
#ifdef ST_DEBUG
@@ -2021,10 +2115,12 @@ st_expand_table(st_table *tab, st_index_t siz)
free(tmp);
}
-/* Rehash using linear search. */
-static void
+/* Rehash using linear search. Return TRUE if we found that the table
+ was rebuilt. */
+static int
st_rehash_linear(st_table *tab)
{
+ int eq_p, rebuilt_p;
st_index_t i, j;
st_table_entry *p, *q;
if (tab->bins) {
@@ -2039,7 +2135,10 @@ st_rehash_linear(st_table *tab)
q = &tab->entries[j];
if (DELETED_ENTRY_P(q))
continue;
- if (PTR_EQUAL(tab, p, q->hash, q->key)) {
+ DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return TRUE;
+ if (eq_p) {
st_assert(p < q);
*p = *q;
MARK_ENTRY_DELETED(q);
@@ -2048,12 +2147,15 @@ st_rehash_linear(st_table *tab)
}
}
}
+ return FALSE;
}
-/* Rehash using index */
-static void
+/* Rehash using index. Return TRUE if we found that the table was
+ rebuilt. */
+static int
st_rehash_indexed(st_table *tab)
{
+ int eq_p, rebuilt_p;
st_index_t i;
st_index_t const n = bins_size(tab);
unsigned int const size_ind = get_size_ind(tab);
@@ -2082,26 +2184,32 @@ st_rehash_indexed(st_table *tab)
set_bin(bins, size_ind, ind, i + ENTRY_BASE);
break;
}
- else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
- /* duplicated key; delete it */
- st_assert(q < p);
- q->record = p->record;
- MARK_ENTRY_DELETED(p);
- tab->num_entries--;
- update_range_for_deleted(tab, bin);
- break;
- }
else {
- /* hash collision; skip it */
+ DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
+ if (EXPECT(rebuilt_p, 0))
+ return TRUE;
+ if (eq_p) {
+ /* duplicated key; delete it */
+ st_assert(q < p);
+ q->record = p->record;
+ MARK_ENTRY_DELETED(p);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+ break;
+ }
+ else {
+ /* hash collision; skip it */
#ifdef QUADRATIC_PROBE
- ind = hash_bin(ind + d, tab);
- d++;
+ ind = hash_bin(ind + d, tab);
+ d++;
#else
- ind = secondary_hash(ind, tab, &peterb);
+ ind = secondary_hash(ind, tab, &peterb);
#endif
- }
+ }
+ }
}
}
+ return FALSE;
}
/* Reconstruct TAB's bins according to TAB's entries. This function
@@ -2110,10 +2218,14 @@ st_rehash_indexed(st_table *tab)
static void
st_rehash(st_table *tab)
{
- if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
- st_rehash_linear(tab);
- else
- st_rehash_indexed(tab);
+ int rebuilt_p;
+
+ do {
+ if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
+ rebuilt_p = st_rehash_linear(tab);
+ else
+ rebuilt_p = st_rehash_indexed(tab);
+ } while (rebuilt_p);
}
#ifdef RUBY