refactor newhash (revision 58463 another try) [fix GH-1600]

* st.c (rb_hash_bulk_insert): new API to bulk insert entries into a hash. Given arguments are first inserted into the table at once, then reindexed. This is faster than inserting things using rb_hash_aset() one by one. This arrangement (rb_ prefixed function placed in st.c) is unavoidable because it both touches table internal and write barrier at once. * internal.h: delcare the new function. * hash.c (rb_hash_s_create): use the new function. * vm.c (core_hash_merge): ditto. * insns.def (newhash): ditto. * test/ruby/test_hash.rb: more coverage on hash creation. * test/ruby/test_literal.rb: ditto. ----------------------------------------------------------- benchmark results: minimum results in each 7 measurements. Execution time (sec) name before after loop_whileloop2 0.136 0.137 vm2_bighash* 1.249 0.623 Speedup ratio: compare with the result of `before' (greater is better) name after loop_whileloop2 0.996 vm2_bighash* 2.004 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58492 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: shyouhei <shyouhei@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2017-04-27 04:21:04 +0000
committer: shyouhei <shyouhei@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2017-04-27 04:21:04 +0000
commit: 29ca20de2d998d21c0d41224799182020311ea76 (patch)
tree: 1107864822da8ed3c2babd7ca9d8015c8a641654 /st.c
parent: 42c2df9937854a4c0988e69fbb176006fb511d6b (diff)
1 files changed, 165 insertions, 0 deletions
diff --git a/st.c b/st.c
index 528c528572..a8c9a44096 100644
--- a/st.c
+++ b/st.c
@@ -1955,3 +1955,168 @@ st_numhash(st_data_t n)
     enum {s1 = 11, s2 = 3};
     return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
 }
+
+/* Expand TAB to be suitable for holding SIZ entries in total.
+   Pre-existing entries remain not deleted inside of TAB, but its bins
+   are cleared to expect future reconstruction. See rehash below. */
+static void
+st_expand_table(st_table *tab, st_index_t siz)
+{
+    st_table *tmp;
+    st_index_t n;
+
+    if (siz <= get_allocated_entries(tab))
+        return; /* enough room already */
+
+    tmp = st_init_table_with_size(tab->type, siz);
+    n = get_allocated_entries(tab);
+    MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
+    free(tab->entries);
+    if (tab->bins != NULL)
+        free(tab->bins);
+    if (tmp->bins != NULL)
+        free(tmp->bins);
+    tab->entry_power = tmp->entry_power;
+    tab->bin_power = tmp->bin_power;
+    tab->size_ind = tmp->size_ind;
+    tab->entries = tmp->entries;
+    tab->bins = NULL;
+    tab->rebuilds_num++;
+    free(tmp);
+}
+
+/* Rehash using linear search. */
+static void
+st_rehash_linear(st_table *tab)
+{
+    st_index_t i, j;
+    st_table_entry *p, *q;
+    if (tab->bins) {
+        free(tab->bins);
+        tab->bins = NULL;
+    }
+    for (i = tab->entries_start; i < tab->entries_bound; i++) {
+        p = &tab->entries[i];
+        if (DELETED_ENTRY_P(p))
+            continue;
+        for (j = i + 1; j < tab->entries_bound; j++) {
+            q = &tab->entries[j];
+            if (DELETED_ENTRY_P(q))
+                continue;
+            if (PTR_EQUAL(tab, p, q->hash, q->key)) {
+                st_assert(p < q);
+                *p = *q;
+                MARK_ENTRY_DELETED(q);
+                tab->num_entries--;
+                update_range_for_deleted(tab, j);
+            }
+        }
+    }
+}
+
+/* Rehash using index */
+static void
+st_rehash_indexed(st_table *tab)
+{
+    st_index_t i;
+    st_index_t const n = bins_size(tab);
+    unsigned int const size_ind = get_size_ind(tab);
+    st_index_t *bins = realloc(tab->bins, n);
+    st_assert(bins != NULL);
+    tab->bins = bins;
+    initialize_bins(tab);
+    for (i = tab->entries_start; i < tab->entries_bound; i++) {
+        st_table_entry *p = &tab->entries[i];
+        st_index_t ind;
+#ifdef QUADRATIC_PROBE
+        st_index_t d = 1;
+#else
+        st_index_t peterb = p->hash;
+#endif
+
+        if (DELETED_ENTRY_P(p))
+            continue;
+
+        ind = hash_bin(p->hash, tab);
+        for(;;) {
+            st_index_t bin = get_bin(bins, size_ind, ind);
+            st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
+            if (EMPTY_OR_DELETED_BIN_P(bin)) {
+                /* ok, new room */
+                set_bin(bins, size_ind, ind, i + ENTRY_BASE);
+                break;
+            }
+            else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
+                /* duplicated key; delete it */
+                st_assert(q < p);
+                q->record = p->record;
+                MARK_ENTRY_DELETED(p);
+                tab->num_entries--;
+                update_range_for_deleted(tab, bin);
+                break;
+            }
+            else {
+                /* hash collision; skip it */
+#ifdef QUADRATIC_PROBE
+                ind = hash_bin(ind + d, tab);
+                d++;
+#else
+                ind = secondary_hash(ind, tab, &peterb);
+#endif
+            }
+        }
+    }
+}
+
+/* Reconstruct TAB's bins according to TAB's entries. This function
+   permits conflicting keys inside of entries.  No errors are reported
+   then.  All but one of them are discarded silently. */
+static void
+st_rehash(st_table *tab)
+{
+    if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
+        st_rehash_linear(tab);
+    else
+        st_rehash_indexed(tab);
+}
+
+#ifdef RUBY
+/* Mimics ruby's { foo => bar } syntax. This function is placed here
+   because it touches table internals and write barriers at once. */
+void
+rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
+{
+    int i;
+    st_table *tab;
+
+    st_assert(argc % 2);
+    if (! argc)
+        return;
+    if (! RHASH(hash)->ntbl)
+        rb_hash_tbl_raw(hash);
+    tab = RHASH(hash)->ntbl;
+
+    /* make room */
+    st_expand_table(tab, tab->num_entries + argc);
+
+    /* push elems */
+    for (i = 0; i < argc; /* */) {
+        VALUE key = argv[i++];
+        VALUE val = argv[i++];
+        st_data_t k = (rb_obj_class(key) == rb_cString) ?
+            rb_str_new_frozen(key) : key;
+        st_table_entry e;
+        e.hash = do_hash(k, tab);
+        e.key = k;
+        e.record = val;
+
+        tab->entries[tab->entries_bound++] = e;
+        tab->num_entries++;
+        RB_OBJ_WRITTEN(hash, Qundef, k);
+        RB_OBJ_WRITTEN(hash, Qundef, val);
+    }
+
+    /* reindex */
+    st_rehash(tab);
+}
+#endif
author	shyouhei <shyouhei@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2017-04-27 04:21:04 +0000
committer	shyouhei <shyouhei@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2017-04-27 04:21:04 +0000
commit	29ca20de2d998d21c0d41224799182020311ea76 (patch)
tree	1107864822da8ed3c2babd7ca9d8015c8a641654 /st.c
parent	42c2df9937854a4c0988e69fbb176006fb511d6b (diff)