*/
#include "pub_core_basics.h"
+#include "pub_core_debuglog.h"
#include "pub_core_hashtable.h"
#include "pub_core_libcassert.h"
#include "pub_core_mallocfree.h"
#define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains)
struct _VgHashTable {
- UInt n_chains; // should be prime
- VgHashNode* iterNode; // current iterator node
- UInt iterChain; // next chain to be traversed by the iterator
- VgHashNode* chains[0]; // must be last field in the struct!
+ UInt n_chains; // should be prime
+ UInt n_elements;
+ VgHashNode* iterNode; // current iterator node
+ UInt iterChain; // next chain to be traversed by the iterator
+ VgHashNode** chains; // expanding array of hash chains
+ Bool iterOK; // table safe to iterate over?
+ HChar* name; // name of table (for debugging only)
+};
+
+#define N_HASH_PRIMES 20
+
+static SizeT primes[N_HASH_PRIMES] = {
+ 769UL, 1543UL, 3079UL, 6151UL,
+ 12289UL, 24593UL, 49157UL, 98317UL,
+ 196613UL, 393241UL, 786433UL, 1572869UL,
+ 3145739UL, 6291469UL, 12582917UL, 25165843UL,
+ 50331653UL, 100663319UL, 201326611UL, 402653189UL
};
/*--------------------------------------------------------------------*/
/*--- Functions ---*/
/*--------------------------------------------------------------------*/
-VgHashTable VG_(HT_construct)(UInt n_chains)
+VgHashTable VG_(HT_construct) ( HChar* name )
{
/* Initialises to zero, ie. all entries NULL */
- SizeT sz = sizeof(struct _VgHashTable) + n_chains*sizeof(VgHashNode*);
- VgHashTable table = VG_(calloc)(1, sz);
- table->n_chains = n_chains;
+ SizeT n_chains = primes[0];
+ SizeT sz = n_chains * sizeof(VgHashNode*);
+ VgHashTable table = VG_(calloc)(1, sizeof(struct _VgHashTable));
+ table->chains = VG_(calloc)(1, sz);
+ table->n_chains = n_chains;
+ table->n_elements = 0;
+ table->iterOK = True;
+ table->name = name;
+ vg_assert(name);
return table;
}
Int VG_(HT_count_nodes) ( VgHashTable table )
{
- VgHashNode* node;
- UInt chain;
- Int n = 0;
-
- for (chain = 0; chain < table->n_chains; chain++)
- for (node = table->chains[chain]; node != NULL; node = node->next)
- n++;
- return n;
+ return table->n_elements;
+}
+
+static void resize ( VgHashTable table )
+{
+ Int i;
+ SizeT sz;
+ SizeT old_chains = table->n_chains;
+ SizeT new_chains = old_chains + 1;
+ VgHashNode** chains;
+ VgHashNode * node;
+
+ /* If we've run out of primes, do nothing. */
+ if (old_chains == primes[N_HASH_PRIMES-1])
+ return;
+
+ vg_assert(old_chains >= primes[0]
+ && old_chains < primes[N_HASH_PRIMES-1]);
+
+ for (i = 0; i < N_HASH_PRIMES; i++) {
+ if (primes[i] > new_chains) {
+ new_chains = primes[i];
+ break;
+ }
+ }
+
+ vg_assert(new_chains > old_chains);
+ vg_assert(new_chains > primes[0]
+ && new_chains <= primes[N_HASH_PRIMES-1]);
+
+ VG_(debugLog)(
+ 1, "hashtable",
+ "resizing table `%s' from %lu to %lu (total elems %lu)\n",
+ table->name, (UWord)old_chains, (UWord)new_chains,
+ (UWord)table->n_elements );
+
+ table->n_chains = new_chains;
+ sz = new_chains * sizeof(VgHashNode*);
+ chains = VG_(calloc)(1, sz);
+
+ for (i = 0; i < old_chains; i++) {
+ node = table->chains[i];
+ while (node != NULL) {
+ VgHashNode* next = node->next;
+ UWord chain = CHAIN_NO(node->key, table);
+ node->next = chains[chain];
+ chains[chain] = node;
+ node = next;
+ }
+ }
+
+ VG_(free)(table->chains);
+ table->chains = chains;
}
/* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends
void VG_(HT_add_node) ( VgHashTable table, void* vnode )
{
VgHashNode* node = (VgHashNode*)vnode;
- UInt chain = CHAIN_NO(node->key, table);
+ UWord chain = CHAIN_NO(node->key, table);
node->next = table->chains[chain];
table->chains[chain] = node;
-}
-
-/* Looks up a VgHashNode in the table. Also returns the address of
- the previous node's 'next' pointer which allows it to be removed from the
- list later without having to look it up again. */
-void* VG_(HT_get_node) ( VgHashTable table, UWord key,
- /*OUT*/VgHashNode*** next_ptr )
-{
- VgHashNode *prev, *curr;
- Int chain;
-
- chain = CHAIN_NO(key, table);
-
- prev = NULL;
- curr = table->chains[chain];
- while (True) {
- if (curr == NULL)
- break;
- if (key == curr->key)
- break;
- prev = curr;
- curr = curr->next;
+ table->n_elements++;
+ if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) {
+ resize(table);
}
- if (NULL == prev)
- *next_ptr = & (table->chains[chain]);
- else
- *next_ptr = & (prev->next);
-
- return curr;
+ /* Table has been modified; hence HT_Next should assert. */
+ table->iterOK = False;
}
/* Looks up a VgHashNode in the table. Returns NULL if not found. */
/* Removes a VgHashNode from the table. Returns NULL if not found. */
void* VG_(HT_remove) ( VgHashTable table, UWord key )
{
- Int chain = CHAIN_NO(key, table);
+ UWord chain = CHAIN_NO(key, table);
VgHashNode* curr = table->chains[chain];
VgHashNode** prev_next_ptr = &(table->chains[chain]);
+ /* Table has been modified; hence HT_Next should assert. */
+ table->iterOK = False;
+
while (curr) {
if (key == curr->key) {
*prev_next_ptr = curr->next;
+ table->n_elements--;
return curr;
}
prev_next_ptr = &(curr->next);
return NULL;
}
-/* Allocates a suitably-sized array, copies all the malloc'd block
- shadows into it, then returns both the array and the size of it. This is
- used by the memory-leak detector.
+/* Allocates a suitably-sized array, copies all the hashtable elements
+ into it, then returns both the array and the size of it. This is
+ used by the memory-leak detector. The array must be freed with
+ VG_(free).
*/
-VgHashNode** VG_(HT_to_array) ( VgHashTable table, /*OUT*/ UInt* n_shadows )
+VgHashNode** VG_(HT_to_array) ( VgHashTable table, /*OUT*/ UInt* n_elems )
{
UInt i, j;
VgHashNode** arr;
VgHashNode* node;
- *n_shadows = 0;
+ *n_elems = 0;
for (i = 0; i < table->n_chains; i++) {
for (node = table->chains[i]; node != NULL; node = node->next) {
- (*n_shadows)++;
+ (*n_elems)++;
}
}
- if (*n_shadows == 0)
+ if (*n_elems == 0)
return NULL;
- arr = VG_(malloc)( *n_shadows * sizeof(VgHashNode*) );
+ arr = VG_(malloc)( *n_elems * sizeof(VgHashNode*) );
j = 0;
for (i = 0; i < table->n_chains; i++) {
arr[j++] = node;
}
}
- vg_assert(j == *n_shadows);
+ vg_assert(j == *n_elems);
return arr;
}
-/* Return the first VgHashNode satisfying the predicate p. */
-void* VG_(HT_first_match) ( VgHashTable table,
- Bool (*p) ( VgHashNode*, void* ),
- void* d )
-{
- UInt i;
- VgHashNode* node;
-
- for (i = 0; i < table->n_chains; i++)
- for (node = table->chains[i]; node != NULL; node = node->next)
- if ( p(node, d) )
- return node;
-
- return NULL;
-}
-
-void VG_(HT_apply_to_all_nodes)( VgHashTable table,
- void (*f)(VgHashNode*, void*),
- void* d )
-{
- UInt i;
- VgHashNode* node;
-
- for (i = 0; i < table->n_chains; i++) {
- for (node = table->chains[i]; node != NULL; node = node->next) {
- f(node, d);
- }
- }
-}
-
void VG_(HT_ResetIter)(VgHashTable table)
{
vg_assert(table);
table->iterNode = NULL;
table->iterChain = 0;
+ table->iterOK = True;
}
void* VG_(HT_Next)(VgHashTable table)
{
Int i;
vg_assert(table);
-
+ /* See long comment on HT_Next prototype in pub_tool_hashtable.h.
+ In short if this fails, it means the caller tried to modify the
+ table whilst iterating over it, which is a bug. */
+ vg_assert(table->iterOK);
+
if (table->iterNode && table->iterNode->next) {
table->iterNode = table->iterNode->next;
return table->iterNode;
{
UInt i;
VgHashNode *node, *node_next;
-
+
for (i = 0; i < table->n_chains; i++) {
for (node = table->chains[i]; node != NULL; node = node_next) {
node_next = node->next;
VG_(free)(node);
}
}
+ VG_(free)(table->chains);
VG_(free)(table);
}
// Problems with this data structure:
// - Separate chaining gives bad cache behaviour. Hash tables with linear
// probing give better cache behaviour.
-// - It's not very abstract, eg. deleting nodes exposes more internals than
-// I'd like.
typedef
struct _VgHashNode {
typedef struct _VgHashTable * VgHashTable;
-/* Make a new table. Allocates the memory with VG_(calloc)(), so can be freed
- with VG_(free)(). n_chains should be prime. */
-extern VgHashTable VG_(HT_construct) ( UInt n_chains );
+/* Make a new table. Allocates the memory with VG_(calloc)(), so can
+ be freed with VG_(free)(). The table starts small but will
+ periodically be expanded. This is transparent to the users of this
+ module. */
+extern VgHashTable VG_(HT_construct) ( HChar* name );
/* Count the number of nodes in a table. */
extern Int VG_(HT_count_nodes) ( VgHashTable table );
/* Add a node to the table. */
extern void VG_(HT_add_node) ( VgHashTable t, void* node );
-/* Looks up a node in the hash table. Also returns the address of the
- previous node's `next' pointer which allows it to be removed from the
- list later without having to look it up again. */
-extern void* VG_(HT_get_node) ( VgHashTable t, UWord key,
- /*OUT*/VgHashNode*** next_ptr );
-
/* Looks up a VgHashNode in the table. Returns NULL if not found. */
extern void* VG_(HT_lookup) ( VgHashTable table, UWord key );
/* Removes a VgHashNode from the table. Returns NULL if not found. */
extern void* VG_(HT_remove) ( VgHashTable table, UWord key );
-/* Allocates an array of pointers to all the shadow chunks of malloc'd
- blocks. Must be freed with VG_(free)(). */
-extern VgHashNode** VG_(HT_to_array) ( VgHashTable t, /*OUT*/ UInt* n_shadows );
-
-/* Returns first node that matches predicate `p', or NULL if none do.
- Extra arguments can be implicitly passed to `p' using `d' which is an
- opaque pointer passed to `p' each time it is called. */
-extern void* VG_(HT_first_match) ( VgHashTable t,
- Bool (*p)(VgHashNode*, void*),
- void* d );
-
-/* Applies a function f() once to each node. Again, `d' can be used
- to pass extra information to the function. */
-extern void VG_(HT_apply_to_all_nodes)( VgHashTable t,
- void (*f)(VgHashNode*, void*),
- void* d );
+/* Allocates a suitably-sized array, copies all the hashtable elements
+ into it, then returns both the array and the size of it. This is
+ used by the memory-leak detector. The array must be freed with
+ VG_(free). */
+extern VgHashNode** VG_(HT_to_array) ( VgHashTable t, /*OUT*/ UInt* n_elems );
/* Reset the table's iterator to point to the first element. */
extern void VG_(HT_ResetIter) ( VgHashTable table );
-/* Return the element pointed to by the iterator and move on to the next
- one. Returns NULL if the last one has been passed, or if HT_ResetIter()
- has not been called previously. */
+/* Return the element pointed to by the iterator and move on to the
+ next one. Returns NULL if the last one has been passed, or if
+ HT_ResetIter() has not been called previously. Asserts if the
+ table has been modified (HT_add_node, HT_remove) since
+ HT_ResetIter. This guarantees that callers cannot screw up by
+ modifying the table whilst iterating over it (and is necessary to
+ make the implementation safe; specifically we must guarantee that
+ the table will not get resized whilst iteration is happening.
+ Since resizing only happens as a result of calling HT_add_node,
+ disallowing HT_add_node during iteration should give the required
+ assurance. */
extern void* VG_(HT_Next) ( VgHashTable table );
/* Destroy a table. */