From: Julian Seward Date: Sat, 25 Aug 2007 07:19:08 +0000 (+0000) Subject: Changes to m_hashtable: X-Git-Tag: svn/VALGRIND_3_3_0~240 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0e70d01bdd0f5bf4ff07786fe0366e65772dde72;p=thirdparty%2Fvalgrind.git Changes to m_hashtable: Allow hashtables to dynamically resize (patch from Christoph Bartoschek). Results in the following interface changes: * HT_construct: no need to supply an initial table size. Instead, supply a text string used to "name" the table, so that debugging messages ("resizing the table") can say which one they are resizing. * Remove VG_(HT_get_node). This exposes the chain structure to callers (via the next_ptr parameter), which is a problem since callers could get some info about the chain structure which then changes when the table is resized. Fortunately is not used. * Remove VG_(HT_first_match) and VG_(HT_apply_to_all_nodes) as they are unused. * Make the iteration mechanism more paranoid, so any adding or deleting of nodes part way through an iteration causes VG_(HT_next) to assert. * Fix the comment on VG_(HT_to_array) so it no longer speaks specifically about MC's leak detector. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@6778 --- diff --git a/coregrind/m_hashtable.c b/coregrind/m_hashtable.c index 8af3f3b238..f7969b7673 100644 --- a/coregrind/m_hashtable.c +++ b/coregrind/m_hashtable.c @@ -29,6 +29,7 @@ */ #include "pub_core_basics.h" +#include "pub_core_debuglog.h" #include "pub_core_hashtable.h" #include "pub_core_libcassert.h" #include "pub_core_mallocfree.h" @@ -40,35 +41,99 @@ #define CHAIN_NO(key,tbl) (((UWord)(key)) % tbl->n_chains) struct _VgHashTable { - UInt n_chains; // should be prime - VgHashNode* iterNode; // current iterator node - UInt iterChain; // next chain to be traversed by the iterator - VgHashNode* chains[0]; // must be last field in the struct! + UInt n_chains; // should be prime + UInt n_elements; + VgHashNode* iterNode; // current iterator node + UInt iterChain; // next chain to be traversed by the iterator + VgHashNode** chains; // expanding array of hash chains + Bool iterOK; // table safe to iterate over? + HChar* name; // name of table (for debugging only) +}; + +#define N_HASH_PRIMES 20 + +static SizeT primes[N_HASH_PRIMES] = { + 769UL, 1543UL, 3079UL, 6151UL, + 12289UL, 24593UL, 49157UL, 98317UL, + 196613UL, 393241UL, 786433UL, 1572869UL, + 3145739UL, 6291469UL, 12582917UL, 25165843UL, + 50331653UL, 100663319UL, 201326611UL, 402653189UL }; /*--------------------------------------------------------------------*/ /*--- Functions ---*/ /*--------------------------------------------------------------------*/ -VgHashTable VG_(HT_construct)(UInt n_chains) +VgHashTable VG_(HT_construct) ( HChar* name ) { /* Initialises to zero, ie. all entries NULL */ - SizeT sz = sizeof(struct _VgHashTable) + n_chains*sizeof(VgHashNode*); - VgHashTable table = VG_(calloc)(1, sz); - table->n_chains = n_chains; + SizeT n_chains = primes[0]; + SizeT sz = n_chains * sizeof(VgHashNode*); + VgHashTable table = VG_(calloc)(1, sizeof(struct _VgHashTable)); + table->chains = VG_(calloc)(1, sz); + table->n_chains = n_chains; + table->n_elements = 0; + table->iterOK = True; + table->name = name; + vg_assert(name); return table; } Int VG_(HT_count_nodes) ( VgHashTable table ) { - VgHashNode* node; - UInt chain; - Int n = 0; - - for (chain = 0; chain < table->n_chains; chain++) - for (node = table->chains[chain]; node != NULL; node = node->next) - n++; - return n; + return table->n_elements; +} + +static void resize ( VgHashTable table ) +{ + Int i; + SizeT sz; + SizeT old_chains = table->n_chains; + SizeT new_chains = old_chains + 1; + VgHashNode** chains; + VgHashNode * node; + + /* If we've run out of primes, do nothing. */ + if (old_chains == primes[N_HASH_PRIMES-1]) + return; + + vg_assert(old_chains >= primes[0] + && old_chains < primes[N_HASH_PRIMES-1]); + + for (i = 0; i < N_HASH_PRIMES; i++) { + if (primes[i] > new_chains) { + new_chains = primes[i]; + break; + } + } + + vg_assert(new_chains > old_chains); + vg_assert(new_chains > primes[0] + && new_chains <= primes[N_HASH_PRIMES-1]); + + VG_(debugLog)( + 1, "hashtable", + "resizing table `%s' from %lu to %lu (total elems %lu)\n", + table->name, (UWord)old_chains, (UWord)new_chains, + (UWord)table->n_elements ); + + table->n_chains = new_chains; + sz = new_chains * sizeof(VgHashNode*); + chains = VG_(calloc)(1, sz); + + for (i = 0; i < old_chains; i++) { + node = table->chains[i]; + while (node != NULL) { + VgHashNode* next = node->next; + UWord chain = CHAIN_NO(node->key, table); + node->next = chains[chain]; + chains[chain] = node; + node = next; + } + } + + VG_(free)(table->chains); + table->chains = chains; } /* Puts a new, heap allocated VgHashNode, into the VgHashTable. Prepends @@ -76,39 +141,16 @@ Int VG_(HT_count_nodes) ( VgHashTable table ) void VG_(HT_add_node) ( VgHashTable table, void* vnode ) { VgHashNode* node = (VgHashNode*)vnode; - UInt chain = CHAIN_NO(node->key, table); + UWord chain = CHAIN_NO(node->key, table); node->next = table->chains[chain]; table->chains[chain] = node; -} - -/* Looks up a VgHashNode in the table. Also returns the address of - the previous node's 'next' pointer which allows it to be removed from the - list later without having to look it up again. */ -void* VG_(HT_get_node) ( VgHashTable table, UWord key, - /*OUT*/VgHashNode*** next_ptr ) -{ - VgHashNode *prev, *curr; - Int chain; - - chain = CHAIN_NO(key, table); - - prev = NULL; - curr = table->chains[chain]; - while (True) { - if (curr == NULL) - break; - if (key == curr->key) - break; - prev = curr; - curr = curr->next; + table->n_elements++; + if ( (1 * (ULong)table->n_elements) > (1 * (ULong)table->n_chains) ) { + resize(table); } - if (NULL == prev) - *next_ptr = & (table->chains[chain]); - else - *next_ptr = & (prev->next); - - return curr; + /* Table has been modified; hence HT_Next should assert. */ + table->iterOK = False; } /* Looks up a VgHashNode in the table. Returns NULL if not found. */ @@ -128,13 +170,17 @@ void* VG_(HT_lookup) ( VgHashTable table, UWord key ) /* Removes a VgHashNode from the table. Returns NULL if not found. */ void* VG_(HT_remove) ( VgHashTable table, UWord key ) { - Int chain = CHAIN_NO(key, table); + UWord chain = CHAIN_NO(key, table); VgHashNode* curr = table->chains[chain]; VgHashNode** prev_next_ptr = &(table->chains[chain]); + /* Table has been modified; hence HT_Next should assert. */ + table->iterOK = False; + while (curr) { if (key == curr->key) { *prev_next_ptr = curr->next; + table->n_elements--; return curr; } prev_next_ptr = &(curr->next); @@ -143,26 +189,27 @@ void* VG_(HT_remove) ( VgHashTable table, UWord key ) return NULL; } -/* Allocates a suitably-sized array, copies all the malloc'd block - shadows into it, then returns both the array and the size of it. This is - used by the memory-leak detector. +/* Allocates a suitably-sized array, copies all the hashtable elements + into it, then returns both the array and the size of it. This is + used by the memory-leak detector. The array must be freed with + VG_(free). */ -VgHashNode** VG_(HT_to_array) ( VgHashTable table, /*OUT*/ UInt* n_shadows ) +VgHashNode** VG_(HT_to_array) ( VgHashTable table, /*OUT*/ UInt* n_elems ) { UInt i, j; VgHashNode** arr; VgHashNode* node; - *n_shadows = 0; + *n_elems = 0; for (i = 0; i < table->n_chains; i++) { for (node = table->chains[i]; node != NULL; node = node->next) { - (*n_shadows)++; + (*n_elems)++; } } - if (*n_shadows == 0) + if (*n_elems == 0) return NULL; - arr = VG_(malloc)( *n_shadows * sizeof(VgHashNode*) ); + arr = VG_(malloc)( *n_elems * sizeof(VgHashNode*) ); j = 0; for (i = 0; i < table->n_chains; i++) { @@ -170,53 +217,28 @@ VgHashNode** VG_(HT_to_array) ( VgHashTable table, /*OUT*/ UInt* n_shadows ) arr[j++] = node; } } - vg_assert(j == *n_shadows); + vg_assert(j == *n_elems); return arr; } -/* Return the first VgHashNode satisfying the predicate p. */ -void* VG_(HT_first_match) ( VgHashTable table, - Bool (*p) ( VgHashNode*, void* ), - void* d ) -{ - UInt i; - VgHashNode* node; - - for (i = 0; i < table->n_chains; i++) - for (node = table->chains[i]; node != NULL; node = node->next) - if ( p(node, d) ) - return node; - - return NULL; -} - -void VG_(HT_apply_to_all_nodes)( VgHashTable table, - void (*f)(VgHashNode*, void*), - void* d ) -{ - UInt i; - VgHashNode* node; - - for (i = 0; i < table->n_chains; i++) { - for (node = table->chains[i]; node != NULL; node = node->next) { - f(node, d); - } - } -} - void VG_(HT_ResetIter)(VgHashTable table) { vg_assert(table); table->iterNode = NULL; table->iterChain = 0; + table->iterOK = True; } void* VG_(HT_Next)(VgHashTable table) { Int i; vg_assert(table); - + /* See long comment on HT_Next prototype in pub_tool_hashtable.h. + In short if this fails, it means the caller tried to modify the + table whilst iterating over it, which is a bug. */ + vg_assert(table->iterOK); + if (table->iterNode && table->iterNode->next) { table->iterNode = table->iterNode->next; return table->iterNode; @@ -236,13 +258,14 @@ void VG_(HT_destruct)(VgHashTable table) { UInt i; VgHashNode *node, *node_next; - + for (i = 0; i < table->n_chains; i++) { for (node = table->chains[i]; node != NULL; node = node_next) { node_next = node->next; VG_(free)(node); } } + VG_(free)(table->chains); VG_(free)(table); } diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c index fbf5f16e49..1e97453393 100644 --- a/helgrind/hg_main.c +++ b/helgrind/hg_main.c @@ -1948,9 +1948,10 @@ void handle_free ( ThreadId tid, void* p ) { HG_Chunk* hc; HG_Chunk** prev_chunks_next_ptr; - + /* Commented out 25 Aug 07 as VG_(HT_get_node) no longer exists. hc = (HG_Chunk*)VG_(HT_get_node) ( hg_malloc_list, (UWord)p, (VgHashNode***)&prev_chunks_next_ptr ); + */ if (hc == NULL) { return; } @@ -1978,9 +1979,10 @@ static void* hg_realloc ( ThreadId tid, void* p, SizeT new_size ) HG_Chunk **prev_chunks_next_ptr; /* First try and find the block. */ + /* Commented out 25 Aug 07 as VG_(HT_get_node) no longer exists. hc = (HG_Chunk*)VG_(HT_get_node) ( hg_malloc_list, (UWord)p, (VgHashNode***)&prev_chunks_next_ptr ); - + */ if (hc == NULL) { return NULL; } @@ -2419,6 +2421,7 @@ void clear_HelgrindError ( HelgrindError* err_extra ) putting the result in ai. */ /* Callback for searching malloc'd and free'd lists */ +/* static Bool addr_is_in_block(VgHashNode *node, void *ap) { HG_Chunk* hc2 = (HG_Chunk*)node; @@ -2426,6 +2429,7 @@ static Bool addr_is_in_block(VgHashNode *node, void *ap) return (hc2->data <= a && a < hc2->data + hc2->size); } +*/ static void describe_addr ( Addr a, AddrInfo* ai ) { @@ -2467,7 +2471,9 @@ static void describe_addr ( Addr a, AddrInfo* ai ) } /* Search for a currently malloc'd block which might bracket it. */ + /* Commented out 25 Aug 07 as VG_(HT_first_match) no longer exists. hc = (HG_Chunk*)VG_(HT_first_match)(hg_malloc_list, addr_is_in_block, &a); + */ if (NULL != hc) { ai->akind = Mallocd; ai->blksize = hc->size; @@ -3478,7 +3484,7 @@ static void hg_pre_clo_init(void) } init_shadow_memory(); - hg_malloc_list = VG_(HT_construct)( 80021 ); // prime, big + hg_malloc_list = VG_(HT_construct)( "Helgrind's malloc list" ); } VG_DETERMINE_INTERFACE_VERSION(hg_pre_clo_init) diff --git a/include/pub_tool_hashtable.h b/include/pub_tool_hashtable.h index fb3b4f4a9d..807eea1390 100644 --- a/include/pub_tool_hashtable.h +++ b/include/pub_tool_hashtable.h @@ -39,8 +39,6 @@ // Problems with this data structure: // - Separate chaining gives bad cache behaviour. Hash tables with linear // probing give better cache behaviour. -// - It's not very abstract, eg. deleting nodes exposes more internals than -// I'd like. typedef struct _VgHashNode { @@ -51,9 +49,11 @@ typedef typedef struct _VgHashTable * VgHashTable; -/* Make a new table. Allocates the memory with VG_(calloc)(), so can be freed - with VG_(free)(). n_chains should be prime. */ -extern VgHashTable VG_(HT_construct) ( UInt n_chains ); +/* Make a new table. Allocates the memory with VG_(calloc)(), so can + be freed with VG_(free)(). The table starts small but will + periodically be expanded. This is transparent to the users of this + module. */ +extern VgHashTable VG_(HT_construct) ( HChar* name ); /* Count the number of nodes in a table. */ extern Int VG_(HT_count_nodes) ( VgHashTable table ); @@ -61,41 +61,32 @@ extern Int VG_(HT_count_nodes) ( VgHashTable table ); /* Add a node to the table. */ extern void VG_(HT_add_node) ( VgHashTable t, void* node ); -/* Looks up a node in the hash table. Also returns the address of the - previous node's `next' pointer which allows it to be removed from the - list later without having to look it up again. */ -extern void* VG_(HT_get_node) ( VgHashTable t, UWord key, - /*OUT*/VgHashNode*** next_ptr ); - /* Looks up a VgHashNode in the table. Returns NULL if not found. */ extern void* VG_(HT_lookup) ( VgHashTable table, UWord key ); /* Removes a VgHashNode from the table. Returns NULL if not found. */ extern void* VG_(HT_remove) ( VgHashTable table, UWord key ); -/* Allocates an array of pointers to all the shadow chunks of malloc'd - blocks. Must be freed with VG_(free)(). */ -extern VgHashNode** VG_(HT_to_array) ( VgHashTable t, /*OUT*/ UInt* n_shadows ); - -/* Returns first node that matches predicate `p', or NULL if none do. - Extra arguments can be implicitly passed to `p' using `d' which is an - opaque pointer passed to `p' each time it is called. */ -extern void* VG_(HT_first_match) ( VgHashTable t, - Bool (*p)(VgHashNode*, void*), - void* d ); - -/* Applies a function f() once to each node. Again, `d' can be used - to pass extra information to the function. */ -extern void VG_(HT_apply_to_all_nodes)( VgHashTable t, - void (*f)(VgHashNode*, void*), - void* d ); +/* Allocates a suitably-sized array, copies all the hashtable elements + into it, then returns both the array and the size of it. This is + used by the memory-leak detector. The array must be freed with + VG_(free). */ +extern VgHashNode** VG_(HT_to_array) ( VgHashTable t, /*OUT*/ UInt* n_elems ); /* Reset the table's iterator to point to the first element. */ extern void VG_(HT_ResetIter) ( VgHashTable table ); -/* Return the element pointed to by the iterator and move on to the next - one. Returns NULL if the last one has been passed, or if HT_ResetIter() - has not been called previously. */ +/* Return the element pointed to by the iterator and move on to the + next one. Returns NULL if the last one has been passed, or if + HT_ResetIter() has not been called previously. Asserts if the + table has been modified (HT_add_node, HT_remove) since + HT_ResetIter. This guarantees that callers cannot screw up by + modifying the table whilst iterating over it (and is necessary to + make the implementation safe; specifically we must guarantee that + the table will not get resized whilst iteration is happening. + Since resizing only happens as a result of calling HT_add_node, + disallowing HT_add_node during iteration should give the required + assurance. */ extern void* VG_(HT_Next) ( VgHashTable table ); /* Destroy a table. */ diff --git a/massif/ms_main.c b/massif/ms_main.c index 55c4828c52..52558dfc8e 100644 --- a/massif/ms_main.c +++ b/massif/ms_main.c @@ -1753,7 +1753,7 @@ static void ms_pre_clo_init(void) VG_(track_die_mem_stack_signal)( die_mem_stack_signal ); // HP_Chunks - malloc_list = VG_(HT_construct)( 80021 ); // prime, big + malloc_list = VG_(HT_construct)( "Massif's malloc list" ); // Dummy node at top of the context structure. alloc_xpt = new_XPt(0, NULL, /*is_bottom*/False); diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c index 1894f4b9ec..4de8377aa0 100644 --- a/memcheck/mc_main.c +++ b/memcheck/mc_main.c @@ -5059,8 +5059,8 @@ static void mc_pre_clo_init(void) VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall ); init_shadow_memory(); - MC_(malloc_list) = VG_(HT_construct)( 80021 ); // prime, big - MC_(mempool_list) = VG_(HT_construct)( 1009 ); // prime, not so big + MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" ); + MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" ); init_prof_mem(); tl_assert( mc_expensive_sanity_check() ); diff --git a/memcheck/mc_malloc_wrappers.c b/memcheck/mc_malloc_wrappers.c index 9a24a45ec9..9eadb01522 100644 --- a/memcheck/mc_malloc_wrappers.c +++ b/memcheck/mc_malloc_wrappers.c @@ -416,7 +416,7 @@ void MC_(create_mempool)(Addr pool, UInt rzB, Bool is_zeroed) mp->pool = pool; mp->rzB = rzB; mp->is_zeroed = is_zeroed; - mp->chunks = VG_(HT_construct)( 3001 ); // prime, not so big + mp->chunks = VG_(HT_construct)( "MC_(create_mempool)" ); /* Paranoia ... ensure this area is off-limits to the client, so the mp->data field isn't visible to the leak checker. If memory