From f6ab65fc2a9bd47c1e0823d0ac099dc83c13cba3 Mon Sep 17 00:00:00 2001 From: Philippe Waroquiers Date: Sat, 25 Apr 2015 14:53:35 +0000 Subject: [PATCH] Replace adler32 by sdbm_hash in m_deduppoolalloc.c adler32 is not very good as a hash function. sdbm_hash gives more different keys that adler32, and in a large majority of the cases, shorter chains. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@15142 --- coregrind/m_deduppoolalloc.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/coregrind/m_deduppoolalloc.c b/coregrind/m_deduppoolalloc.c index 2456e1b65e..bf6d1e3c88 100644 --- a/coregrind/m_deduppoolalloc.c +++ b/coregrind/m_deduppoolalloc.c @@ -231,6 +231,19 @@ void VG_(freezeDedupPA) (DedupPoolAlloc *ddpa, ddpa->ht_node_pa = NULL; } + +// hash function used by gawk and SDBM. +static UInt sdbm_hash (const UChar* buf, UInt len ) +{ + UInt h; + UInt i; + + h = 0; + for (i = 0; i < len; i++) + h = *buf++ + (h<<6) + (h<<16) - h; + return h; +} + const void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB, const void *elt) { @@ -243,14 +256,7 @@ const void* VG_(allocEltDedupPA) (DedupPoolAlloc *ddpa, SizeT eltSzB, ddpa->nr_alloc_calls++; - // Currently using adler32 as hash function. - // Many references tells adler32 is bad as a hash function. - // And effectively, some tests on dwarf debug string shows - // a lot of collisions (at least for short elements). - // (A lot can be 10% of the elements colliding, even on - // small nr of elements such as 10_000). - ht_elt.key = VG_(adler32) (0, NULL, 0); - ht_elt.key = VG_(adler32) (ht_elt.key, elt, eltSzB); + ht_elt.key = sdbm_hash (elt, eltSzB); ht_elt.eltSzB = eltSzB; ht_elt.elt = elt; -- 2.47.3