From: Bruno Haible Date: Tue, 20 Nov 2001 14:46:17 +0000 (+0000) Subject: Hash table efficiency improvements. Never believe your textbooks. X-Git-Tag: v0.11~284 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=406e22ffdb30ca5ccbb439441b7596ba2ed617b7;p=thirdparty%2Fgettext.git Hash table efficiency improvements. Never believe your textbooks. --- diff --git a/lib/ChangeLog b/lib/ChangeLog index 0b7b8f5ae..b9b761df8 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,13 @@ +2001-11-17 Bruno Haible + + * hash.c (lookup_2): Remove function, identical to 'lookup'. + (init_hash): Remove test for NULL that can never succeed. + (insert_entry_2): Resize at 75%, not 90%. Call 'lookup' instead of + 'lookup_2'. + (compute_hashval): Shift by 9 bits, not by 4 bits. This drastically + improves the quality of the hash function, especially for short + strings. + 2001-11-11 Bruno Haible * javaexec.sh.in: Use HAVE_JAVA instead of HAVE_JAVA_JVM. diff --git a/lib/hash.c b/lib/hash.c index c76e587e7..382d0be17 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -85,9 +85,6 @@ static void insert_entry_2 PARAMS ((hash_table *htab, static size_t lookup PARAMS ((hash_table *htab, const void *key, size_t keylen, unsigned long int hval)); -static size_t lookup_2 PARAMS ((hash_table *htab, - const void *key, size_t keylen, - unsigned long int hval)); static unsigned long compute_hashval PARAMS ((const void *key, size_t keylen)); static int is_prime PARAMS ((unsigned long int candidate)); @@ -105,8 +102,6 @@ init_hash (htab, init_size) htab->filled = 0; htab->first = NULL; htab->table = (void *) xcalloc (init_size + 1, sizeof (hash_entry)); - if (htab->table == NULL) - return -1; obstack_init (&htab->mem_pool); @@ -177,9 +172,9 @@ insert_entry_2 (htab, key, keylen, hval, idx, data) } ++htab->filled; - if (100 * htab->filled > 90 * htab->size) + if (100 * htab->filled > 75 * htab->size) { - /* Table is filled more than 90%. Resize the table. */ + /* Table is filled more than 75%. Resize the table. */ unsigned long int old_size = htab->size; htab->size = next_prime (htab->size * 2); @@ -191,8 +186,8 @@ insert_entry_2 (htab, key, keylen, hval, idx, data) if (table[idx].used) insert_entry_2 (htab, table[idx].key, table[idx].keylen, table[idx].used, - lookup_2 (htab, table[idx].key, table[idx].keylen, - table[idx].used), + lookup (htab, table[idx].key, table[idx].keylen, + table[idx].used), table[idx].data); free (table); @@ -246,55 +241,12 @@ iterate_table (htab, ptr, key, keylen, data) } -static size_t -lookup (htab, key, keylen, hval) - hash_table *htab; - const void *key; - size_t keylen; - unsigned long hval; -{ - unsigned long hash; - size_t idx; - hash_entry *table = (hash_entry *) htab->table; - - /* First hash function: simply take the modul but prevent zero. */ - hash = 1 + hval % htab->size; - - idx = hash; - - if (table[idx].used) - { - if (table[idx].used == hval && table[idx].keylen == keylen - && memcmp (key, table[idx].key, keylen) == 0) - return idx; - - /* Second hash function as suggested in [Knuth]. */ - hash = 1 + hval % (htab->size - 2); - - do - { - if (idx <= hash) - idx = htab->size + idx - hash; - else - idx -= hash; - - /* If entry is found use it. */ - if (table[idx].used == hval && table[idx].keylen == keylen - && memcmp (key, table[idx].key, keylen) == 0) - return idx; - } - while (table[idx].used); - } - return idx; -} - - /* References: [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 [Knuth] The Art of Computer Programming, part3 (6.4) */ static size_t -lookup_2 (htab, key, keylen, hval) +lookup (htab, key, keylen, hval) hash_table *htab; const void *key; size_t keylen; @@ -342,7 +294,7 @@ compute_hashval (key, keylen) size_t keylen; { size_t cnt; - unsigned long int hval, g; + unsigned long int hval; /* Compute the hash value for the given string. The algorithm is taken from [Aho,Sethi,Ullman]. */ @@ -350,14 +302,8 @@ compute_hashval (key, keylen) hval = keylen; while (cnt < keylen) { - hval <<= 4; - hval += ((char *) key)[cnt++]; - g = hval & ((unsigned long) 0xf << (LONGBITS - 4)); - if (g != 0) - { - hval ^= g >> (LONGBITS - 8); - hval ^= g; - } + hval = (hval << 9) | (hval >> (LONGBITS - 9)); + hval += (unsigned long int) *(((const char *) key) + cnt++); } return hval != 0 ? hval : ~((unsigned long) 0); }