/* hashlib.c -- functions to manage and access hash tables for bash. */
-/* Copyright (C) 1987, 1989, 1991 Free Software Foundation, Inc.
+/* Copyright (C) 1987-1991,1995,1998,2001-2009,2020,2022 Free Software Foundation, Inc.
-This file is part of GNU Bash, the Bourne Again SHell.
+ This file is part of GNU Bash, the Bourne Again SHell.
-Bash is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 1, or (at your option) any later
-version.
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
-Bash is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-You should have received a copy of the GNU General Public License along
-with Bash; see the file COPYING. If not, write to the Free Software
-Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
-#include "config.h"
+#include <config.h>
#include "bashansi.h"
#include "shell.h"
#include "hashlib.h"
-/* Zero the buckets in TABLE. */
-static void
-initialize_hash_table (table)
- HASH_TABLE *table;
-{
- register int i;
- for (i = 0; i < table->nbuckets; i++)
- table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
-}
+/* tunable constants for rehashing */
+#define HASH_REHASH_MULTIPLIER 4
+#define HASH_REHASH_FACTOR 2
+
+#define HASH_SHOULDGROW(table) \
+ ((table)->nentries >= (table)->nbuckets * HASH_REHASH_FACTOR)
+
+/* an initial approximation */
+#define HASH_SHOULDSHRINK(table) \
+ (((table)->nbuckets > DEFAULT_HASH_BUCKETS) && \
+ ((table)->nentries < (table)->nbuckets / HASH_REHASH_MULTIPLIER))
+
+/* Rely on properties of unsigned division (unsigned/int -> unsigned) and
+ don't discard the upper 32 bits of the value, if present. */
+#define HASH_BUCKET(s, t, h) (((h) = hash_string (s)) & ((t)->nbuckets - 1))
+
+static BUCKET_CONTENTS *copy_bucket_array (BUCKET_CONTENTS *, sh_string_func_t *);
+
+static void hash_rehash (HASH_TABLE *, int);
+static void hash_grow (HASH_TABLE *);
+static void hash_shrink (HASH_TABLE *);
/* Make a new hash table with BUCKETS number of buckets. Initialize
each slot in the table to NULL. */
HASH_TABLE *
-make_hash_table (buckets)
- int buckets;
+hash_create (int buckets)
{
HASH_TABLE *new_table;
+ register int i;
new_table = (HASH_TABLE *)xmalloc (sizeof (HASH_TABLE));
if (buckets == 0)
(BUCKET_CONTENTS **)xmalloc (buckets * sizeof (BUCKET_CONTENTS *));
new_table->nbuckets = buckets;
new_table->nentries = 0;
- initialize_hash_table (new_table);
+
+ for (i = 0; i < buckets; i++)
+ new_table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
+
return (new_table);
}
-/* Return the location of the bucket which should contain the data
- for STRING. TABLE is a pointer to a HASH_TABLE. */
+int
+hash_size (HASH_TABLE *table)
+{
+ return (HASH_ENTRIES(table));
+}
-/* A possibly better distribution may be obtained by initializing i to
- ~0UL and using i = (i * 33) + *string++ as the step */
+/* Copy a hash table bucket array. Call (*cpdata) to copy the data from
+ each element. */
+static BUCKET_CONTENTS *
+copy_bucket_array (BUCKET_CONTENTS *ba, sh_string_func_t *cpdata)
+{
+ BUCKET_CONTENTS *new_bucket, *n, *e;
-#define ALL_ONES (~((unsigned long) 0))
-#define BITS(h, n) ((unsigned long)(h) & ~(ALL_ONES << (n)))
+ if (ba == 0)
+ return ((BUCKET_CONTENTS *)0);
-int
-hash_string (string, table)
- char *string;
- HASH_TABLE *table;
+ for (n = (BUCKET_CONTENTS *)0, e = ba; e; e = e->next)
+ {
+ if (n == 0)
+ {
+ new_bucket = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ n = new_bucket;
+ }
+ else
+ {
+ n->next = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ n = n->next;
+ }
+
+ n->key = savestring (e->key);
+ n->data = e->data ? (cpdata ? (*cpdata) (e->data) : savestring (e->data))
+ : NULL;
+ n->khash = e->khash;
+ n->times_found = e->times_found;
+ n->next = (BUCKET_CONTENTS *)NULL;
+ }
+
+ return new_bucket;
+}
+
+static void
+hash_rehash (HASH_TABLE *table, int nsize)
+{
+ int osize, i, j;
+ BUCKET_CONTENTS **old_bucket_array, *item, *next;
+
+ if (table == NULL || nsize == table->nbuckets)
+ return;
+
+ osize = table->nbuckets;
+ old_bucket_array = table->bucket_array;
+
+ table->nbuckets = nsize;
+ table->bucket_array = (BUCKET_CONTENTS **)xmalloc (table->nbuckets * sizeof (BUCKET_CONTENTS *));
+ for (i = 0; i < table->nbuckets; i++)
+ table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
+
+ for (j = 0; j < osize; j++)
+ {
+ for (item = old_bucket_array[j]; item; item = next)
+ {
+ next = item->next;
+ i = item->khash & (table->nbuckets - 1);
+ item->next = table->bucket_array[i];
+ table->bucket_array[i] = item;
+ }
+ }
+
+ free (old_bucket_array);
+}
+
+static void
+hash_grow (HASH_TABLE *table)
+{
+ int nsize;
+
+ nsize = table->nbuckets * HASH_REHASH_MULTIPLIER;
+ if (nsize > 0) /* overflow */
+ hash_rehash (table, nsize);
+}
+
+static void
+hash_shrink (HASH_TABLE *table)
+{
+ int nsize;
+
+ nsize = table->nbuckets / HASH_REHASH_MULTIPLIER;
+ hash_rehash (table, nsize);
+}
+
+/* Copy an entire hash table. (*cpdata) copies the data in each element. */
+HASH_TABLE *
+hash_copy (HASH_TABLE *table, sh_string_func_t *cpdata)
+{
+ HASH_TABLE *new_table;
+ int i;
+
+ if (table == 0)
+ return ((HASH_TABLE *)NULL);
+
+ new_table = hash_create (table->nbuckets);
+
+ for (i = 0; i < table->nbuckets; i++)
+ new_table->bucket_array[i] = copy_bucket_array (table->bucket_array[i], cpdata);
+
+ new_table->nentries = table->nentries;
+ return new_table;
+}
+
+/* This is the best 32-bit string hash function I found. It's one of the
+ Fowler-Noll-Vo family (FNV-1).
+
+ The magic is in the interesting relationship between the special prime
+ 16777619 (2^24 + 403) and 2^32 and 2^8. */
+
+#define FNV_OFFSET 2166136261
+#define FNV_PRIME 16777619
+
+/* If you want to use 64 bits, use
+FNV_OFFSET 14695981039346656037
+FNV_PRIME 1099511628211
+*/
+
+/* The `khash' check below requires that strings that compare equally with
+ strcmp hash to the same value. */
+unsigned int
+hash_string (const char *s)
{
- register unsigned int i = 0;
+ register unsigned int i;
- while (*string)
- i = (i << 2) + *string++;
+ for (i = FNV_OFFSET; *s; s++)
+ {
+ /* FNV-1a has the XOR first, traditional FNV-1 has the multiply first */
+
+ /* was i *= FNV_PRIME */
+ i += (i<<1) + (i<<4) + (i<<7) + (i<<8) + (i<<24);
+ i ^= *s;
+ }
+
+ return i;
+}
+
+/* Return the location of the bucket which should contain the data
+ for STRING. TABLE is a pointer to a HASH_TABLE. */
+
+int
+hash_bucket (const char *string, HASH_TABLE *table)
+{
+ unsigned int h;
- return (BITS (i, 31) % table->nbuckets);
+ return (HASH_BUCKET (string, table, h));
}
-/* Return a pointer to the hashed item, or NULL if the item
- can't be found. */
+/* Return a pointer to the hashed item. If the HASH_CREATE flag is passed,
+ create a new hash table entry for STRING, otherwise return NULL. */
BUCKET_CONTENTS *
-find_hash_item (string, table)
- char *string;
- HASH_TABLE *table;
+hash_search (const char *string, HASH_TABLE *table, int flags)
{
BUCKET_CONTENTS *list;
- int which_bucket;
+ int bucket;
+ unsigned int hv;
- if (table == 0)
+ if (table == 0 || ((flags & HASH_CREATE) == 0 && HASH_ENTRIES (table) == 0))
return (BUCKET_CONTENTS *)NULL;
- which_bucket = hash_string (string, table);
+ bucket = HASH_BUCKET (string, table, hv);
- for (list = table->bucket_array[which_bucket]; list; list = list->next)
+ for (list = table->bucket_array ? table->bucket_array[bucket] : 0; list; list = list->next)
{
- if (STREQ (list->key, string))
+ /* This is the comparison function */
+ if (hv == list->khash && STREQ (list->key, string))
{
list->times_found++;
return (list);
}
}
+
+ if (flags & HASH_CREATE)
+ {
+ if (HASH_SHOULDGROW (table))
+ {
+ hash_grow (table);
+ bucket = HASH_BUCKET (string, table, hv);
+ }
+
+ list = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ list->next = table->bucket_array[bucket];
+ table->bucket_array[bucket] = list;
+
+ list->data = NULL;
+ list->key = (char *)string; /* XXX fix later */
+ list->khash = hv;
+ list->times_found = 0;
+
+ table->nentries++;
+ return (list);
+ }
+
return (BUCKET_CONTENTS *)NULL;
}
The item removed is returned, so you can free its contents. If
the item isn't in this table NULL is returned. */
BUCKET_CONTENTS *
-remove_hash_item (string, table)
- char *string;
- HASH_TABLE *table;
+hash_remove (const char *string, HASH_TABLE *table, int flags)
{
- int the_bucket;
+ int bucket;
BUCKET_CONTENTS *prev, *temp;
+ unsigned int hv;
- if (table == 0)
+ if (table == 0 || HASH_ENTRIES (table) == 0)
return (BUCKET_CONTENTS *)NULL;
- the_bucket = hash_string (string, table);
+ bucket = HASH_BUCKET (string, table, hv);
prev = (BUCKET_CONTENTS *)NULL;
- for (temp = table->bucket_array[the_bucket]; temp; temp = temp->next)
+ for (temp = table->bucket_array[bucket]; temp; temp = temp->next)
{
- if (STREQ (temp->key, string))
+ if (hv == temp->khash && STREQ (temp->key, string))
{
if (prev)
prev->next = temp->next;
else
- table->bucket_array[the_bucket] = temp->next;
+ table->bucket_array[bucket] = temp->next;
table->nentries--;
return (temp);
}
/* Create an entry for STRING, in TABLE. If the entry already
- exists, then return it. */
+ exists, then return it (unless the HASH_NOSRCH flag is set). */
BUCKET_CONTENTS *
-add_hash_item (string, table)
- char *string;
- HASH_TABLE *table;
+hash_insert (char *string, HASH_TABLE *table, int flags)
{
BUCKET_CONTENTS *item;
int bucket;
+ unsigned int hv;
if (table == 0)
- table = make_hash_table (0);
+ table = hash_create (0);
+
+ item = (flags & HASH_NOSRCH) ? (BUCKET_CONTENTS *)NULL
+ : hash_search (string, table, 0);
- if ((item = find_hash_item (string, table)) == 0)
+ if (item == 0)
{
- bucket = hash_string (string, table);
- item = table->bucket_array[bucket];
+ if (HASH_SHOULDGROW (table))
+ hash_grow (table);
- while (item && item->next)
- item = item->next;
+ bucket = HASH_BUCKET (string, table, hv);
- if (item)
- {
- item->next = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
- item = item->next;
- }
- else
- {
- table->bucket_array[bucket] =
- (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
- item = table->bucket_array[bucket];
- }
+ item = (BUCKET_CONTENTS *)xmalloc (sizeof (BUCKET_CONTENTS));
+ item->next = table->bucket_array[bucket];
+ table->bucket_array[bucket] = item;
- item->data = (char *)NULL;
- item->next = (BUCKET_CONTENTS *)NULL;
+ item->data = NULL;
item->key = string;
- table->nentries++;
+ item->khash = hv;
item->times_found = 0;
+
+ table->nentries++;
}
return (item);
is a function to call to dispose of a hash item's data. Otherwise,
free() is called. */
void
-flush_hash_table (table, free_data)
- HASH_TABLE *table;
- VFunction *free_data;
+hash_flush (HASH_TABLE *table, sh_free_func_t *free_data)
{
int i;
register BUCKET_CONTENTS *bucket, *item;
- if (table == 0)
+ if (table == 0 || HASH_ENTRIES (table) == 0)
return;
for (i = 0; i < table->nbuckets; i++)
}
table->bucket_array[i] = (BUCKET_CONTENTS *)NULL;
}
+
+ table->nentries = 0;
}
/* Free the hash table pointed to by TABLE. */
void
-dispose_hash_table (table)
- HASH_TABLE *table;
+hash_dispose (HASH_TABLE *table)
{
free (table->bucket_array);
free (table);
}
-/* No longer necessary; everything uses the macro */
-#if 0
-/* Return the bucket_contents list of bucket BUCKET in TABLE. If
- TABLE doesn't have BUCKET buckets, return NULL. */
-#undef get_hash_bucket
-BUCKET_CONTENTS *
-get_hash_bucket (bucket, table)
- int bucket;
- HASH_TABLE *table;
+/* Call (*FUNC) for each element in TABLE. If FUNC returns < 0, abort the
+ walk. */
+void
+hash_walk (HASH_TABLE *table, hash_wfunc *func)
{
- if (table && bucket < table->nbuckets)
- return (table->bucket_array[bucket]);
- else
- return (BUCKET_CONTENTS *)NULL;
+ register int i;
+ BUCKET_CONTENTS *item;
+
+ if (table == 0 || HASH_ENTRIES (table) == 0)
+ return;
+
+ for (i = 0; i < table->nbuckets; i++)
+ {
+ for (item = hash_items (i, table); item; item = item->next)
+ if ((*func) (item) < 0)
+ return;
+ }
}
-#endif
-#ifdef DEBUG
+#if defined (DEBUG) || defined (TEST_HASHING)
void
-print_table_stats (table, name)
- HASH_TABLE *table;
- char *name;
+hash_pstats (HASH_TABLE *table, char *name)
{
register int slot, bcount;
register BUCKET_CONTENTS *bc;
see how even the distribution is. */
for (slot = 0; slot < table->nbuckets; slot++)
{
- bc = get_hash_bucket (slot, table);
+ bc = hash_items (slot, table);
fprintf (stderr, "\tslot %3d: ", slot);
for (bcount = 0; bc; bc = bc->next)
- bcount++;
+ bcount++;
fprintf (stderr, "%d\n", bcount);
}
#ifdef TEST_HASHING
+/* link with xmalloc.o and lib/malloc/libmalloc.a */
#undef NULL
#include <stdio.h>
-HASH_TABLE *table;
-#define NBUCKETS 107
+#ifndef NULL
+#define NULL 0
+#endif
+
+HASH_TABLE *table, *ntable;
+
+int interrupt_immediately = 0;
+int running_trap = 0;
-char *
-xmalloc (bytes)
- int bytes;
+int
+signal_is_trapped (int s)
{
- char *result = (char *)malloc (bytes);
- if (!result)
- {
- fprintf (stderr, "hash: out of virtual memory\n");
- abort ();
- }
- return (result);
+ return (0);
}
-main ()
+void
+programming_error (const char *format, ...)
+{
+ abort();
+}
+
+void
+fatal_error (const char *format, ...)
+{
+ abort();
+}
+
+void
+internal_warning (const char *format, ...)
+{
+}
+
+int
+main (int c, char **v)
{
char string[256];
int count = 0;
BUCKET_CONTENTS *tt;
- table = make_hash_table (NBUCKETS);
+#if defined (TEST_NBUCKETS)
+ table = hash_create (TEST_NBUCKETS);
+#else
+ table = hash_create (0);
+#endif
for (;;)
{
char *temp_string;
if (fgets (string, sizeof (string), stdin) == 0)
- break;
+ break;
if (!*string)
- break;
+ break;
temp_string = savestring (string);
- tt = add_hash_item (temp_string, table);
+ tt = hash_insert (temp_string, table, 0);
if (tt->times_found)
{
fprintf (stderr, "You have already added item `%s'\n", string);
}
}
- print_table_stats (table, "hash test");
+ hash_pstats (table, "hash test");
+
+ ntable = hash_copy (table, (sh_string_func_t *)NULL);
+ hash_flush (table, (sh_free_func_t *)NULL);
+ hash_pstats (ntable, "hash copy test");
+
exit (0);
}