[thirdparty/squid.git] / test-suite / hash.c

/*
 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
 *
 * Squid software is distributed under GPLv2+ license and includes
 * contributions from numerous individuals and organizations.
 * Please see the COPYING and CONTRIBUTORS files for details.
 */

/* DEBUG: section 00    Hash Tables */

#include "squid.h"

#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#if HAVE_CTYPE_H
#include <ctype.h>
#endif
#if HAVE_STRINGS_H
#include <strings.h>
#endif
#if HAVE_ASSERT_H
#include <assert.h>
#endif

#include "hash.h"

#undef free
extern void my_free(char *, int, void *);

#define free(a) my_free(__FILE__, __LINE__, a)

extern void print_stats();
/*
 *  hash_url() - Returns a well-distributed hash function for URLs.
 *  The best way is to sum up the last half of the string.
 *  Adapted from code written by Mic Bowman.  -Darren
 *  Generates a standard deviation = 15.73
 */
unsigned int
hash_url(const void *data, unsigned int size)
{
    const char *s = data;
    unsigned int i, j, n;
    j = strlen(s);
    for (i = j / 2, n = 0; i < j; i++)
        n ^= 271 * (unsigned) s[i];
    i = n ^ (j * 271);
    return i % size;
}

unsigned int
hash_string(const void *data, unsigned int size)
{
    const char *s = data;
    unsigned int n = 0;
    unsigned int j = 0;
    unsigned int i = 0;
    while (*s) {
        j++;
        n ^= 271 * (unsigned) *s++;
    }
    i = n ^ (j * 271);
    return i % size;
}

/* the following 4 functions were adapted from
 *    usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */

/*
 * HASH FUNCTIONS
 *
 * Assume that we've already split the bucket to which this key hashes,
 * calculate that bucket, and check that in fact we did already split it.
 *
 * This came from ejb's hsearch.
 */

#define PRIME1      37
#define PRIME2      1048583

/* Hash function from Chris Torek. */
unsigned int
hash4(const void *data, unsigned int size)
{
    const char *key = data;
    size_t loop;
    unsigned int h;
    size_t len;

#define HASH4a   h = (h << 5) - h + *key++;
#define HASH4b   h = (h << 5) + h + *key++;
#define HASH4 HASH4b

    h = 0;
    len = strlen(key);
    loop = (len + 8 - 1) >> 3;
    switch (len & (8 - 1)) {
    case 0:
        do {
            HASH4;
        /* FALLTHROUGH */
        case 7:
            HASH4;
        /* FALLTHROUGH */
        case 6:
            HASH4;
        /* FALLTHROUGH */
        case 5:
            HASH4;
        /* FALLTHROUGH */
        case 4:
            HASH4;
        /* FALLTHROUGH */
        case 3:
            HASH4;
        /* FALLTHROUGH */
        case 2:
            HASH4;
        /* FALLTHROUGH */
        case 1:
            HASH4;
        } while (--loop);
    }
    return h % size;
}

/*
 *  hash_create - creates a new hash table, uses the cmp_func
 *  to compare keys.  Returns the identification for the hash table;
 *  otherwise returns a negative number on error.
 */
hash_table *
hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
{
    hash_table *hid = calloc(1, sizeof(hash_table));
    if (!hash_sz)
        hid->size = (unsigned int) DEFAULT_HASH_SIZE;
    else
        hid->size = (unsigned int) hash_sz;
    /* allocate and null the buckets */
    hid->buckets = calloc(hid->size, sizeof(hash_link *));
    hid->cmp = cmp_func;
    hid->hash = hash_func;
    hid->current_ptr = NULL;
    hid->current_slot = 0;
    return hid;
}

/*
 *  hash_insert - inserts the given item 'item' under the given key 'k'
 *  into the hash table 'hid'.  Returns non-zero on error; otherwise,
 *  returns 0 and inserts the item.
 *
 *  It does not copy any data into the hash table, only pointers.
 */
void
hash_insert(hash_table * hid, const char *k, void *item)
{
    int i;
    hash_link *new;
    assert(k != NULL);
    /* Add to the given hash table 'hid' */
    new = calloc(1, sizeof(hash_link));
    if (!new) {
        fprintf(stderr, "calloc failed!\n");
        print_stats();
        exit(1);
    }
    new->item = item;
    new->key = (char *) k;
    i = hid->hash(k, hid->size);
    new->next = hid->buckets[i];
    hid->buckets[i] = new;
}

/*
 *  hash_join - joins a hash_link under its key lnk->key
 *  into the hash table 'hid'.
 *
 *  It does not copy any data into the hash table, only links pointers.
 */
void
hash_join(hash_table * hid, hash_link * lnk)
{
    int i;
    i = hid->hash(lnk->key, hid->size);
    lnk->next = hid->buckets[i];
    hid->buckets[i] = lnk;
}

/*
 *  hash_lookup - locates the item under the key 'k' in the hash table
 *  'hid'.  Returns a pointer to the hash bucket on success; otherwise
 *  returns NULL.
 */
hash_link *
hash_lookup(hash_table * hid, const void *k)
{
    hash_link *walker;
    int b;
    assert(k != NULL);
    b = hid->hash(k, hid->size);
    for (walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
        if ((hid->cmp) (k, walker->key) == 0)
            return (walker);
        assert(walker != walker->next);
    }
    return NULL;
}

/*
 *  hash_first - returns the first item in the hash table 'hid'.
 *  Otherwise, returns NULL on error.
 */
hash_link *
hash_first(hash_table * hid)
{
    int i;

    for (i = 0; i < hid->size; i++) {
        hid->current_slot = i;
        if (hid->buckets[i] != NULL)
            return (hid->current_ptr = hid->buckets[i]);
    }
    return NULL;
}

/*
 *  hash_next - returns the next item in the hash table 'hid'.
 *  Otherwise, returns NULL on error or end of list.
 *
 *  MUST call hash_first() before hash_next().
 */
hash_link *
hash_next(hash_table * hid)
{
    int i;

    if (hid->current_ptr != NULL) {
        hid->current_ptr = hid->current_ptr->next;
        if (hid->current_ptr != NULL)
            return (hid->current_ptr);  /* next item */
    }
    /* find next bucket */
    for (i = hid->current_slot + 1; i < hid->size; i++) {
        hid->current_slot = i;
        if (hid->buckets[i] != NULL)
            return (hid->current_ptr = hid->buckets[i]);
    }
    return NULL;        /* end of list */
}

int
hash_delete(hash_table * hid, const char *key)
{
    return hash_delete_link(hid, hash_lookup(hid, key));
}

/*
 *  hash_delete_link - deletes the given hash_link node from the
 *  hash table 'hid'. If FreeLink then free the given hash_link.
 *
 *  On success, it returns 0 and deletes the link; otherwise,
 *  returns non-zero on error.
 */
int
hash_unlink(hash_table * hid, hash_link * hl, int FreeLink)
{
    hash_link *walker, *prev;
    int i;
    if (hl == NULL)
        return -1;
    i = hid->hash(hl->key, hid->size);
    for (prev = NULL, walker = hid->buckets[i];
            walker != NULL; prev = walker, walker = walker->next) {
        if (walker == hl) {
            if (prev == NULL) { /* it's the head */
                hid->buckets[i] = walker->next;
            } else {
                prev->next = walker->next;  /* skip it */
            }
            /* fix walker state if needed */
            if (walker == hid->current_ptr)
                hid->current_ptr = walker->next;
            if (FreeLink) {
                if (walker) {
                    free(walker);
                }
            }
            return 0;
        }
    }
    return 1;
}

/* take link off and free link node */
int
hash_delete_link(hash_table * hid, hash_link * hl)
{
    return (hash_unlink(hid, hl, 1));
}

/* take link off only */
int
hash_remove_link(hash_table * hid, hash_link * hl)
{
    return (hash_unlink(hid, hl, 0));
}

/*
 *  hash_get_bucket - returns the head item of the bucket
 *  in the hash table 'hid'. Otherwise, returns NULL on error.
 */
hash_link *
hash_get_bucket(hash_table * hid, unsigned int bucket)
{
    if (bucket >= hid->size)
        return NULL;
    return (hid->buckets[bucket]);
}

void
hashFreeMemory(hash_table * hid)
{
    if (hid->buckets);
    free(hid->buckets);
    if (hid)
        free(hid);
}

#if USE_HASH_DRIVER
/*
 *  hash-driver - Run with a big file as stdin to insert each line into the
 *  hash table, then prints the whole hash table, then deletes a random item,
 *  and prints the table again...
 */
int
main(void)
{
    hash_table *hid;
    int i;
    LOCAL_ARRAY(char, buf, BUFSIZ);
    LOCAL_ARRAY(char, todelete, BUFSIZ);
    hash_link *walker = NULL;

    todelete[0] = '\0';
    printf("init\n");

    printf("creating hash table\n");
    if ((hid = hash_create(strcmp, 229, hash_string)) < 0) {
        printf("hash_create error.\n");
        exit(1);
    }
    printf("done creating hash table: %d\n", hid);

    std::mt19937 mt;
    xuniform_int_distribution<> dist(0,16);

    while (fgets(buf, BUFSIZ, stdin)) {
        buf[strlen(buf) - 1] = '\0';
        printf("Inserting '%s' for item %p to hash table: %d\n",
               buf, buf, hid);
        hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
        if (dist(mt) == 0)
            strcpy(todelete, buf);
    }

    printf("walking hash table...\n");
    for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
               walker->item);
    }
    printf("done walking hash table...\n");

    if (todelete[0]) {
        printf("deleting %s from %d\n", todelete, hid);
        if (hash_delete(hid, todelete))
            printf("hash_delete error\n");
    }
    printf("walking hash table...\n");
    for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
               walker->item);
    }
    printf("done walking hash table...\n");

    printf("driver finished.\n");
    exit(0);
}
#endif
Commit	Line	Data
	1	/*
	2	* Copyright (C) 1996-2020 The Squid Software Foundation and contributors
	3	*
	4	* Squid software is distributed under GPLv2+ license and includes
	5	* contributions from numerous individuals and organizations.
	6	* Please see the COPYING and CONTRIBUTORS files for details.
	7	*/
	8
	9	/* DEBUG: section 00 Hash Tables */
	10
	11	#include "squid.h"
	12
	13	#if HAVE_UNISTD_H
	14	#include <unistd.h>
	15	#endif
	16	#if HAVE_CTYPE_H
	17	#include <ctype.h>
	18	#endif
	19	#if HAVE_STRINGS_H
	20	#include <strings.h>
	21	#endif
	22	#if HAVE_ASSERT_H
	23	#include <assert.h>
	24	#endif
	25
	26	#include "hash.h"
	27
	28	#undef free
	29	extern void my_free(char , int, void );
	30
	31	#define free(a) my_free(__FILE__, __LINE__, a)
	32
	33	extern void print_stats();
	34	/*
	35	* hash_url() - Returns a well-distributed hash function for URLs.
	36	* The best way is to sum up the last half of the string.
	37	* Adapted from code written by Mic Bowman. -Darren
	38	* Generates a standard deviation = 15.73
	39	*/
	40	unsigned int
	41	hash_url(const void *data, unsigned int size)
	42	{
	43	const char *s = data;
	44	unsigned int i, j, n;
	45	j = strlen(s);
	46	for (i = j / 2, n = 0; i < j; i++)
	47	n ^= 271 * (unsigned) s[i];
	48	i = n ^ (j * 271);
	49	return i % size;
	50	}
	51
	52	unsigned int
	53	hash_string(const void *data, unsigned int size)
	54	{
	55	const char *s = data;
	56	unsigned int n = 0;
	57	unsigned int j = 0;
	58	unsigned int i = 0;
	59	while (*s) {
	60	j++;
	61	n ^= 271 * (unsigned) *s++;
	62	}
	63	i = n ^ (j * 271);
	64	return i % size;
	65	}
	66
	67	/* the following 4 functions were adapted from
	68	* usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */
	69
	70	/*
	71	* HASH FUNCTIONS
	72	*
	73	* Assume that we've already split the bucket to which this key hashes,
	74	* calculate that bucket, and check that in fact we did already split it.
	75	*
	76	* This came from ejb's hsearch.
	77	*/
	78
	79	#define PRIME1 37
	80	#define PRIME2 1048583
	81
	82	/* Hash function from Chris Torek. */
	83	unsigned int
	84	hash4(const void *data, unsigned int size)
	85	{
	86	const char *key = data;
	87	size_t loop;
	88	unsigned int h;
	89	size_t len;
	90
	91	#define HASH4a h = (h << 5) - h + *key++;
	92	#define HASH4b h = (h << 5) + h + *key++;
	93	#define HASH4 HASH4b
	94
	95	h = 0;
	96	len = strlen(key);
	97	loop = (len + 8 - 1) >> 3;
	98	switch (len & (8 - 1)) {
	99	case 0:
	100	do {
	101	HASH4;
	102	/* FALLTHROUGH */
	103	case 7:
	104	HASH4;
	105	/* FALLTHROUGH */
	106	case 6:
	107	HASH4;
	108	/* FALLTHROUGH */
	109	case 5:
	110	HASH4;
	111	/* FALLTHROUGH */
	112	case 4:
	113	HASH4;
	114	/* FALLTHROUGH */
	115	case 3:
	116	HASH4;
	117	/* FALLTHROUGH */
	118	case 2:
	119	HASH4;
	120	/* FALLTHROUGH */
	121	case 1:
	122	HASH4;
	123	} while (--loop);
	124	}
	125	return h % size;
	126	}
	127
	128	/*
	129	* hash_create - creates a new hash table, uses the cmp_func
	130	* to compare keys. Returns the identification for the hash table;
	131	* otherwise returns a negative number on error.
	132	*/
	133	hash_table *
	134	hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
	135	{
	136	hash_table *hid = calloc(1, sizeof(hash_table));
	137	if (!hash_sz)
	138	hid->size = (unsigned int) DEFAULT_HASH_SIZE;
	139	else
	140	hid->size = (unsigned int) hash_sz;
	141	/* allocate and null the buckets */
	142	hid->buckets = calloc(hid->size, sizeof(hash_link *));
	143	hid->cmp = cmp_func;
	144	hid->hash = hash_func;
	145	hid->current_ptr = NULL;
	146	hid->current_slot = 0;
	147	return hid;
	148	}
	149
	150	/*
	151	* hash_insert - inserts the given item 'item' under the given key 'k'
	152	* into the hash table 'hid'. Returns non-zero on error; otherwise,
	153	* returns 0 and inserts the item.
	154	*
	155	* It does not copy any data into the hash table, only pointers.
	156	*/
	157	void
	158	hash_insert(hash_table * hid, const char k, void item)
	159	{
	160	int i;
	161	hash_link *new;
	162	assert(k != NULL);
	163	/* Add to the given hash table 'hid' */
	164	new = calloc(1, sizeof(hash_link));
	165	if (!new) {
	166	fprintf(stderr, "calloc failed!\n");
	167	print_stats();
	168	exit(1);
	169	}
	170	new->item = item;
	171	new->key = (char *) k;
	172	i = hid->hash(k, hid->size);
	173	new->next = hid->buckets[i];
	174	hid->buckets[i] = new;
	175	}
	176
	177	/*
	178	* hash_join - joins a hash_link under its key lnk->key
	179	* into the hash table 'hid'.
	180	*
	181	* It does not copy any data into the hash table, only links pointers.
	182	*/
	183	void
	184	hash_join(hash_table * hid, hash_link * lnk)
	185	{
	186	int i;
	187	i = hid->hash(lnk->key, hid->size);
	188	lnk->next = hid->buckets[i];
	189	hid->buckets[i] = lnk;
	190	}
	191
	192	/*
	193	* hash_lookup - locates the item under the key 'k' in the hash table
	194	* 'hid'. Returns a pointer to the hash bucket on success; otherwise
	195	* returns NULL.
	196	*/
	197	hash_link *
	198	hash_lookup(hash_table * hid, const void *k)
	199	{
	200	hash_link *walker;
	201	int b;
	202	assert(k != NULL);
	203	b = hid->hash(k, hid->size);
	204	for (walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
	205	if ((hid->cmp) (k, walker->key) == 0)
	206	return (walker);
	207	assert(walker != walker->next);
	208	}
	209	return NULL;
	210	}
	211
	212	/*
	213	* hash_first - returns the first item in the hash table 'hid'.
	214	* Otherwise, returns NULL on error.
	215	*/
	216	hash_link *
	217	hash_first(hash_table * hid)
	218	{
	219	int i;
	220
	221	for (i = 0; i < hid->size; i++) {
	222	hid->current_slot = i;
	223	if (hid->buckets[i] != NULL)
	224	return (hid->current_ptr = hid->buckets[i]);
	225	}
	226	return NULL;
	227	}
	228
	229	/*
	230	* hash_next - returns the next item in the hash table 'hid'.
	231	* Otherwise, returns NULL on error or end of list.
	232	*
	233	* MUST call hash_first() before hash_next().
	234	*/
	235	hash_link *
	236	hash_next(hash_table * hid)
	237	{
	238	int i;
	239
	240	if (hid->current_ptr != NULL) {
	241	hid->current_ptr = hid->current_ptr->next;
	242	if (hid->current_ptr != NULL)
	243	return (hid->current_ptr); /* next item */
	244	}
	245	/* find next bucket */
	246	for (i = hid->current_slot + 1; i < hid->size; i++) {
	247	hid->current_slot = i;
	248	if (hid->buckets[i] != NULL)
	249	return (hid->current_ptr = hid->buckets[i]);
	250	}
	251	return NULL; /* end of list */
	252	}
	253
	254	int
	255	hash_delete(hash_table * hid, const char *key)
	256	{
	257	return hash_delete_link(hid, hash_lookup(hid, key));
	258	}
	259
	260	/*
	261	* hash_delete_link - deletes the given hash_link node from the
	262	* hash table 'hid'. If FreeLink then free the given hash_link.
	263	*
	264	* On success, it returns 0 and deletes the link; otherwise,
	265	* returns non-zero on error.
	266	*/
	267	int
	268	hash_unlink(hash_table * hid, hash_link * hl, int FreeLink)
	269	{
	270	hash_link walker, prev;
	271	int i;
	272	if (hl == NULL)
	273	return -1;
	274	i = hid->hash(hl->key, hid->size);
	275	for (prev = NULL, walker = hid->buckets[i];
	276	walker != NULL; prev = walker, walker = walker->next) {
	277	if (walker == hl) {
	278	if (prev == NULL) { /* it's the head */
	279	hid->buckets[i] = walker->next;
	280	} else {
	281	prev->next = walker->next; /* skip it */
	282	}
	283	/* fix walker state if needed */
	284	if (walker == hid->current_ptr)
	285	hid->current_ptr = walker->next;
	286	if (FreeLink) {
	287	if (walker) {
	288	free(walker);
	289	}
	290	}
	291	return 0;
	292	}
	293	}
	294	return 1;
	295	}
	296
	297	/* take link off and free link node */
	298	int
	299	hash_delete_link(hash_table * hid, hash_link * hl)
	300	{
	301	return (hash_unlink(hid, hl, 1));
	302	}
	303
	304	/* take link off only */
	305	int
	306	hash_remove_link(hash_table * hid, hash_link * hl)
	307	{
	308	return (hash_unlink(hid, hl, 0));
	309	}
	310
	311	/*
	312	* hash_get_bucket - returns the head item of the bucket
	313	* in the hash table 'hid'. Otherwise, returns NULL on error.
	314	*/
	315	hash_link *
	316	hash_get_bucket(hash_table * hid, unsigned int bucket)
	317	{
	318	if (bucket >= hid->size)
	319	return NULL;
	320	return (hid->buckets[bucket]);
	321	}
	322
	323	void
	324	hashFreeMemory(hash_table * hid)
	325	{
	326	if (hid->buckets);
	327	free(hid->buckets);
	328	if (hid)
	329	free(hid);
	330	}
	331
	332	#if USE_HASH_DRIVER
	333	/*
	334	* hash-driver - Run with a big file as stdin to insert each line into the
	335	* hash table, then prints the whole hash table, then deletes a random item,
	336	* and prints the table again...
	337	*/
	338	int
	339	main(void)
	340	{
	341	hash_table *hid;
	342	int i;
	343	LOCAL_ARRAY(char, buf, BUFSIZ);
	344	LOCAL_ARRAY(char, todelete, BUFSIZ);
	345	hash_link *walker = NULL;
	346
	347	todelete[0] = '\0';
	348	printf("init\n");
	349
	350	printf("creating hash table\n");
	351	if ((hid = hash_create(strcmp, 229, hash_string)) < 0) {
	352	printf("hash_create error.\n");
	353	exit(1);
	354	}
	355	printf("done creating hash table: %d\n", hid);
	356
	357	std::mt19937 mt;
	358	xuniform_int_distribution<> dist(0,16);
	359
	360	while (fgets(buf, BUFSIZ, stdin)) {
	361	buf[strlen(buf) - 1] = '\0';
	362	printf("Inserting '%s' for item %p to hash table: %d\n",
	363	buf, buf, hid);
	364	hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
	365	if (dist(mt) == 0)
	366	strcpy(todelete, buf);
	367	}
	368
	369	printf("walking hash table...\n");
	370	for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
	371	printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
	372	walker->item);
	373	}
	374	printf("done walking hash table...\n");
	375
	376	if (todelete[0]) {
	377	printf("deleting %s from %d\n", todelete, hid);
	378	if (hash_delete(hid, todelete))
	379	printf("hash_delete error\n");
	380	}
	381	printf("walking hash table...\n");
	382	for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
	383	printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
	384	walker->item);
	385	}
	386	printf("done walking hash table...\n");
	387
	388	printf("driver finished.\n");
	389	exit(0);
	390	}
	391	#endif
	392