test-suite/hash.c

   1
   2 /*
   3  * $Id: hash.c,v 1.4 1998/07/20 17:20:25 wessels Exp $
   4  *
   5  * DEBUG: section 0     Hash Tables
   6  * AUTHOR: Harvest Derived
   7  *
   8  * SQUID Internet Object Cache  http://squid.nlanr.net/Squid/
   9  * --------------------------------------------------------
  10  *
  11  *  Squid is the result of efforts by numerous individuals from the
  12  *  Internet community.  Development is led by Duane Wessels of the
  13  *  National Laboratory for Applied Network Research and funded by
  14  *  the National Science Foundation.
  15  *
  16  *  This program is free software; you can redistribute it and/or modify
  17  *  it under the terms of the GNU General Public License as published by
  18  *  the Free Software Foundation; either version 2 of the License, or
  19  *  (at your option) any later version.
  20  *
  21  *  This program is distributed in the hope that it will be useful,
  22  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  23  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  24  *  GNU General Public License for more details.
  25  *
  26  *  You should have received a copy of the GNU General Public License
  27  *  along with this program; if not, write to the Free Software
  28  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
  29  *
  30  */
  31
  32 /*
  33  * Copyright (c) 1994, 1995.  All rights reserved.
  34  *
  35  *   The Harvest software was developed by the Internet Research Task
  36  *   Force Research Group on Resource Discovery (IRTF-RD):
  37  *
  38  *         Mic Bowman of Transarc Corporation.
  39  *         Peter Danzig of the University of Southern California.
  40  *         Darren R. Hardy of the University of Colorado at Boulder.
  41  *         Udi Manber of the University of Arizona.
  42  *         Michael F. Schwartz of the University of Colorado at Boulder.
  43  *         Duane Wessels of the University of Colorado at Boulder.
  44  *
  45  *   This copyright notice applies to software in the Harvest
  46  *   ``src/'' directory only.  Users should consult the individual
  47  *   copyright notices in the ``components/'' subdirectories for
  48  *   copyright information about other software bundled with the
  49  *   Harvest source code distribution.
  50  *
  51  * TERMS OF USE
  52  *
  53  *   The Harvest software may be used and re-distributed without
  54  *   charge, provided that the software origin and research team are
  55  *   cited in any use of the system.  Most commonly this is
  56  *   accomplished by including a link to the Harvest Home Page
  57  *   (http://harvest.cs.colorado.edu/) from the query page of any
  58  *   Broker you deploy, as well as in the query result pages.  These
  59  *   links are generated automatically by the standard Broker
  60  *   software distribution.
  61  *
  62  *   The Harvest software is provided ``as is'', without express or
  63  *   implied warranty, and with no support nor obligation to assist
  64  *   in its use, correction, modification or enhancement.  We assume
  65  *   no liability with respect to the infringement of copyrights,
  66  *   trade secrets, or any patents, and are not responsible for
  67  *   consequential damages.  Proper use of the Harvest software is
  68  *   entirely the responsibility of the user.
  69  *
  70  * DERIVATIVE WORKS
  71  *
  72  *   Users may make derivative works from the Harvest software, subject
  73  *   to the following constraints:
  74  *
  75  *     - You must include the above copyright notice and these
  76  *       accompanying paragraphs in all forms of derivative works,
  77  *       and any documentation and other materials related to such
  78  *       distribution and use acknowledge that the software was
  79  *       developed at the above institutions.
  80  *
  81  *     - You must notify IRTF-RD regarding your distribution of
  82  *       the derivative work.
  83  *
  84  *     - You must clearly notify users that your are distributing
  85  *       a modified version and not the original Harvest software.
  86  *
  87  *     - Any derivative product is also subject to these copyright
  88  *       and use restrictions.
  89  *
  90  *   Note that the Harvest software is NOT in the public domain.  We
  91  *   retain copyright, as specified above.
  92  *
  93  * HISTORY OF FREE SOFTWARE STATUS
  94  *
  95  *   Originally we required sites to license the software in cases
  96  *   where they were going to build commercial products/services
  97  *   around Harvest.  In June 1995 we changed this policy.  We now
  98  *   allow people to use the core Harvest software (the code found in
  99  *   the Harvest ``src/'' directory) for free.  We made this change
 100  *   in the interest of encouraging the widest possible deployment of
 101  *   the technology.  The Harvest software is really a reference
 102  *   implementation of a set of protocols and formats, some of which
 103  *   we intend to standardize.  We encourage commercial
 104  *   re-implementations of code complying to this set of standards.
 105  */
 106
 107 #include <unistd.h>
 108 #include <stdlib.h>
 109 #include <stdio.h>
 110 #include <sys/types.h>
 111 #include <ctype.h>
 112 #include <sys/time.h>
 113 #include <strings.h>
 114 #include <assert.h>
 115 #include "hash.h"
 116 #undef free
 117 extern void my_free(char *, int , void *);
 118
 119 #define free(a) my_free(__FILE__, __LINE__, a)
 120
 121 extern void print_stats();
 122 /*
 123  *  hash_url() - Returns a well-distributed hash function for URLs.
 124  *  The best way is to sum up the last half of the string.
 125  *  Adapted from code written by Mic Bowman.  -Darren
 126  *  Generates a standard deviation = 15.73
 127  */
 128 unsigned int
 129 hash_url(const void *data, unsigned int size)
 130 {
 131     const char *s = data;
 132     unsigned int i, j, n;
 133     j = strlen(s);
 134     for (i = j / 2, n = 0; i < j; i++)
 135         n ^= 271 * (unsigned) s[i];
 136     i = n ^ (j * 271);
 137     return i % size;
 138 }
 139
 140 unsigned int
 141 hash_string(const void *data, unsigned int size)
 142 {
 143     const char *s = data;
 144     unsigned int n = 0;
 145     unsigned int j = 0;
 146     unsigned int i = 0;
 147     while (*s) {
 148         j++;
 149         n ^= 271 * (unsigned) *s++;
 150     }
 151     i = n ^ (j * 271);
 152     return i % size;
 153 }
 154
 155 /* the following 4 functions were adapted from
 156  *    usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */
 157
 158 /*
 159  * HASH FUNCTIONS
 160  *
 161  * Assume that we've already split the bucket to which this key hashes,
 162  * calculate that bucket, and check that in fact we did already split it.
 163  *
 164  * This came from ejb's hsearch.
 165  */
 166
 167 #define PRIME1          37
 168 #define PRIME2          1048583
 169
 170 /* Hash function from Chris Torek. */
 171 unsigned int
 172 hash4(const void *data, unsigned int size)
 173 {
 174     const char *key = data;
 175     size_t loop;
 176     unsigned int h;
 177     size_t len;
 178
 179 #define HASH4a   h = (h << 5) - h + *key++;
 180 #define HASH4b   h = (h << 5) + h + *key++;
 181 #define HASH4 HASH4b
 182
 183     h = 0;
 184     len = strlen(key);
 185     loop = (len + 8 - 1) >> 3;
 186     switch (len & (8 - 1)) {
 187     case 0:
 188         do {
 189             HASH4;
 190             /* FALLTHROUGH */
 191     case 7:
 192             HASH4;
 193             /* FALLTHROUGH */
 194     case 6:
 195             HASH4;
 196             /* FALLTHROUGH */
 197     case 5:
 198             HASH4;
 199             /* FALLTHROUGH */
 200     case 4:
 201             HASH4;
 202             /* FALLTHROUGH */
 203     case 3:
 204             HASH4;
 205             /* FALLTHROUGH */
 206     case 2:
 207             HASH4;
 208             /* FALLTHROUGH */
 209     case 1:
 210             HASH4;
 211         } while (--loop);
 212     }
 213     return h % size;
 214 }
 215
 216 /*
 217  *  hash_create - creates a new hash table, uses the cmp_func
 218  *  to compare keys.  Returns the identification for the hash table;
 219  *  otherwise returns a negative number on error.
 220  */
 221 hash_table *
 222 hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
 223 {
 224     hash_table *hid = calloc(1, sizeof(hash_table));
 225     if (!hash_sz)
 226         hid->size = (unsigned int) DEFAULT_HASH_SIZE;
 227     else
 228         hid->size = (unsigned int) hash_sz;
 229     /* allocate and null the buckets */
 230     hid->buckets = calloc(hid->size, sizeof(hash_link *));
 231     hid->cmp = cmp_func;
 232     hid->hash = hash_func;
 233     hid->current_ptr = NULL;
 234     hid->current_slot = 0;
 235     return hid;
 236 }
 237
 238 /*
 239  *  hash_insert - inserts the given item 'item' under the given key 'k'
 240  *  into the hash table 'hid'.  Returns non-zero on error; otherwise,
 241  *  returns 0 and inserts the item.
 242  *
 243  *  It does not copy any data into the hash table, only pointers.
 244  */
 245 void
 246 hash_insert(hash_table * hid, const char *k, void *item)
 247 {
 248     int i;
 249     hash_link *new;
 250     assert(k != NULL);
 251     /* Add to the given hash table 'hid' */
 252     new = calloc(1, sizeof(hash_link));
 253     if (!new) {
 254         fprintf(stderr,"calloc failed!\n");
 255         print_stats();
 256         exit(1);
 257     }
 258     new->item = item;
 259     new->key = (char *) k;
 260     i = hid->hash(k, hid->size);
 261     new->next = hid->buckets[i];
 262     hid->buckets[i] = new;
 263 }
 264
 265 /*
 266  *  hash_join - joins a hash_link under its key lnk->key
 267  *  into the hash table 'hid'.
 268  *
 269  *  It does not copy any data into the hash table, only links pointers.
 270  */
 271 void
 272 hash_join(hash_table * hid, hash_link * lnk)
 273 {
 274     int i;
 275     i = hid->hash(lnk->key, hid->size);
 276     lnk->next = hid->buckets[i];
 277     hid->buckets[i] = lnk;
 278 }
 279
 280 /*
 281  *  hash_lookup - locates the item under the key 'k' in the hash table
 282  *  'hid'.  Returns a pointer to the hash bucket on success; otherwise
 283  *  returns NULL.
 284  */
 285 hash_link *
 286 hash_lookup(hash_table * hid, const void *k)
 287 {
 288     hash_link *walker;
 289     int b;
 290     assert(k != NULL);
 291     b = hid->hash(k, hid->size);
 292     for (walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
 293         if ((hid->cmp) (k, walker->key) == 0)
 294             return (walker);
 295         assert(walker != walker->next);
 296     }
 297     return NULL;
 298 }
 299
 300 /*
 301  *  hash_first - returns the first item in the hash table 'hid'.
 302  *  Otherwise, returns NULL on error.
 303  */
 304 hash_link *
 305 hash_first(hash_table * hid)
 306 {
 307     int i;
 308
 309     for (i = 0; i < hid->size; i++) {
 310         hid->current_slot = i;
 311         if (hid->buckets[i] != NULL)
 312             return (hid->current_ptr = hid->buckets[i]);
 313     }
 314     return NULL;
 315 }
 316
 317 /*
 318  *  hash_next - returns the next item in the hash table 'hid'.
 319  *  Otherwise, returns NULL on error or end of list.
 320  *
 321  *  MUST call hash_first() before hash_next().
 322  */
 323 hash_link *
 324 hash_next(hash_table * hid)
 325 {
 326     int i;
 327
 328     if (hid->current_ptr != NULL) {
 329         hid->current_ptr = hid->current_ptr->next;
 330         if (hid->current_ptr != NULL)
 331             return (hid->current_ptr);  /* next item */
 332     }
 333     /* find next bucket */
 334     for (i = hid->current_slot + 1; i < hid->size; i++) {
 335         hid->current_slot = i;
 336         if (hid->buckets[i] != NULL)
 337             return (hid->current_ptr = hid->buckets[i]);
 338     }
 339     return NULL;                /* end of list */
 340 }
 341
 342 int
 343 hash_delete(hash_table * hid, const char *key)
 344 {
 345     return hash_delete_link(hid, hash_lookup(hid, key));
 346 }
 347
 348 /*
 349  *  hash_delete_link - deletes the given hash_link node from the
 350  *  hash table 'hid'. If FreeLink then free the given hash_link.
 351  *
 352  *  On success, it returns 0 and deletes the link; otherwise,
 353  *  returns non-zero on error.
 354  */
 355 int
 356 hash_unlink(hash_table * hid, hash_link * hl, int FreeLink)
 357 {
 358     hash_link *walker, *prev;
 359     int i;
 360     if (hl == NULL)
 361         return -1;
 362     i = hid->hash(hl->key, hid->size);
 363     for (prev = NULL, walker = hid->buckets[i];
 364         walker != NULL; prev = walker, walker = walker->next) {
 365         if (walker == hl) {
 366             if (prev == NULL) { /* it's the head */
 367                 hid->buckets[i] = walker->next;
 368             } else {
 369                 prev->next = walker->next;      /* skip it */
 370             }
 371             /* fix walker state if needed */
 372             if (walker == hid->current_ptr)
 373                 hid->current_ptr = walker->next;
 374             if (FreeLink) {
 375                 if (walker) {
 376                 free(walker);
 377                 }
 378             }
 379             return 0;
 380         }
 381     }
 382     return 1;
 383 }
 384
 385 /* take link off and free link node */
 386 int
 387 hash_delete_link(hash_table * hid, hash_link * hl)
 388 {
 389     return (hash_unlink(hid, hl, 1));
 390 }
 391
 392 /* take link off only */
 393 int
 394 hash_remove_link(hash_table * hid, hash_link * hl)
 395 {
 396     return (hash_unlink(hid, hl, 0));
 397 }
 398
 399 /*
 400  *  hash_get_bucket - returns the head item of the bucket
 401  *  in the hash table 'hid'. Otherwise, returns NULL on error.
 402  */
 403 hash_link *
 404 hash_get_bucket(hash_table * hid, unsigned int bucket)
 405 {
 406     if (bucket >= hid->size)
 407         return NULL;
 408     return (hid->buckets[bucket]);
 409 }
 410
 411
 412 void
 413 hashFreeMemory(hash_table * hid)
 414 {
 415         if (hid->buckets);
 416     free(hid->buckets);
 417         if (hid)
 418     free(hid);
 419 }
 420
 421
 422 #ifdef USE_HASH_DRIVER
 423 /*
 424  *  hash-driver - Run with a big file as stdin to insert each line into the
 425  *  hash table, then prints the whole hash table, then deletes a random item,
 426  *  and prints the table again...
 427  */
 428 int
 429 main(void)
 430 {
 431     hash_table *hid;
 432     int i;
 433     LOCAL_ARRAY(char, buf, BUFSIZ);
 434     LOCAL_ARRAY(char, todelete, BUFSIZ);
 435     hash_link *walker = NULL;
 436
 437     todelete[0] = '\0';
 438     printf("init\n");
 439
 440     printf("creating hash table\n");
 441     if ((hid = hash_create(strcmp, 229, hash_string)) < 0) {
 442         printf("hash_create error.\n");
 443         exit(1);
 444     }
 445     printf("done creating hash table: %d\n", hid);
 446
 447     while (fgets(buf, BUFSIZ, stdin)) {
 448         buf[strlen(buf) - 1] = '\0';
 449         printf("Inserting '%s' for item %p to hash table: %d\n",
 450             buf, buf, hid);
 451         hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
 452         if (random() % 17 == 0)
 453             strcpy(todelete, buf);
 454     }
 455
 456     printf("walking hash table...\n");
 457     for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
 458         printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
 459             walker->item);
 460     }
 461     printf("done walking hash table...\n");
 462
 463     if (todelete[0]) {
 464         printf("deleting %s from %d\n", todelete, hid);
 465         if (hash_delete(hid, todelete))
 466             printf("hash_delete error\n");
 467     }
 468     printf("walking hash table...\n");
 469     for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
 470         printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
 471             walker->item);
 472     }
 473     printf("done walking hash table...\n");
 474
 475
 476     printf("driver finished.\n");
 477     exit(0);
 478 }
 479 #endif
 480