]> git.ipfire.org Git - thirdparty/squid.git/blob - test-suite/hash.c
change FSF address
[thirdparty/squid.git] / test-suite / hash.c
1
2 /*
3 * $Id: hash.c,v 1.4 1998/07/20 17:20:25 wessels Exp $
4 *
5 * DEBUG: section 0 Hash Tables
6 * AUTHOR: Harvest Derived
7 *
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * --------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by
14 * the National Science Foundation.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
29 *
30 */
31
32 /*
33 * Copyright (c) 1994, 1995. All rights reserved.
34 *
35 * The Harvest software was developed by the Internet Research Task
36 * Force Research Group on Resource Discovery (IRTF-RD):
37 *
38 * Mic Bowman of Transarc Corporation.
39 * Peter Danzig of the University of Southern California.
40 * Darren R. Hardy of the University of Colorado at Boulder.
41 * Udi Manber of the University of Arizona.
42 * Michael F. Schwartz of the University of Colorado at Boulder.
43 * Duane Wessels of the University of Colorado at Boulder.
44 *
45 * This copyright notice applies to software in the Harvest
46 * ``src/'' directory only. Users should consult the individual
47 * copyright notices in the ``components/'' subdirectories for
48 * copyright information about other software bundled with the
49 * Harvest source code distribution.
50 *
51 * TERMS OF USE
52 *
53 * The Harvest software may be used and re-distributed without
54 * charge, provided that the software origin and research team are
55 * cited in any use of the system. Most commonly this is
56 * accomplished by including a link to the Harvest Home Page
57 * (http://harvest.cs.colorado.edu/) from the query page of any
58 * Broker you deploy, as well as in the query result pages. These
59 * links are generated automatically by the standard Broker
60 * software distribution.
61 *
62 * The Harvest software is provided ``as is'', without express or
63 * implied warranty, and with no support nor obligation to assist
64 * in its use, correction, modification or enhancement. We assume
65 * no liability with respect to the infringement of copyrights,
66 * trade secrets, or any patents, and are not responsible for
67 * consequential damages. Proper use of the Harvest software is
68 * entirely the responsibility of the user.
69 *
70 * DERIVATIVE WORKS
71 *
72 * Users may make derivative works from the Harvest software, subject
73 * to the following constraints:
74 *
75 * - You must include the above copyright notice and these
76 * accompanying paragraphs in all forms of derivative works,
77 * and any documentation and other materials related to such
78 * distribution and use acknowledge that the software was
79 * developed at the above institutions.
80 *
81 * - You must notify IRTF-RD regarding your distribution of
82 * the derivative work.
83 *
84 * - You must clearly notify users that your are distributing
85 * a modified version and not the original Harvest software.
86 *
87 * - Any derivative product is also subject to these copyright
88 * and use restrictions.
89 *
90 * Note that the Harvest software is NOT in the public domain. We
91 * retain copyright, as specified above.
92 *
93 * HISTORY OF FREE SOFTWARE STATUS
94 *
95 * Originally we required sites to license the software in cases
96 * where they were going to build commercial products/services
97 * around Harvest. In June 1995 we changed this policy. We now
98 * allow people to use the core Harvest software (the code found in
99 * the Harvest ``src/'' directory) for free. We made this change
100 * in the interest of encouraging the widest possible deployment of
101 * the technology. The Harvest software is really a reference
102 * implementation of a set of protocols and formats, some of which
103 * we intend to standardize. We encourage commercial
104 * re-implementations of code complying to this set of standards.
105 */
106
107 #include <unistd.h>
108 #include <stdlib.h>
109 #include <stdio.h>
110 #include <sys/types.h>
111 #include <ctype.h>
112 #include <sys/time.h>
113 #include <strings.h>
114 #include <assert.h>
115 #include "hash.h"
116 #undef free
117 extern void my_free(char *, int , void *);
118
119 #define free(a) my_free(__FILE__, __LINE__, a)
120
121 extern void print_stats();
122 /*
123 * hash_url() - Returns a well-distributed hash function for URLs.
124 * The best way is to sum up the last half of the string.
125 * Adapted from code written by Mic Bowman. -Darren
126 * Generates a standard deviation = 15.73
127 */
128 unsigned int
129 hash_url(const void *data, unsigned int size)
130 {
131 const char *s = data;
132 unsigned int i, j, n;
133 j = strlen(s);
134 for (i = j / 2, n = 0; i < j; i++)
135 n ^= 271 * (unsigned) s[i];
136 i = n ^ (j * 271);
137 return i % size;
138 }
139
140 unsigned int
141 hash_string(const void *data, unsigned int size)
142 {
143 const char *s = data;
144 unsigned int n = 0;
145 unsigned int j = 0;
146 unsigned int i = 0;
147 while (*s) {
148 j++;
149 n ^= 271 * (unsigned) *s++;
150 }
151 i = n ^ (j * 271);
152 return i % size;
153 }
154
155 /* the following 4 functions were adapted from
156 * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */
157
158 /*
159 * HASH FUNCTIONS
160 *
161 * Assume that we've already split the bucket to which this key hashes,
162 * calculate that bucket, and check that in fact we did already split it.
163 *
164 * This came from ejb's hsearch.
165 */
166
167 #define PRIME1 37
168 #define PRIME2 1048583
169
170 /* Hash function from Chris Torek. */
171 unsigned int
172 hash4(const void *data, unsigned int size)
173 {
174 const char *key = data;
175 size_t loop;
176 unsigned int h;
177 size_t len;
178
179 #define HASH4a h = (h << 5) - h + *key++;
180 #define HASH4b h = (h << 5) + h + *key++;
181 #define HASH4 HASH4b
182
183 h = 0;
184 len = strlen(key);
185 loop = (len + 8 - 1) >> 3;
186 switch (len & (8 - 1)) {
187 case 0:
188 do {
189 HASH4;
190 /* FALLTHROUGH */
191 case 7:
192 HASH4;
193 /* FALLTHROUGH */
194 case 6:
195 HASH4;
196 /* FALLTHROUGH */
197 case 5:
198 HASH4;
199 /* FALLTHROUGH */
200 case 4:
201 HASH4;
202 /* FALLTHROUGH */
203 case 3:
204 HASH4;
205 /* FALLTHROUGH */
206 case 2:
207 HASH4;
208 /* FALLTHROUGH */
209 case 1:
210 HASH4;
211 } while (--loop);
212 }
213 return h % size;
214 }
215
216 /*
217 * hash_create - creates a new hash table, uses the cmp_func
218 * to compare keys. Returns the identification for the hash table;
219 * otherwise returns a negative number on error.
220 */
221 hash_table *
222 hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
223 {
224 hash_table *hid = calloc(1, sizeof(hash_table));
225 if (!hash_sz)
226 hid->size = (unsigned int) DEFAULT_HASH_SIZE;
227 else
228 hid->size = (unsigned int) hash_sz;
229 /* allocate and null the buckets */
230 hid->buckets = calloc(hid->size, sizeof(hash_link *));
231 hid->cmp = cmp_func;
232 hid->hash = hash_func;
233 hid->current_ptr = NULL;
234 hid->current_slot = 0;
235 return hid;
236 }
237
238 /*
239 * hash_insert - inserts the given item 'item' under the given key 'k'
240 * into the hash table 'hid'. Returns non-zero on error; otherwise,
241 * returns 0 and inserts the item.
242 *
243 * It does not copy any data into the hash table, only pointers.
244 */
245 void
246 hash_insert(hash_table * hid, const char *k, void *item)
247 {
248 int i;
249 hash_link *new;
250 assert(k != NULL);
251 /* Add to the given hash table 'hid' */
252 new = calloc(1, sizeof(hash_link));
253 if (!new) {
254 fprintf(stderr,"calloc failed!\n");
255 print_stats();
256 exit(1);
257 }
258 new->item = item;
259 new->key = (char *) k;
260 i = hid->hash(k, hid->size);
261 new->next = hid->buckets[i];
262 hid->buckets[i] = new;
263 }
264
265 /*
266 * hash_join - joins a hash_link under its key lnk->key
267 * into the hash table 'hid'.
268 *
269 * It does not copy any data into the hash table, only links pointers.
270 */
271 void
272 hash_join(hash_table * hid, hash_link * lnk)
273 {
274 int i;
275 i = hid->hash(lnk->key, hid->size);
276 lnk->next = hid->buckets[i];
277 hid->buckets[i] = lnk;
278 }
279
280 /*
281 * hash_lookup - locates the item under the key 'k' in the hash table
282 * 'hid'. Returns a pointer to the hash bucket on success; otherwise
283 * returns NULL.
284 */
285 hash_link *
286 hash_lookup(hash_table * hid, const void *k)
287 {
288 hash_link *walker;
289 int b;
290 assert(k != NULL);
291 b = hid->hash(k, hid->size);
292 for (walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
293 if ((hid->cmp) (k, walker->key) == 0)
294 return (walker);
295 assert(walker != walker->next);
296 }
297 return NULL;
298 }
299
300 /*
301 * hash_first - returns the first item in the hash table 'hid'.
302 * Otherwise, returns NULL on error.
303 */
304 hash_link *
305 hash_first(hash_table * hid)
306 {
307 int i;
308
309 for (i = 0; i < hid->size; i++) {
310 hid->current_slot = i;
311 if (hid->buckets[i] != NULL)
312 return (hid->current_ptr = hid->buckets[i]);
313 }
314 return NULL;
315 }
316
317 /*
318 * hash_next - returns the next item in the hash table 'hid'.
319 * Otherwise, returns NULL on error or end of list.
320 *
321 * MUST call hash_first() before hash_next().
322 */
323 hash_link *
324 hash_next(hash_table * hid)
325 {
326 int i;
327
328 if (hid->current_ptr != NULL) {
329 hid->current_ptr = hid->current_ptr->next;
330 if (hid->current_ptr != NULL)
331 return (hid->current_ptr); /* next item */
332 }
333 /* find next bucket */
334 for (i = hid->current_slot + 1; i < hid->size; i++) {
335 hid->current_slot = i;
336 if (hid->buckets[i] != NULL)
337 return (hid->current_ptr = hid->buckets[i]);
338 }
339 return NULL; /* end of list */
340 }
341
342 int
343 hash_delete(hash_table * hid, const char *key)
344 {
345 return hash_delete_link(hid, hash_lookup(hid, key));
346 }
347
348 /*
349 * hash_delete_link - deletes the given hash_link node from the
350 * hash table 'hid'. If FreeLink then free the given hash_link.
351 *
352 * On success, it returns 0 and deletes the link; otherwise,
353 * returns non-zero on error.
354 */
355 int
356 hash_unlink(hash_table * hid, hash_link * hl, int FreeLink)
357 {
358 hash_link *walker, *prev;
359 int i;
360 if (hl == NULL)
361 return -1;
362 i = hid->hash(hl->key, hid->size);
363 for (prev = NULL, walker = hid->buckets[i];
364 walker != NULL; prev = walker, walker = walker->next) {
365 if (walker == hl) {
366 if (prev == NULL) { /* it's the head */
367 hid->buckets[i] = walker->next;
368 } else {
369 prev->next = walker->next; /* skip it */
370 }
371 /* fix walker state if needed */
372 if (walker == hid->current_ptr)
373 hid->current_ptr = walker->next;
374 if (FreeLink) {
375 if (walker) {
376 free(walker);
377 }
378 }
379 return 0;
380 }
381 }
382 return 1;
383 }
384
385 /* take link off and free link node */
386 int
387 hash_delete_link(hash_table * hid, hash_link * hl)
388 {
389 return (hash_unlink(hid, hl, 1));
390 }
391
392 /* take link off only */
393 int
394 hash_remove_link(hash_table * hid, hash_link * hl)
395 {
396 return (hash_unlink(hid, hl, 0));
397 }
398
399 /*
400 * hash_get_bucket - returns the head item of the bucket
401 * in the hash table 'hid'. Otherwise, returns NULL on error.
402 */
403 hash_link *
404 hash_get_bucket(hash_table * hid, unsigned int bucket)
405 {
406 if (bucket >= hid->size)
407 return NULL;
408 return (hid->buckets[bucket]);
409 }
410
411
412 void
413 hashFreeMemory(hash_table * hid)
414 {
415 if (hid->buckets);
416 free(hid->buckets);
417 if (hid)
418 free(hid);
419 }
420
421
422 #ifdef USE_HASH_DRIVER
423 /*
424 * hash-driver - Run with a big file as stdin to insert each line into the
425 * hash table, then prints the whole hash table, then deletes a random item,
426 * and prints the table again...
427 */
428 int
429 main(void)
430 {
431 hash_table *hid;
432 int i;
433 LOCAL_ARRAY(char, buf, BUFSIZ);
434 LOCAL_ARRAY(char, todelete, BUFSIZ);
435 hash_link *walker = NULL;
436
437 todelete[0] = '\0';
438 printf("init\n");
439
440 printf("creating hash table\n");
441 if ((hid = hash_create(strcmp, 229, hash_string)) < 0) {
442 printf("hash_create error.\n");
443 exit(1);
444 }
445 printf("done creating hash table: %d\n", hid);
446
447 while (fgets(buf, BUFSIZ, stdin)) {
448 buf[strlen(buf) - 1] = '\0';
449 printf("Inserting '%s' for item %p to hash table: %d\n",
450 buf, buf, hid);
451 hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
452 if (random() % 17 == 0)
453 strcpy(todelete, buf);
454 }
455
456 printf("walking hash table...\n");
457 for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
458 printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
459 walker->item);
460 }
461 printf("done walking hash table...\n");
462
463 if (todelete[0]) {
464 printf("deleting %s from %d\n", todelete, hid);
465 if (hash_delete(hid, todelete))
466 printf("hash_delete error\n");
467 }
468 printf("walking hash table...\n");
469 for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
470 printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
471 walker->item);
472 }
473 printf("done walking hash table...\n");
474
475
476 printf("driver finished.\n");
477 exit(0);
478 }
479 #endif
480