]> git.ipfire.org Git - thirdparty/squid.git/blob - lib/hash.cc
SourceFormat Enforcement
[thirdparty/squid.git] / lib / hash.cc
1
2 /*
3 * $Id$
4 *
5 * DEBUG: section 00 Hash Tables
6 * AUTHOR: Harvest Derived
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #include "squid.h"
37 #include "hash.h"
38 #include "profiler/Profiler.h"
39
40 #if HAVE_STDIO_H
41 #include <stdio.h>
42 #endif
43 #if HAVE_STDLIB_H
44 #include <stdlib.h>
45 #endif
46 #if HAVE_STRING_H
47 #include <string.h>
48 #endif
49 #if HAVE_UNISTD_H
50 #include <unistd.h>
51 #endif
52 #if HAVE_GNUMALLLOC_H
53 #include <gnumalloc.h>
54 #elif HAVE_MALLOC_H
55 #include <malloc.h>
56 #endif
57 #if HAVE_ASSERT_H
58 #include <assert.h>
59 #endif
60 #if HAVE_MATH_H
61 #include <math.h>
62 #endif
63
64 static void hash_next_bucket(hash_table * hid);
65
66 unsigned int
67 hash_string(const void *data, unsigned int size)
68 {
69 const unsigned char *s = static_cast<const unsigned char *>(data);
70 unsigned int n = 0;
71 unsigned int j = 0;
72 unsigned int i = 0;
73 while (*s) {
74 ++j;
75 n ^= 271 * *s;
76 ++s;
77 }
78 i = n ^ (j * 271);
79 return i % size;
80 }
81
82 /* the following function(s) were adapted from
83 * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */
84
85 /* Hash function from Chris Torek. */
86 unsigned int
87 hash4(const void *data, unsigned int size)
88 {
89 const char *key = static_cast<const char *>(data);
90 size_t loop;
91 unsigned int h;
92 size_t len;
93
94 #define HASH4a h = (h << 5) - h + *key++;
95 #define HASH4b h = (h << 5) + h + *key++;
96 #define HASH4 HASH4b
97
98 h = 0;
99 len = strlen(key);
100 loop = len >> 3;
101 switch (len & (8 - 1)) {
102 case 0:
103 break;
104 case 7:
105 HASH4;
106 /* FALLTHROUGH */
107 case 6:
108 HASH4;
109 /* FALLTHROUGH */
110 case 5:
111 HASH4;
112 /* FALLTHROUGH */
113 case 4:
114 HASH4;
115 /* FALLTHROUGH */
116 case 3:
117 HASH4;
118 /* FALLTHROUGH */
119 case 2:
120 HASH4;
121 /* FALLTHROUGH */
122 case 1:
123 HASH4;
124 }
125 while (loop) {
126 --loop;
127 HASH4;
128 HASH4;
129 HASH4;
130 HASH4;
131 HASH4;
132 HASH4;
133 HASH4;
134 HASH4;
135 }
136 return h % size;
137 }
138
139 /**
140 * hash_create - creates a new hash table, uses the cmp_func
141 * to compare keys. Returns the identification for the hash table;
142 * otherwise returns a negative number on error.
143 */
144 hash_table *
145 hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
146 {
147 hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));
148 if (!hash_sz)
149 hid->size = (unsigned int) DEFAULT_HASH_SIZE;
150 else
151 hid->size = (unsigned int) hash_sz;
152 /* allocate and null the buckets */
153 hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));
154 hid->cmp = cmp_func;
155 hid->hash = hash_func;
156 hid->next = NULL;
157 hid->current_slot = 0;
158 return hid;
159 }
160
161 /**
162 * hash_join - joins a hash_link under its key lnk->key
163 * into the hash table 'hid'.
164 *
165 * It does not copy any data into the hash table, only links pointers.
166 */
167 void
168 hash_join(hash_table * hid, hash_link * lnk)
169 {
170 int i;
171 i = hid->hash(lnk->key, hid->size);
172 lnk->next = hid->buckets[i];
173 hid->buckets[i] = lnk;
174 ++hid->count;
175 }
176
177 /**
178 * hash_lookup - locates the item under the key 'k' in the hash table
179 * 'hid'. Returns a pointer to the hash bucket on success; otherwise
180 * returns NULL.
181 */
182 hash_link *
183 hash_lookup(hash_table * hid, const void *k)
184 {
185 int b;
186 PROF_start(hash_lookup);
187 assert(k != NULL);
188 b = hid->hash(k, hid->size);
189 for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
190 if ((hid->cmp) (k, walker->key) == 0) {
191 PROF_stop(hash_lookup);
192 return (walker);
193 }
194 assert(walker != walker->next);
195 }
196 PROF_stop(hash_lookup);
197 return NULL;
198 }
199
200 static void
201 hash_next_bucket(hash_table * hid)
202 {
203 while (hid->next == NULL && ++hid->current_slot < hid->size)
204 hid->next = hid->buckets[hid->current_slot];
205 }
206
207 /**
208 * hash_first - initializes the hash table for the hash_next()
209 * function.
210 */
211 void
212 hash_first(hash_table * hid)
213 {
214 assert(NULL == hid->next);
215 hid->current_slot = 0;
216 hid->next = hid->buckets[hid->current_slot];
217 if (NULL == hid->next)
218 hash_next_bucket(hid);
219 }
220
221 /**
222 * hash_next - returns the next item in the hash table 'hid'.
223 * Otherwise, returns NULL on error or end of list.
224 *
225 * MUST call hash_first() before hash_next().
226 */
227 hash_link *
228 hash_next(hash_table * hid)
229 {
230 hash_link *p = hid->next;
231 if (NULL == p)
232 return NULL;
233 hid->next = p->next;
234 if (NULL == hid->next)
235 hash_next_bucket(hid);
236 return p;
237 }
238
239 /**
240 * hash_last - resets hash traversal state to NULL
241 *
242 */
243 void
244 hash_last(hash_table * hid)
245 {
246 assert(hid != NULL);
247 hid->next = NULL;
248 hid->current_slot = 0;
249 }
250
251 /**
252 * hash_remove_link - deletes the given hash_link node from the
253 * hash table 'hid'. Does not free the item, only removes it
254 * from the list.
255 *
256 * An assertion is triggered if the hash_link is not found in the
257 * list.
258 */
259 void
260 hash_remove_link(hash_table * hid, hash_link * hl)
261 {
262 assert(hl != NULL);
263 int i = hid->hash(hl->key, hid->size);
264 for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {
265 if (*P != hl)
266 continue;
267 *P = hl->next;
268 if (hid->next == hl) {
269 hid->next = hl->next;
270 if (NULL == hid->next)
271 hash_next_bucket(hid);
272 }
273 --hid->count;
274 return;
275 }
276 assert(0);
277 }
278
279 /**
280 * hash_get_bucket - returns the head item of the bucket
281 * in the hash table 'hid'. Otherwise, returns NULL on error.
282 */
283 hash_link *
284 hash_get_bucket(hash_table * hid, unsigned int bucket)
285 {
286 if (bucket >= hid->size)
287 return NULL;
288 return (hid->buckets[bucket]);
289 }
290
291 void
292 hashFreeItems(hash_table * hid, HASHFREE * free_func)
293 {
294 hash_link *l;
295 int i = 0;
296 hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));
297 hash_first(hid);
298 while ((l = hash_next(hid)) && i < hid->count) {
299 *(list + i) = l;
300 ++i;
301 }
302 for (int j = 0; j < i; ++j)
303 free_func(*(list + j));
304 xfree(list);
305 }
306
307 void
308 hashFreeMemory(hash_table * hid)
309 {
310 if (hid == NULL)
311 return;
312 if (hid->buckets)
313 xfree(hid->buckets);
314 xfree(hid);
315 }
316
317 static int hash_primes[] = {
318 103,
319 229,
320 467,
321 977,
322 1979,
323 4019,
324 6037,
325 7951,
326 12149,
327 16231,
328 33493,
329 65357
330 };
331
332 int
333 hashPrime(int n)
334 {
335 int I = sizeof(hash_primes) / sizeof(int);
336 int best_prime = hash_primes[0];
337 double min = fabs(log((double) n) - log((double) hash_primes[0]));
338 double d;
339 for (int i = 0; i < I; ++i) {
340 d = fabs(log((double) n) - log((double) hash_primes[i]));
341 if (d > min)
342 continue;
343 min = d;
344 best_prime = hash_primes[i];
345 }
346 return best_prime;
347 }
348
349 /**
350 * return the key of a hash_link as a const string
351 */
352 const char *
353 hashKeyStr(hash_link * hl)
354 {
355 return (const char *) hl->key;
356 }
357
358 #if USE_HASH_DRIVER
359 /**
360 * hash-driver - Run with a big file as stdin to insert each line into the
361 * hash table, then prints the whole hash table, then deletes a random item,
362 * and prints the table again...
363 */
364 int
365 main(void)
366 {
367 hash_table *hid;
368 LOCAL_ARRAY(char, buf, BUFSIZ);
369 LOCAL_ARRAY(char, todelete, BUFSIZ);
370 hash_link *walker = NULL;
371
372 todelete[0] = '\0';
373 printf("init\n");
374
375 printf("creating hash table\n");
376 if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) {
377 printf("hash_create error.\n");
378 exit(1);
379 }
380 printf("done creating hash table: %d\n", hid);
381
382 while (fgets(buf, BUFSIZ, stdin)) {
383 buf[strlen(buf) - 1] = '\0';
384 printf("Inserting '%s' for item %p to hash table: %d\n",
385 buf, buf, hid);
386 hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
387 if (random() % 17 == 0)
388 strcpy(todelete, buf);
389 }
390
391 printf("walking hash table...\n");
392 for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
393 printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
394 walker->item);
395 }
396 printf("done walking hash table...\n");
397
398 if (todelete[0]) {
399 printf("deleting %s from %d\n", todelete, hid);
400 if (hash_delete(hid, todelete))
401 printf("hash_delete error\n");
402 }
403 printf("walking hash table...\n");
404 for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
405 printf("item %5d: key: '%s' item: %p\n", i++, walker->key,
406 walker->item);
407 }
408 printf("done walking hash table...\n");
409
410 printf("driver finished.\n");
411 exit(0);
412 }
413 #endif