]>
Commit | Line | Data |
---|---|---|
f52a7d75 | 1 | |
2 | /* | |
262a0e14 | 3 | * $Id$ |
f52a7d75 | 4 | * |
b510f3a1 | 5 | * DEBUG: section 00 Hash Tables |
f52a7d75 | 6 | * AUTHOR: Harvest Derived |
7 | * | |
2b6662ba | 8 | * SQUID Web Proxy Cache http://www.squid-cache.org/ |
f52a7d75 | 9 | * ---------------------------------------------------------- |
10 | * | |
2b6662ba | 11 | * Squid is the result of efforts by numerous individuals from |
12 | * the Internet community; see the CONTRIBUTORS file for full | |
13 | * details. Many organizations have provided support for Squid's | |
14 | * development; see the SPONSORS file for full details. Squid is | |
15 | * Copyrighted (C) 2001 by the Regents of the University of | |
16 | * California; see the COPYRIGHT file for full details. Squid | |
17 | * incorporates software developed and/or copyrighted by other | |
18 | * sources; see the CREDITS file for full details. | |
f52a7d75 | 19 | * |
20 | * This program is free software; you can redistribute it and/or modify | |
21 | * it under the terms of the GNU General Public License as published by | |
22 | * the Free Software Foundation; either version 2 of the License, or | |
23 | * (at your option) any later version. | |
26ac0430 | 24 | * |
f52a7d75 | 25 | * This program is distributed in the hope that it will be useful, |
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
28 | * GNU General Public License for more details. | |
26ac0430 | 29 | * |
f52a7d75 | 30 | * You should have received a copy of the GNU General Public License |
31 | * along with this program; if not, write to the Free Software | |
32 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
33 | * | |
34 | */ | |
35 | ||
f7f3304a | 36 | #include "squid.h" |
25f98340 AJ |
37 | #include "hash.h" |
38 | #include "profiler/Profiler.h" | |
f52a7d75 | 39 | |
40 | #if HAVE_STDIO_H | |
41 | #include <stdio.h> | |
42 | #endif | |
43 | #if HAVE_STDLIB_H | |
44 | #include <stdlib.h> | |
45 | #endif | |
46 | #if HAVE_STRING_H | |
47 | #include <string.h> | |
48 | #endif | |
49 | #if HAVE_UNISTD_H | |
50 | #include <unistd.h> | |
51 | #endif | |
52 | #if HAVE_GNUMALLLOC_H | |
53 | #include <gnumalloc.h> | |
482aa790 | 54 | #elif HAVE_MALLOC_H |
f52a7d75 | 55 | #include <malloc.h> |
56 | #endif | |
57 | #if HAVE_ASSERT_H | |
58 | #include <assert.h> | |
59 | #endif | |
e880cefa | 60 | #if HAVE_MATH_H |
61 | #include <math.h> | |
62 | #endif | |
f52a7d75 | 63 | |
f52a7d75 | 64 | static void hash_next_bucket(hash_table * hid); |
65 | ||
66 | unsigned int | |
67 | hash_string(const void *data, unsigned int size) | |
68 | { | |
209663bb | 69 | const unsigned char *s = static_cast<const unsigned char *>(data); |
f52a7d75 | 70 | unsigned int n = 0; |
71 | unsigned int j = 0; | |
72 | unsigned int i = 0; | |
73 | while (*s) { | |
742a021b | 74 | ++j; |
aec55359 FC |
75 | n ^= 271 * *s; |
76 | ++s; | |
f52a7d75 | 77 | } |
78 | i = n ^ (j * 271); | |
79 | return i % size; | |
80 | } | |
81 | ||
82 | /* the following function(s) were adapted from | |
83 | * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */ | |
84 | ||
85 | /* Hash function from Chris Torek. */ | |
86 | unsigned int | |
87 | hash4(const void *data, unsigned int size) | |
88 | { | |
209663bb | 89 | const char *key = static_cast<const char *>(data); |
f52a7d75 | 90 | size_t loop; |
91 | unsigned int h; | |
92 | size_t len; | |
93 | ||
94 | #define HASH4a h = (h << 5) - h + *key++; | |
95 | #define HASH4b h = (h << 5) + h + *key++; | |
96 | #define HASH4 HASH4b | |
97 | ||
98 | h = 0; | |
99 | len = strlen(key); | |
100 | loop = len >> 3; | |
101 | switch (len & (8 - 1)) { | |
102 | case 0: | |
26ac0430 | 103 | break; |
f52a7d75 | 104 | case 7: |
26ac0430 AJ |
105 | HASH4; |
106 | /* FALLTHROUGH */ | |
f52a7d75 | 107 | case 6: |
26ac0430 AJ |
108 | HASH4; |
109 | /* FALLTHROUGH */ | |
f52a7d75 | 110 | case 5: |
26ac0430 AJ |
111 | HASH4; |
112 | /* FALLTHROUGH */ | |
f52a7d75 | 113 | case 4: |
26ac0430 AJ |
114 | HASH4; |
115 | /* FALLTHROUGH */ | |
f52a7d75 | 116 | case 3: |
26ac0430 AJ |
117 | HASH4; |
118 | /* FALLTHROUGH */ | |
f52a7d75 | 119 | case 2: |
26ac0430 AJ |
120 | HASH4; |
121 | /* FALLTHROUGH */ | |
f52a7d75 | 122 | case 1: |
26ac0430 | 123 | HASH4; |
f52a7d75 | 124 | } |
a2f5277a | 125 | while (--loop) { |
26ac0430 AJ |
126 | HASH4; |
127 | HASH4; | |
128 | HASH4; | |
129 | HASH4; | |
130 | HASH4; | |
131 | HASH4; | |
132 | HASH4; | |
133 | HASH4; | |
f52a7d75 | 134 | } |
135 | return h % size; | |
136 | } | |
137 | ||
209663bb | 138 | /** |
f52a7d75 | 139 | * hash_create - creates a new hash table, uses the cmp_func |
140 | * to compare keys. Returns the identification for the hash table; | |
141 | * otherwise returns a negative number on error. | |
142 | */ | |
143 | hash_table * | |
144 | hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func) | |
145 | { | |
209663bb | 146 | hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table)); |
f52a7d75 | 147 | if (!hash_sz) |
26ac0430 | 148 | hid->size = (unsigned int) DEFAULT_HASH_SIZE; |
f52a7d75 | 149 | else |
26ac0430 | 150 | hid->size = (unsigned int) hash_sz; |
f52a7d75 | 151 | /* allocate and null the buckets */ |
209663bb | 152 | hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *)); |
f52a7d75 | 153 | hid->cmp = cmp_func; |
154 | hid->hash = hash_func; | |
155 | hid->next = NULL; | |
156 | hid->current_slot = 0; | |
157 | return hid; | |
158 | } | |
159 | ||
209663bb | 160 | /** |
f52a7d75 | 161 | * hash_join - joins a hash_link under its key lnk->key |
26ac0430 | 162 | * into the hash table 'hid'. |
f52a7d75 | 163 | * |
164 | * It does not copy any data into the hash table, only links pointers. | |
165 | */ | |
166 | void | |
167 | hash_join(hash_table * hid, hash_link * lnk) | |
168 | { | |
169 | int i; | |
170 | i = hid->hash(lnk->key, hid->size); | |
171 | lnk->next = hid->buckets[i]; | |
172 | hid->buckets[i] = lnk; | |
742a021b | 173 | ++hid->count; |
f52a7d75 | 174 | } |
175 | ||
209663bb | 176 | /** |
f52a7d75 | 177 | * hash_lookup - locates the item under the key 'k' in the hash table |
178 | * 'hid'. Returns a pointer to the hash bucket on success; otherwise | |
179 | * returns NULL. | |
180 | */ | |
4a8b20e8 | 181 | hash_link * |
f52a7d75 | 182 | hash_lookup(hash_table * hid, const void *k) |
183 | { | |
f52a7d75 | 184 | int b; |
88bfe092 | 185 | PROF_start(hash_lookup); |
f52a7d75 | 186 | assert(k != NULL); |
187 | b = hid->hash(k, hid->size); | |
209663bb | 188 | for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) { |
26ac0430 AJ |
189 | if ((hid->cmp) (k, walker->key) == 0) { |
190 | PROF_stop(hash_lookup); | |
191 | return (walker); | |
192 | } | |
193 | assert(walker != walker->next); | |
f52a7d75 | 194 | } |
88bfe092 | 195 | PROF_stop(hash_lookup); |
f52a7d75 | 196 | return NULL; |
197 | } | |
198 | ||
199 | static void | |
200 | hash_next_bucket(hash_table * hid) | |
201 | { | |
202 | while (hid->next == NULL && ++hid->current_slot < hid->size) | |
26ac0430 | 203 | hid->next = hid->buckets[hid->current_slot]; |
f52a7d75 | 204 | } |
205 | ||
209663bb | 206 | /** |
f52a7d75 | 207 | * hash_first - initializes the hash table for the hash_next() |
208 | * function. | |
209 | */ | |
210 | void | |
211 | hash_first(hash_table * hid) | |
212 | { | |
213 | assert(NULL == hid->next); | |
214 | hid->current_slot = 0; | |
215 | hid->next = hid->buckets[hid->current_slot]; | |
216 | if (NULL == hid->next) | |
26ac0430 | 217 | hash_next_bucket(hid); |
f52a7d75 | 218 | } |
219 | ||
209663bb | 220 | /** |
f52a7d75 | 221 | * hash_next - returns the next item in the hash table 'hid'. |
26ac0430 | 222 | * Otherwise, returns NULL on error or end of list. |
f52a7d75 | 223 | * |
224 | * MUST call hash_first() before hash_next(). | |
225 | */ | |
4a8b20e8 | 226 | hash_link * |
f52a7d75 | 227 | hash_next(hash_table * hid) |
228 | { | |
209663bb AJ |
229 | hash_link *p = hid->next; |
230 | if (NULL == p) | |
26ac0430 | 231 | return NULL; |
209663bb | 232 | hid->next = p->next; |
f52a7d75 | 233 | if (NULL == hid->next) |
26ac0430 | 234 | hash_next_bucket(hid); |
209663bb | 235 | return p; |
f52a7d75 | 236 | } |
237 | ||
209663bb | 238 | /** |
2c4f7ab2 | 239 | * hash_last - resets hash traversal state to NULL |
240 | * | |
241 | */ | |
242 | void | |
ab96e65c | 243 | hash_last(hash_table * hid) |
2c4f7ab2 | 244 | { |
137a13ea | 245 | assert(hid != NULL); |
2c4f7ab2 | 246 | hid->next = NULL; |
247 | hid->current_slot = 0; | |
248 | } | |
249 | ||
209663bb | 250 | /** |
26ac0430 | 251 | * hash_remove_link - deletes the given hash_link node from the |
f52a7d75 | 252 | * hash table 'hid'. Does not free the item, only removes it |
253 | * from the list. | |
254 | * | |
4fba1a24 | 255 | * An assertion is triggered if the hash_link is not found in the |
256 | * list. | |
f52a7d75 | 257 | */ |
258 | void | |
259 | hash_remove_link(hash_table * hid, hash_link * hl) | |
260 | { | |
f52a7d75 | 261 | assert(hl != NULL); |
209663bb AJ |
262 | int i = hid->hash(hl->key, hid->size); |
263 | for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) { | |
26ac0430 AJ |
264 | if (*P != hl) |
265 | continue; | |
266 | *P = hl->next; | |
267 | if (hid->next == hl) { | |
268 | hid->next = hl->next; | |
269 | if (NULL == hid->next) | |
270 | hash_next_bucket(hid); | |
271 | } | |
742a021b | 272 | --hid->count; |
26ac0430 | 273 | return; |
f52a7d75 | 274 | } |
e530de6c | 275 | assert(0); |
f52a7d75 | 276 | } |
277 | ||
209663bb | 278 | /** |
26ac0430 | 279 | * hash_get_bucket - returns the head item of the bucket |
f52a7d75 | 280 | * in the hash table 'hid'. Otherwise, returns NULL on error. |
281 | */ | |
282 | hash_link * | |
283 | hash_get_bucket(hash_table * hid, unsigned int bucket) | |
284 | { | |
285 | if (bucket >= hid->size) | |
26ac0430 | 286 | return NULL; |
f52a7d75 | 287 | return (hid->buckets[bucket]); |
288 | } | |
289 | ||
290 | void | |
291 | hashFreeItems(hash_table * hid, HASHFREE * free_func) | |
292 | { | |
293 | hash_link *l; | |
f52a7d75 | 294 | int i = 0; |
209663bb | 295 | hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *)); |
f52a7d75 | 296 | hash_first(hid); |
297 | while ((l = hash_next(hid)) && i < hid->count) { | |
26ac0430 | 298 | *(list + i) = l; |
742a021b | 299 | ++i; |
f52a7d75 | 300 | } |
742a021b | 301 | for (int j = 0; j < i; ++j) |
26ac0430 | 302 | free_func(*(list + j)); |
f52a7d75 | 303 | xfree(list); |
304 | } | |
305 | ||
306 | void | |
307 | hashFreeMemory(hash_table * hid) | |
308 | { | |
04f7fd38 | 309 | if (hid == NULL) |
928b6c10 | 310 | return; |
efacbef0 | 311 | if (hid->buckets) |
26ac0430 | 312 | xfree(hid->buckets); |
f52a7d75 | 313 | xfree(hid); |
314 | } | |
315 | ||
26ac0430 | 316 | static int hash_primes[] = { |
f52a7d75 | 317 | 103, |
318 | 229, | |
319 | 467, | |
320 | 977, | |
321 | 1979, | |
322 | 4019, | |
323 | 6037, | |
324 | 7951, | |
325 | 12149, | |
326 | 16231, | |
327 | 33493, | |
328 | 65357 | |
329 | }; | |
330 | ||
331 | int | |
332 | hashPrime(int n) | |
333 | { | |
334 | int I = sizeof(hash_primes) / sizeof(int); | |
f52a7d75 | 335 | int best_prime = hash_primes[0]; |
d87ebd78 | 336 | double min = fabs(log((double) n) - log((double) hash_primes[0])); |
f52a7d75 | 337 | double d; |
742a021b | 338 | for (int i = 0; i < I; ++i) { |
26ac0430 AJ |
339 | d = fabs(log((double) n) - log((double) hash_primes[i])); |
340 | if (d > min) | |
341 | continue; | |
342 | min = d; | |
343 | best_prime = hash_primes[i]; | |
f52a7d75 | 344 | } |
345 | return best_prime; | |
346 | } | |
347 | ||
209663bb | 348 | /** |
186477c1 | 349 | * return the key of a hash_link as a const string |
350 | */ | |
351 | const char * | |
352 | hashKeyStr(hash_link * hl) | |
353 | { | |
354 | return (const char *) hl->key; | |
355 | } | |
356 | ||
f52a7d75 | 357 | |
32d002cb | 358 | #if USE_HASH_DRIVER |
209663bb | 359 | /** |
f52a7d75 | 360 | * hash-driver - Run with a big file as stdin to insert each line into the |
361 | * hash table, then prints the whole hash table, then deletes a random item, | |
362 | * and prints the table again... | |
363 | */ | |
364 | int | |
365 | main(void) | |
366 | { | |
367 | hash_table *hid; | |
f52a7d75 | 368 | LOCAL_ARRAY(char, buf, BUFSIZ); |
369 | LOCAL_ARRAY(char, todelete, BUFSIZ); | |
370 | hash_link *walker = NULL; | |
371 | ||
372 | todelete[0] = '\0'; | |
373 | printf("init\n"); | |
374 | ||
375 | printf("creating hash table\n"); | |
376 | if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) { | |
26ac0430 AJ |
377 | printf("hash_create error.\n"); |
378 | exit(1); | |
f52a7d75 | 379 | } |
380 | printf("done creating hash table: %d\n", hid); | |
381 | ||
382 | while (fgets(buf, BUFSIZ, stdin)) { | |
26ac0430 AJ |
383 | buf[strlen(buf) - 1] = '\0'; |
384 | printf("Inserting '%s' for item %p to hash table: %d\n", | |
385 | buf, buf, hid); | |
386 | hash_insert(hid, xstrdup(buf), (void *) 0x12345678); | |
387 | if (random() % 17 == 0) | |
388 | strcpy(todelete, buf); | |
f52a7d75 | 389 | } |
390 | ||
391 | printf("walking hash table...\n"); | |
209663bb | 392 | for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { |
26ac0430 AJ |
393 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, |
394 | walker->item); | |
f52a7d75 | 395 | } |
396 | printf("done walking hash table...\n"); | |
397 | ||
398 | if (todelete[0]) { | |
26ac0430 AJ |
399 | printf("deleting %s from %d\n", todelete, hid); |
400 | if (hash_delete(hid, todelete)) | |
401 | printf("hash_delete error\n"); | |
f52a7d75 | 402 | } |
403 | printf("walking hash table...\n"); | |
209663bb | 404 | for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { |
26ac0430 AJ |
405 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, |
406 | walker->item); | |
f52a7d75 | 407 | } |
408 | printf("done walking hash table...\n"); | |
409 | ||
410 | ||
411 | printf("driver finished.\n"); | |
412 | exit(0); | |
413 | } | |
414 | #endif |