]>
Commit | Line | Data |
---|---|---|
14c63c25 | 1 | |
2 | /* | |
262a0e14 | 3 | * $Id$ |
14c63c25 | 4 | * |
d090e020 | 5 | * DEBUG: section 00 Hash Tables |
14c63c25 | 6 | * AUTHOR: Harvest Derived |
7 | * | |
2b6662ba | 8 | * SQUID Web Proxy Cache http://www.squid-cache.org/ |
9 | * ---------------------------------------------------------- | |
14c63c25 | 10 | * |
2b6662ba | 11 | * Squid is the result of efforts by numerous individuals from |
12 | * the Internet community; see the CONTRIBUTORS file for full | |
13 | * details. Many organizations have provided support for Squid's | |
14 | * development; see the SPONSORS file for full details. Squid is | |
15 | * Copyrighted (C) 2001 by the Regents of the University of | |
16 | * California; see the COPYRIGHT file for full details. Squid | |
17 | * incorporates software developed and/or copyrighted by other | |
18 | * sources; see the CREDITS file for full details. | |
14c63c25 | 19 | * |
20 | * This program is free software; you can redistribute it and/or modify | |
21 | * it under the terms of the GNU General Public License as published by | |
22 | * the Free Software Foundation; either version 2 of the License, or | |
23 | * (at your option) any later version. | |
26ac0430 | 24 | * |
14c63c25 | 25 | * This program is distributed in the hope that it will be useful, |
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
28 | * GNU General Public License for more details. | |
26ac0430 | 29 | * |
14c63c25 | 30 | * You should have received a copy of the GNU General Public License |
31 | * along with this program; if not, write to the Free Software | |
cbdec147 | 32 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. |
26ac0430 | 33 | * |
14c63c25 | 34 | */ |
f7f3304a | 35 | #include "squid.h" |
14c63c25 | 36 | |
ef364f64 | 37 | #if HAVE_UNISTD_H |
14c63c25 | 38 | #include <unistd.h> |
ef364f64 AJ |
39 | #endif |
40 | #if HAVE_STDIO_H | |
14c63c25 | 41 | #include <stdio.h> |
ef364f64 AJ |
42 | #endif |
43 | #if HAVE_CTYPE_H | |
14c63c25 | 44 | #include <ctype.h> |
ef364f64 AJ |
45 | #endif |
46 | #if HAVE_STRINGS_H | |
14c63c25 | 47 | #include <strings.h> |
ef364f64 AJ |
48 | #endif |
49 | #if HAVE_ASSERT_H | |
e7740002 | 50 | #include <assert.h> |
ef364f64 AJ |
51 | #endif |
52 | ||
14c63c25 | 53 | #include "hash.h" |
ef364f64 | 54 | |
e7740002 | 55 | #undef free |
468ae12b | 56 | extern void my_free(char *, int, void *); |
e7740002 | 57 | |
58 | #define free(a) my_free(__FILE__, __LINE__, a) | |
14c63c25 | 59 | |
da537f52 | 60 | extern void print_stats(); |
14c63c25 | 61 | /* |
62 | * hash_url() - Returns a well-distributed hash function for URLs. | |
63 | * The best way is to sum up the last half of the string. | |
64 | * Adapted from code written by Mic Bowman. -Darren | |
65 | * Generates a standard deviation = 15.73 | |
66 | */ | |
67 | unsigned int | |
68 | hash_url(const void *data, unsigned int size) | |
69 | { | |
70 | const char *s = data; | |
71 | unsigned int i, j, n; | |
72 | j = strlen(s); | |
73 | for (i = j / 2, n = 0; i < j; i++) | |
26ac0430 | 74 | n ^= 271 * (unsigned) s[i]; |
14c63c25 | 75 | i = n ^ (j * 271); |
76 | return i % size; | |
77 | } | |
78 | ||
79 | unsigned int | |
80 | hash_string(const void *data, unsigned int size) | |
81 | { | |
82 | const char *s = data; | |
83 | unsigned int n = 0; | |
84 | unsigned int j = 0; | |
85 | unsigned int i = 0; | |
86 | while (*s) { | |
26ac0430 AJ |
87 | j++; |
88 | n ^= 271 * (unsigned) *s++; | |
14c63c25 | 89 | } |
90 | i = n ^ (j * 271); | |
91 | return i % size; | |
92 | } | |
93 | ||
94 | /* the following 4 functions were adapted from | |
95 | * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */ | |
96 | ||
97 | /* | |
98 | * HASH FUNCTIONS | |
99 | * | |
100 | * Assume that we've already split the bucket to which this key hashes, | |
101 | * calculate that bucket, and check that in fact we did already split it. | |
102 | * | |
103 | * This came from ejb's hsearch. | |
104 | */ | |
105 | ||
106 | #define PRIME1 37 | |
107 | #define PRIME2 1048583 | |
108 | ||
109 | /* Hash function from Chris Torek. */ | |
110 | unsigned int | |
111 | hash4(const void *data, unsigned int size) | |
112 | { | |
113 | const char *key = data; | |
114 | size_t loop; | |
115 | unsigned int h; | |
116 | size_t len; | |
117 | ||
118 | #define HASH4a h = (h << 5) - h + *key++; | |
119 | #define HASH4b h = (h << 5) + h + *key++; | |
120 | #define HASH4 HASH4b | |
121 | ||
122 | h = 0; | |
123 | len = strlen(key); | |
124 | loop = (len + 8 - 1) >> 3; | |
125 | switch (len & (8 - 1)) { | |
126 | case 0: | |
26ac0430 AJ |
127 | do { |
128 | HASH4; | |
129 | /* FALLTHROUGH */ | |
130 | case 7: | |
131 | HASH4; | |
132 | /* FALLTHROUGH */ | |
133 | case 6: | |
134 | HASH4; | |
135 | /* FALLTHROUGH */ | |
136 | case 5: | |
137 | HASH4; | |
138 | /* FALLTHROUGH */ | |
139 | case 4: | |
140 | HASH4; | |
141 | /* FALLTHROUGH */ | |
142 | case 3: | |
143 | HASH4; | |
144 | /* FALLTHROUGH */ | |
145 | case 2: | |
146 | HASH4; | |
147 | /* FALLTHROUGH */ | |
148 | case 1: | |
149 | HASH4; | |
150 | } while (--loop); | |
14c63c25 | 151 | } |
152 | return h % size; | |
153 | } | |
154 | ||
155 | /* | |
156 | * hash_create - creates a new hash table, uses the cmp_func | |
157 | * to compare keys. Returns the identification for the hash table; | |
158 | * otherwise returns a negative number on error. | |
159 | */ | |
160 | hash_table * | |
161 | hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func) | |
162 | { | |
163 | hash_table *hid = calloc(1, sizeof(hash_table)); | |
164 | if (!hash_sz) | |
26ac0430 | 165 | hid->size = (unsigned int) DEFAULT_HASH_SIZE; |
14c63c25 | 166 | else |
26ac0430 | 167 | hid->size = (unsigned int) hash_sz; |
14c63c25 | 168 | /* allocate and null the buckets */ |
169 | hid->buckets = calloc(hid->size, sizeof(hash_link *)); | |
170 | hid->cmp = cmp_func; | |
171 | hid->hash = hash_func; | |
172 | hid->current_ptr = NULL; | |
173 | hid->current_slot = 0; | |
174 | return hid; | |
175 | } | |
176 | ||
177 | /* | |
178 | * hash_insert - inserts the given item 'item' under the given key 'k' | |
179 | * into the hash table 'hid'. Returns non-zero on error; otherwise, | |
180 | * returns 0 and inserts the item. | |
181 | * | |
182 | * It does not copy any data into the hash table, only pointers. | |
183 | */ | |
184 | void | |
185 | hash_insert(hash_table * hid, const char *k, void *item) | |
186 | { | |
187 | int i; | |
188 | hash_link *new; | |
189 | assert(k != NULL); | |
190 | /* Add to the given hash table 'hid' */ | |
191 | new = calloc(1, sizeof(hash_link)); | |
da537f52 | 192 | if (!new) { |
26ac0430 AJ |
193 | fprintf(stderr, "calloc failed!\n"); |
194 | print_stats(); | |
195 | exit(1); | |
da537f52 | 196 | } |
14c63c25 | 197 | new->item = item; |
198 | new->key = (char *) k; | |
199 | i = hid->hash(k, hid->size); | |
200 | new->next = hid->buckets[i]; | |
201 | hid->buckets[i] = new; | |
202 | } | |
203 | ||
204 | /* | |
205 | * hash_join - joins a hash_link under its key lnk->key | |
26ac0430 | 206 | * into the hash table 'hid'. |
14c63c25 | 207 | * |
208 | * It does not copy any data into the hash table, only links pointers. | |
209 | */ | |
210 | void | |
211 | hash_join(hash_table * hid, hash_link * lnk) | |
212 | { | |
213 | int i; | |
214 | i = hid->hash(lnk->key, hid->size); | |
215 | lnk->next = hid->buckets[i]; | |
216 | hid->buckets[i] = lnk; | |
217 | } | |
218 | ||
219 | /* | |
220 | * hash_lookup - locates the item under the key 'k' in the hash table | |
221 | * 'hid'. Returns a pointer to the hash bucket on success; otherwise | |
222 | * returns NULL. | |
223 | */ | |
224 | hash_link * | |
225 | hash_lookup(hash_table * hid, const void *k) | |
226 | { | |
227 | hash_link *walker; | |
228 | int b; | |
229 | assert(k != NULL); | |
230 | b = hid->hash(k, hid->size); | |
231 | for (walker = hid->buckets[b]; walker != NULL; walker = walker->next) { | |
26ac0430 AJ |
232 | if ((hid->cmp) (k, walker->key) == 0) |
233 | return (walker); | |
234 | assert(walker != walker->next); | |
14c63c25 | 235 | } |
236 | return NULL; | |
237 | } | |
238 | ||
239 | /* | |
240 | * hash_first - returns the first item in the hash table 'hid'. | |
241 | * Otherwise, returns NULL on error. | |
242 | */ | |
243 | hash_link * | |
244 | hash_first(hash_table * hid) | |
245 | { | |
246 | int i; | |
247 | ||
248 | for (i = 0; i < hid->size; i++) { | |
26ac0430 AJ |
249 | hid->current_slot = i; |
250 | if (hid->buckets[i] != NULL) | |
251 | return (hid->current_ptr = hid->buckets[i]); | |
14c63c25 | 252 | } |
253 | return NULL; | |
254 | } | |
255 | ||
256 | /* | |
257 | * hash_next - returns the next item in the hash table 'hid'. | |
26ac0430 | 258 | * Otherwise, returns NULL on error or end of list. |
14c63c25 | 259 | * |
260 | * MUST call hash_first() before hash_next(). | |
261 | */ | |
262 | hash_link * | |
263 | hash_next(hash_table * hid) | |
264 | { | |
265 | int i; | |
266 | ||
267 | if (hid->current_ptr != NULL) { | |
26ac0430 AJ |
268 | hid->current_ptr = hid->current_ptr->next; |
269 | if (hid->current_ptr != NULL) | |
270 | return (hid->current_ptr); /* next item */ | |
14c63c25 | 271 | } |
272 | /* find next bucket */ | |
273 | for (i = hid->current_slot + 1; i < hid->size; i++) { | |
26ac0430 AJ |
274 | hid->current_slot = i; |
275 | if (hid->buckets[i] != NULL) | |
276 | return (hid->current_ptr = hid->buckets[i]); | |
14c63c25 | 277 | } |
278 | return NULL; /* end of list */ | |
279 | } | |
280 | ||
281 | int | |
282 | hash_delete(hash_table * hid, const char *key) | |
283 | { | |
284 | return hash_delete_link(hid, hash_lookup(hid, key)); | |
285 | } | |
286 | ||
287 | /* | |
26ac0430 | 288 | * hash_delete_link - deletes the given hash_link node from the |
14c63c25 | 289 | * hash table 'hid'. If FreeLink then free the given hash_link. |
290 | * | |
26ac0430 | 291 | * On success, it returns 0 and deletes the link; otherwise, |
14c63c25 | 292 | * returns non-zero on error. |
293 | */ | |
da537f52 | 294 | int |
14c63c25 | 295 | hash_unlink(hash_table * hid, hash_link * hl, int FreeLink) |
296 | { | |
297 | hash_link *walker, *prev; | |
298 | int i; | |
299 | if (hl == NULL) | |
26ac0430 | 300 | return -1; |
14c63c25 | 301 | i = hid->hash(hl->key, hid->size); |
302 | for (prev = NULL, walker = hid->buckets[i]; | |
26ac0430 AJ |
303 | walker != NULL; prev = walker, walker = walker->next) { |
304 | if (walker == hl) { | |
305 | if (prev == NULL) { /* it's the head */ | |
306 | hid->buckets[i] = walker->next; | |
307 | } else { | |
308 | prev->next = walker->next; /* skip it */ | |
309 | } | |
310 | /* fix walker state if needed */ | |
311 | if (walker == hid->current_ptr) | |
312 | hid->current_ptr = walker->next; | |
313 | if (FreeLink) { | |
314 | if (walker) { | |
315 | free(walker); | |
316 | } | |
317 | } | |
318 | return 0; | |
319 | } | |
14c63c25 | 320 | } |
321 | return 1; | |
322 | } | |
323 | ||
324 | /* take link off and free link node */ | |
325 | int | |
326 | hash_delete_link(hash_table * hid, hash_link * hl) | |
327 | { | |
328 | return (hash_unlink(hid, hl, 1)); | |
329 | } | |
330 | ||
331 | /* take link off only */ | |
332 | int | |
333 | hash_remove_link(hash_table * hid, hash_link * hl) | |
334 | { | |
335 | return (hash_unlink(hid, hl, 0)); | |
336 | } | |
337 | ||
338 | /* | |
26ac0430 | 339 | * hash_get_bucket - returns the head item of the bucket |
14c63c25 | 340 | * in the hash table 'hid'. Otherwise, returns NULL on error. |
341 | */ | |
342 | hash_link * | |
343 | hash_get_bucket(hash_table * hid, unsigned int bucket) | |
344 | { | |
345 | if (bucket >= hid->size) | |
26ac0430 | 346 | return NULL; |
14c63c25 | 347 | return (hid->buckets[bucket]); |
348 | } | |
349 | ||
14c63c25 | 350 | void |
351 | hashFreeMemory(hash_table * hid) | |
352 | { | |
468ae12b | 353 | if (hid->buckets); |
14c63c25 | 354 | free(hid->buckets); |
468ae12b | 355 | if (hid) |
26ac0430 | 356 | free(hid); |
14c63c25 | 357 | } |
358 | ||
32d002cb | 359 | #if USE_HASH_DRIVER |
14c63c25 | 360 | /* |
361 | * hash-driver - Run with a big file as stdin to insert each line into the | |
362 | * hash table, then prints the whole hash table, then deletes a random item, | |
363 | * and prints the table again... | |
364 | */ | |
365 | int | |
366 | main(void) | |
367 | { | |
368 | hash_table *hid; | |
369 | int i; | |
370 | LOCAL_ARRAY(char, buf, BUFSIZ); | |
371 | LOCAL_ARRAY(char, todelete, BUFSIZ); | |
372 | hash_link *walker = NULL; | |
373 | ||
374 | todelete[0] = '\0'; | |
375 | printf("init\n"); | |
376 | ||
377 | printf("creating hash table\n"); | |
378 | if ((hid = hash_create(strcmp, 229, hash_string)) < 0) { | |
26ac0430 AJ |
379 | printf("hash_create error.\n"); |
380 | exit(1); | |
14c63c25 | 381 | } |
382 | printf("done creating hash table: %d\n", hid); | |
383 | ||
384 | while (fgets(buf, BUFSIZ, stdin)) { | |
26ac0430 AJ |
385 | buf[strlen(buf) - 1] = '\0'; |
386 | printf("Inserting '%s' for item %p to hash table: %d\n", | |
387 | buf, buf, hid); | |
388 | hash_insert(hid, xstrdup(buf), (void *) 0x12345678); | |
389 | if (random() % 17 == 0) | |
390 | strcpy(todelete, buf); | |
14c63c25 | 391 | } |
392 | ||
393 | printf("walking hash table...\n"); | |
394 | for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { | |
26ac0430 AJ |
395 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, |
396 | walker->item); | |
14c63c25 | 397 | } |
398 | printf("done walking hash table...\n"); | |
399 | ||
400 | if (todelete[0]) { | |
26ac0430 AJ |
401 | printf("deleting %s from %d\n", todelete, hid); |
402 | if (hash_delete(hid, todelete)) | |
403 | printf("hash_delete error\n"); | |
14c63c25 | 404 | } |
405 | printf("walking hash table...\n"); | |
406 | for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { | |
26ac0430 AJ |
407 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, |
408 | walker->item); | |
14c63c25 | 409 | } |
410 | printf("done walking hash table...\n"); | |
411 | ||
14c63c25 | 412 | printf("driver finished.\n"); |
413 | exit(0); | |
414 | } | |
415 | #endif |