]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 1996-2020 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | /* DEBUG: section 00 Hash Tables */ | |
10 | ||
11 | #include "squid.h" | |
12 | ||
13 | #if HAVE_UNISTD_H | |
14 | #include <unistd.h> | |
15 | #endif | |
16 | #if HAVE_CTYPE_H | |
17 | #include <ctype.h> | |
18 | #endif | |
19 | #if HAVE_STRINGS_H | |
20 | #include <strings.h> | |
21 | #endif | |
22 | #if HAVE_ASSERT_H | |
23 | #include <assert.h> | |
24 | #endif | |
25 | ||
26 | #include "hash.h" | |
27 | ||
28 | #undef free | |
29 | extern void my_free(char *, int, void *); | |
30 | ||
31 | #define free(a) my_free(__FILE__, __LINE__, a) | |
32 | ||
33 | extern void print_stats(); | |
34 | /* | |
35 | * hash_url() - Returns a well-distributed hash function for URLs. | |
36 | * The best way is to sum up the last half of the string. | |
37 | * Adapted from code written by Mic Bowman. -Darren | |
38 | * Generates a standard deviation = 15.73 | |
39 | */ | |
40 | unsigned int | |
41 | hash_url(const void *data, unsigned int size) | |
42 | { | |
43 | const char *s = data; | |
44 | unsigned int i, j, n; | |
45 | j = strlen(s); | |
46 | for (i = j / 2, n = 0; i < j; i++) | |
47 | n ^= 271 * (unsigned) s[i]; | |
48 | i = n ^ (j * 271); | |
49 | return i % size; | |
50 | } | |
51 | ||
52 | unsigned int | |
53 | hash_string(const void *data, unsigned int size) | |
54 | { | |
55 | const char *s = data; | |
56 | unsigned int n = 0; | |
57 | unsigned int j = 0; | |
58 | unsigned int i = 0; | |
59 | while (*s) { | |
60 | j++; | |
61 | n ^= 271 * (unsigned) *s++; | |
62 | } | |
63 | i = n ^ (j * 271); | |
64 | return i % size; | |
65 | } | |
66 | ||
67 | /* the following 4 functions were adapted from | |
68 | * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */ | |
69 | ||
70 | /* | |
71 | * HASH FUNCTIONS | |
72 | * | |
73 | * Assume that we've already split the bucket to which this key hashes, | |
74 | * calculate that bucket, and check that in fact we did already split it. | |
75 | * | |
76 | * This came from ejb's hsearch. | |
77 | */ | |
78 | ||
79 | #define PRIME1 37 | |
80 | #define PRIME2 1048583 | |
81 | ||
82 | /* Hash function from Chris Torek. */ | |
83 | unsigned int | |
84 | hash4(const void *data, unsigned int size) | |
85 | { | |
86 | const char *key = data; | |
87 | size_t loop; | |
88 | unsigned int h; | |
89 | size_t len; | |
90 | ||
91 | #define HASH4a h = (h << 5) - h + *key++; | |
92 | #define HASH4b h = (h << 5) + h + *key++; | |
93 | #define HASH4 HASH4b | |
94 | ||
95 | h = 0; | |
96 | len = strlen(key); | |
97 | loop = (len + 8 - 1) >> 3; | |
98 | switch (len & (8 - 1)) { | |
99 | case 0: | |
100 | do { | |
101 | HASH4; | |
102 | /* FALLTHROUGH */ | |
103 | case 7: | |
104 | HASH4; | |
105 | /* FALLTHROUGH */ | |
106 | case 6: | |
107 | HASH4; | |
108 | /* FALLTHROUGH */ | |
109 | case 5: | |
110 | HASH4; | |
111 | /* FALLTHROUGH */ | |
112 | case 4: | |
113 | HASH4; | |
114 | /* FALLTHROUGH */ | |
115 | case 3: | |
116 | HASH4; | |
117 | /* FALLTHROUGH */ | |
118 | case 2: | |
119 | HASH4; | |
120 | /* FALLTHROUGH */ | |
121 | case 1: | |
122 | HASH4; | |
123 | } while (--loop); | |
124 | } | |
125 | return h % size; | |
126 | } | |
127 | ||
128 | /* | |
129 | * hash_create - creates a new hash table, uses the cmp_func | |
130 | * to compare keys. Returns the identification for the hash table; | |
131 | * otherwise returns a negative number on error. | |
132 | */ | |
133 | hash_table * | |
134 | hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func) | |
135 | { | |
136 | hash_table *hid = calloc(1, sizeof(hash_table)); | |
137 | if (!hash_sz) | |
138 | hid->size = (unsigned int) DEFAULT_HASH_SIZE; | |
139 | else | |
140 | hid->size = (unsigned int) hash_sz; | |
141 | /* allocate and null the buckets */ | |
142 | hid->buckets = calloc(hid->size, sizeof(hash_link *)); | |
143 | hid->cmp = cmp_func; | |
144 | hid->hash = hash_func; | |
145 | hid->current_ptr = NULL; | |
146 | hid->current_slot = 0; | |
147 | return hid; | |
148 | } | |
149 | ||
150 | /* | |
151 | * hash_insert - inserts the given item 'item' under the given key 'k' | |
152 | * into the hash table 'hid'. Returns non-zero on error; otherwise, | |
153 | * returns 0 and inserts the item. | |
154 | * | |
155 | * It does not copy any data into the hash table, only pointers. | |
156 | */ | |
157 | void | |
158 | hash_insert(hash_table * hid, const char *k, void *item) | |
159 | { | |
160 | int i; | |
161 | hash_link *new; | |
162 | assert(k != NULL); | |
163 | /* Add to the given hash table 'hid' */ | |
164 | new = calloc(1, sizeof(hash_link)); | |
165 | if (!new) { | |
166 | fprintf(stderr, "calloc failed!\n"); | |
167 | print_stats(); | |
168 | exit(1); | |
169 | } | |
170 | new->item = item; | |
171 | new->key = (char *) k; | |
172 | i = hid->hash(k, hid->size); | |
173 | new->next = hid->buckets[i]; | |
174 | hid->buckets[i] = new; | |
175 | } | |
176 | ||
177 | /* | |
178 | * hash_join - joins a hash_link under its key lnk->key | |
179 | * into the hash table 'hid'. | |
180 | * | |
181 | * It does not copy any data into the hash table, only links pointers. | |
182 | */ | |
183 | void | |
184 | hash_join(hash_table * hid, hash_link * lnk) | |
185 | { | |
186 | int i; | |
187 | i = hid->hash(lnk->key, hid->size); | |
188 | lnk->next = hid->buckets[i]; | |
189 | hid->buckets[i] = lnk; | |
190 | } | |
191 | ||
192 | /* | |
193 | * hash_lookup - locates the item under the key 'k' in the hash table | |
194 | * 'hid'. Returns a pointer to the hash bucket on success; otherwise | |
195 | * returns NULL. | |
196 | */ | |
197 | hash_link * | |
198 | hash_lookup(hash_table * hid, const void *k) | |
199 | { | |
200 | hash_link *walker; | |
201 | int b; | |
202 | assert(k != NULL); | |
203 | b = hid->hash(k, hid->size); | |
204 | for (walker = hid->buckets[b]; walker != NULL; walker = walker->next) { | |
205 | if ((hid->cmp) (k, walker->key) == 0) | |
206 | return (walker); | |
207 | assert(walker != walker->next); | |
208 | } | |
209 | return NULL; | |
210 | } | |
211 | ||
212 | /* | |
213 | * hash_first - returns the first item in the hash table 'hid'. | |
214 | * Otherwise, returns NULL on error. | |
215 | */ | |
216 | hash_link * | |
217 | hash_first(hash_table * hid) | |
218 | { | |
219 | int i; | |
220 | ||
221 | for (i = 0; i < hid->size; i++) { | |
222 | hid->current_slot = i; | |
223 | if (hid->buckets[i] != NULL) | |
224 | return (hid->current_ptr = hid->buckets[i]); | |
225 | } | |
226 | return NULL; | |
227 | } | |
228 | ||
229 | /* | |
230 | * hash_next - returns the next item in the hash table 'hid'. | |
231 | * Otherwise, returns NULL on error or end of list. | |
232 | * | |
233 | * MUST call hash_first() before hash_next(). | |
234 | */ | |
235 | hash_link * | |
236 | hash_next(hash_table * hid) | |
237 | { | |
238 | int i; | |
239 | ||
240 | if (hid->current_ptr != NULL) { | |
241 | hid->current_ptr = hid->current_ptr->next; | |
242 | if (hid->current_ptr != NULL) | |
243 | return (hid->current_ptr); /* next item */ | |
244 | } | |
245 | /* find next bucket */ | |
246 | for (i = hid->current_slot + 1; i < hid->size; i++) { | |
247 | hid->current_slot = i; | |
248 | if (hid->buckets[i] != NULL) | |
249 | return (hid->current_ptr = hid->buckets[i]); | |
250 | } | |
251 | return NULL; /* end of list */ | |
252 | } | |
253 | ||
254 | int | |
255 | hash_delete(hash_table * hid, const char *key) | |
256 | { | |
257 | return hash_delete_link(hid, hash_lookup(hid, key)); | |
258 | } | |
259 | ||
260 | /* | |
261 | * hash_delete_link - deletes the given hash_link node from the | |
262 | * hash table 'hid'. If FreeLink then free the given hash_link. | |
263 | * | |
264 | * On success, it returns 0 and deletes the link; otherwise, | |
265 | * returns non-zero on error. | |
266 | */ | |
267 | int | |
268 | hash_unlink(hash_table * hid, hash_link * hl, int FreeLink) | |
269 | { | |
270 | hash_link *walker, *prev; | |
271 | int i; | |
272 | if (hl == NULL) | |
273 | return -1; | |
274 | i = hid->hash(hl->key, hid->size); | |
275 | for (prev = NULL, walker = hid->buckets[i]; | |
276 | walker != NULL; prev = walker, walker = walker->next) { | |
277 | if (walker == hl) { | |
278 | if (prev == NULL) { /* it's the head */ | |
279 | hid->buckets[i] = walker->next; | |
280 | } else { | |
281 | prev->next = walker->next; /* skip it */ | |
282 | } | |
283 | /* fix walker state if needed */ | |
284 | if (walker == hid->current_ptr) | |
285 | hid->current_ptr = walker->next; | |
286 | if (FreeLink) { | |
287 | if (walker) { | |
288 | free(walker); | |
289 | } | |
290 | } | |
291 | return 0; | |
292 | } | |
293 | } | |
294 | return 1; | |
295 | } | |
296 | ||
297 | /* take link off and free link node */ | |
298 | int | |
299 | hash_delete_link(hash_table * hid, hash_link * hl) | |
300 | { | |
301 | return (hash_unlink(hid, hl, 1)); | |
302 | } | |
303 | ||
304 | /* take link off only */ | |
305 | int | |
306 | hash_remove_link(hash_table * hid, hash_link * hl) | |
307 | { | |
308 | return (hash_unlink(hid, hl, 0)); | |
309 | } | |
310 | ||
311 | /* | |
312 | * hash_get_bucket - returns the head item of the bucket | |
313 | * in the hash table 'hid'. Otherwise, returns NULL on error. | |
314 | */ | |
315 | hash_link * | |
316 | hash_get_bucket(hash_table * hid, unsigned int bucket) | |
317 | { | |
318 | if (bucket >= hid->size) | |
319 | return NULL; | |
320 | return (hid->buckets[bucket]); | |
321 | } | |
322 | ||
323 | void | |
324 | hashFreeMemory(hash_table * hid) | |
325 | { | |
326 | if (hid->buckets); | |
327 | free(hid->buckets); | |
328 | if (hid) | |
329 | free(hid); | |
330 | } | |
331 | ||
332 | #if USE_HASH_DRIVER | |
333 | /* | |
334 | * hash-driver - Run with a big file as stdin to insert each line into the | |
335 | * hash table, then prints the whole hash table, then deletes a random item, | |
336 | * and prints the table again... | |
337 | */ | |
338 | int | |
339 | main(void) | |
340 | { | |
341 | hash_table *hid; | |
342 | int i; | |
343 | LOCAL_ARRAY(char, buf, BUFSIZ); | |
344 | LOCAL_ARRAY(char, todelete, BUFSIZ); | |
345 | hash_link *walker = NULL; | |
346 | ||
347 | todelete[0] = '\0'; | |
348 | printf("init\n"); | |
349 | ||
350 | printf("creating hash table\n"); | |
351 | if ((hid = hash_create(strcmp, 229, hash_string)) < 0) { | |
352 | printf("hash_create error.\n"); | |
353 | exit(1); | |
354 | } | |
355 | printf("done creating hash table: %d\n", hid); | |
356 | ||
357 | std::mt19937 mt; | |
358 | xuniform_int_distribution<> dist(0,16); | |
359 | ||
360 | while (fgets(buf, BUFSIZ, stdin)) { | |
361 | buf[strlen(buf) - 1] = '\0'; | |
362 | printf("Inserting '%s' for item %p to hash table: %d\n", | |
363 | buf, buf, hid); | |
364 | hash_insert(hid, xstrdup(buf), (void *) 0x12345678); | |
365 | if (dist(mt) == 0) | |
366 | strcpy(todelete, buf); | |
367 | } | |
368 | ||
369 | printf("walking hash table...\n"); | |
370 | for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { | |
371 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, | |
372 | walker->item); | |
373 | } | |
374 | printf("done walking hash table...\n"); | |
375 | ||
376 | if (todelete[0]) { | |
377 | printf("deleting %s from %d\n", todelete, hid); | |
378 | if (hash_delete(hid, todelete)) | |
379 | printf("hash_delete error\n"); | |
380 | } | |
381 | printf("walking hash table...\n"); | |
382 | for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { | |
383 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, | |
384 | walker->item); | |
385 | } | |
386 | printf("done walking hash table...\n"); | |
387 | ||
388 | printf("driver finished.\n"); | |
389 | exit(0); | |
390 | } | |
391 | #endif | |
392 |