]>
Commit | Line | Data |
---|---|---|
f52a7d75 | 1 | /* |
5b74111a | 2 | * Copyright (C) 1996-2018 The Squid Software Foundation and contributors |
f52a7d75 | 3 | * |
0545caaa AJ |
4 | * Squid software is distributed under GPLv2+ license and includes |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
f52a7d75 | 7 | */ |
8 | ||
0545caaa AJ |
9 | /* DEBUG: section 00 Hash Tables */ |
10 | ||
f7f3304a | 11 | #include "squid.h" |
25f98340 AJ |
12 | #include "hash.h" |
13 | #include "profiler/Profiler.h" | |
f52a7d75 | 14 | |
074d6a40 AJ |
15 | #include <cassert> |
16 | #include <cmath> | |
17 | #include <cstdlib> | |
18 | #include <cstring> | |
f52a7d75 | 19 | #if HAVE_UNISTD_H |
20 | #include <unistd.h> | |
21 | #endif | |
22 | #if HAVE_GNUMALLLOC_H | |
23 | #include <gnumalloc.h> | |
482aa790 | 24 | #elif HAVE_MALLOC_H |
f52a7d75 | 25 | #include <malloc.h> |
26 | #endif | |
f52a7d75 | 27 | |
f52a7d75 | 28 | static void hash_next_bucket(hash_table * hid); |
29 | ||
30 | unsigned int | |
31 | hash_string(const void *data, unsigned int size) | |
32 | { | |
209663bb | 33 | const unsigned char *s = static_cast<const unsigned char *>(data); |
f52a7d75 | 34 | unsigned int n = 0; |
35 | unsigned int j = 0; | |
36 | unsigned int i = 0; | |
37 | while (*s) { | |
742a021b | 38 | ++j; |
aec55359 FC |
39 | n ^= 271 * *s; |
40 | ++s; | |
f52a7d75 | 41 | } |
42 | i = n ^ (j * 271); | |
43 | return i % size; | |
44 | } | |
45 | ||
46 | /* the following function(s) were adapted from | |
47 | * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */ | |
48 | ||
49 | /* Hash function from Chris Torek. */ | |
50 | unsigned int | |
51 | hash4(const void *data, unsigned int size) | |
52 | { | |
209663bb | 53 | const char *key = static_cast<const char *>(data); |
f52a7d75 | 54 | size_t loop; |
55 | unsigned int h; | |
56 | size_t len; | |
57 | ||
58 | #define HASH4a h = (h << 5) - h + *key++; | |
59 | #define HASH4b h = (h << 5) + h + *key++; | |
60 | #define HASH4 HASH4b | |
61 | ||
62 | h = 0; | |
63 | len = strlen(key); | |
64 | loop = len >> 3; | |
65 | switch (len & (8 - 1)) { | |
66 | case 0: | |
26ac0430 | 67 | break; |
f52a7d75 | 68 | case 7: |
26ac0430 | 69 | HASH4; |
f53969cc | 70 | /* FALLTHROUGH */ |
f52a7d75 | 71 | case 6: |
26ac0430 | 72 | HASH4; |
f53969cc | 73 | /* FALLTHROUGH */ |
f52a7d75 | 74 | case 5: |
26ac0430 | 75 | HASH4; |
f53969cc | 76 | /* FALLTHROUGH */ |
f52a7d75 | 77 | case 4: |
26ac0430 | 78 | HASH4; |
f53969cc | 79 | /* FALLTHROUGH */ |
f52a7d75 | 80 | case 3: |
26ac0430 | 81 | HASH4; |
f53969cc | 82 | /* FALLTHROUGH */ |
f52a7d75 | 83 | case 2: |
26ac0430 | 84 | HASH4; |
f53969cc | 85 | /* FALLTHROUGH */ |
f52a7d75 | 86 | case 1: |
26ac0430 | 87 | HASH4; |
f52a7d75 | 88 | } |
a08d350e CT |
89 | while (loop) { |
90 | --loop; | |
26ac0430 AJ |
91 | HASH4; |
92 | HASH4; | |
93 | HASH4; | |
94 | HASH4; | |
95 | HASH4; | |
96 | HASH4; | |
97 | HASH4; | |
98 | HASH4; | |
f52a7d75 | 99 | } |
100 | return h % size; | |
101 | } | |
102 | ||
209663bb | 103 | /** |
f52a7d75 | 104 | * hash_create - creates a new hash table, uses the cmp_func |
105 | * to compare keys. Returns the identification for the hash table; | |
106 | * otherwise returns a negative number on error. | |
107 | */ | |
108 | hash_table * | |
109 | hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func) | |
110 | { | |
209663bb | 111 | hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table)); |
f52a7d75 | 112 | if (!hash_sz) |
26ac0430 | 113 | hid->size = (unsigned int) DEFAULT_HASH_SIZE; |
f52a7d75 | 114 | else |
26ac0430 | 115 | hid->size = (unsigned int) hash_sz; |
f52a7d75 | 116 | /* allocate and null the buckets */ |
209663bb | 117 | hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *)); |
f52a7d75 | 118 | hid->cmp = cmp_func; |
119 | hid->hash = hash_func; | |
120 | hid->next = NULL; | |
121 | hid->current_slot = 0; | |
122 | return hid; | |
123 | } | |
124 | ||
209663bb | 125 | /** |
f52a7d75 | 126 | * hash_join - joins a hash_link under its key lnk->key |
26ac0430 | 127 | * into the hash table 'hid'. |
f52a7d75 | 128 | * |
129 | * It does not copy any data into the hash table, only links pointers. | |
130 | */ | |
131 | void | |
132 | hash_join(hash_table * hid, hash_link * lnk) | |
133 | { | |
134 | int i; | |
135 | i = hid->hash(lnk->key, hid->size); | |
136 | lnk->next = hid->buckets[i]; | |
137 | hid->buckets[i] = lnk; | |
742a021b | 138 | ++hid->count; |
f52a7d75 | 139 | } |
140 | ||
209663bb | 141 | /** |
f52a7d75 | 142 | * hash_lookup - locates the item under the key 'k' in the hash table |
143 | * 'hid'. Returns a pointer to the hash bucket on success; otherwise | |
144 | * returns NULL. | |
145 | */ | |
4a8b20e8 | 146 | hash_link * |
f52a7d75 | 147 | hash_lookup(hash_table * hid, const void *k) |
148 | { | |
f52a7d75 | 149 | int b; |
88bfe092 | 150 | PROF_start(hash_lookup); |
f52a7d75 | 151 | assert(k != NULL); |
152 | b = hid->hash(k, hid->size); | |
209663bb | 153 | for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) { |
26ac0430 AJ |
154 | if ((hid->cmp) (k, walker->key) == 0) { |
155 | PROF_stop(hash_lookup); | |
156 | return (walker); | |
157 | } | |
158 | assert(walker != walker->next); | |
f52a7d75 | 159 | } |
88bfe092 | 160 | PROF_stop(hash_lookup); |
f52a7d75 | 161 | return NULL; |
162 | } | |
163 | ||
164 | static void | |
165 | hash_next_bucket(hash_table * hid) | |
166 | { | |
167 | while (hid->next == NULL && ++hid->current_slot < hid->size) | |
26ac0430 | 168 | hid->next = hid->buckets[hid->current_slot]; |
f52a7d75 | 169 | } |
170 | ||
209663bb | 171 | /** |
f52a7d75 | 172 | * hash_first - initializes the hash table for the hash_next() |
173 | * function. | |
174 | */ | |
175 | void | |
176 | hash_first(hash_table * hid) | |
177 | { | |
178 | assert(NULL == hid->next); | |
179 | hid->current_slot = 0; | |
180 | hid->next = hid->buckets[hid->current_slot]; | |
181 | if (NULL == hid->next) | |
26ac0430 | 182 | hash_next_bucket(hid); |
f52a7d75 | 183 | } |
184 | ||
209663bb | 185 | /** |
f52a7d75 | 186 | * hash_next - returns the next item in the hash table 'hid'. |
26ac0430 | 187 | * Otherwise, returns NULL on error or end of list. |
f52a7d75 | 188 | * |
189 | * MUST call hash_first() before hash_next(). | |
190 | */ | |
4a8b20e8 | 191 | hash_link * |
f52a7d75 | 192 | hash_next(hash_table * hid) |
193 | { | |
209663bb AJ |
194 | hash_link *p = hid->next; |
195 | if (NULL == p) | |
26ac0430 | 196 | return NULL; |
209663bb | 197 | hid->next = p->next; |
f52a7d75 | 198 | if (NULL == hid->next) |
26ac0430 | 199 | hash_next_bucket(hid); |
209663bb | 200 | return p; |
f52a7d75 | 201 | } |
202 | ||
209663bb | 203 | /** |
2c4f7ab2 | 204 | * hash_last - resets hash traversal state to NULL |
205 | * | |
206 | */ | |
207 | void | |
ab96e65c | 208 | hash_last(hash_table * hid) |
2c4f7ab2 | 209 | { |
137a13ea | 210 | assert(hid != NULL); |
2c4f7ab2 | 211 | hid->next = NULL; |
212 | hid->current_slot = 0; | |
213 | } | |
214 | ||
209663bb | 215 | /** |
26ac0430 | 216 | * hash_remove_link - deletes the given hash_link node from the |
f52a7d75 | 217 | * hash table 'hid'. Does not free the item, only removes it |
218 | * from the list. | |
219 | * | |
4fba1a24 | 220 | * An assertion is triggered if the hash_link is not found in the |
221 | * list. | |
f52a7d75 | 222 | */ |
223 | void | |
224 | hash_remove_link(hash_table * hid, hash_link * hl) | |
225 | { | |
f52a7d75 | 226 | assert(hl != NULL); |
209663bb AJ |
227 | int i = hid->hash(hl->key, hid->size); |
228 | for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) { | |
26ac0430 AJ |
229 | if (*P != hl) |
230 | continue; | |
231 | *P = hl->next; | |
232 | if (hid->next == hl) { | |
233 | hid->next = hl->next; | |
234 | if (NULL == hid->next) | |
235 | hash_next_bucket(hid); | |
236 | } | |
742a021b | 237 | --hid->count; |
26ac0430 | 238 | return; |
f52a7d75 | 239 | } |
e530de6c | 240 | assert(0); |
f52a7d75 | 241 | } |
242 | ||
209663bb | 243 | /** |
26ac0430 | 244 | * hash_get_bucket - returns the head item of the bucket |
f52a7d75 | 245 | * in the hash table 'hid'. Otherwise, returns NULL on error. |
246 | */ | |
247 | hash_link * | |
248 | hash_get_bucket(hash_table * hid, unsigned int bucket) | |
249 | { | |
250 | if (bucket >= hid->size) | |
26ac0430 | 251 | return NULL; |
f52a7d75 | 252 | return (hid->buckets[bucket]); |
253 | } | |
254 | ||
255 | void | |
256 | hashFreeItems(hash_table * hid, HASHFREE * free_func) | |
257 | { | |
258 | hash_link *l; | |
f52a7d75 | 259 | int i = 0; |
209663bb | 260 | hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *)); |
f52a7d75 | 261 | hash_first(hid); |
262 | while ((l = hash_next(hid)) && i < hid->count) { | |
26ac0430 | 263 | *(list + i) = l; |
742a021b | 264 | ++i; |
f52a7d75 | 265 | } |
742a021b | 266 | for (int j = 0; j < i; ++j) |
26ac0430 | 267 | free_func(*(list + j)); |
f52a7d75 | 268 | xfree(list); |
269 | } | |
270 | ||
271 | void | |
272 | hashFreeMemory(hash_table * hid) | |
273 | { | |
04f7fd38 | 274 | if (hid == NULL) |
928b6c10 | 275 | return; |
efacbef0 | 276 | if (hid->buckets) |
26ac0430 | 277 | xfree(hid->buckets); |
f52a7d75 | 278 | xfree(hid); |
279 | } | |
280 | ||
26ac0430 | 281 | static int hash_primes[] = { |
f52a7d75 | 282 | 103, |
283 | 229, | |
284 | 467, | |
285 | 977, | |
286 | 1979, | |
287 | 4019, | |
288 | 6037, | |
289 | 7951, | |
290 | 12149, | |
291 | 16231, | |
292 | 33493, | |
293 | 65357 | |
294 | }; | |
295 | ||
296 | int | |
297 | hashPrime(int n) | |
298 | { | |
299 | int I = sizeof(hash_primes) / sizeof(int); | |
f52a7d75 | 300 | int best_prime = hash_primes[0]; |
d87ebd78 | 301 | double min = fabs(log((double) n) - log((double) hash_primes[0])); |
f52a7d75 | 302 | double d; |
742a021b | 303 | for (int i = 0; i < I; ++i) { |
26ac0430 AJ |
304 | d = fabs(log((double) n) - log((double) hash_primes[i])); |
305 | if (d > min) | |
306 | continue; | |
307 | min = d; | |
308 | best_prime = hash_primes[i]; | |
f52a7d75 | 309 | } |
310 | return best_prime; | |
311 | } | |
312 | ||
209663bb | 313 | /** |
186477c1 | 314 | * return the key of a hash_link as a const string |
315 | */ | |
316 | const char * | |
317 | hashKeyStr(hash_link * hl) | |
318 | { | |
319 | return (const char *) hl->key; | |
320 | } | |
321 | ||
32d002cb | 322 | #if USE_HASH_DRIVER |
209663bb | 323 | /** |
f52a7d75 | 324 | * hash-driver - Run with a big file as stdin to insert each line into the |
325 | * hash table, then prints the whole hash table, then deletes a random item, | |
326 | * and prints the table again... | |
327 | */ | |
328 | int | |
329 | main(void) | |
330 | { | |
331 | hash_table *hid; | |
f52a7d75 | 332 | LOCAL_ARRAY(char, buf, BUFSIZ); |
333 | LOCAL_ARRAY(char, todelete, BUFSIZ); | |
334 | hash_link *walker = NULL; | |
335 | ||
336 | todelete[0] = '\0'; | |
337 | printf("init\n"); | |
338 | ||
339 | printf("creating hash table\n"); | |
340 | if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) { | |
26ac0430 | 341 | printf("hash_create error.\n"); |
24885773 | 342 | exit(EXIT_FAILURE); |
f52a7d75 | 343 | } |
344 | printf("done creating hash table: %d\n", hid); | |
345 | ||
4a0a4ad8 | 346 | std::mt19937 mt; |
8ed8fa40 | 347 | xuniform_int_distribution<> dist(0,16); |
4a0a4ad8 | 348 | |
f52a7d75 | 349 | while (fgets(buf, BUFSIZ, stdin)) { |
26ac0430 AJ |
350 | buf[strlen(buf) - 1] = '\0'; |
351 | printf("Inserting '%s' for item %p to hash table: %d\n", | |
352 | buf, buf, hid); | |
353 | hash_insert(hid, xstrdup(buf), (void *) 0x12345678); | |
4a0a4ad8 | 354 | if (dist(mt) == 0) |
26ac0430 | 355 | strcpy(todelete, buf); |
f52a7d75 | 356 | } |
357 | ||
358 | printf("walking hash table...\n"); | |
209663bb | 359 | for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { |
26ac0430 AJ |
360 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, |
361 | walker->item); | |
f52a7d75 | 362 | } |
363 | printf("done walking hash table...\n"); | |
364 | ||
365 | if (todelete[0]) { | |
26ac0430 AJ |
366 | printf("deleting %s from %d\n", todelete, hid); |
367 | if (hash_delete(hid, todelete)) | |
368 | printf("hash_delete error\n"); | |
f52a7d75 | 369 | } |
370 | printf("walking hash table...\n"); | |
209663bb | 371 | for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) { |
26ac0430 AJ |
372 | printf("item %5d: key: '%s' item: %p\n", i++, walker->key, |
373 | walker->item); | |
f52a7d75 | 374 | } |
375 | printf("done walking hash table...\n"); | |
376 | ||
f52a7d75 | 377 | printf("driver finished.\n"); |
24885773 | 378 | return EXIT_SUCCESS; |
f52a7d75 | 379 | } |
380 | #endif | |
f53969cc | 381 |