]>
Commit | Line | Data |
---|---|---|
0545caaa AJ |
1 | /* |
2 | * Copyright (C) 1996-2014 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
ad19eafb | 8 | |
9 | /* | |
ad19eafb | 10 | * AUTHOR: John Dilley, Hewlett Packard |
ad19eafb | 11 | */ |
12 | ||
13 | /**************************************************************************** | |
14 | * Heap implementation | |
9bc73deb | 15 | * Copyright (C) 1999 by Hewlett Packard |
ad19eafb | 16 | ****************************************************************************/ |
17 | ||
f7f3304a | 18 | #include "squid.h" |
2d72d4fd | 19 | #include "heap.h" |
ad19eafb | 20 | |
21 | #if HAVE_STDLIB_H | |
22 | #include <stdlib.h> | |
23 | #endif | |
24 | #if HAVE_ASSERT_H | |
25 | #include <assert.h> | |
26 | #endif | |
27 | #if HAVE_STRING_H | |
28 | #include <string.h> | |
29 | #endif | |
ad19eafb | 30 | |
7e3ce7b9 | 31 | #include "util.h" |
ad19eafb | 32 | |
33 | /* | |
34 | * Hacks for non-synchronized heap implementation. | |
35 | */ | |
36 | #define mutex_lock(m) (void)0 | |
37 | #define mutex_unlock(m) (void)0 | |
38 | #define mutex_trylock(m) (void)0 | |
39 | #define mutex_init(m) ((m)=123456) | |
40 | ||
26ac0430 | 41 | /* |
ad19eafb | 42 | * Private function prototypes. |
43 | */ | |
44 | static void _heap_ify_up(heap * hp, heap_node * elm); | |
45 | static void _heap_ify_down(heap * hp, heap_node * elm); | |
46 | static int _heap_should_grow(heap * hp); | |
47 | static void _heap_grow(heap * hp); | |
48 | static void _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2); | |
49 | static int _heap_node_exist(heap * hp, int id); | |
50 | ||
f53969cc | 51 | #ifdef HEAP_DEBUG |
ad19eafb | 52 | void _heap_print_tree(heap * hp, heap_node * node); |
53 | #endif /* HEAP_DEBUG */ | |
54 | ||
55 | #define Left(x) (2 * (x) + 1) | |
56 | #define Right(x) (2 * (x) + 2) | |
57 | #define Parent(x) ((int)((x)-1)/2) | |
58 | ||
59 | #define Threshold 10000 | |
60 | #define NormalRate 2 | |
61 | #define SlowRate 1.5 | |
62 | #define MinSize 32 | |
63 | ||
64 | /**************************************************************************** | |
65 | * Public functions | |
66 | ****************************************************************************/ | |
67 | ||
68 | /* | |
69 | * Return a newly created heap. INITSIZE is the initial size of the heap. | |
70 | */ | |
71 | heap * | |
72 | new_heap(int initSize, heap_key_func gen_key) | |
73 | { | |
7e3ce7b9 | 74 | heap *hp = xmalloc(sizeof(*hp)); |
ad19eafb | 75 | assert(hp != NULL); |
76 | ||
9bc73deb | 77 | if (initSize <= 0) |
26ac0430 | 78 | initSize = MinSize; |
7e3ce7b9 | 79 | hp->nodes = xcalloc(initSize, sizeof(heap_node *)); |
ad19eafb | 80 | assert(hp->nodes != NULL); |
81 | ||
ad19eafb | 82 | hp->size = initSize; |
83 | hp->last = 0; | |
84 | hp->gen_key = gen_key; | |
85 | hp->age = 0; | |
86 | ||
87 | return hp; | |
88 | } | |
89 | ||
ad19eafb | 90 | /* |
91 | * Free memory used by a heap. Does not free the metadata pointed to by the | |
92 | * heap nodes, only the heap's internal memory. | |
93 | */ | |
94 | void | |
95 | delete_heap(heap * hp) | |
96 | { | |
97 | int i; | |
137a13ea | 98 | assert(hp != NULL); |
ad19eafb | 99 | for (i = 0; i < hp->last; i++) { |
26ac0430 | 100 | xfree(hp->nodes[i]); |
ad19eafb | 101 | } |
7e3ce7b9 | 102 | xfree(hp->nodes); |
103 | xfree(hp); | |
ad19eafb | 104 | } |
105 | ||
106 | /* | |
7e3ce7b9 | 107 | * Insert DAT based on KY into HP maintaining the heap property. |
108 | * Return the newly inserted heap node. The fields of ELM other | |
109 | * than ID are never changed until ELM is deleted from HP, i.e. | |
110 | * caller can assume that the heap node always exist at the same | |
111 | * place in memory unless heap_delete or heap_extractmin is called | |
112 | * on that node. This function exposes the heap's internal data | |
113 | * structure to the caller. This is required in order to do O(lgN) | |
114 | * deletion. | |
ad19eafb | 115 | */ |
116 | heap_node * | |
117 | heap_insert(heap * hp, void *dat) | |
118 | { | |
7e3ce7b9 | 119 | heap_node *elm = xmalloc(sizeof(*elm)); |
ad19eafb | 120 | |
121 | elm->key = heap_gen_key(hp, dat); | |
122 | elm->data = dat; | |
123 | ||
124 | if (_heap_should_grow(hp)) | |
26ac0430 | 125 | _heap_grow(hp); |
ad19eafb | 126 | |
127 | hp->nodes[hp->last] = elm; | |
128 | elm->id = hp->last; | |
129 | hp->last += 1; | |
130 | ||
131 | _heap_ify_up(hp, elm); | |
132 | ||
133 | return elm; | |
134 | } | |
135 | ||
ad19eafb | 136 | /* |
137 | * Delete ELM while maintaining the heap property. ELM may be modified. | |
138 | * Assumes that ELM is not NULL and frees it. Returns the data pointed to | |
139 | * in, which the caller must free if necessary. | |
140 | */ | |
141 | heap_t | |
142 | heap_delete(heap * hp, heap_node * elm) | |
143 | { | |
144 | heap_node *lastNode; | |
145 | heap_t data = elm->data; | |
146 | ||
147 | assert(_heap_node_exist(hp, hp->last - 1)); | |
148 | ||
149 | lastNode = hp->nodes[hp->last - 1]; | |
150 | _heap_swap_element(hp, lastNode, elm); | |
151 | heap_extractlast(hp); | |
152 | ||
7e3ce7b9 | 153 | if (elm == lastNode) { |
26ac0430 AJ |
154 | /* |
155 | * lastNode just got freed, so don't access it in the next | |
156 | * block. | |
157 | */ | |
158 | (void) 0; | |
7e3ce7b9 | 159 | } else if (hp->last > 0) { |
26ac0430 | 160 | if (lastNode->key < hp->nodes[Parent(lastNode->id)]->key) |
f53969cc | 161 | _heap_ify_up(hp, lastNode); /* COOL! */ |
26ac0430 | 162 | _heap_ify_down(hp, lastNode); |
ad19eafb | 163 | } |
164 | return data; | |
165 | } | |
166 | ||
26ac0430 | 167 | /* |
ad19eafb | 168 | * Delete the last element (leaf) out of the heap. Does not require a |
169 | * heapify operation. | |
170 | */ | |
171 | ||
f53969cc | 172 | #ifndef heap_gen_key |
ad19eafb | 173 | /* |
174 | * Function to generate keys. See macro definition in heap.h. | |
175 | */ | |
176 | heap_key | |
177 | heap_gen_key(heap * hp, heap_t dat) | |
178 | { | |
179 | return hp->gen_key(dat, hp->age); | |
180 | } | |
181 | #endif /* heap_gen_key */ | |
182 | ||
26ac0430 | 183 | /* |
ad19eafb | 184 | * Returns the data of the node with the largest KEY value and removes that |
185 | * node from the heap. Returns NULL if the heap was empty. | |
186 | */ | |
187 | heap_t | |
188 | heap_extractmin(heap * hp) | |
189 | { | |
190 | heap_t data; | |
191 | ||
192 | if (hp->last <= 0) | |
26ac0430 | 193 | return NULL; |
ad19eafb | 194 | |
195 | mutex_lock(hp->lock); | |
196 | ||
197 | data = hp->nodes[0]->data; | |
f53969cc | 198 | heap_delete(hp, hp->nodes[0]); /* Delete the root */ |
ad19eafb | 199 | |
200 | mutex_unlock(hp->lock); | |
201 | ||
202 | return data; | |
203 | } | |
204 | ||
26ac0430 | 205 | /* |
ad19eafb | 206 | * Remove the last node in HP. Frees the heap internal structure and |
207 | * returns the data pointes to by the last node. | |
208 | */ | |
209 | heap_t | |
210 | heap_extractlast(heap * hp) | |
211 | { | |
212 | heap_t data; | |
213 | assert(_heap_node_exist(hp, hp->last - 1)); | |
214 | hp->last -= 1; | |
215 | data = hp->nodes[hp->last]->data; | |
7e3ce7b9 | 216 | xfree(hp->nodes[hp->last]); |
ad19eafb | 217 | return data; |
218 | } | |
219 | ||
26ac0430 | 220 | /* |
ad19eafb | 221 | * The semantics of this routine is the same as the followings: |
222 | * heap_delete(hp, elm); | |
223 | * heap_insert(hp, dat); | |
224 | * Returns the old data object from elm (the one being replaced). The | |
225 | * caller must free this as necessary. | |
226 | */ | |
227 | heap_t | |
228 | heap_update(heap * hp, heap_node * elm, void *dat) | |
229 | { | |
230 | heap_t old = elm->data; | |
231 | heap_key ky = heap_gen_key(hp, dat); | |
232 | ||
233 | elm->key = ky; | |
234 | elm->data = dat; | |
235 | ||
236 | if (elm->key < hp->nodes[Parent(elm->id)]->key) | |
26ac0430 | 237 | _heap_ify_up(hp, elm); |
ad19eafb | 238 | _heap_ify_down(hp, elm); |
239 | ||
240 | return old; | |
241 | } | |
242 | ||
26ac0430 | 243 | /* |
ad19eafb | 244 | * A pointer to the root node's DATA. |
245 | */ | |
246 | void * | |
247 | heap_peepmin(heap * hp) | |
248 | { | |
249 | assert(_heap_node_exist(hp, 0)); | |
250 | return hp->nodes[0]->data; | |
251 | } | |
252 | ||
26ac0430 | 253 | /* |
ad19eafb | 254 | * The KEY of the root node. |
255 | */ | |
256 | heap_key | |
257 | heap_peepminkey(heap * hp) | |
258 | { | |
259 | assert(_heap_node_exist(hp, 0)); | |
260 | return hp->nodes[0]->key; | |
261 | } | |
262 | ||
26ac0430 | 263 | /* |
ad19eafb | 264 | * Same as heap_peep except that this return the KEY of the node. |
265 | * Only meant for iteration. | |
266 | */ | |
267 | heap_key | |
268 | heap_peepkey(heap * hp, int n) | |
269 | { | |
270 | assert(_heap_node_exist(hp, n)); | |
271 | return hp->nodes[n]->key; | |
272 | } | |
273 | ||
26ac0430 | 274 | /* |
ad19eafb | 275 | * A pointer to Nth node's DATA. The caller can iterate through HP by |
276 | * calling this routine. eg. Caller can execute the following code: | |
277 | * for(i = 0; i < heap_nodes(hp); i++) | |
278 | * data = heap_peep(hp, i); | |
279 | */ | |
280 | void * | |
281 | heap_peep(heap * hp, int n) | |
282 | { | |
283 | void *data; | |
284 | assert(_heap_node_exist(hp, n)); | |
285 | data = hp->nodes[n]->data; | |
286 | return data; | |
287 | } | |
288 | ||
f53969cc | 289 | #ifndef heap_nodes |
26ac0430 | 290 | /* |
ad19eafb | 291 | * Current number of nodes in HP. |
292 | */ | |
293 | int | |
294 | heap_nodes(heap * hp) | |
295 | { | |
296 | return hp->last; | |
297 | } | |
298 | #endif /* heap_nodes */ | |
299 | ||
f53969cc | 300 | #ifndef heap_empty |
26ac0430 | 301 | /* |
ad19eafb | 302 | * Determine if the heap is empty. Returns 1 if HP has no elements and 0 |
303 | * otherwise. | |
304 | */ | |
305 | int | |
306 | heap_empty(heap * hp) | |
307 | { | |
308 | return (hp->last <= 0) ? 1 : 0; | |
309 | } | |
310 | #endif /* heap_empty */ | |
311 | ||
312 | /****************** Private Functions *******************/ | |
313 | ||
26ac0430 | 314 | /* |
ad19eafb | 315 | * Maintain the heap order property (parent is smaller than children) which |
316 | * may only be violated at ELM downwards. Assumes caller has locked the heap. | |
317 | */ | |
318 | static void | |
319 | _heap_ify_down(heap * hp, heap_node * elm) | |
320 | { | |
321 | heap_node *kid; | |
322 | int left = 0, right = 0; | |
94060df3 AJ |
323 | int isTrue = 1; |
324 | while (isTrue) { | |
26ac0430 AJ |
325 | left = Left(elm->id); |
326 | right = Right(elm->id); | |
327 | if (!_heap_node_exist(hp, left)) { | |
328 | /* At the bottom of the heap (no child). */ | |
329 | ||
330 | assert(!_heap_node_exist(hp, right)); | |
331 | break; | |
332 | } else if (!_heap_node_exist(hp, right)) | |
333 | /* Only left child exists. */ | |
334 | ||
335 | kid = hp->nodes[left]; | |
336 | else { | |
337 | if (hp->nodes[right]->key < hp->nodes[left]->key) | |
338 | kid = hp->nodes[right]; | |
339 | else | |
340 | kid = hp->nodes[left]; | |
341 | } | |
342 | if (elm->key <= kid->key) | |
343 | break; | |
344 | _heap_swap_element(hp, kid, elm); | |
ad19eafb | 345 | } |
346 | } | |
347 | ||
26ac0430 | 348 | /* |
ad19eafb | 349 | * Maintain the heap property above ELM. Caller has locked the heap. |
350 | */ | |
351 | static void | |
352 | _heap_ify_up(heap * hp, heap_node * elm) | |
353 | { | |
354 | heap_node *parentNode; | |
355 | while (elm->id > 0) { | |
26ac0430 AJ |
356 | parentNode = hp->nodes[Parent(elm->id)]; |
357 | if (parentNode->key <= elm->key) | |
358 | break; | |
f53969cc | 359 | _heap_swap_element(hp, parentNode, elm); /* Demote the parent. */ |
ad19eafb | 360 | } |
361 | } | |
362 | ||
26ac0430 | 363 | /* |
ad19eafb | 364 | * Swap the position of ELM1 and ELM2 in heap structure. Their IDs are also |
26ac0430 | 365 | * swapped. |
ad19eafb | 366 | */ |
367 | static void | |
368 | _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2) | |
369 | { | |
370 | int elm1Id = elm1->id; | |
371 | elm1->id = elm2->id; | |
372 | elm2->id = elm1Id; | |
373 | hp->nodes[elm1->id] = elm1; | |
374 | hp->nodes[elm2->id] = elm2; | |
375 | } | |
376 | ||
f53969cc | 377 | #ifdef NOTDEF |
26ac0430 | 378 | /* |
ad19eafb | 379 | * Copy KEY and DATA fields of SRC to DEST. ID field is NOT copied. |
380 | */ | |
381 | static void | |
382 | _heap_copy_element(heap_node * src, heap_node * dest) | |
383 | { | |
384 | dest->key = src->key; | |
385 | dest->data = src->data; | |
386 | } | |
387 | ||
388 | #endif /* NOTDEF */ | |
389 | ||
26ac0430 | 390 | /* |
ad19eafb | 391 | * True if HP needs to be grown in size. |
392 | */ | |
393 | static int | |
394 | _heap_should_grow(heap * hp) | |
395 | { | |
396 | if (hp->size <= hp->last) | |
26ac0430 | 397 | return 1; |
ad19eafb | 398 | return 0; |
399 | } | |
400 | ||
26ac0430 | 401 | /* |
ad19eafb | 402 | * Grow HP. |
403 | */ | |
404 | static void | |
405 | _heap_grow(heap * hp) | |
406 | { | |
407 | int newSize; | |
408 | ||
409 | if (hp->size > Threshold) | |
26ac0430 | 410 | newSize = hp->size * SlowRate; |
ad19eafb | 411 | else |
26ac0430 | 412 | newSize = hp->size * NormalRate; |
ad19eafb | 413 | |
7e3ce7b9 | 414 | hp->nodes = xrealloc(hp->nodes, newSize * sizeof(heap_node *)); |
44e08383 | 415 | #if COMMENTED_OUT |
416 | for (i = 0; i < hp->size; i++) | |
26ac0430 | 417 | newNodes[i] = hp->nodes[i]; |
7e3ce7b9 | 418 | xfree(hp->nodes); |
44e08383 | 419 | hp->nodes = newNodes; |
420 | #endif | |
ad19eafb | 421 | hp->size = newSize; |
422 | } | |
423 | ||
26ac0430 | 424 | /* |
ad19eafb | 425 | * True if a node with ID exists in HP. |
426 | */ | |
427 | static int | |
428 | _heap_node_exist(heap * hp, int id) | |
429 | { | |
430 | if ((id >= hp->last) || (id < 0) || (hp->nodes[id] == NULL)) | |
26ac0430 | 431 | return 0; |
ad19eafb | 432 | return 1; |
433 | } | |
434 | ||
435 | /**************************************************************************** | |
436 | * Printing and debug functions | |
437 | ****************************************************************************/ | |
438 | ||
26ac0430 AJ |
439 | /* |
440 | * Print the heap in element order, id..last. | |
ad19eafb | 441 | */ |
2d72d4fd | 442 | static void |
ad19eafb | 443 | heap_print_inorder(heap * hp, int id) |
444 | { | |
445 | while (id < hp->last) { | |
26ac0430 AJ |
446 | printf("%d\tKey = %.04f\n", id, hp->nodes[id]->key); |
447 | id++; | |
ad19eafb | 448 | } |
449 | } | |
450 | ||
451 | /* | |
452 | * Returns 1 if HP maintians the heap property and 0 otherwise. | |
453 | */ | |
454 | int | |
455 | verify_heap_property(heap * hp) | |
456 | { | |
457 | int i = 0; | |
458 | int correct = 1; | |
459 | for (i = 0; i < hp->last / 2; i++) { | |
26ac0430 AJ |
460 | correct = 1; |
461 | if (_heap_node_exist(hp, Left(i))) | |
462 | if (hp->nodes[i]->key > hp->nodes[Left(i)]->key) | |
463 | correct = 0; | |
464 | if (_heap_node_exist(hp, Right(i))) | |
465 | if (hp->nodes[i]->key > hp->nodes[Right(i)]->key) | |
466 | correct = 0; | |
467 | if (!correct) { | |
468 | printf("verifyHeap: violated at %d", i); | |
469 | heap_print_inorder(hp, 0); | |
470 | break; | |
471 | } | |
ad19eafb | 472 | } |
473 | return correct; | |
474 | } | |
475 | ||
f53969cc | 476 | #ifdef MEASURE_HEAP_SKEW |
ad19eafb | 477 | |
478 | /**************************************************************************** | |
479 | * Heap skew computation | |
480 | ****************************************************************************/ | |
481 | ||
482 | int | |
483 | compare_heap_keys(const void *a, const void *b) | |
484 | { | |
485 | heap_node **an = (heap_node **) a; | |
486 | heap_node **bn = (heap_node **) b; | |
487 | float cmp = (*an)->key - (*bn)->key; | |
488 | if (cmp < 0) | |
26ac0430 | 489 | return -1; |
ad19eafb | 490 | else |
26ac0430 | 491 | return 1; |
ad19eafb | 492 | } |
493 | ||
494 | /* | |
495 | * Compute the heap skew for HEAP, a measure of how out-of-order the | |
496 | * elements in the heap are. The skew of a heap node is the difference | |
497 | * between its current position in the heap and where it would be if the | |
498 | * heap were in sorted order. To compute this we have to sort the heap. At | |
499 | * the end if the flag REPLACE is non-zero the heap will be returned in | |
500 | * sorted order (with skew == 0). Note: using REPLACE does not help the | |
501 | * performance of the heap, so only do this if you really want to have a | |
502 | * sorted heap. It is faster not to replace. | |
503 | */ | |
504 | float | |
505 | calc_heap_skew(heap * heap, int replace) | |
506 | { | |
507 | heap_node **nodes; | |
508 | long id, diff, skew = 0; | |
f53969cc | 509 | #ifdef HEAP_DEBUG_SKEW |
ad19eafb | 510 | long skewsq = 0; |
511 | #endif /* HEAP_DEBUG_SKEW */ | |
512 | float norm = 0; | |
513 | unsigned long max; | |
514 | ||
26ac0430 | 515 | /* |
ad19eafb | 516 | * Lock the heap to copy it. If replacing it need to keep the heap locked |
517 | * until we are all done. | |
518 | */ | |
519 | mutex_lock(hp->lock); | |
520 | ||
521 | max = heap_nodes(heap); | |
522 | ||
26ac0430 | 523 | /* |
ad19eafb | 524 | * Copy the heap nodes to a new storage area for offline sorting. |
525 | */ | |
7e3ce7b9 | 526 | nodes = xmalloc(max * sizeof(heap_node *)); |
ad19eafb | 527 | memcpy(nodes, heap->nodes, max * sizeof(heap_node *)); |
528 | ||
529 | if (replace == 0) { | |
26ac0430 AJ |
530 | /* |
531 | * Unlock the heap to allow updates from other threads before the sort. | |
532 | * This allows other heap operations to proceed concurrently with the | |
533 | * heap skew computation on the heap at the time of the call ... | |
534 | */ | |
535 | mutex_unlock(hp->lock); | |
ad19eafb | 536 | } |
537 | qsort(nodes, max, sizeof(heap_node *), compare_heap_keys); | |
538 | ||
539 | for (id = 0; id < max; id++) { | |
26ac0430 AJ |
540 | diff = id - nodes[id]->id; |
541 | skew += abs(diff); | |
ad19eafb | 542 | |
f53969cc | 543 | #ifdef HEAP_DEBUG_SKEW |
26ac0430 | 544 | skewsq += diff * diff; |
f53969cc | 545 | #ifdef HEAP_DEBUG_ALL |
26ac0430 | 546 | printf("%d\tKey = %f, diff = %d\n", id, nodes[id]->key, diff); |
ad19eafb | 547 | #endif /* HEAP_DEBUG */ |
548 | #endif /* HEAP_DEBUG_SKEW */ | |
549 | } | |
550 | ||
551 | if (replace != 0) { | |
26ac0430 AJ |
552 | /* |
553 | * Replace the original heap with the newly sorted heap and let it | |
554 | * continue. Then compute the skew using the copy of the previous heap | |
555 | * which we maintain as private data. | |
556 | */ | |
557 | memcpy(heap->nodes, nodes, max * sizeof(heap_node *)); | |
558 | ||
559 | for (id = 0; id < max; id++) { | |
560 | /* | |
561 | * Fix up all the ID values in the copied nodes. | |
562 | */ | |
563 | heap->nodes[id]->id = id; | |
564 | } | |
565 | ||
566 | mutex_unlock(hp->lock); | |
ad19eafb | 567 | } |
26ac0430 | 568 | /* |
ad19eafb | 569 | * The skew value is normalized to a range of [0..1]; the distribution |
570 | * appears to be a skewed Gaussian distribution. For random insertions | |
571 | * into a heap the normalized skew will be slightly less than 0.5. The | |
572 | * maximum value of skew/N^2 (for any value of N) is about 0.39 and is | |
573 | * fairly stable. | |
574 | */ | |
575 | norm = skew * 2.56 / (max * max); | |
576 | ||
26ac0430 | 577 | /* |
ad19eafb | 578 | * Free the nodes array; note this is just an array of pointers, not data! |
579 | */ | |
7e3ce7b9 | 580 | xfree(nodes); |
ad19eafb | 581 | return norm; |
582 | } | |
583 | ||
584 | #endif /* MEASURE_HEAP_SKEW */ | |
f53969cc | 585 |