]>
Commit | Line | Data |
---|---|---|
ad19eafb | 1 | |
2 | /* | |
2b6662ba | 3 | * $Id: heap.c,v 1.7 2001/01/12 00:37:12 wessels Exp $ |
ad19eafb | 4 | * |
5 | * AUTHOR: John Dilley, Hewlett Packard | |
6 | * | |
2b6662ba | 7 | * SQUID Web Proxy Cache http://www.squid-cache.org/ |
ad19eafb | 8 | * ---------------------------------------------------------- |
9 | * | |
2b6662ba | 10 | * Squid is the result of efforts by numerous individuals from |
11 | * the Internet community; see the CONTRIBUTORS file for full | |
12 | * details. Many organizations have provided support for Squid's | |
13 | * development; see the SPONSORS file for full details. Squid is | |
14 | * Copyrighted (C) 2001 by the Regents of the University of | |
15 | * California; see the COPYRIGHT file for full details. Squid | |
16 | * incorporates software developed and/or copyrighted by other | |
17 | * sources; see the CREDITS file for full details. | |
ad19eafb | 18 | * |
19 | * This program is free software; you can redistribute it and/or modify | |
20 | * it under the terms of the GNU General Public License as published by | |
21 | * the Free Software Foundation; either version 2 of the License, or | |
22 | * (at your option) any later version. | |
23 | * | |
24 | * This program is distributed in the hope that it will be useful, | |
25 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
27 | * GNU General Public License for more details. | |
28 | * | |
29 | * You should have received a copy of the GNU General Public License | |
30 | * along with this program; if not, write to the Free Software | |
31 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
32 | * | |
33 | */ | |
34 | ||
35 | /**************************************************************************** | |
36 | * Heap implementation | |
9bc73deb | 37 | * Copyright (C) 1999 by Hewlett Packard |
ad19eafb | 38 | ****************************************************************************/ |
39 | ||
40 | #include "config.h" | |
41 | ||
42 | #if HAVE_STDLIB_H | |
43 | #include <stdlib.h> | |
44 | #endif | |
45 | #if HAVE_ASSERT_H | |
46 | #include <assert.h> | |
47 | #endif | |
48 | #if HAVE_STRING_H | |
49 | #include <string.h> | |
50 | #endif | |
51 | #if HAVE_STDIO_H | |
52 | #include <stdio.h> | |
53 | #endif | |
54 | ||
55 | #include "heap.h" | |
7e3ce7b9 | 56 | #include "util.h" |
ad19eafb | 57 | |
58 | /* | |
59 | * Hacks for non-synchronized heap implementation. | |
60 | */ | |
61 | #define mutex_lock(m) (void)0 | |
62 | #define mutex_unlock(m) (void)0 | |
63 | #define mutex_trylock(m) (void)0 | |
64 | #define mutex_init(m) ((m)=123456) | |
65 | ||
66 | /* | |
67 | * Private function prototypes. | |
68 | */ | |
69 | static void _heap_ify_up(heap * hp, heap_node * elm); | |
70 | static void _heap_ify_down(heap * hp, heap_node * elm); | |
71 | static int _heap_should_grow(heap * hp); | |
72 | static void _heap_grow(heap * hp); | |
73 | static void _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2); | |
74 | static int _heap_node_exist(heap * hp, int id); | |
75 | ||
76 | #ifdef HEAP_DEBUG | |
77 | void _heap_print_tree(heap * hp, heap_node * node); | |
78 | #endif /* HEAP_DEBUG */ | |
79 | ||
80 | #define Left(x) (2 * (x) + 1) | |
81 | #define Right(x) (2 * (x) + 2) | |
82 | #define Parent(x) ((int)((x)-1)/2) | |
83 | ||
84 | #define Threshold 10000 | |
85 | #define NormalRate 2 | |
86 | #define SlowRate 1.5 | |
87 | #define MinSize 32 | |
88 | ||
89 | /**************************************************************************** | |
90 | * Public functions | |
91 | ****************************************************************************/ | |
92 | ||
93 | /* | |
94 | * Return a newly created heap. INITSIZE is the initial size of the heap. | |
95 | */ | |
96 | heap * | |
97 | new_heap(int initSize, heap_key_func gen_key) | |
98 | { | |
7e3ce7b9 | 99 | heap *hp = xmalloc(sizeof(*hp)); |
ad19eafb | 100 | assert(hp != NULL); |
101 | ||
9bc73deb | 102 | if (initSize <= 0) |
103 | initSize = MinSize; | |
7e3ce7b9 | 104 | hp->nodes = xcalloc(initSize, sizeof(heap_node *)); |
ad19eafb | 105 | assert(hp->nodes != NULL); |
106 | ||
ad19eafb | 107 | hp->size = initSize; |
108 | hp->last = 0; | |
109 | hp->gen_key = gen_key; | |
110 | hp->age = 0; | |
111 | ||
112 | return hp; | |
113 | } | |
114 | ||
115 | ||
116 | /* | |
117 | * Free memory used by a heap. Does not free the metadata pointed to by the | |
118 | * heap nodes, only the heap's internal memory. | |
119 | */ | |
120 | void | |
121 | delete_heap(heap * hp) | |
122 | { | |
123 | int i; | |
124 | assert(hp); | |
125 | for (i = 0; i < hp->last; i++) { | |
7e3ce7b9 | 126 | xfree(hp->nodes[i]); |
ad19eafb | 127 | } |
7e3ce7b9 | 128 | xfree(hp->nodes); |
129 | xfree(hp); | |
ad19eafb | 130 | } |
131 | ||
132 | /* | |
7e3ce7b9 | 133 | * Insert DAT based on KY into HP maintaining the heap property. |
134 | * Return the newly inserted heap node. The fields of ELM other | |
135 | * than ID are never changed until ELM is deleted from HP, i.e. | |
136 | * caller can assume that the heap node always exist at the same | |
137 | * place in memory unless heap_delete or heap_extractmin is called | |
138 | * on that node. This function exposes the heap's internal data | |
139 | * structure to the caller. This is required in order to do O(lgN) | |
140 | * deletion. | |
ad19eafb | 141 | */ |
142 | heap_node * | |
143 | heap_insert(heap * hp, void *dat) | |
144 | { | |
7e3ce7b9 | 145 | heap_node *elm = xmalloc(sizeof(*elm)); |
ad19eafb | 146 | |
147 | elm->key = heap_gen_key(hp, dat); | |
148 | elm->data = dat; | |
149 | ||
150 | if (_heap_should_grow(hp)) | |
151 | _heap_grow(hp); | |
152 | ||
153 | hp->nodes[hp->last] = elm; | |
154 | elm->id = hp->last; | |
155 | hp->last += 1; | |
156 | ||
157 | _heap_ify_up(hp, elm); | |
158 | ||
159 | return elm; | |
160 | } | |
161 | ||
162 | ||
163 | /* | |
164 | * Delete ELM while maintaining the heap property. ELM may be modified. | |
165 | * Assumes that ELM is not NULL and frees it. Returns the data pointed to | |
166 | * in, which the caller must free if necessary. | |
167 | */ | |
168 | heap_t | |
169 | heap_delete(heap * hp, heap_node * elm) | |
170 | { | |
171 | heap_node *lastNode; | |
172 | heap_t data = elm->data; | |
173 | ||
174 | assert(_heap_node_exist(hp, hp->last - 1)); | |
175 | ||
176 | lastNode = hp->nodes[hp->last - 1]; | |
177 | _heap_swap_element(hp, lastNode, elm); | |
178 | heap_extractlast(hp); | |
179 | ||
7e3ce7b9 | 180 | if (elm == lastNode) { |
181 | /* | |
182 | * lastNode just got freed, so don't access it in the next | |
183 | * block. | |
184 | */ | |
185 | (void) 0; | |
186 | } else if (hp->last > 0) { | |
ad19eafb | 187 | if (lastNode->key < hp->nodes[Parent(lastNode->id)]->key) |
188 | _heap_ify_up(hp, lastNode); /* COOL! */ | |
189 | _heap_ify_down(hp, lastNode); | |
190 | } | |
191 | return data; | |
192 | } | |
193 | ||
194 | /* | |
195 | * Delete the last element (leaf) out of the heap. Does not require a | |
196 | * heapify operation. | |
197 | */ | |
198 | ||
199 | #ifndef heap_gen_key | |
200 | /* | |
201 | * Function to generate keys. See macro definition in heap.h. | |
202 | */ | |
203 | heap_key | |
204 | heap_gen_key(heap * hp, heap_t dat) | |
205 | { | |
206 | return hp->gen_key(dat, hp->age); | |
207 | } | |
208 | #endif /* heap_gen_key */ | |
209 | ||
210 | ||
211 | /* | |
212 | * Returns the data of the node with the largest KEY value and removes that | |
213 | * node from the heap. Returns NULL if the heap was empty. | |
214 | */ | |
215 | heap_t | |
216 | heap_extractmin(heap * hp) | |
217 | { | |
218 | heap_t data; | |
219 | ||
220 | if (hp->last <= 0) | |
221 | return NULL; | |
222 | ||
223 | mutex_lock(hp->lock); | |
224 | ||
225 | data = hp->nodes[0]->data; | |
226 | heap_delete(hp, hp->nodes[0]); /* Delete the root */ | |
227 | ||
228 | mutex_unlock(hp->lock); | |
229 | ||
230 | return data; | |
231 | } | |
232 | ||
233 | ||
234 | /* | |
235 | * Remove the last node in HP. Frees the heap internal structure and | |
236 | * returns the data pointes to by the last node. | |
237 | */ | |
238 | heap_t | |
239 | heap_extractlast(heap * hp) | |
240 | { | |
241 | heap_t data; | |
242 | assert(_heap_node_exist(hp, hp->last - 1)); | |
243 | hp->last -= 1; | |
244 | data = hp->nodes[hp->last]->data; | |
7e3ce7b9 | 245 | xfree(hp->nodes[hp->last]); |
ad19eafb | 246 | return data; |
247 | } | |
248 | ||
249 | ||
250 | /* | |
251 | * The semantics of this routine is the same as the followings: | |
252 | * heap_delete(hp, elm); | |
253 | * heap_insert(hp, dat); | |
254 | * Returns the old data object from elm (the one being replaced). The | |
255 | * caller must free this as necessary. | |
256 | */ | |
257 | heap_t | |
258 | heap_update(heap * hp, heap_node * elm, void *dat) | |
259 | { | |
260 | heap_t old = elm->data; | |
261 | heap_key ky = heap_gen_key(hp, dat); | |
262 | ||
263 | elm->key = ky; | |
264 | elm->data = dat; | |
265 | ||
266 | if (elm->key < hp->nodes[Parent(elm->id)]->key) | |
267 | _heap_ify_up(hp, elm); | |
268 | _heap_ify_down(hp, elm); | |
269 | ||
270 | return old; | |
271 | } | |
272 | ||
273 | ||
274 | /* | |
275 | * A pointer to the root node's DATA. | |
276 | */ | |
277 | void * | |
278 | heap_peepmin(heap * hp) | |
279 | { | |
280 | assert(_heap_node_exist(hp, 0)); | |
281 | return hp->nodes[0]->data; | |
282 | } | |
283 | ||
284 | ||
285 | /* | |
286 | * The KEY of the root node. | |
287 | */ | |
288 | heap_key | |
289 | heap_peepminkey(heap * hp) | |
290 | { | |
291 | assert(_heap_node_exist(hp, 0)); | |
292 | return hp->nodes[0]->key; | |
293 | } | |
294 | ||
295 | ||
296 | /* | |
297 | * Same as heap_peep except that this return the KEY of the node. | |
298 | * Only meant for iteration. | |
299 | */ | |
300 | heap_key | |
301 | heap_peepkey(heap * hp, int n) | |
302 | { | |
303 | assert(_heap_node_exist(hp, n)); | |
304 | return hp->nodes[n]->key; | |
305 | } | |
306 | ||
307 | ||
308 | /* | |
309 | * A pointer to Nth node's DATA. The caller can iterate through HP by | |
310 | * calling this routine. eg. Caller can execute the following code: | |
311 | * for(i = 0; i < heap_nodes(hp); i++) | |
312 | * data = heap_peep(hp, i); | |
313 | */ | |
314 | void * | |
315 | heap_peep(heap * hp, int n) | |
316 | { | |
317 | void *data; | |
318 | assert(_heap_node_exist(hp, n)); | |
319 | data = hp->nodes[n]->data; | |
320 | return data; | |
321 | } | |
322 | ||
323 | ||
324 | #ifndef heap_nodes | |
325 | /* | |
326 | * Current number of nodes in HP. | |
327 | */ | |
328 | int | |
329 | heap_nodes(heap * hp) | |
330 | { | |
331 | return hp->last; | |
332 | } | |
333 | #endif /* heap_nodes */ | |
334 | ||
335 | ||
336 | #ifndef heap_empty | |
337 | /* | |
338 | * Determine if the heap is empty. Returns 1 if HP has no elements and 0 | |
339 | * otherwise. | |
340 | */ | |
341 | int | |
342 | heap_empty(heap * hp) | |
343 | { | |
344 | return (hp->last <= 0) ? 1 : 0; | |
345 | } | |
346 | #endif /* heap_empty */ | |
347 | ||
348 | /****************** Private Functions *******************/ | |
349 | ||
350 | /* | |
351 | * Maintain the heap order property (parent is smaller than children) which | |
352 | * may only be violated at ELM downwards. Assumes caller has locked the heap. | |
353 | */ | |
354 | static void | |
355 | _heap_ify_down(heap * hp, heap_node * elm) | |
356 | { | |
357 | heap_node *kid; | |
358 | int left = 0, right = 0; | |
359 | int true = 1; | |
360 | while (true) { | |
361 | left = Left(elm->id); | |
362 | right = Right(elm->id); | |
44e08383 | 363 | if (!_heap_node_exist(hp, left)) { |
364 | /* At the bottom of the heap (no child). */ | |
ad19eafb | 365 | |
366 | assert(!_heap_node_exist(hp, right)); | |
367 | break; | |
44e08383 | 368 | } else if (!_heap_node_exist(hp, right)) |
369 | /* Only left child exists. */ | |
ad19eafb | 370 | |
371 | kid = hp->nodes[left]; | |
372 | else { | |
373 | if (hp->nodes[right]->key < hp->nodes[left]->key) | |
374 | kid = hp->nodes[right]; | |
375 | else | |
376 | kid = hp->nodes[left]; | |
377 | } | |
378 | if (elm->key <= kid->key) | |
379 | break; | |
380 | _heap_swap_element(hp, kid, elm); | |
381 | } | |
382 | } | |
383 | ||
384 | ||
385 | /* | |
386 | * Maintain the heap property above ELM. Caller has locked the heap. | |
387 | */ | |
388 | static void | |
389 | _heap_ify_up(heap * hp, heap_node * elm) | |
390 | { | |
391 | heap_node *parentNode; | |
392 | while (elm->id > 0) { | |
393 | parentNode = hp->nodes[Parent(elm->id)]; | |
394 | if (parentNode->key <= elm->key) | |
395 | break; | |
396 | _heap_swap_element(hp, parentNode, elm); /* Demote the parent. */ | |
397 | } | |
398 | } | |
399 | ||
400 | ||
401 | /* | |
402 | * Swap the position of ELM1 and ELM2 in heap structure. Their IDs are also | |
403 | * swapped. | |
404 | */ | |
405 | static void | |
406 | _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2) | |
407 | { | |
408 | int elm1Id = elm1->id; | |
409 | elm1->id = elm2->id; | |
410 | elm2->id = elm1Id; | |
411 | hp->nodes[elm1->id] = elm1; | |
412 | hp->nodes[elm2->id] = elm2; | |
413 | } | |
414 | ||
415 | ||
416 | ||
417 | #ifdef NOTDEF | |
418 | /* | |
419 | * Copy KEY and DATA fields of SRC to DEST. ID field is NOT copied. | |
420 | */ | |
421 | static void | |
422 | _heap_copy_element(heap_node * src, heap_node * dest) | |
423 | { | |
424 | dest->key = src->key; | |
425 | dest->data = src->data; | |
426 | } | |
427 | ||
428 | #endif /* NOTDEF */ | |
429 | ||
430 | ||
431 | /* | |
432 | * True if HP needs to be grown in size. | |
433 | */ | |
434 | static int | |
435 | _heap_should_grow(heap * hp) | |
436 | { | |
437 | if (hp->size <= hp->last) | |
438 | return 1; | |
439 | return 0; | |
440 | } | |
441 | ||
442 | ||
443 | /* | |
444 | * Grow HP. | |
445 | */ | |
446 | static void | |
447 | _heap_grow(heap * hp) | |
448 | { | |
449 | int newSize; | |
450 | ||
451 | if (hp->size > Threshold) | |
452 | newSize = hp->size * SlowRate; | |
453 | else | |
454 | newSize = hp->size * NormalRate; | |
455 | ||
7e3ce7b9 | 456 | hp->nodes = xrealloc(hp->nodes, newSize * sizeof(heap_node *)); |
44e08383 | 457 | #if COMMENTED_OUT |
458 | for (i = 0; i < hp->size; i++) | |
459 | newNodes[i] = hp->nodes[i]; | |
7e3ce7b9 | 460 | xfree(hp->nodes); |
44e08383 | 461 | hp->nodes = newNodes; |
462 | #endif | |
ad19eafb | 463 | hp->size = newSize; |
464 | } | |
465 | ||
466 | ||
467 | /* | |
468 | * True if a node with ID exists in HP. | |
469 | */ | |
470 | static int | |
471 | _heap_node_exist(heap * hp, int id) | |
472 | { | |
473 | if ((id >= hp->last) || (id < 0) || (hp->nodes[id] == NULL)) | |
474 | return 0; | |
475 | return 1; | |
476 | } | |
477 | ||
478 | /**************************************************************************** | |
479 | * Printing and debug functions | |
480 | ****************************************************************************/ | |
481 | ||
482 | /* | |
483 | * Print the heap in element order, id..last. | |
484 | */ | |
485 | void | |
486 | heap_print_inorder(heap * hp, int id) | |
487 | { | |
488 | while (id < hp->last) { | |
489 | printf("%d\tKey = %.04f\n", id, hp->nodes[id]->key); | |
490 | id++; | |
491 | } | |
492 | } | |
493 | ||
494 | /* | |
495 | * Returns 1 if HP maintians the heap property and 0 otherwise. | |
496 | */ | |
497 | int | |
498 | verify_heap_property(heap * hp) | |
499 | { | |
500 | int i = 0; | |
501 | int correct = 1; | |
502 | for (i = 0; i < hp->last / 2; i++) { | |
503 | correct = 1; | |
504 | if (_heap_node_exist(hp, Left(i))) | |
505 | if (hp->nodes[i]->key > hp->nodes[Left(i)]->key) | |
506 | correct = 0; | |
507 | if (_heap_node_exist(hp, Right(i))) | |
508 | if (hp->nodes[i]->key > hp->nodes[Right(i)]->key) | |
509 | correct = 0; | |
510 | if (!correct) { | |
511 | printf("verifyHeap: violated at %d", i); | |
512 | heap_print_inorder(hp, 0); | |
513 | break; | |
514 | } | |
515 | } | |
516 | return correct; | |
517 | } | |
518 | ||
519 | #ifdef MEASURE_HEAP_SKEW | |
520 | ||
521 | /**************************************************************************** | |
522 | * Heap skew computation | |
523 | ****************************************************************************/ | |
524 | ||
525 | int | |
526 | compare_heap_keys(const void *a, const void *b) | |
527 | { | |
528 | heap_node **an = (heap_node **) a; | |
529 | heap_node **bn = (heap_node **) b; | |
530 | float cmp = (*an)->key - (*bn)->key; | |
531 | if (cmp < 0) | |
532 | return -1; | |
533 | else | |
534 | return 1; | |
535 | } | |
536 | ||
537 | /* | |
538 | * Compute the heap skew for HEAP, a measure of how out-of-order the | |
539 | * elements in the heap are. The skew of a heap node is the difference | |
540 | * between its current position in the heap and where it would be if the | |
541 | * heap were in sorted order. To compute this we have to sort the heap. At | |
542 | * the end if the flag REPLACE is non-zero the heap will be returned in | |
543 | * sorted order (with skew == 0). Note: using REPLACE does not help the | |
544 | * performance of the heap, so only do this if you really want to have a | |
545 | * sorted heap. It is faster not to replace. | |
546 | */ | |
547 | float | |
548 | calc_heap_skew(heap * heap, int replace) | |
549 | { | |
550 | heap_node **nodes; | |
551 | long id, diff, skew = 0; | |
552 | #ifdef HEAP_DEBUG_SKEW | |
553 | long skewsq = 0; | |
554 | #endif /* HEAP_DEBUG_SKEW */ | |
555 | float norm = 0; | |
556 | unsigned long max; | |
557 | ||
558 | /* | |
559 | * Lock the heap to copy it. If replacing it need to keep the heap locked | |
560 | * until we are all done. | |
561 | */ | |
562 | mutex_lock(hp->lock); | |
563 | ||
564 | max = heap_nodes(heap); | |
565 | ||
566 | /* | |
567 | * Copy the heap nodes to a new storage area for offline sorting. | |
568 | */ | |
7e3ce7b9 | 569 | nodes = xmalloc(max * sizeof(heap_node *)); |
ad19eafb | 570 | memcpy(nodes, heap->nodes, max * sizeof(heap_node *)); |
571 | ||
572 | if (replace == 0) { | |
573 | /* | |
574 | * Unlock the heap to allow updates from other threads before the sort. | |
575 | * This allows other heap operations to proceed concurrently with the | |
576 | * heap skew computation on the heap at the time of the call ... | |
577 | */ | |
578 | mutex_unlock(hp->lock); | |
579 | } | |
580 | qsort(nodes, max, sizeof(heap_node *), compare_heap_keys); | |
581 | ||
582 | for (id = 0; id < max; id++) { | |
583 | diff = id - nodes[id]->id; | |
584 | skew += abs(diff); | |
585 | ||
586 | #ifdef HEAP_DEBUG_SKEW | |
587 | skewsq += diff * diff; | |
588 | #ifdef HEAP_DEBUG_ALL | |
589 | printf("%d\tKey = %f, diff = %d\n", id, nodes[id]->key, diff); | |
590 | #endif /* HEAP_DEBUG */ | |
591 | #endif /* HEAP_DEBUG_SKEW */ | |
592 | } | |
593 | ||
594 | if (replace != 0) { | |
595 | /* | |
596 | * Replace the original heap with the newly sorted heap and let it | |
597 | * continue. Then compute the skew using the copy of the previous heap | |
598 | * which we maintain as private data. | |
599 | */ | |
600 | memcpy(heap->nodes, nodes, max * sizeof(heap_node *)); | |
601 | ||
602 | for (id = 0; id < max; id++) { | |
603 | /* | |
604 | * Fix up all the ID values in the copied nodes. | |
605 | */ | |
606 | heap->nodes[id]->id = id; | |
607 | } | |
608 | ||
609 | mutex_unlock(hp->lock); | |
610 | } | |
611 | /* | |
612 | * The skew value is normalized to a range of [0..1]; the distribution | |
613 | * appears to be a skewed Gaussian distribution. For random insertions | |
614 | * into a heap the normalized skew will be slightly less than 0.5. The | |
615 | * maximum value of skew/N^2 (for any value of N) is about 0.39 and is | |
616 | * fairly stable. | |
617 | */ | |
618 | norm = skew * 2.56 / (max * max); | |
619 | ||
620 | /* | |
621 | * Free the nodes array; note this is just an array of pointers, not data! | |
622 | */ | |
7e3ce7b9 | 623 | xfree(nodes); |
ad19eafb | 624 | return norm; |
625 | } | |
626 | ||
627 | #endif /* MEASURE_HEAP_SKEW */ |