]> git.ipfire.org Git - thirdparty/squid.git/blob - lib/heap.c
Updated copyright
[thirdparty/squid.git] / lib / heap.c
1
2 /*
3 * $Id: heap.c,v 1.7 2001/01/12 00:37:12 wessels Exp $
4 *
5 * AUTHOR: John Dilley, Hewlett Packard
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35 /****************************************************************************
36 * Heap implementation
37 * Copyright (C) 1999 by Hewlett Packard
38 ****************************************************************************/
39
40 #include "config.h"
41
42 #if HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 #if HAVE_ASSERT_H
46 #include <assert.h>
47 #endif
48 #if HAVE_STRING_H
49 #include <string.h>
50 #endif
51 #if HAVE_STDIO_H
52 #include <stdio.h>
53 #endif
54
55 #include "heap.h"
56 #include "util.h"
57
58 /*
59 * Hacks for non-synchronized heap implementation.
60 */
61 #define mutex_lock(m) (void)0
62 #define mutex_unlock(m) (void)0
63 #define mutex_trylock(m) (void)0
64 #define mutex_init(m) ((m)=123456)
65
66 /*
67 * Private function prototypes.
68 */
69 static void _heap_ify_up(heap * hp, heap_node * elm);
70 static void _heap_ify_down(heap * hp, heap_node * elm);
71 static int _heap_should_grow(heap * hp);
72 static void _heap_grow(heap * hp);
73 static void _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2);
74 static int _heap_node_exist(heap * hp, int id);
75
76 #ifdef HEAP_DEBUG
77 void _heap_print_tree(heap * hp, heap_node * node);
78 #endif /* HEAP_DEBUG */
79
80 #define Left(x) (2 * (x) + 1)
81 #define Right(x) (2 * (x) + 2)
82 #define Parent(x) ((int)((x)-1)/2)
83
84 #define Threshold 10000
85 #define NormalRate 2
86 #define SlowRate 1.5
87 #define MinSize 32
88
89 /****************************************************************************
90 * Public functions
91 ****************************************************************************/
92
93 /*
94 * Return a newly created heap. INITSIZE is the initial size of the heap.
95 */
96 heap *
97 new_heap(int initSize, heap_key_func gen_key)
98 {
99 heap *hp = xmalloc(sizeof(*hp));
100 assert(hp != NULL);
101
102 if (initSize <= 0)
103 initSize = MinSize;
104 hp->nodes = xcalloc(initSize, sizeof(heap_node *));
105 assert(hp->nodes != NULL);
106
107 hp->size = initSize;
108 hp->last = 0;
109 hp->gen_key = gen_key;
110 hp->age = 0;
111
112 return hp;
113 }
114
115
116 /*
117 * Free memory used by a heap. Does not free the metadata pointed to by the
118 * heap nodes, only the heap's internal memory.
119 */
120 void
121 delete_heap(heap * hp)
122 {
123 int i;
124 assert(hp);
125 for (i = 0; i < hp->last; i++) {
126 xfree(hp->nodes[i]);
127 }
128 xfree(hp->nodes);
129 xfree(hp);
130 }
131
132 /*
133 * Insert DAT based on KY into HP maintaining the heap property.
134 * Return the newly inserted heap node. The fields of ELM other
135 * than ID are never changed until ELM is deleted from HP, i.e.
136 * caller can assume that the heap node always exist at the same
137 * place in memory unless heap_delete or heap_extractmin is called
138 * on that node. This function exposes the heap's internal data
139 * structure to the caller. This is required in order to do O(lgN)
140 * deletion.
141 */
142 heap_node *
143 heap_insert(heap * hp, void *dat)
144 {
145 heap_node *elm = xmalloc(sizeof(*elm));
146
147 elm->key = heap_gen_key(hp, dat);
148 elm->data = dat;
149
150 if (_heap_should_grow(hp))
151 _heap_grow(hp);
152
153 hp->nodes[hp->last] = elm;
154 elm->id = hp->last;
155 hp->last += 1;
156
157 _heap_ify_up(hp, elm);
158
159 return elm;
160 }
161
162
163 /*
164 * Delete ELM while maintaining the heap property. ELM may be modified.
165 * Assumes that ELM is not NULL and frees it. Returns the data pointed to
166 * in, which the caller must free if necessary.
167 */
168 heap_t
169 heap_delete(heap * hp, heap_node * elm)
170 {
171 heap_node *lastNode;
172 heap_t data = elm->data;
173
174 assert(_heap_node_exist(hp, hp->last - 1));
175
176 lastNode = hp->nodes[hp->last - 1];
177 _heap_swap_element(hp, lastNode, elm);
178 heap_extractlast(hp);
179
180 if (elm == lastNode) {
181 /*
182 * lastNode just got freed, so don't access it in the next
183 * block.
184 */
185 (void) 0;
186 } else if (hp->last > 0) {
187 if (lastNode->key < hp->nodes[Parent(lastNode->id)]->key)
188 _heap_ify_up(hp, lastNode); /* COOL! */
189 _heap_ify_down(hp, lastNode);
190 }
191 return data;
192 }
193
194 /*
195 * Delete the last element (leaf) out of the heap. Does not require a
196 * heapify operation.
197 */
198
199 #ifndef heap_gen_key
200 /*
201 * Function to generate keys. See macro definition in heap.h.
202 */
203 heap_key
204 heap_gen_key(heap * hp, heap_t dat)
205 {
206 return hp->gen_key(dat, hp->age);
207 }
208 #endif /* heap_gen_key */
209
210
211 /*
212 * Returns the data of the node with the largest KEY value and removes that
213 * node from the heap. Returns NULL if the heap was empty.
214 */
215 heap_t
216 heap_extractmin(heap * hp)
217 {
218 heap_t data;
219
220 if (hp->last <= 0)
221 return NULL;
222
223 mutex_lock(hp->lock);
224
225 data = hp->nodes[0]->data;
226 heap_delete(hp, hp->nodes[0]); /* Delete the root */
227
228 mutex_unlock(hp->lock);
229
230 return data;
231 }
232
233
234 /*
235 * Remove the last node in HP. Frees the heap internal structure and
236 * returns the data pointes to by the last node.
237 */
238 heap_t
239 heap_extractlast(heap * hp)
240 {
241 heap_t data;
242 assert(_heap_node_exist(hp, hp->last - 1));
243 hp->last -= 1;
244 data = hp->nodes[hp->last]->data;
245 xfree(hp->nodes[hp->last]);
246 return data;
247 }
248
249
250 /*
251 * The semantics of this routine is the same as the followings:
252 * heap_delete(hp, elm);
253 * heap_insert(hp, dat);
254 * Returns the old data object from elm (the one being replaced). The
255 * caller must free this as necessary.
256 */
257 heap_t
258 heap_update(heap * hp, heap_node * elm, void *dat)
259 {
260 heap_t old = elm->data;
261 heap_key ky = heap_gen_key(hp, dat);
262
263 elm->key = ky;
264 elm->data = dat;
265
266 if (elm->key < hp->nodes[Parent(elm->id)]->key)
267 _heap_ify_up(hp, elm);
268 _heap_ify_down(hp, elm);
269
270 return old;
271 }
272
273
274 /*
275 * A pointer to the root node's DATA.
276 */
277 void *
278 heap_peepmin(heap * hp)
279 {
280 assert(_heap_node_exist(hp, 0));
281 return hp->nodes[0]->data;
282 }
283
284
285 /*
286 * The KEY of the root node.
287 */
288 heap_key
289 heap_peepminkey(heap * hp)
290 {
291 assert(_heap_node_exist(hp, 0));
292 return hp->nodes[0]->key;
293 }
294
295
296 /*
297 * Same as heap_peep except that this return the KEY of the node.
298 * Only meant for iteration.
299 */
300 heap_key
301 heap_peepkey(heap * hp, int n)
302 {
303 assert(_heap_node_exist(hp, n));
304 return hp->nodes[n]->key;
305 }
306
307
308 /*
309 * A pointer to Nth node's DATA. The caller can iterate through HP by
310 * calling this routine. eg. Caller can execute the following code:
311 * for(i = 0; i < heap_nodes(hp); i++)
312 * data = heap_peep(hp, i);
313 */
314 void *
315 heap_peep(heap * hp, int n)
316 {
317 void *data;
318 assert(_heap_node_exist(hp, n));
319 data = hp->nodes[n]->data;
320 return data;
321 }
322
323
324 #ifndef heap_nodes
325 /*
326 * Current number of nodes in HP.
327 */
328 int
329 heap_nodes(heap * hp)
330 {
331 return hp->last;
332 }
333 #endif /* heap_nodes */
334
335
336 #ifndef heap_empty
337 /*
338 * Determine if the heap is empty. Returns 1 if HP has no elements and 0
339 * otherwise.
340 */
341 int
342 heap_empty(heap * hp)
343 {
344 return (hp->last <= 0) ? 1 : 0;
345 }
346 #endif /* heap_empty */
347
348 /****************** Private Functions *******************/
349
350 /*
351 * Maintain the heap order property (parent is smaller than children) which
352 * may only be violated at ELM downwards. Assumes caller has locked the heap.
353 */
354 static void
355 _heap_ify_down(heap * hp, heap_node * elm)
356 {
357 heap_node *kid;
358 int left = 0, right = 0;
359 int true = 1;
360 while (true) {
361 left = Left(elm->id);
362 right = Right(elm->id);
363 if (!_heap_node_exist(hp, left)) {
364 /* At the bottom of the heap (no child). */
365
366 assert(!_heap_node_exist(hp, right));
367 break;
368 } else if (!_heap_node_exist(hp, right))
369 /* Only left child exists. */
370
371 kid = hp->nodes[left];
372 else {
373 if (hp->nodes[right]->key < hp->nodes[left]->key)
374 kid = hp->nodes[right];
375 else
376 kid = hp->nodes[left];
377 }
378 if (elm->key <= kid->key)
379 break;
380 _heap_swap_element(hp, kid, elm);
381 }
382 }
383
384
385 /*
386 * Maintain the heap property above ELM. Caller has locked the heap.
387 */
388 static void
389 _heap_ify_up(heap * hp, heap_node * elm)
390 {
391 heap_node *parentNode;
392 while (elm->id > 0) {
393 parentNode = hp->nodes[Parent(elm->id)];
394 if (parentNode->key <= elm->key)
395 break;
396 _heap_swap_element(hp, parentNode, elm); /* Demote the parent. */
397 }
398 }
399
400
401 /*
402 * Swap the position of ELM1 and ELM2 in heap structure. Their IDs are also
403 * swapped.
404 */
405 static void
406 _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2)
407 {
408 int elm1Id = elm1->id;
409 elm1->id = elm2->id;
410 elm2->id = elm1Id;
411 hp->nodes[elm1->id] = elm1;
412 hp->nodes[elm2->id] = elm2;
413 }
414
415
416
417 #ifdef NOTDEF
418 /*
419 * Copy KEY and DATA fields of SRC to DEST. ID field is NOT copied.
420 */
421 static void
422 _heap_copy_element(heap_node * src, heap_node * dest)
423 {
424 dest->key = src->key;
425 dest->data = src->data;
426 }
427
428 #endif /* NOTDEF */
429
430
431 /*
432 * True if HP needs to be grown in size.
433 */
434 static int
435 _heap_should_grow(heap * hp)
436 {
437 if (hp->size <= hp->last)
438 return 1;
439 return 0;
440 }
441
442
443 /*
444 * Grow HP.
445 */
446 static void
447 _heap_grow(heap * hp)
448 {
449 int newSize;
450
451 if (hp->size > Threshold)
452 newSize = hp->size * SlowRate;
453 else
454 newSize = hp->size * NormalRate;
455
456 hp->nodes = xrealloc(hp->nodes, newSize * sizeof(heap_node *));
457 #if COMMENTED_OUT
458 for (i = 0; i < hp->size; i++)
459 newNodes[i] = hp->nodes[i];
460 xfree(hp->nodes);
461 hp->nodes = newNodes;
462 #endif
463 hp->size = newSize;
464 }
465
466
467 /*
468 * True if a node with ID exists in HP.
469 */
470 static int
471 _heap_node_exist(heap * hp, int id)
472 {
473 if ((id >= hp->last) || (id < 0) || (hp->nodes[id] == NULL))
474 return 0;
475 return 1;
476 }
477
478 /****************************************************************************
479 * Printing and debug functions
480 ****************************************************************************/
481
482 /*
483 * Print the heap in element order, id..last.
484 */
485 void
486 heap_print_inorder(heap * hp, int id)
487 {
488 while (id < hp->last) {
489 printf("%d\tKey = %.04f\n", id, hp->nodes[id]->key);
490 id++;
491 }
492 }
493
494 /*
495 * Returns 1 if HP maintians the heap property and 0 otherwise.
496 */
497 int
498 verify_heap_property(heap * hp)
499 {
500 int i = 0;
501 int correct = 1;
502 for (i = 0; i < hp->last / 2; i++) {
503 correct = 1;
504 if (_heap_node_exist(hp, Left(i)))
505 if (hp->nodes[i]->key > hp->nodes[Left(i)]->key)
506 correct = 0;
507 if (_heap_node_exist(hp, Right(i)))
508 if (hp->nodes[i]->key > hp->nodes[Right(i)]->key)
509 correct = 0;
510 if (!correct) {
511 printf("verifyHeap: violated at %d", i);
512 heap_print_inorder(hp, 0);
513 break;
514 }
515 }
516 return correct;
517 }
518
519 #ifdef MEASURE_HEAP_SKEW
520
521 /****************************************************************************
522 * Heap skew computation
523 ****************************************************************************/
524
525 int
526 compare_heap_keys(const void *a, const void *b)
527 {
528 heap_node **an = (heap_node **) a;
529 heap_node **bn = (heap_node **) b;
530 float cmp = (*an)->key - (*bn)->key;
531 if (cmp < 0)
532 return -1;
533 else
534 return 1;
535 }
536
537 /*
538 * Compute the heap skew for HEAP, a measure of how out-of-order the
539 * elements in the heap are. The skew of a heap node is the difference
540 * between its current position in the heap and where it would be if the
541 * heap were in sorted order. To compute this we have to sort the heap. At
542 * the end if the flag REPLACE is non-zero the heap will be returned in
543 * sorted order (with skew == 0). Note: using REPLACE does not help the
544 * performance of the heap, so only do this if you really want to have a
545 * sorted heap. It is faster not to replace.
546 */
547 float
548 calc_heap_skew(heap * heap, int replace)
549 {
550 heap_node **nodes;
551 long id, diff, skew = 0;
552 #ifdef HEAP_DEBUG_SKEW
553 long skewsq = 0;
554 #endif /* HEAP_DEBUG_SKEW */
555 float norm = 0;
556 unsigned long max;
557
558 /*
559 * Lock the heap to copy it. If replacing it need to keep the heap locked
560 * until we are all done.
561 */
562 mutex_lock(hp->lock);
563
564 max = heap_nodes(heap);
565
566 /*
567 * Copy the heap nodes to a new storage area for offline sorting.
568 */
569 nodes = xmalloc(max * sizeof(heap_node *));
570 memcpy(nodes, heap->nodes, max * sizeof(heap_node *));
571
572 if (replace == 0) {
573 /*
574 * Unlock the heap to allow updates from other threads before the sort.
575 * This allows other heap operations to proceed concurrently with the
576 * heap skew computation on the heap at the time of the call ...
577 */
578 mutex_unlock(hp->lock);
579 }
580 qsort(nodes, max, sizeof(heap_node *), compare_heap_keys);
581
582 for (id = 0; id < max; id++) {
583 diff = id - nodes[id]->id;
584 skew += abs(diff);
585
586 #ifdef HEAP_DEBUG_SKEW
587 skewsq += diff * diff;
588 #ifdef HEAP_DEBUG_ALL
589 printf("%d\tKey = %f, diff = %d\n", id, nodes[id]->key, diff);
590 #endif /* HEAP_DEBUG */
591 #endif /* HEAP_DEBUG_SKEW */
592 }
593
594 if (replace != 0) {
595 /*
596 * Replace the original heap with the newly sorted heap and let it
597 * continue. Then compute the skew using the copy of the previous heap
598 * which we maintain as private data.
599 */
600 memcpy(heap->nodes, nodes, max * sizeof(heap_node *));
601
602 for (id = 0; id < max; id++) {
603 /*
604 * Fix up all the ID values in the copied nodes.
605 */
606 heap->nodes[id]->id = id;
607 }
608
609 mutex_unlock(hp->lock);
610 }
611 /*
612 * The skew value is normalized to a range of [0..1]; the distribution
613 * appears to be a skewed Gaussian distribution. For random insertions
614 * into a heap the normalized skew will be slightly less than 0.5. The
615 * maximum value of skew/N^2 (for any value of N) is about 0.39 and is
616 * fairly stable.
617 */
618 norm = skew * 2.56 / (max * max);
619
620 /*
621 * Free the nodes array; note this is just an array of pointers, not data!
622 */
623 xfree(nodes);
624 return norm;
625 }
626
627 #endif /* MEASURE_HEAP_SKEW */