--- /dev/null
+/*
+ * $Id: heap.h,v 1.1 1999/06/24 20:17:03 wessels Exp $
+ *
+ * AUTHOR: John Dilley, Hewlett Packard
+ *
+ * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
+ * --------------------------------------------------------
+ *
+ * Squid is the result of efforts by numerous individuals from the
+ * Internet community. Development is led by Duane Wessels of the
+ * National Laboratory for Applied Network Research and funded by
+ * the National Science Foundation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+/****************************************************************************
+ * Heap data structure. Used to store objects for cache replacement. The
+ * heap is implemented as a contiguous array in memory. Heap sort and heap
+ * update are done in-place. The heap is ordered with the smallest value at
+ * the top of the heap (as in the smallest object key value). Child nodes
+ * are larger than their parent.
+ ****************************************************************************/
+
+#ifndef _heap_h_INCLUDED
+#define _heap_h_INCLUDED
+
+/*
+ * Function for generating heap keys. The first argument will typically be
+ * a dws_md_p passed in as a void *. Should find a way to get type safety
+ * without having heap know all about metadata objects... The second arg is
+ * the current aging factor for the heap.
+ */
+typedef unsigned long heap_mutex_t;
+typedef void *heap_t;
+typedef double heap_key;
+typedef heap_key heap_key_func(heap_t, heap_key);
+
+
+/*
+ * Heap node. Has a key value generated by a key_func, id (array index) so
+ * it can be quickly found in its heap, and a pointer to a data object that
+ * key_func can generate a key from.
+ */
+typedef struct _heap_node {
+ heap_key key;
+ unsigned long id;
+ heap_t data;
+} heap_node;
+
+
+/*
+ * Heap object. Holds an array of heap_node objects along with a heap size
+ * (array length), the index of the last heap element, and a key generation
+ * function. Also stores aging factor for this heap.
+ */
+typedef struct _heap {
+ heap_mutex_t lock;
+ unsigned long size;
+ unsigned long last;
+ heap_key_func *gen_key; /* key generator for heap */
+ heap_key age; /* aging factor for heap */
+ heap_node **nodes;
+} heap;
+
+/****************************************************************************
+ * Public functions
+ ****************************************************************************/
+
+/*
+ * Create and initialize a new heap.
+ */
+extern heap *new_heap(int init_size, heap_key_func gen_key);
+
+/*
+ * Delete a heap and clean up its memory. Does not delete what the heap
+ * nodes are pointing to!
+ */
+extern void delete_heap(heap *);
+
+/*
+ * Insert a new node into a heap, returning a pointer to it. The heap_node
+ * object returned is used to update or delete a heap object. Nothing else
+ * should be done with this data structure (especially modifying it!) The
+ * heap does not assume ownership of the data passed to it.
+ */
+extern heap_node *heap_insert(heap *, heap_t dat);
+
+/*
+ * Delete a node out of a heap. Returns the heap data from the deleted
+ * node. The caller is responsible for freeing this data.
+ */
+extern heap_t heap_delete(heap *, heap_node * elm);
+
+/*
+ * The semantics of this routine is the same as the followings:
+ * heap_delete(hp, elm);
+ * heap_insert(hp, dat);
+ * Returns the old data object from elm (the one being replaced). The
+ * caller must free this as necessary.
+ */
+extern heap_t heap_update(heap *, heap_node * elm, heap_t dat);
+
+/*
+ * Generate a heap key for a given data object. Alternative macro form:
+ */
+#ifdef MACRO_DEBUG
+extern heap_key heap_gen_key(heap * hp, heap_t dat);
+#else
+#define heap_gen_key(hp,md) ((hp)->gen_key((md),(hp)->age))
+#endif /* MACRO_DEBUG */
+
+
+/*
+ * Extract the minimum (root) element and maintain the heap property.
+ * Returns the data pointed to by the root node, which the caller must
+ * free as necessary.
+ */
+extern heap_t heap_extractmin(heap *);
+
+/*
+ * Extract the last leaf node (does not change the heap property).
+ * Returns the data that had been in the heap which the caller must free if
+ * necessary. Note that the last node is guaranteed to be less than its
+ * parent, but may not be less than any of the other (leaf or parent) notes
+ * in the tree. This operation is fast but imprecise.
+ */
+extern heap_t heap_extractlast(heap * hp);
+
+/*
+ * Get the root key, the nth key, the root (smallest) element, or the nth
+ * element. None of these operations modify the heap.
+ */
+extern heap_key heap_peepminkey(heap *);
+extern heap_key heap_peepkey(heap *, int n);
+
+extern heap_t heap_peepmin(heap *);
+extern heap_t heap_peep(heap *, int n);
+
+/*
+ * Is the heap empty? How many nodes (data objects) are in it?
+ */
+#ifdef MACRO_DEBUG
+extern int heap_empty(heap *);
+extern int heap_nodes(heap *);
+#else /* MACRO_DEBUG */
+#define heap_nodes(heap) ((heap)->last)
+#define heap_empty(heap) (((heap)->last <= 0) ? 1 : 0)
+#endif /* MACRO_DEBUG */
+
+/*
+ * Print the heap or a node in the heap.
+ */
+extern void heap_print(heap *);
+extern void heap_printnode(char *msg, heap_node * elm);
+
+extern int verify_heap_property(heap *);
+
+#endif /* _heap_h_INCLUDED */
--- /dev/null
+
+/*
+ * $Id: heap.c,v 1.1 1999/06/24 20:17:02 wessels Exp $
+ *
+ * AUTHOR: John Dilley, Hewlett Packard
+ *
+ * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
+ * ----------------------------------------------------------
+ *
+ * Squid is the result of efforts by numerous individuals from the
+ * Internet community. Development is led by Duane Wessels of the
+ * National Laboratory for Applied Network Research and funded by the
+ * National Science Foundation. Squid is Copyrighted (C) 1998 by
+ * Duane Wessels and the University of California San Diego. Please
+ * see the COPYRIGHT file for full details. Squid incorporates
+ * software developed and/or copyrighted by other sources. Please see
+ * the CREDITS file for full details.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+/****************************************************************************
+ * Heap implementation
+ ****************************************************************************/
+
+#include "config.h"
+
+#if HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#if HAVE_ASSERT_H
+#include <assert.h>
+#endif
+#if HAVE_STRING_H
+#include <string.h>
+#endif
+#if HAVE_STDIO_H
+#include <stdio.h>
+#endif
+
+#include "heap.h"
+
+/*
+ * Hacks for non-synchronized heap implementation.
+ */
+#define mutex_lock(m) (void)0
+#define mutex_unlock(m) (void)0
+#define mutex_trylock(m) (void)0
+#define mutex_init(m) ((m)=123456)
+
+/*
+ * Private function prototypes.
+ */
+static void _heap_ify_up(heap * hp, heap_node * elm);
+static void _heap_ify_down(heap * hp, heap_node * elm);
+static int _heap_should_grow(heap * hp);
+static void _heap_grow(heap * hp);
+static void _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2);
+static int _heap_node_exist(heap * hp, int id);
+
+#ifdef HEAP_DEBUG
+void _heap_print_tree(heap * hp, heap_node * node);
+#endif /* HEAP_DEBUG */
+
+#define Left(x) (2 * (x) + 1)
+#define Right(x) (2 * (x) + 2)
+#define Parent(x) ((int)((x)-1)/2)
+
+#define Threshold 10000
+#define NormalRate 2
+#define SlowRate 1.5
+#define MinSize 32
+
+/****************************************************************************
+ * Public functions
+ ****************************************************************************/
+
+/*
+ * Return a newly created heap. INITSIZE is the initial size of the heap.
+ */
+heap *
+new_heap(int initSize, heap_key_func gen_key)
+{
+ heap *hp = malloc(sizeof(*hp));
+ assert(hp != NULL);
+
+ hp->nodes = calloc(initSize, sizeof(heap_node *));
+ assert(hp->nodes != NULL);
+
+ if (initSize <= 0)
+ initSize = MinSize;
+ hp->size = initSize;
+ hp->last = 0;
+ hp->gen_key = gen_key;
+ hp->age = 0;
+
+ return hp;
+}
+
+
+/*
+ * Free memory used by a heap. Does not free the metadata pointed to by the
+ * heap nodes, only the heap's internal memory.
+ */
+void
+delete_heap(heap * hp)
+{
+ int i;
+ assert(hp);
+ for (i = 0; i < hp->last; i++) {
+ free(hp->nodes[i]);
+ }
+ free(hp->nodes);
+ free(hp);
+}
+
+/*
+ * Insert DAT based on KY into HP maintaining the heap property. Return the
+ * newly inserted heap node. The fields of ELM other than ID are never
+ * changed until ELM is deleted from HP, i.e. caller can assume that the
+ * heap node always exist at the same place in memory unless heap_delete or
+ * heap_extractmin is called on that node. This function exposes the heap's
+ * internal data structure to the caller. This is required in order to do
+ * O(lgN) deletion.
+ */
+heap_node *
+heap_insert(heap * hp, void *dat)
+{
+ heap_node *elm = (heap_node *) malloc(sizeof(heap_node));
+
+ elm->key = heap_gen_key(hp, dat);
+ elm->data = dat;
+
+ if (_heap_should_grow(hp))
+ _heap_grow(hp);
+
+ hp->nodes[hp->last] = elm;
+ elm->id = hp->last;
+ hp->last += 1;
+
+ _heap_ify_up(hp, elm);
+
+ return elm;
+}
+
+
+/*
+ * Delete ELM while maintaining the heap property. ELM may be modified.
+ * Assumes that ELM is not NULL and frees it. Returns the data pointed to
+ * in, which the caller must free if necessary.
+ */
+heap_t
+heap_delete(heap * hp, heap_node * elm)
+{
+ heap_node *lastNode;
+ heap_t data = elm->data;
+
+ assert(_heap_node_exist(hp, hp->last - 1));
+
+ lastNode = hp->nodes[hp->last - 1];
+ _heap_swap_element(hp, lastNode, elm);
+ heap_extractlast(hp);
+
+ if (hp->last > 0) {
+ if (lastNode->key < hp->nodes[Parent(lastNode->id)]->key)
+ _heap_ify_up(hp, lastNode); /* COOL! */
+ _heap_ify_down(hp, lastNode);
+ }
+ return data;
+}
+
+/*
+ * Delete the last element (leaf) out of the heap. Does not require a
+ * heapify operation.
+ */
+
+#ifndef heap_gen_key
+/*
+ * Function to generate keys. See macro definition in heap.h.
+ */
+heap_key
+heap_gen_key(heap * hp, heap_t dat)
+{
+ return hp->gen_key(dat, hp->age);
+}
+#endif /* heap_gen_key */
+
+
+/*
+ * Returns the data of the node with the largest KEY value and removes that
+ * node from the heap. Returns NULL if the heap was empty.
+ */
+heap_t
+heap_extractmin(heap * hp)
+{
+ heap_t data;
+
+ if (hp->last <= 0)
+ return NULL;
+
+ mutex_lock(hp->lock);
+
+ data = hp->nodes[0]->data;
+ heap_delete(hp, hp->nodes[0]); /* Delete the root */
+
+ mutex_unlock(hp->lock);
+
+ return data;
+}
+
+
+/*
+ * Remove the last node in HP. Frees the heap internal structure and
+ * returns the data pointes to by the last node.
+ */
+heap_t
+heap_extractlast(heap * hp)
+{
+ heap_t data;
+ assert(_heap_node_exist(hp, hp->last - 1));
+ hp->last -= 1;
+ data = hp->nodes[hp->last]->data;
+ free(hp->nodes[hp->last]);
+ return data;
+}
+
+
+/*
+ * The semantics of this routine is the same as the followings:
+ * heap_delete(hp, elm);
+ * heap_insert(hp, dat);
+ * Returns the old data object from elm (the one being replaced). The
+ * caller must free this as necessary.
+ */
+heap_t
+heap_update(heap * hp, heap_node * elm, void *dat)
+{
+ heap_t old = elm->data;
+ heap_key ky = heap_gen_key(hp, dat);
+
+ elm->key = ky;
+ elm->data = dat;
+
+ if (elm->key < hp->nodes[Parent(elm->id)]->key)
+ _heap_ify_up(hp, elm);
+ _heap_ify_down(hp, elm);
+
+ return old;
+}
+
+
+/*
+ * A pointer to the root node's DATA.
+ */
+void *
+heap_peepmin(heap * hp)
+{
+ assert(_heap_node_exist(hp, 0));
+ return hp->nodes[0]->data;
+}
+
+
+/*
+ * The KEY of the root node.
+ */
+heap_key
+heap_peepminkey(heap * hp)
+{
+ assert(_heap_node_exist(hp, 0));
+ return hp->nodes[0]->key;
+}
+
+
+/*
+ * Same as heap_peep except that this return the KEY of the node.
+ * Only meant for iteration.
+ */
+heap_key
+heap_peepkey(heap * hp, int n)
+{
+ assert(_heap_node_exist(hp, n));
+ return hp->nodes[n]->key;
+}
+
+
+/*
+ * A pointer to Nth node's DATA. The caller can iterate through HP by
+ * calling this routine. eg. Caller can execute the following code:
+ * for(i = 0; i < heap_nodes(hp); i++)
+ * data = heap_peep(hp, i);
+ */
+void *
+heap_peep(heap * hp, int n)
+{
+ void *data;
+ assert(_heap_node_exist(hp, n));
+ data = hp->nodes[n]->data;
+ return data;
+}
+
+
+#ifndef heap_nodes
+/*
+ * Current number of nodes in HP.
+ */
+int
+heap_nodes(heap * hp)
+{
+ return hp->last;
+}
+#endif /* heap_nodes */
+
+
+#ifndef heap_empty
+/*
+ * Determine if the heap is empty. Returns 1 if HP has no elements and 0
+ * otherwise.
+ */
+int
+heap_empty(heap * hp)
+{
+ return (hp->last <= 0) ? 1 : 0;
+}
+#endif /* heap_empty */
+
+/****************** Private Functions *******************/
+
+/*
+ * Maintain the heap order property (parent is smaller than children) which
+ * may only be violated at ELM downwards. Assumes caller has locked the heap.
+ */
+static void
+_heap_ify_down(heap * hp, heap_node * elm)
+{
+ heap_node *kid;
+ int left = 0, right = 0;
+ int true = 1;
+ while (true) {
+ left = Left(elm->id);
+ right = Right(elm->id);
+ if (!_heap_node_exist(hp, left)) { // At the bottom of the heap (no child).
+
+ assert(!_heap_node_exist(hp, right));
+ break;
+ } else if (!_heap_node_exist(hp, right)) // Only left child exists.
+
+ kid = hp->nodes[left];
+ else {
+ if (hp->nodes[right]->key < hp->nodes[left]->key)
+ kid = hp->nodes[right];
+ else
+ kid = hp->nodes[left];
+ }
+ if (elm->key <= kid->key)
+ break;
+ _heap_swap_element(hp, kid, elm);
+ }
+}
+
+
+/*
+ * Maintain the heap property above ELM. Caller has locked the heap.
+ */
+static void
+_heap_ify_up(heap * hp, heap_node * elm)
+{
+ heap_node *parentNode;
+ while (elm->id > 0) {
+ parentNode = hp->nodes[Parent(elm->id)];
+ if (parentNode->key <= elm->key)
+ break;
+ _heap_swap_element(hp, parentNode, elm); /* Demote the parent. */
+ }
+}
+
+
+/*
+ * Swap the position of ELM1 and ELM2 in heap structure. Their IDs are also
+ * swapped.
+ */
+static void
+_heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2)
+{
+ int elm1Id = elm1->id;
+ elm1->id = elm2->id;
+ elm2->id = elm1Id;
+ hp->nodes[elm1->id] = elm1;
+ hp->nodes[elm2->id] = elm2;
+}
+
+
+
+#ifdef NOTDEF
+/*
+ * Copy KEY and DATA fields of SRC to DEST. ID field is NOT copied.
+ */
+static void
+_heap_copy_element(heap_node * src, heap_node * dest)
+{
+ dest->key = src->key;
+ dest->data = src->data;
+}
+
+#endif /* NOTDEF */
+
+
+/*
+ * True if HP needs to be grown in size.
+ */
+static int
+_heap_should_grow(heap * hp)
+{
+ if (hp->size <= hp->last)
+ return 1;
+ return 0;
+}
+
+
+/*
+ * Grow HP.
+ */
+static void
+_heap_grow(heap * hp)
+{
+ int newSize;
+
+ if (hp->size > Threshold)
+ newSize = hp->size * SlowRate;
+ else
+ newSize = hp->size * NormalRate;
+
+ hp->nodes = (heap_node * *)realloc(hp->nodes, newSize * sizeof(heap_node *));
+ //for(i = 0; i < hp->size; i++)
+ //newNodes[i] = hp->nodes[i];
+ //free(hp->nodes);
+ //hp->nodes = newNodes;
+ hp->size = newSize;
+}
+
+
+/*
+ * True if a node with ID exists in HP.
+ */
+static int
+_heap_node_exist(heap * hp, int id)
+{
+ if ((id >= hp->last) || (id < 0) || (hp->nodes[id] == NULL))
+ return 0;
+ return 1;
+}
+
+/****************************************************************************
+ * Printing and debug functions
+ ****************************************************************************/
+
+/*
+ * Print the heap in element order, id..last.
+ */
+void
+heap_print_inorder(heap * hp, int id)
+{
+ while (id < hp->last) {
+ printf("%d\tKey = %.04f\n", id, hp->nodes[id]->key);
+ id++;
+ }
+}
+
+/*
+ * Returns 1 if HP maintians the heap property and 0 otherwise.
+ */
+int
+verify_heap_property(heap * hp)
+{
+ int i = 0;
+ int correct = 1;
+ for (i = 0; i < hp->last / 2; i++) {
+ correct = 1;
+ if (_heap_node_exist(hp, Left(i)))
+ if (hp->nodes[i]->key > hp->nodes[Left(i)]->key)
+ correct = 0;
+ if (_heap_node_exist(hp, Right(i)))
+ if (hp->nodes[i]->key > hp->nodes[Right(i)]->key)
+ correct = 0;
+ if (!correct) {
+ printf("verifyHeap: violated at %d", i);
+ heap_print_inorder(hp, 0);
+ break;
+ }
+ }
+ return correct;
+}
+
+#ifdef MEASURE_HEAP_SKEW
+
+/****************************************************************************
+ * Heap skew computation
+ ****************************************************************************/
+
+int
+compare_heap_keys(const void *a, const void *b)
+{
+ heap_node **an = (heap_node **) a;
+ heap_node **bn = (heap_node **) b;
+ float cmp = (*an)->key - (*bn)->key;
+ if (cmp < 0)
+ return -1;
+ else
+ return 1;
+}
+
+/*
+ * Compute the heap skew for HEAP, a measure of how out-of-order the
+ * elements in the heap are. The skew of a heap node is the difference
+ * between its current position in the heap and where it would be if the
+ * heap were in sorted order. To compute this we have to sort the heap. At
+ * the end if the flag REPLACE is non-zero the heap will be returned in
+ * sorted order (with skew == 0). Note: using REPLACE does not help the
+ * performance of the heap, so only do this if you really want to have a
+ * sorted heap. It is faster not to replace.
+ */
+float
+calc_heap_skew(heap * heap, int replace)
+{
+ heap_node **nodes;
+ long id, diff, skew = 0;
+#ifdef HEAP_DEBUG_SKEW
+ long skewsq = 0;
+#endif /* HEAP_DEBUG_SKEW */
+ float norm = 0;
+ unsigned long max;
+
+ /*
+ * Lock the heap to copy it. If replacing it need to keep the heap locked
+ * until we are all done.
+ */
+ mutex_lock(hp->lock);
+
+ max = heap_nodes(heap);
+
+ /*
+ * Copy the heap nodes to a new storage area for offline sorting.
+ */
+ nodes = (heap_node **) malloc(max * sizeof(heap_node *));
+ memcpy(nodes, heap->nodes, max * sizeof(heap_node *));
+
+ if (replace == 0) {
+ /*
+ * Unlock the heap to allow updates from other threads before the sort.
+ * This allows other heap operations to proceed concurrently with the
+ * heap skew computation on the heap at the time of the call ...
+ */
+ mutex_unlock(hp->lock);
+ }
+ qsort(nodes, max, sizeof(heap_node *), compare_heap_keys);
+
+ for (id = 0; id < max; id++) {
+ diff = id - nodes[id]->id;
+ skew += abs(diff);
+
+#ifdef HEAP_DEBUG_SKEW
+ skewsq += diff * diff;
+#ifdef HEAP_DEBUG_ALL
+ printf("%d\tKey = %f, diff = %d\n", id, nodes[id]->key, diff);
+#endif /* HEAP_DEBUG */
+#endif /* HEAP_DEBUG_SKEW */
+ }
+
+ if (replace != 0) {
+ /*
+ * Replace the original heap with the newly sorted heap and let it
+ * continue. Then compute the skew using the copy of the previous heap
+ * which we maintain as private data.
+ */
+ memcpy(heap->nodes, nodes, max * sizeof(heap_node *));
+
+ for (id = 0; id < max; id++) {
+ /*
+ * Fix up all the ID values in the copied nodes.
+ */
+ heap->nodes[id]->id = id;
+ }
+
+ mutex_unlock(hp->lock);
+ }
+ /*
+ * The skew value is normalized to a range of [0..1]; the distribution
+ * appears to be a skewed Gaussian distribution. For random insertions
+ * into a heap the normalized skew will be slightly less than 0.5. The
+ * maximum value of skew/N^2 (for any value of N) is about 0.39 and is
+ * fairly stable.
+ */
+ norm = skew * 2.56 / (max * max);
+
+ /*
+ * Free the nodes array; note this is just an array of pointers, not data!
+ */
+ free(nodes);
+ return norm;
+}
+
+#endif /* MEASURE_HEAP_SKEW */