From: wessels <> Date: Fri, 25 Jun 1999 02:17:02 +0000 (+0000) Subject: adding X-Git-Tag: SQUID_3_0_PRE1~2145 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ad19eafb998c90b24e3348f0cafce714740d81be;p=thirdparty%2Fsquid.git adding --- diff --git a/include/heap.h b/include/heap.h new file mode 100644 index 0000000000..2d9243740f --- /dev/null +++ b/include/heap.h @@ -0,0 +1,172 @@ +/* + * $Id: heap.h,v 1.1 1999/06/24 20:17:03 wessels Exp $ + * + * AUTHOR: John Dilley, Hewlett Packard + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * -------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by + * the National Science Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +/**************************************************************************** + * Heap data structure. Used to store objects for cache replacement. The + * heap is implemented as a contiguous array in memory. Heap sort and heap + * update are done in-place. The heap is ordered with the smallest value at + * the top of the heap (as in the smallest object key value). Child nodes + * are larger than their parent. + ****************************************************************************/ + +#ifndef _heap_h_INCLUDED +#define _heap_h_INCLUDED + +/* + * Function for generating heap keys. The first argument will typically be + * a dws_md_p passed in as a void *. Should find a way to get type safety + * without having heap know all about metadata objects... The second arg is + * the current aging factor for the heap. + */ +typedef unsigned long heap_mutex_t; +typedef void *heap_t; +typedef double heap_key; +typedef heap_key heap_key_func(heap_t, heap_key); + + +/* + * Heap node. Has a key value generated by a key_func, id (array index) so + * it can be quickly found in its heap, and a pointer to a data object that + * key_func can generate a key from. + */ +typedef struct _heap_node { + heap_key key; + unsigned long id; + heap_t data; +} heap_node; + + +/* + * Heap object. Holds an array of heap_node objects along with a heap size + * (array length), the index of the last heap element, and a key generation + * function. Also stores aging factor for this heap. + */ +typedef struct _heap { + heap_mutex_t lock; + unsigned long size; + unsigned long last; + heap_key_func *gen_key; /* key generator for heap */ + heap_key age; /* aging factor for heap */ + heap_node **nodes; +} heap; + +/**************************************************************************** + * Public functions + ****************************************************************************/ + +/* + * Create and initialize a new heap. + */ +extern heap *new_heap(int init_size, heap_key_func gen_key); + +/* + * Delete a heap and clean up its memory. Does not delete what the heap + * nodes are pointing to! + */ +extern void delete_heap(heap *); + +/* + * Insert a new node into a heap, returning a pointer to it. The heap_node + * object returned is used to update or delete a heap object. Nothing else + * should be done with this data structure (especially modifying it!) The + * heap does not assume ownership of the data passed to it. + */ +extern heap_node *heap_insert(heap *, heap_t dat); + +/* + * Delete a node out of a heap. Returns the heap data from the deleted + * node. The caller is responsible for freeing this data. + */ +extern heap_t heap_delete(heap *, heap_node * elm); + +/* + * The semantics of this routine is the same as the followings: + * heap_delete(hp, elm); + * heap_insert(hp, dat); + * Returns the old data object from elm (the one being replaced). The + * caller must free this as necessary. + */ +extern heap_t heap_update(heap *, heap_node * elm, heap_t dat); + +/* + * Generate a heap key for a given data object. Alternative macro form: + */ +#ifdef MACRO_DEBUG +extern heap_key heap_gen_key(heap * hp, heap_t dat); +#else +#define heap_gen_key(hp,md) ((hp)->gen_key((md),(hp)->age)) +#endif /* MACRO_DEBUG */ + + +/* + * Extract the minimum (root) element and maintain the heap property. + * Returns the data pointed to by the root node, which the caller must + * free as necessary. + */ +extern heap_t heap_extractmin(heap *); + +/* + * Extract the last leaf node (does not change the heap property). + * Returns the data that had been in the heap which the caller must free if + * necessary. Note that the last node is guaranteed to be less than its + * parent, but may not be less than any of the other (leaf or parent) notes + * in the tree. This operation is fast but imprecise. + */ +extern heap_t heap_extractlast(heap * hp); + +/* + * Get the root key, the nth key, the root (smallest) element, or the nth + * element. None of these operations modify the heap. + */ +extern heap_key heap_peepminkey(heap *); +extern heap_key heap_peepkey(heap *, int n); + +extern heap_t heap_peepmin(heap *); +extern heap_t heap_peep(heap *, int n); + +/* + * Is the heap empty? How many nodes (data objects) are in it? + */ +#ifdef MACRO_DEBUG +extern int heap_empty(heap *); +extern int heap_nodes(heap *); +#else /* MACRO_DEBUG */ +#define heap_nodes(heap) ((heap)->last) +#define heap_empty(heap) (((heap)->last <= 0) ? 1 : 0) +#endif /* MACRO_DEBUG */ + +/* + * Print the heap or a node in the heap. + */ +extern void heap_print(heap *); +extern void heap_printnode(char *msg, heap_node * elm); + +extern int verify_heap_property(heap *); + +#endif /* _heap_h_INCLUDED */ diff --git a/lib/heap.c b/lib/heap.c new file mode 100644 index 0000000000..b800254ec8 --- /dev/null +++ b/lib/heap.c @@ -0,0 +1,614 @@ + +/* + * $Id: heap.c,v 1.1 1999/06/24 20:17:02 wessels Exp $ + * + * AUTHOR: John Dilley, Hewlett Packard + * + * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ + * ---------------------------------------------------------- + * + * Squid is the result of efforts by numerous individuals from the + * Internet community. Development is led by Duane Wessels of the + * National Laboratory for Applied Network Research and funded by the + * National Science Foundation. Squid is Copyrighted (C) 1998 by + * Duane Wessels and the University of California San Diego. Please + * see the COPYRIGHT file for full details. Squid incorporates + * software developed and/or copyrighted by other sources. Please see + * the CREDITS file for full details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +/**************************************************************************** + * Heap implementation + ****************************************************************************/ + +#include "config.h" + +#if HAVE_STDLIB_H +#include +#endif +#if HAVE_ASSERT_H +#include +#endif +#if HAVE_STRING_H +#include +#endif +#if HAVE_STDIO_H +#include +#endif + +#include "heap.h" + +/* + * Hacks for non-synchronized heap implementation. + */ +#define mutex_lock(m) (void)0 +#define mutex_unlock(m) (void)0 +#define mutex_trylock(m) (void)0 +#define mutex_init(m) ((m)=123456) + +/* + * Private function prototypes. + */ +static void _heap_ify_up(heap * hp, heap_node * elm); +static void _heap_ify_down(heap * hp, heap_node * elm); +static int _heap_should_grow(heap * hp); +static void _heap_grow(heap * hp); +static void _heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2); +static int _heap_node_exist(heap * hp, int id); + +#ifdef HEAP_DEBUG +void _heap_print_tree(heap * hp, heap_node * node); +#endif /* HEAP_DEBUG */ + +#define Left(x) (2 * (x) + 1) +#define Right(x) (2 * (x) + 2) +#define Parent(x) ((int)((x)-1)/2) + +#define Threshold 10000 +#define NormalRate 2 +#define SlowRate 1.5 +#define MinSize 32 + +/**************************************************************************** + * Public functions + ****************************************************************************/ + +/* + * Return a newly created heap. INITSIZE is the initial size of the heap. + */ +heap * +new_heap(int initSize, heap_key_func gen_key) +{ + heap *hp = malloc(sizeof(*hp)); + assert(hp != NULL); + + hp->nodes = calloc(initSize, sizeof(heap_node *)); + assert(hp->nodes != NULL); + + if (initSize <= 0) + initSize = MinSize; + hp->size = initSize; + hp->last = 0; + hp->gen_key = gen_key; + hp->age = 0; + + return hp; +} + + +/* + * Free memory used by a heap. Does not free the metadata pointed to by the + * heap nodes, only the heap's internal memory. + */ +void +delete_heap(heap * hp) +{ + int i; + assert(hp); + for (i = 0; i < hp->last; i++) { + free(hp->nodes[i]); + } + free(hp->nodes); + free(hp); +} + +/* + * Insert DAT based on KY into HP maintaining the heap property. Return the + * newly inserted heap node. The fields of ELM other than ID are never + * changed until ELM is deleted from HP, i.e. caller can assume that the + * heap node always exist at the same place in memory unless heap_delete or + * heap_extractmin is called on that node. This function exposes the heap's + * internal data structure to the caller. This is required in order to do + * O(lgN) deletion. + */ +heap_node * +heap_insert(heap * hp, void *dat) +{ + heap_node *elm = (heap_node *) malloc(sizeof(heap_node)); + + elm->key = heap_gen_key(hp, dat); + elm->data = dat; + + if (_heap_should_grow(hp)) + _heap_grow(hp); + + hp->nodes[hp->last] = elm; + elm->id = hp->last; + hp->last += 1; + + _heap_ify_up(hp, elm); + + return elm; +} + + +/* + * Delete ELM while maintaining the heap property. ELM may be modified. + * Assumes that ELM is not NULL and frees it. Returns the data pointed to + * in, which the caller must free if necessary. + */ +heap_t +heap_delete(heap * hp, heap_node * elm) +{ + heap_node *lastNode; + heap_t data = elm->data; + + assert(_heap_node_exist(hp, hp->last - 1)); + + lastNode = hp->nodes[hp->last - 1]; + _heap_swap_element(hp, lastNode, elm); + heap_extractlast(hp); + + if (hp->last > 0) { + if (lastNode->key < hp->nodes[Parent(lastNode->id)]->key) + _heap_ify_up(hp, lastNode); /* COOL! */ + _heap_ify_down(hp, lastNode); + } + return data; +} + +/* + * Delete the last element (leaf) out of the heap. Does not require a + * heapify operation. + */ + +#ifndef heap_gen_key +/* + * Function to generate keys. See macro definition in heap.h. + */ +heap_key +heap_gen_key(heap * hp, heap_t dat) +{ + return hp->gen_key(dat, hp->age); +} +#endif /* heap_gen_key */ + + +/* + * Returns the data of the node with the largest KEY value and removes that + * node from the heap. Returns NULL if the heap was empty. + */ +heap_t +heap_extractmin(heap * hp) +{ + heap_t data; + + if (hp->last <= 0) + return NULL; + + mutex_lock(hp->lock); + + data = hp->nodes[0]->data; + heap_delete(hp, hp->nodes[0]); /* Delete the root */ + + mutex_unlock(hp->lock); + + return data; +} + + +/* + * Remove the last node in HP. Frees the heap internal structure and + * returns the data pointes to by the last node. + */ +heap_t +heap_extractlast(heap * hp) +{ + heap_t data; + assert(_heap_node_exist(hp, hp->last - 1)); + hp->last -= 1; + data = hp->nodes[hp->last]->data; + free(hp->nodes[hp->last]); + return data; +} + + +/* + * The semantics of this routine is the same as the followings: + * heap_delete(hp, elm); + * heap_insert(hp, dat); + * Returns the old data object from elm (the one being replaced). The + * caller must free this as necessary. + */ +heap_t +heap_update(heap * hp, heap_node * elm, void *dat) +{ + heap_t old = elm->data; + heap_key ky = heap_gen_key(hp, dat); + + elm->key = ky; + elm->data = dat; + + if (elm->key < hp->nodes[Parent(elm->id)]->key) + _heap_ify_up(hp, elm); + _heap_ify_down(hp, elm); + + return old; +} + + +/* + * A pointer to the root node's DATA. + */ +void * +heap_peepmin(heap * hp) +{ + assert(_heap_node_exist(hp, 0)); + return hp->nodes[0]->data; +} + + +/* + * The KEY of the root node. + */ +heap_key +heap_peepminkey(heap * hp) +{ + assert(_heap_node_exist(hp, 0)); + return hp->nodes[0]->key; +} + + +/* + * Same as heap_peep except that this return the KEY of the node. + * Only meant for iteration. + */ +heap_key +heap_peepkey(heap * hp, int n) +{ + assert(_heap_node_exist(hp, n)); + return hp->nodes[n]->key; +} + + +/* + * A pointer to Nth node's DATA. The caller can iterate through HP by + * calling this routine. eg. Caller can execute the following code: + * for(i = 0; i < heap_nodes(hp); i++) + * data = heap_peep(hp, i); + */ +void * +heap_peep(heap * hp, int n) +{ + void *data; + assert(_heap_node_exist(hp, n)); + data = hp->nodes[n]->data; + return data; +} + + +#ifndef heap_nodes +/* + * Current number of nodes in HP. + */ +int +heap_nodes(heap * hp) +{ + return hp->last; +} +#endif /* heap_nodes */ + + +#ifndef heap_empty +/* + * Determine if the heap is empty. Returns 1 if HP has no elements and 0 + * otherwise. + */ +int +heap_empty(heap * hp) +{ + return (hp->last <= 0) ? 1 : 0; +} +#endif /* heap_empty */ + +/****************** Private Functions *******************/ + +/* + * Maintain the heap order property (parent is smaller than children) which + * may only be violated at ELM downwards. Assumes caller has locked the heap. + */ +static void +_heap_ify_down(heap * hp, heap_node * elm) +{ + heap_node *kid; + int left = 0, right = 0; + int true = 1; + while (true) { + left = Left(elm->id); + right = Right(elm->id); + if (!_heap_node_exist(hp, left)) { // At the bottom of the heap (no child). + + assert(!_heap_node_exist(hp, right)); + break; + } else if (!_heap_node_exist(hp, right)) // Only left child exists. + + kid = hp->nodes[left]; + else { + if (hp->nodes[right]->key < hp->nodes[left]->key) + kid = hp->nodes[right]; + else + kid = hp->nodes[left]; + } + if (elm->key <= kid->key) + break; + _heap_swap_element(hp, kid, elm); + } +} + + +/* + * Maintain the heap property above ELM. Caller has locked the heap. + */ +static void +_heap_ify_up(heap * hp, heap_node * elm) +{ + heap_node *parentNode; + while (elm->id > 0) { + parentNode = hp->nodes[Parent(elm->id)]; + if (parentNode->key <= elm->key) + break; + _heap_swap_element(hp, parentNode, elm); /* Demote the parent. */ + } +} + + +/* + * Swap the position of ELM1 and ELM2 in heap structure. Their IDs are also + * swapped. + */ +static void +_heap_swap_element(heap * hp, heap_node * elm1, heap_node * elm2) +{ + int elm1Id = elm1->id; + elm1->id = elm2->id; + elm2->id = elm1Id; + hp->nodes[elm1->id] = elm1; + hp->nodes[elm2->id] = elm2; +} + + + +#ifdef NOTDEF +/* + * Copy KEY and DATA fields of SRC to DEST. ID field is NOT copied. + */ +static void +_heap_copy_element(heap_node * src, heap_node * dest) +{ + dest->key = src->key; + dest->data = src->data; +} + +#endif /* NOTDEF */ + + +/* + * True if HP needs to be grown in size. + */ +static int +_heap_should_grow(heap * hp) +{ + if (hp->size <= hp->last) + return 1; + return 0; +} + + +/* + * Grow HP. + */ +static void +_heap_grow(heap * hp) +{ + int newSize; + + if (hp->size > Threshold) + newSize = hp->size * SlowRate; + else + newSize = hp->size * NormalRate; + + hp->nodes = (heap_node * *)realloc(hp->nodes, newSize * sizeof(heap_node *)); + //for(i = 0; i < hp->size; i++) + //newNodes[i] = hp->nodes[i]; + //free(hp->nodes); + //hp->nodes = newNodes; + hp->size = newSize; +} + + +/* + * True if a node with ID exists in HP. + */ +static int +_heap_node_exist(heap * hp, int id) +{ + if ((id >= hp->last) || (id < 0) || (hp->nodes[id] == NULL)) + return 0; + return 1; +} + +/**************************************************************************** + * Printing and debug functions + ****************************************************************************/ + +/* + * Print the heap in element order, id..last. + */ +void +heap_print_inorder(heap * hp, int id) +{ + while (id < hp->last) { + printf("%d\tKey = %.04f\n", id, hp->nodes[id]->key); + id++; + } +} + +/* + * Returns 1 if HP maintians the heap property and 0 otherwise. + */ +int +verify_heap_property(heap * hp) +{ + int i = 0; + int correct = 1; + for (i = 0; i < hp->last / 2; i++) { + correct = 1; + if (_heap_node_exist(hp, Left(i))) + if (hp->nodes[i]->key > hp->nodes[Left(i)]->key) + correct = 0; + if (_heap_node_exist(hp, Right(i))) + if (hp->nodes[i]->key > hp->nodes[Right(i)]->key) + correct = 0; + if (!correct) { + printf("verifyHeap: violated at %d", i); + heap_print_inorder(hp, 0); + break; + } + } + return correct; +} + +#ifdef MEASURE_HEAP_SKEW + +/**************************************************************************** + * Heap skew computation + ****************************************************************************/ + +int +compare_heap_keys(const void *a, const void *b) +{ + heap_node **an = (heap_node **) a; + heap_node **bn = (heap_node **) b; + float cmp = (*an)->key - (*bn)->key; + if (cmp < 0) + return -1; + else + return 1; +} + +/* + * Compute the heap skew for HEAP, a measure of how out-of-order the + * elements in the heap are. The skew of a heap node is the difference + * between its current position in the heap and where it would be if the + * heap were in sorted order. To compute this we have to sort the heap. At + * the end if the flag REPLACE is non-zero the heap will be returned in + * sorted order (with skew == 0). Note: using REPLACE does not help the + * performance of the heap, so only do this if you really want to have a + * sorted heap. It is faster not to replace. + */ +float +calc_heap_skew(heap * heap, int replace) +{ + heap_node **nodes; + long id, diff, skew = 0; +#ifdef HEAP_DEBUG_SKEW + long skewsq = 0; +#endif /* HEAP_DEBUG_SKEW */ + float norm = 0; + unsigned long max; + + /* + * Lock the heap to copy it. If replacing it need to keep the heap locked + * until we are all done. + */ + mutex_lock(hp->lock); + + max = heap_nodes(heap); + + /* + * Copy the heap nodes to a new storage area for offline sorting. + */ + nodes = (heap_node **) malloc(max * sizeof(heap_node *)); + memcpy(nodes, heap->nodes, max * sizeof(heap_node *)); + + if (replace == 0) { + /* + * Unlock the heap to allow updates from other threads before the sort. + * This allows other heap operations to proceed concurrently with the + * heap skew computation on the heap at the time of the call ... + */ + mutex_unlock(hp->lock); + } + qsort(nodes, max, sizeof(heap_node *), compare_heap_keys); + + for (id = 0; id < max; id++) { + diff = id - nodes[id]->id; + skew += abs(diff); + +#ifdef HEAP_DEBUG_SKEW + skewsq += diff * diff; +#ifdef HEAP_DEBUG_ALL + printf("%d\tKey = %f, diff = %d\n", id, nodes[id]->key, diff); +#endif /* HEAP_DEBUG */ +#endif /* HEAP_DEBUG_SKEW */ + } + + if (replace != 0) { + /* + * Replace the original heap with the newly sorted heap and let it + * continue. Then compute the skew using the copy of the previous heap + * which we maintain as private data. + */ + memcpy(heap->nodes, nodes, max * sizeof(heap_node *)); + + for (id = 0; id < max; id++) { + /* + * Fix up all the ID values in the copied nodes. + */ + heap->nodes[id]->id = id; + } + + mutex_unlock(hp->lock); + } + /* + * The skew value is normalized to a range of [0..1]; the distribution + * appears to be a skewed Gaussian distribution. For random insertions + * into a heap the normalized skew will be slightly less than 0.5. The + * maximum value of skew/N^2 (for any value of N) is about 0.39 and is + * fairly stable. + */ + norm = skew * 2.56 / (max * max); + + /* + * Free the nodes array; note this is just an array of pointers, not data! + */ + free(nodes); + return norm; +} + +#endif /* MEASURE_HEAP_SKEW */