]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
critbit: included small implementation of critbit tree
authorMarek Vavruša <marek.vavrusa@nic.cz>
Sat, 28 Mar 2015 16:44:59 +0000 (17:44 +0100)
committerMarek Vavruša <marek.vavrusa@nic.cz>
Sat, 28 Mar 2015 16:44:59 +0000 (17:44 +0100)
lib/generic/critbit.c [new file with mode: 0644]
lib/generic/critbit.h [new file with mode: 0644]
lib/lib.mk
tests.unit.mk
tests/test_critbit.c [new file with mode: 0644]

diff --git a/lib/generic/critbit.c b/lib/generic/critbit.c
new file mode 100644 (file)
index 0000000..15fe643
--- /dev/null
@@ -0,0 +1,311 @@
+/*
+ * critbit89 - A crit-bit tree implementation for strings in C89
+ * Written by Jonas Gehring <jonas@jgehring.net>
+ */
+
+/*
+ * The code makes the assumption that malloc returns pointers aligned at at
+ * least a two-byte boundary. Since the C standard requires that malloc return
+ * pointers that can store any type, there are no commonly-used toolchains for
+ * which this assumption is false.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "critbit.h"
+
+#ifdef _MSC_VER /* MSVC */
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int32 uint32_t;
+ #ifdef _WIN64
+  typedef signed __int64 intptr_t;
+ #else
+  typedef _W64 signed int intptr_t;
+ #endif
+#else /* Not MSVC */
+ #include <stdint.h>
+#endif
+
+
+typedef struct {
+       void *child[2];
+       uint32_t byte;
+       uint8_t otherbits;
+} cb_node_t;
+
+/* Standard memory allocation functions */
+static void *malloc_std(size_t size, void *baton) {
+       (void)baton; /* Prevent compiler warnings */
+       return malloc(size);
+}
+
+static void free_std(void *ptr, void *baton) {
+       (void)baton; /* Prevent compiler warnings */
+       free(ptr);
+}
+
+/* Static helper functions */
+static void cbt_traverse_delete(cb_tree_t *tree, void *top)
+{
+       uint8_t *p = top;
+       if (1 & (intptr_t)p) {
+               cb_node_t *q = (void *)(p - 1);
+               cbt_traverse_delete(tree, q->child[0]);
+               cbt_traverse_delete(tree, q->child[1]);
+               tree->free(q, tree->baton);
+       } else {
+               tree->free(p, tree->baton);
+       }
+}
+
+static int cbt_traverse_prefixed(uint8_t *top,
+       int (*callback)(const char *, void *), void *baton)
+{
+       if (1 & (intptr_t)top) {
+               cb_node_t *q = (void *)(top - 1);
+               int ret = 0;
+
+               ret = cbt_traverse_prefixed(q->child[0], callback, baton);
+               if (ret != 0) {
+                       return ret;
+               }
+               ret = cbt_traverse_prefixed(q->child[1], callback, baton);
+               if (ret != 0) {
+                       return ret;
+               }
+               return 0;
+       }
+
+       return (callback)((const char *)top, baton);
+}
+
+
+/*! Creates a new, empty critbit tree */
+cb_tree_t cb_tree_make()
+{
+       cb_tree_t tree;
+       tree.root = NULL;
+       tree.malloc = &malloc_std;
+       tree.free = &free_std;
+       tree.baton = NULL;
+       return tree;
+}
+
+/*! Returns non-zero if tree contains str */
+int cb_tree_contains(cb_tree_t *tree, const char *str)
+{
+       const uint8_t *ubytes = (void *)str;
+       const size_t ulen = strlen(str);
+       uint8_t *p = tree->root;
+
+       if (p == NULL) {
+               return 0;
+       }
+
+       while (1 & (intptr_t)p) {
+               cb_node_t *q = (void *)(p - 1);
+               uint8_t c = 0;
+               int direction;
+
+               if (q->byte < ulen) {
+                       c = ubytes[q->byte];
+               }
+               direction = (1 + (q->otherbits | c)) >> 8;
+
+               p = q->child[direction];
+       }
+
+       return (strcmp(str, (const char *)p) == 0);
+}
+
+/*! Inserts str into tree, returns 0 on success */
+int cb_tree_insert(cb_tree_t *tree, const char *str)
+{
+       const uint8_t *const ubytes = (void *)str;
+       const size_t ulen = strlen(str);
+       uint8_t *p = tree->root;
+       uint8_t c, *x;
+       uint32_t newbyte;
+       uint32_t newotherbits;
+       int direction, newdirection;
+       cb_node_t *newnode;
+       void **wherep;
+
+       if (p == NULL) {
+               x = tree->malloc(ulen + 1, tree->baton);
+               if (x == NULL) {
+                       return ENOMEM;
+               }
+               memcpy(x, str, ulen + 1);
+               tree->root = x;
+               return 0;
+       }
+
+       while (1 & (intptr_t)p) {
+               cb_node_t *q = (void *)(p - 1);
+               c = 0;
+               if (q->byte < ulen) {
+                       c = ubytes[q->byte];
+               }
+               direction = (1 + (q->otherbits | c)) >> 8;
+
+               p = q->child[direction];
+       }
+
+       for (newbyte = 0; newbyte < ulen; ++newbyte) {
+               if (p[newbyte] != ubytes[newbyte]) {
+                       newotherbits = p[newbyte] ^ ubytes[newbyte];
+                       goto different_byte_found;
+               }
+       }
+
+       if (p[newbyte] != 0) {
+               newotherbits = p[newbyte];
+               goto different_byte_found;
+       }
+       return 1;
+
+different_byte_found:
+       newotherbits |= newotherbits >> 1;
+       newotherbits |= newotherbits >> 2;
+       newotherbits |= newotherbits >> 4;
+       newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
+       c = p[newbyte];
+       newdirection = (1 + (newotherbits | c)) >> 8;
+
+       newnode = tree->malloc(sizeof(cb_node_t), tree->baton);
+       if (newnode == NULL) {
+               return ENOMEM;
+       }
+
+       x = tree->malloc(ulen + 1, tree->baton);
+       if (x == NULL) {
+               tree->free(newnode, tree->baton);
+               return ENOMEM;
+       }
+
+       memcpy(x, ubytes, ulen + 1);
+       newnode->byte = newbyte;
+       newnode->otherbits = newotherbits;
+       newnode->child[1 - newdirection] = x;
+
+       /* Insert into tree */
+       wherep = &tree->root;
+       for (;;) {
+               cb_node_t *q;
+               p = *wherep;
+               if (!(1 & (intptr_t)p)) {
+                       break;
+               }
+
+               q = (void *)(p - 1);
+               if (q->byte > newbyte) {
+                       break;
+               }
+               if (q->byte == newbyte && q->otherbits > newotherbits) {
+                       break;
+               }
+
+               c = 0;
+               if (q->byte < ulen) {
+                       c = ubytes[q->byte];
+               }
+               direction = (1 + (q->otherbits | c)) >> 8;
+               wherep = q->child + direction;
+       }
+
+       newnode->child[newdirection] = *wherep;
+       *wherep = (void *)(1 + (char *)newnode);
+       return 0;
+}
+
+/*! Deletes str from the tree, returns 0 on success */
+int cb_tree_delete(cb_tree_t *tree, const char *str)
+{
+       const uint8_t *ubytes = (void *)str;
+       const size_t ulen = strlen(str);
+       uint8_t *p = tree->root;
+       void **wherep = 0, **whereq = 0;
+       cb_node_t *q = 0;
+       int direction = 0;
+
+       if (tree->root == NULL) {
+               return 1;
+       }
+       wherep = &tree->root;
+
+       while (1 & (intptr_t)p) {
+               uint8_t c = 0;
+               whereq = wherep;
+               q = (void *)(p - 1);
+
+               if (q->byte < ulen) {
+                       c = ubytes[q->byte];
+               }
+               direction = (1 + (q->otherbits | c)) >> 8;
+               wherep = q->child + direction;
+               p = *wherep;
+       }
+
+       if (strcmp(str, (const char *)p) != 0) {
+               return 1;
+       }
+       tree->free(p, tree->baton);
+
+       if (!whereq) {
+               tree->root = NULL;
+               return 0;
+       }
+
+       *whereq = q->child[1 - direction];
+       tree->free(q, tree->baton);
+       return 0;
+}
+
+/*! Clears the given tree */
+void cb_tree_clear(cb_tree_t *tree)
+{
+       if (tree->root) {
+               cbt_traverse_delete(tree, tree->root);
+       }
+       tree->root = NULL;
+}
+
+/*! Calls callback for all strings in tree with the given prefix */
+int cb_tree_walk_prefixed(cb_tree_t *tree, const char *prefix,
+       int (*callback)(const char *, void *), void *baton)
+{
+       const uint8_t *ubytes = (void *)prefix;
+       const size_t ulen = strlen(prefix);
+       uint8_t *p = tree->root;
+       uint8_t *top = p;
+
+       if (p == NULL) {
+               return 0;
+       }
+
+       while (1 & (intptr_t)p) {
+               cb_node_t *q = (void *)(p - 1);
+               uint8_t c = 0;
+               int direction;
+
+               if (q->byte < ulen) {
+                       c = ubytes[q->byte];
+               }
+               direction = (1 + (q->otherbits | c)) >> 8;
+
+               p = q->child[direction];
+               if (q->byte < ulen) {
+                       top = p;
+               }
+       }
+
+       if (strlen((const char *)p) < ulen || memcmp(p, prefix, ulen) != 0) {
+               /* No strings match */
+               return 0;
+       }
+
+       return cbt_traverse_prefixed(top, callback, baton);
+}
diff --git a/lib/generic/critbit.h b/lib/generic/critbit.h
new file mode 100644 (file)
index 0000000..c3b9210
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * critbit89 - A crit-bit tree implementation for strings in C89
+ * Written by Jonas Gehring <jonas@jgehring.net>
+ */
+
+
+#ifndef CRITBIT_H_
+#define CRITBIT_H_
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*! Main data structure */
+typedef struct {
+       void *root;
+       void *(*malloc)(size_t size, void *baton);
+       void (*free)(void *ptr, void *baton);
+       void *baton; /*! Passed to malloc() and free() */
+} cb_tree_t;
+
+/*! Creates an new, empty critbit tree */
+extern cb_tree_t cb_tree_make();
+
+/*! Returns non-zero if tree contains str */
+extern int cb_tree_contains(cb_tree_t *tree, const char *str);
+
+/*! Inserts str into tree, returns 0 on suceess */
+extern int cb_tree_insert(cb_tree_t *tree, const char *str);
+
+/*! Deletes str from the tree, returns 0 on suceess */
+extern int cb_tree_delete(cb_tree_t *tree, const char *str);
+
+/*! Clears the given tree */
+extern void cb_tree_clear(cb_tree_t *tree);
+
+/*! Calls callback for all strings in tree with the given prefix  */
+extern int cb_tree_walk_prefixed(cb_tree_t *tree, const char *prefix,
+       int (*callback)(const char *, void *), void *baton);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRITBIT_H_ */
\ No newline at end of file
index 151b1446bab83dcb27b61397fae6faa013f23290..2522efae066a7f319431c3cf848f4ef5111c9119 100644 (file)
@@ -1,4 +1,5 @@
 libkresolve_SOURCES := \
+       lib/generic/critbit.c  \
        lib/layer/iterate.c    \
        lib/layer/itercache.c  \
        lib/utils.c            \
@@ -10,6 +11,8 @@ libkresolve_SOURCES := \
        lib/cache.c
 
 libkresolve_HEADERS := \
+       lib/generic/array.h    \
+       lib/generic/critbit.h  \
        lib/layer.h            \
        lib/utils.h            \
        lib/nsrep.h            \
index 89adc7edcee6b765cd0bbebc27be83fd7f937a2d..fd67c54bd651f6698d6fbbd53354820d43ad2f34 100644 (file)
@@ -3,6 +3,7 @@
 #
 
 tests_BIN := \
+       test_critbit \
        test_generics \
        test_utils \
        test_module \
diff --git a/tests/test_critbit.c b/tests/test_critbit.c
new file mode 100644 (file)
index 0000000..e8e5482
--- /dev/null
@@ -0,0 +1,280 @@
+/*
+ * critbit89 - A crit-bit tree implementation for strings in C89
+ * Written by Jonas Gehring <jonas@jgehring.net>
+ */
+
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "critbit.h"
+
+
+/*
+ * Sample dictionary: 100 random words from /usr/share/dict/words
+ * Generated using random.org:
+ * MAX=`wc -l < /usr/share/dict/words | tr -d " "`
+ * for i in `curl "http://www.random.org/integers/?num=100&min=1&max=$MAX&col=1&base=10&format=plain&rnd=new"`; do
+ *   nl /usr/share/dict/words | grep -w $i | tr -d "0-9\t "
+ * done
+ */
+static const char *dict[] = {
+       "catagmatic", "prevaricator", "statoscope", "workhand", "benzamide",
+       "alluvia", "fanciful", "bladish", "Tarsius", "unfast", "appropriative",
+       "seraphically", "monkeypod", "deflectometer", "tanglesome", "zodiacal",
+       "physiologically", "economizer", "forcepslike", "betrumpet",
+       "Danization", "broadthroat", "randir", "usherette", "nephropyosis",
+       "hematocyanin", "chrysohermidin", "uncave", "mirksome", "podophyllum",
+       "siphonognathous", "indoor", "featheriness", "forwardation",
+       "archruler", "soricoid", "Dailamite", "carmoisin", "controllability",
+       "unpragmatical", "childless", "transumpt", "productive",
+       "thyreotoxicosis", "oversorrow", "disshadow", "osse", "roar",
+       "pantomnesia", "talcer", "hydrorrhoea", "Satyridae", "undetesting",
+       "smoothbored", "widower", "sivathere", "pendle", "saltation",
+       "autopelagic", "campfight", "unexplained", "Macrorhamphosus",
+       "absconsa", "counterflory", "interdependent", "triact", "reconcentration",
+       "oversharpness", "sarcoenchondroma", "superstimulate", "assessory",
+       "pseudepiscopacy", "telescopically", "ventriloque", "politicaster",
+       "Caesalpiniaceae", "inopportunity", "Helion", "uncompatible",
+       "cephaloclasia", "oversearch", "Mahayanistic", "quarterspace",
+       "bacillogenic", "hamartite", "polytheistical", "unescapableness",
+       "Pterophorus", "cradlemaking", "Hippoboscidae", "overindustrialize",
+       "perishless", "cupidity", "semilichen", "gadge", "detrimental",
+       "misencourage", "toparchia", "lurchingly", "apocatastasis"
+};
+
+static int tnum = 0;
+
+
+/* Insertions */
+static void test_insert(cb_tree_t *tree)
+{
+       int dict_size = sizeof(dict) / sizeof(const char *);
+       int i;
+
+       for (i = 0; i < dict_size; i++) {
+               if (cb_tree_insert(tree, dict[i]) != 0) {
+                       fprintf(stderr, "Insertion failed\n");
+                       abort();
+               }
+       }
+}
+
+/* Insertion of duplicate element */
+static void test_insert_dup(cb_tree_t *tree)
+{
+       int dict_size = sizeof(dict) / sizeof(const char *);
+       int i;
+
+       for (i = 0; i < dict_size; i++) {
+               if (!cb_tree_contains(tree, dict[i])) {
+                       continue;
+               }
+               if (cb_tree_insert(tree, dict[i]) != 1) {
+                       fprintf(stderr, "Insertion of duplicate '%s' should fail\n", dict[i]);
+                       abort();
+               }
+       }
+}
+
+/* Searching */
+static void test_contains(cb_tree_t *tree)
+{
+       char *in;
+       const char *notin = "not in tree";
+
+       in = malloc(strlen(dict[23])+1);
+       strcpy(in, dict[23]);
+
+       if (cb_tree_contains(tree, in) != 1) {
+               fprintf(stderr, "Tree should contain '%s'\n", in);
+               abort();
+       }
+       if (cb_tree_contains(tree, notin) != 0) {
+               fprintf(stderr, "Tree should not contain '%s'\n", notin);
+               abort();
+       }
+       if (cb_tree_contains(tree, "") != 0) {
+               fprintf(stderr, "Tree should not contain empty string\n");
+               abort();
+       }
+       in[strlen(in)/2] = '\0';
+       if (cb_tree_contains(tree, in) != 0) {
+               fprintf(stderr, "Tree should not contain prefix of '%s'\n", in);
+               abort();
+       }
+
+       free(in);
+}
+
+/* Count number of items */
+static int count_cb(const char *s, void *n) { (*(int *)n)++; return 0; }
+static void test_complete(cb_tree_t *tree, int n)
+{
+       int i = 0;
+       if (cb_tree_walk_prefixed(tree, "", count_cb, &i) != 0) {
+               fprintf(stderr, "Walking with empty prefix failed\n");
+               abort();
+       }
+       if (i != n) {
+               fprintf(stderr, "%d items expected, but %d walked\n", n, i);
+               abort();
+       }
+}
+
+/* Deletion */
+static void test_delete(cb_tree_t *tree)
+{
+       if (cb_tree_delete(tree, dict[91]) != 0) {
+               fprintf(stderr, "Deletion failed\n");
+               abort();
+       }
+       if (cb_tree_delete(tree, "most likely not in tree") != 1) {
+               fprintf(stderr, "Deletion of item not in tree should fail\n");
+               abort();
+       }
+}
+
+/* Complete deletion */
+static void test_delete_all(cb_tree_t *tree)
+{
+       int dict_size = sizeof(dict) / sizeof(const char *);
+       int i;
+
+       for (i = 0; i < dict_size; i++) {
+               if (!cb_tree_contains(tree, dict[i])) {
+                       continue;
+               }
+               if (cb_tree_delete(tree, dict[i]) != 0) {
+                       fprintf(stderr, "Deletion of '%s' failed\n", dict[i]);
+                       abort();
+               }
+       }
+}
+
+/* Fake allocator */
+static void *fake_malloc(size_t s, void *b) { return NULL; }
+static void test_allocator(cb_tree_t *unused)
+{
+       cb_tree_t tree = cb_tree_make();
+       tree.malloc = fake_malloc;
+       if (cb_tree_insert(&tree, dict[0]) != ENOMEM) {
+               fprintf(stderr, "ENOMEM failure expected\n");
+               abort();
+       }
+}
+
+/* Empty tree */
+static void test_empty(cb_tree_t *tree)
+{
+       if (cb_tree_contains(tree, dict[1]) != 0) {
+               fprintf(stderr, "Empty tree expected\n");
+               abort();
+       }
+       if (cb_tree_delete(tree, dict[1]) == 0) {
+               fprintf(stderr, "Empty tree expected\n");
+               abort();
+       }
+}
+
+/* Prefix walking */
+static void test_prefixes(cb_tree_t *tree)
+{
+       int i = 0;
+       if ((cb_tree_insert(tree, "1str") != 0) ||
+                       (cb_tree_insert(tree, "11str2") != 0) ||
+                       (cb_tree_insert(tree, "12str") != 0) ||
+                       (cb_tree_insert(tree, "11str") != 0)) {
+               fprintf(stderr, "Insertion failed\n");
+               abort();
+       }
+
+       if (cb_tree_walk_prefixed(tree, "11", count_cb, &i) != 0) {
+               fprintf(stderr, "Walking with prefix failed\n");
+               abort();
+       }
+       if (i != 2) {
+               fprintf(stderr, "2 items expected, but %d walked\n", i);
+               abort();
+       }
+
+       i = 0;
+       if (cb_tree_walk_prefixed(tree, "13", count_cb, &i) != 0) {
+               fprintf(stderr, "Walking with non-matching prefix failed\n");
+               abort();
+       }
+       if (i != 0) {
+               fprintf(stderr, "0 items expected, but %d walked\n", i);
+               abort();
+       }
+
+       i = 0;
+       if (cb_tree_walk_prefixed(tree, "12345678", count_cb, &i) != 0) {
+               fprintf(stderr, "Walking with long prefix failed\n");
+               abort();
+       }
+       if (i != 0) {
+               fprintf(stderr, "0 items expected, but %d walked\n", i);
+               abort();
+       }
+
+       i = 0;
+       if (cb_tree_walk_prefixed(tree, "11str", count_cb, &i) != 0) {
+               fprintf(stderr, "Walking with exactly matching prefix failed\n");
+               abort();
+       }
+       if (i != 2) {
+               fprintf(stderr, "2 items expected, but %d walked\n", i);
+               abort();
+       }
+}
+
+
+/* Program entry point */
+int main(int argc, char **argv)
+{
+       cb_tree_t tree = cb_tree_make();
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_insert(&tree);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_complete(&tree, sizeof(dict) / sizeof(const char *));
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_insert_dup(&tree);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_contains(&tree);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_delete(&tree);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       cb_tree_clear(&tree);
+       test_insert(&tree);
+       test_complete(&tree, sizeof(dict) / sizeof(const char *));
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_delete_all(&tree);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_complete(&tree, 0);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_allocator(&tree);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       cb_tree_clear(&tree);
+       test_empty(&tree);
+
+       printf("%d ", ++tnum); fflush(stdout);
+       test_insert(&tree);
+       test_prefixes(&tree);
+
+       cb_tree_clear(&tree);
+       printf("ok\n");
+       return 0;
+}
\ No newline at end of file