From: Wouter Wijngaards <wouter@nlnetlabs.nl>
Date: Wed, 7 Mar 2007 16:21:31 +0000 (+0000)
Subject: Simple thread problem detector code.
X-Git-Tag: release-0.1~6
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a43a0427923081c9e7467c31723450b44e181025;p=thirdparty%2Funbound.git

Simple thread problem detector code.


git-svn-id: file:///svn/unbound/trunk@165 be551aaa-1e26-0410-a405-d3ace91eadb9
---

diff --git a/Makefile.in b/Makefile.in
index ba622448d..313f2b3ef 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -50,7 +50,7 @@ LINTFLAGS+="-DBN_ULONG=unsigned long" -Dkrb5_int32=int "-Dkrb5_ui_4=unsigned int
 
 INSTALL=$(srcdir)/install-sh 
 
-COMMON_SRC=$(wildcard services/*.c util/*.c) util/configparser.c util/configlexer.c
+COMMON_SRC=$(wildcard services/*.c util/*.c) util/configparser.c util/configlexer.c testcode/checklocks.c
 COMMON_OBJ=$(addprefix $(BUILD),$(COMMON_SRC:.c=.o))
 COMPAT_OBJ=$(addprefix $(BUILD)compat/,$(LIBOBJS))
 UNITTEST_SRC=testcode/unitmain.c $(COMMON_SRC)
diff --git a/daemon/worker.h b/daemon/worker.h
index 72576a5f8..5121bdc0d 100644
--- a/daemon/worker.h
+++ b/daemon/worker.h
@@ -68,10 +68,10 @@ enum worker_commands {
  * Holds globally visible information.
  */
 struct worker {
+	/** the thread number (in daemon array). First in struct for debug. */
+	int thread_num;
 	/** global shared daemon structure */
 	struct daemon* daemon;
-	/** the thread number (in daemon array). */
-	int thread_num;
 	/** thread id */
 	ub_thread_t thr_id;
 	/** fd 0 of socketpair, write commands for worker to this one */
diff --git a/doc/Changelog b/doc/Changelog
index 57ac174b1..19cd2f1d0 100644
--- a/doc/Changelog
+++ b/doc/Changelog
@@ -1,3 +1,8 @@
+7 March 2007: Wouter
+	- created a wrapper around thread calls that performs some basic
+	  checking for data race and deadlock, and basic performance 
+	  contention measurement.
+
 6 March 2007: Wouter
 	- Testbed works with threading (different machines, different options).
 	- alloc work, does the special type.
diff --git a/doc/TODO b/doc/TODO
index 50e3825cc..32a8a6ada 100644
--- a/doc/TODO
+++ b/doc/TODO
@@ -1,3 +1,5 @@
 TODO items.
 o use real entropy to make random (ID, port) numbers more random.
 o in production mode, do not free memory on exit. In debug mode, test leaks.
+o profile memory allocation, and if performance issues, use special memory
+  allocator. For example, with caches per thread.
diff --git a/testcode/checklocks.c b/testcode/checklocks.c
new file mode 100644
index 000000000..917e30395
--- /dev/null
+++ b/testcode/checklocks.c
@@ -0,0 +1,554 @@
+/**
+ * testcode/checklocks.c - wrapper on locks that checks access.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ * 
+ * This software is open source.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include <signal.h>
+#include "util/locks.h"   /* include before checklocks.h */
+#include "testcode/checklocks.h"
+
+/**
+ * \file
+ * Locks that are checked.
+ *
+ * Ugly hack: uses the fact that workers are passed to thread_create to make
+ * the thread numbers here the same as those used for logging which is nice.
+ *
+ * Todo: - check global ordering of instances of locks.
+ *       - refcount statistics.
+ *	 - debug status print, of thread lock stacks, and current waiting.
+ */
+#ifdef USE_THREAD_DEBUG
+
+/** if key has been created */
+static int key_created = 0;
+/** we hide the thread debug info with this key. */
+static ub_thread_key_t thr_debug_key;
+/** the list of threads, so all threads can be examined. NULL at start. */
+static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS];
+
+/** print pretty lock error and exit */
+static void lock_error(struct checked_lock* lock, 
+	const char* func, const char* file, int line, const char* err)
+{
+	log_err("lock error (description follows)");
+	log_err("Created at %s %s %d", lock->create_func, lock->create_file, lock->create_line);
+	log_err("Previously %s %s %d", lock->holder_func, lock->holder_file, lock->holder_line);
+	log_err("At %s %s %d", func, file, line);
+	log_err("Error for %s lock: %s",
+		(lock->type==check_lock_mutex)?"mutex": (
+		(lock->type==check_lock_spinlock)?"spinlock": "rwlock"), err);
+	fatal_exit("bailing out");
+}
+
+/** obtain lock on debug lock structure. This could be a deadlock.
+ * (could it?) Anyway, check with timeouts. 
+ * @param lock: on what to acquire lock.
+ * @param func: user level caller identification.
+ * @param file: user level caller identification.
+ * @param line: user level caller identification.
+ */
+static void
+acquire_locklock(struct checked_lock* lock, 
+	const char* func, const char* file, int line)
+{
+	struct timespec to;
+	int err;
+	int contend = 0;
+	/* first try; inc contention counter if not immediately */
+	if((err = pthread_mutex_trylock(&lock->lock))) {
+		if(err==EBUSY)
+			contend++;
+		else fatal_exit("error in mutex_trylock: %s", strerror(err));
+	}
+	if(!err)
+		return; /* immediate success */
+	to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
+	to.tv_nsec = 0;
+	err = pthread_mutex_timedlock(&lock->lock, &to);
+	if(err) {
+		log_err("in acquiring locklock: %s", strerror(err));
+		lock_error(lock, func, file, line, "acquire locklock");
+	}
+	lock->contention_count += contend;
+}
+
+/** add protected region */
+void 
+lock_protect(struct checked_lock* lock, void* area, size_t size)
+{
+	struct protected_area* e = (struct protected_area*)calloc(1,
+		sizeof(struct protected_area));
+	if(!e)
+		fatal_exit("lock_protect: out of memory");
+	e->region = area;
+	e->size = size;
+	e->hold = calloc(1, size);
+	if(!e->hold)
+		fatal_exit("lock_protect: out of memory");
+	memcpy(e->hold, e->region, e->size);
+
+	acquire_locklock(lock, __func__, __FILE__, __LINE__);
+	e->next = lock->prot;
+	lock->prot = e;
+	LOCKRET(pthread_mutex_unlock(&lock->lock));
+}
+
+/** 
+ * Check protected memory region. Memory compare. Exit on error. 
+ * @param lock: which lock to check.
+ * @param func: location we are now (when failure is detected).
+ * @param file: location we are now (when failure is detected).
+ * @param line: location we are now (when failure is detected).
+ */
+static void 
+prot_check(struct checked_lock* lock,
+	const char* func, const char* file, int line)
+{
+	struct protected_area* p = lock->prot;
+	while(p) {
+		if(memcmp(p->hold, p->region, p->size) != 0) {
+			lock_error(lock, func, file, line, 
+				"protected area modified");
+		}
+		p = p->next;
+	}
+}
+
+/** Copy protected memory region. */
+static void 
+prot_store(struct checked_lock* lock)
+{
+	struct protected_area* p = lock->prot;
+	while(p) {
+		memcpy(p->hold, p->region, p->size);
+		p = p->next;
+	}
+}
+
+
+/** alloc struct, init lock empty */
+void 
+checklock_init(enum check_lock_type type, struct checked_lock** lock,
+        const char* func, const char* file, int line)
+{
+	struct checked_lock* e = (struct checked_lock*)calloc(1, 
+		sizeof(struct checked_lock));
+	if(!e)
+		fatal_exit("%s %s %d: out of memory", func, file, line);
+	*lock = e;
+	e->type = type;
+	e->create_func = func;
+	e->create_file = file;
+	e->create_line = line;
+	LOCKRET(pthread_mutex_init(&e->lock, NULL));
+	switch(e->type) {
+		case check_lock_mutex:
+			LOCKRET(pthread_mutex_init(&e->mutex, NULL));
+			break;
+		case check_lock_spinlock:
+			LOCKRET(pthread_spin_init(&e->spinlock, PTHREAD_PROCESS_PRIVATE));
+			break;
+		case check_lock_rwlock:
+			LOCKRET(pthread_rwlock_init(&e->rwlock, NULL));
+			break;
+		default:
+			log_assert(0);
+	}
+}
+
+/** delete prot items */
+static void prot_delete(struct checked_lock* lock)
+{
+	struct protected_area* p=lock->prot, *np;
+	while(p) {
+		np = p->next;
+		free(p->hold);
+		free(p);
+		p = np;
+	}
+}
+
+/** check if type is OK for the lock given */
+static void 
+checktype(enum check_lock_type type, struct checked_lock* lock,
+        const char* func, const char* file, int line)
+{
+	if(type != lock->type) {
+		lock_error(lock, func, file, line, "wrong lock type");
+	}
+}
+
+/** check if OK, free struct */
+void 
+checklock_destroy(enum check_lock_type type, struct checked_lock** lock,
+        const char* func, const char* file, int line)
+{
+	const size_t contention_interest = 10;
+	struct checked_lock* e;
+	if(!lock) 
+		return;
+	e = *lock;
+	if(!e)
+		return;
+	*lock = NULL; /* use after free will fail */
+	checktype(type, e, func, file, line);
+
+	/* check if delete is OK */
+	acquire_locklock(e, func, file, line);
+	if(e->hold_count != 0)
+		lock_error(e, func, file, line, "delete while locked.");
+	if(e->wait_count != 0)
+		lock_error(e, func, file, line, "delete while waited on.");
+	prot_check(e, func, file, line);
+	LOCKRET(pthread_mutex_unlock(&e->lock));
+
+	/* contention */
+	if(e->contention_count > contention_interest) {
+		log_info("lock created %s %s %d has contention %u",
+			e->create_func, e->create_file, e->create_line,
+			(unsigned int)e->contention_count);
+	}
+
+	/* delete it */
+	LOCKRET(pthread_mutex_destroy(&e->lock));
+	prot_delete(e);
+	/* since nobody holds the lock - see check above, no need to unlink */
+	switch(e->type) {
+		case check_lock_mutex:
+			LOCKRET(pthread_mutex_destroy(&e->mutex));
+			break;
+		case check_lock_spinlock:
+			LOCKRET(pthread_spin_destroy(&e->spinlock));
+			break;
+		case check_lock_rwlock:
+			LOCKRET(pthread_rwlock_destroy(&e->rwlock));
+			break;
+		default:
+			log_assert(0);
+	}
+	memset(e, 0, sizeof(*lock));
+	free(e);
+}
+
+/** finish acquiring lock, shared between _(rd|wr||)lock() routines. */
+static void 
+finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock,
+        const char* func, const char* file, int line)
+{
+	thr->waiting = NULL;
+	lock->wait_count --;
+	lock->holder = thr;
+	lock->hold_count ++;
+	lock->holder_func = func;
+	lock->holder_file = file;
+	lock->holder_line = line;
+	
+	/* insert in thread lock list, as first */
+	lock->prev_held_lock[thr->num] = NULL;
+	lock->next_held_lock[thr->num] = thr->holding_first;
+	if(thr->holding_first)
+		/* no need to lock it, since this thread already holds the
+		 * lock (since it is on this list) and we only edit thr->num
+		 * member in array. So it is safe.  */
+		thr->holding_first->prev_held_lock[thr->num] = lock;
+	else	thr->holding_last = lock;
+	thr->holding_first = lock;
+}
+
+/**
+ * Locking routine.
+ * @param type: as passed by user.
+ * @param lock: as passed by user.
+ * @param func: caller location.
+ * @param file: caller location.
+ * @param line: caller location.
+ * @param tryfunc: the pthread_mutex_trylock or similar function.
+ * @param timedfunc: the pthread_mutex_timedlock or similar function.
+ *	Uses absolute timeout value.
+ * @param arg: what to pass to tryfunc and timedlock.
+ * @param exclusive: if lock must be exlusive (only one allowed).
+ */
+static void 
+checklock_lockit(enum check_lock_type type, struct checked_lock* lock,
+        const char* func, const char* file, int line,
+	int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*),
+	void* arg, int exclusive)
+{
+	int err;
+	int contend = 0;
+	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
+		thr_debug_key);
+	checktype(type, lock, func, file, line);
+	if(!thr) lock_error(lock, func, file, line, "no thread info");
+	
+	acquire_locklock(lock, func, file, line);
+	lock->wait_count ++;
+	thr->waiting = lock;
+	if(exclusive && lock->hold_count > 0 && lock->holder == thr) 
+		lock_error(lock, func, file, line, "thread already owns lock");
+	LOCKRET(pthread_mutex_unlock(&lock->lock));
+
+	/* first try; if busy increase contention counter */
+	if((err=tryfunc(arg))) {
+		struct timespec to;
+		if(err != EBUSY) log_err("trylock: %s", strerror(err));
+		to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
+		to.tv_nsec = 0;
+		if((err=timedfunc(arg, &to))) {
+			if(err == ETIMEDOUT)
+				lock_error(lock, func, file, line, 
+					"timeout, deadlock?");
+			log_err("timedlock: %s", strerror(err));
+		}
+		contend ++;
+	}
+	/* got the lock */
+
+	acquire_locklock(lock, func, file, line);
+	lock->contention_count += contend;
+	if(exclusive && lock->hold_count > 0)
+		lock_error(lock, func, file, line, "got nonexclusive lock");
+	/* check the memory areas for unauthorized changes,
+	 * between last unlock time and current lock time.
+	 * we check while holding the lock (threadsafe).
+	 */
+	prot_check(lock, func, file, line);
+	finish_acquire_lock(thr, lock, func, file, line);
+	LOCKRET(pthread_mutex_unlock(&lock->lock));
+}
+
+/** helper for rdlock: try */
+static int try_rd(void* arg)
+{ return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); }
+/** helper for rdlock: timed */
+static int timed_rd(void* arg, struct timespec* to)
+{ return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); }
+
+/** check if OK, lock */
+void 
+checklock_rdlock(enum check_lock_type type, struct checked_lock* lock,
+        const char* func, const char* file, int line)
+{
+
+	log_assert(type == check_lock_rwlock);
+	checklock_lockit(type, lock, func, file, line,
+		try_rd, timed_rd, &lock->rwlock, 0);
+}
+
+/** helper for wrlock: try */
+static int try_wr(void* arg)
+{ return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); }
+/** helper for wrlock: timed */
+static int timed_wr(void* arg, struct timespec* to)
+{ return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); }
+
+/** check if OK, lock */
+void 
+checklock_wrlock(enum check_lock_type type, struct checked_lock* lock,
+        const char* func, const char* file, int line)
+{
+	log_assert(type == check_lock_rwlock);
+	checklock_lockit(type, lock, func, file, line,
+		try_wr, timed_wr, &lock->rwlock, 0);
+}
+
+/** helper for lock mutex: try */
+static int try_mutex(void* arg)
+{ return pthread_mutex_trylock((pthread_mutex_t*)arg); }
+/** helper for lock mutex: timed */
+static int timed_mutex(void* arg, struct timespec* to)
+{ return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); }
+
+/** helper for lock spinlock: try */
+static int try_spinlock(void* arg)
+{ return pthread_spin_trylock((pthread_spinlock_t*)arg); }
+/** helper for lock spinlock: timed */
+static int timed_spinlock(void* arg, struct timespec* to)
+{
+	int err;
+	/* spin for 5 seconds. (ouch for the CPU, but it beats forever) */
+	while( (err=try_spinlock(arg)) == EBUSY) {
+#ifndef S_SPLINT_S
+		if(time(NULL) >= to->tv_sec)
+			return ETIMEDOUT;
+#endif
+	}
+	return err;
+}
+
+/** check if OK, lock */
+void 
+checklock_lock(enum check_lock_type type, struct checked_lock* lock,
+        const char* func, const char* file, int line)
+{
+	log_assert(type != check_lock_rwlock);
+	switch(type) {
+		case check_lock_mutex:
+			checklock_lockit(type, lock, func, file, line,
+				try_mutex, timed_mutex, &lock->mutex, 1);
+			break;
+		case check_lock_spinlock:
+			/* void* cast needed because 'volatile' on some OS */
+			checklock_lockit(type, lock, func, file, line,
+				try_spinlock, timed_spinlock, 
+				(void*)&lock->spinlock, 1);
+			break;
+		default:
+			log_assert(0);
+	}
+}
+
+/** check if OK, unlock */
+void 
+checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
+        const char* func, const char* file, int line)
+{
+	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
+		thr_debug_key);
+	checktype(type, lock, func, file, line);
+	if(!thr) lock_error(lock, func, file, line, "no thread info");
+
+	acquire_locklock(lock, func, file, line);
+	/* was this thread even holding this lock? */
+	if(thr->holding_first != lock &&
+		lock->prev_held_lock[thr->num] == NULL) {
+		lock_error(lock, func, file, line, "unlock nonlocked lock");
+	}
+	if(lock->hold_count <= 0)
+		lock_error(lock, func, file, line, "too many unlocks");
+
+	/* store this point as last touched by */
+	lock->holder = thr;
+	lock->hold_count --;
+	lock->holder_func = func;
+	lock->holder_file = file;
+	lock->holder_line = line;
+
+	/* delete from thread holder list */
+	/* no need to lock other lockstructs, because they are all on the
+	 * held-locks list, and this threads holds their locks.
+	 * we only touch the thr->num members, so it is safe.  */
+	if(thr->holding_first == lock)
+		thr->holding_first = lock->next_held_lock[thr->num];
+	if(thr->holding_last == lock)
+		thr->holding_last = lock->prev_held_lock[thr->num];
+	if(lock->next_held_lock[thr->num])
+		lock->next_held_lock[thr->num]->prev_held_lock[thr->num] =
+			lock->prev_held_lock[thr->num];
+	if(lock->prev_held_lock[thr->num])
+		lock->prev_held_lock[thr->num]->next_held_lock[thr->num] =
+			lock->next_held_lock[thr->num];
+	lock->next_held_lock[thr->num] = NULL;
+	lock->prev_held_lock[thr->num] = NULL;
+
+	/* store memory areas that are protected, for later checks */
+	prot_store(lock);
+	LOCKRET(pthread_mutex_unlock(&lock->lock));
+
+	/* unlock it */
+	switch(type) {
+		case check_lock_mutex:
+			LOCKRET(pthread_mutex_unlock(&lock->mutex));
+			break;
+		case check_lock_spinlock:
+			LOCKRET(pthread_spin_unlock(&lock->spinlock));
+			break;
+		case check_lock_rwlock:
+			LOCKRET(pthread_rwlock_unlock(&lock->rwlock));
+			break;
+		default:
+			log_assert(0);
+	}
+}
+
+/** checklock thread main, Inits thread structure. */
+static void* checklock_main(void* arg)
+{
+	struct thr_check* thr = (struct thr_check*)arg; 
+	void* ret;
+	thr->id = pthread_self();
+	/* Hack to get same numbers as in log file */
+	thr->num = *(int*)(thr->arg);
+	log_assert(thread_infos[thr->num] == NULL);
+	thread_infos[thr->num] = thr;
+	LOCKRET(pthread_setspecific(thr_debug_key, thr));
+	ret = thr->func(thr->arg);
+	thread_infos[thr->num] = NULL;
+	free(thr);
+	return ret;
+}
+
+/** allocate debug info and create thread */
+void 
+checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg)
+{
+	struct thr_check* thr = (struct thr_check*)calloc(1, 
+		sizeof(struct thr_check));
+	if(!thr)
+		fatal_exit("thrcreate: out of memory");
+	if(!key_created) {
+		struct thr_check* thisthr = (struct thr_check*)calloc(1, 
+			sizeof(struct thr_check));
+		if(!thisthr)
+			fatal_exit("thrcreate: out of memory");
+		key_created = 1;
+		LOCKRET(pthread_key_create(&thr_debug_key, NULL));
+		LOCKRET(pthread_setspecific(thr_debug_key, thisthr));
+		thread_infos[0] = thisthr;
+	}
+	thr->func = func;
+	thr->arg = arg;
+	LOCKRET(pthread_create(id, NULL, checklock_main, thr));
+}
+
+/** signal handler for join timeout, Exits. */
+static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig))
+{
+	fatal_exit("join thread timeout. hangup or deadlock.");
+}
+
+/** wait for thread with a timeout. */
+void 
+checklock_thrjoin(pthread_t thread)
+{
+	/* wait with a timeout */
+	if(signal(SIGALRM, joinalarm) == SIG_ERR)
+		fatal_exit("signal(): %s", strerror(errno));
+	(void)alarm(CHECK_JOIN_TIMEOUT);
+	LOCKRET(pthread_join(thread, NULL));
+	(void)alarm(0);
+}
+
+#endif /* USE_THREAD_DEBUG */
diff --git a/testcode/checklocks.h b/testcode/checklocks.h
new file mode 100644
index 000000000..af14fc06e
--- /dev/null
+++ b/testcode/checklocks.h
@@ -0,0 +1,304 @@
+/**
+ * testcode/checklocks.h - wrapper on locks that checks access.
+ *
+ * Copyright (c) 2007, NLnet Labs. All rights reserved.
+ * 
+ * This software is open source.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * 
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TESTCODE_CHECK_LOCKS_H
+#define TESTCODE_CHECK_LOCKS_H
+
+/**
+ * \file
+ * Locks that are checked.
+ *
+ * Holds information per lock and per thread.
+ * That information is protected by a mutex (unchecked).
+ *
+ * Checks:
+ *      o which func, file, line created the lock.
+ *      o contention count, measures amount of contention on the lock.
+ *      o the memory region(s) that the lock protects are
+ *        memcmp'ed to ascertain no race conditions.
+ *      o checks that locks are unlocked properly (before deletion).
+ *        keeps which func, file, line that locked it.
+ *
+ * Limitations:
+ *	o Detects unprotected memory access when the lock is locked or freed,
+ *	  which detects races only if they happen, and only if in protected
+ *	  memory areas.
+ *	o Detects deadlocks by timeout, so approximately, as they happen.
+ *	o Does not check order of locking.
+ *	o Uses a lot of memory.
+ *	o The checks use locks themselves, changing scheduling,
+ *	  thus affecting the races that you see.
+ *	o for rwlocks does not detect exclusive writelock, or double locking.
+ */
+
+#ifdef USE_THREAD_DEBUG
+#ifndef HAVE_PTHREAD
+/* really pretty arbitrary, since it will work with solaris threads too */
+#error "Need pthreads for checked locks"
+#endif
+/******************* THREAD DEBUG ************************/
+#include <pthread.h>
+
+/** How long to wait before lock attempt is a failure. */
+#define CHECK_LOCK_TIMEOUT 5 /* seconds */
+/** How long to wait before join attempt is a failure. */
+#define CHECK_JOIN_TIMEOUT 120 /* seconds */
+/** How many trheads to allocate for */
+#define THRDEBUG_MAX_THREADS 32 /* threads */
+
+/**
+ * Protection memory area.
+ * It is copied to a holding buffer to compare against later.
+ * Note that it may encompass the lock structure.
+ */
+struct protected_area {
+	/** where the memory region starts */
+	void* region;
+	/** size of the region */
+	size_t size;
+	/** backbuffer that holds a copy, of same size. */
+	void* hold;
+	/** next protected area in list */
+	struct protected_area* next;
+};
+
+/**
+ * per thread information for locking debug wrappers. 
+ */
+struct thr_check {
+	/** thread id */
+	pthread_t id;
+	/** real thread func */
+	void* (*func)(void*);
+	/** func user arg */
+	void* arg;
+	/** number of thread in list structure */
+	int num;
+	/** 
+	 * list of locks that this thread is holding, double
+	 * linked list, which first element the most recent lock acquired.
+	 * So a represents the stack of locks acquired. (of all types).
+	 */
+	struct checked_lock *holding_first, *holding_last;
+	/** if the thread is currently waiting for a lock, which one */
+	struct checked_lock* waiting;
+};
+
+/**
+ * One structure for all types of locks.
+ */
+struct checked_lock {
+	/** mutex for exclusive access to this structure */
+	pthread_mutex_t lock;
+	/** list of memory regions protected by this checked lock */
+	struct protected_area* prot;
+	/** where was this lock created */
+	const char* create_func, *create_file;
+	/** where was this lock created */
+	int create_line;
+	/** contention count */
+	size_t contention_count;
+	/** hold count (how many threads are holding this lock) */
+	int hold_count;
+	/** how many threads are waiting for this lock */
+	int wait_count;
+	/** who touched it last */
+	const char* holder_func, *holder_file;
+	/** who touched it last */
+	int holder_line;
+	/** who owns the lock now */
+	struct thr_check* holder;
+
+	/** next lock a thread is holding (less recent) */
+	struct checked_lock* next_held_lock[THRDEBUG_MAX_THREADS];
+	/** prev lock a thread is holding (more recent) */
+	struct checked_lock* prev_held_lock[THRDEBUG_MAX_THREADS];
+
+	/** type of lock */
+	enum check_lock_type {
+		/** basic mutex */
+		check_lock_mutex,
+		/** fast spinlock */
+		check_lock_spinlock,
+		/** rwlock */
+		check_lock_rwlock
+	} type;
+	/** the lock itself, see type to disambiguate the union */
+	union {
+		/** mutex */
+		pthread_mutex_t mutex;
+		/** spinlock */
+		pthread_spinlock_t spinlock;
+		/** rwlock */
+		pthread_rwlock_t rwlock;
+	};
+};
+
+/**
+ * Additional call for the user to specify what areas are protected
+ * @param lock: the lock that protects the area. It can be inside the area.
+ * @param area: ptr to mem.
+ * @param size: length of area.
+ * You can call it multiple times with the same lock to give several areas.
+ */
+void lock_protect(struct checked_lock* lock, void* area, size_t size);
+
+/**
+ * Init locks.
+ * @param type: what type of lock this is.
+ * @param lock: ptr to user alloced ptr structure. This is inited.
+ *     So an alloc is done and the ptr is stored as result.
+ * @param func: caller function name.
+ * @param file: caller file name.
+ * @param line: caller line number.
+ */
+void checklock_init(enum check_lock_type type, struct checked_lock** lock,
+	const char* func, const char* file, int line);
+
+/**
+ * Destroy locks. Free the structure.
+ * @param type: what type of lock this is.
+ * @param lock: ptr to user alloced structure. This is destroyed.
+ * @param func: caller function name.
+ * @param file: caller file name.
+ * @param line: caller line number.
+ */
+void checklock_destroy(enum check_lock_type type, struct checked_lock** lock,
+	const char* func, const char* file, int line);
+
+/**
+ * Acquire readlock.
+ * @param type: what type of lock this is. Had better be a rwlock.
+ * @param lock: ptr to lock.
+ * @param func: caller function name.
+ * @param file: caller file name.
+ * @param line: caller line number.
+ */
+void checklock_rdlock(enum check_lock_type type, struct checked_lock* lock,
+	const char* func, const char* file, int line);
+
+/**
+ * Acquire writelock.
+ * @param type: what type of lock this is. Had better be a rwlock.
+ * @param lock: ptr to lock.
+ * @param func: caller function name.
+ * @param file: caller file name.
+ * @param line: caller line number.
+ */
+void checklock_wrlock(enum check_lock_type type, struct checked_lock* lock,
+	const char* func, const char* file, int line);
+
+/**
+ * Locks.
+ * @param type: what type of lock this is. Had better be mutex or spinlock.
+ * @param lock: the lock.
+ * @param func: caller function name.
+ * @param file: caller file name.
+ * @param line: caller line number.
+ */
+void checklock_lock(enum check_lock_type type, struct checked_lock* lock,
+	const char* func, const char* file, int line);
+
+/**
+ * Unlocks.
+ * @param type: what type of lock this is.
+ * @param lock: the lock.
+ * @param func: caller function name.
+ * @param file: caller file name.
+ * @param line: caller line number.
+ */
+void checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
+	const char* func, const char* file, int line);
+
+/**
+ * Create thread.
+ * @param thr: Thread id, where to store result.
+ * @param func: thread start function.
+ * @param arg: user argument.
+ */
+void checklock_thrcreate(pthread_t* thr, void* (*func)(void*), void* arg);
+
+/**
+ * Wait for thread to exit. Returns thread return value.
+ * @param thread: thread to wait for.
+ */
+void checklock_thrjoin(pthread_t thread);
+
+/** structures to enable compiler type checking on the locks. 
+ * Also the pointer makes it so that the lock can be part of the protected
+ * region without any possible problem (since the ptr will stay the same.)
+ */
+struct checked_lock_rw { struct checked_lock* c_rw; };
+/** structures to enable compiler type checking on the locks. */
+struct checked_lock_mutex { struct checked_lock* c_m; };
+/** structures to enable compiler type checking on the locks. */
+struct checked_lock_spl { struct checked_lock* c_spl; };
+
+/** debugging rwlock */
+typedef struct checked_lock_rw lock_rw_t;
+#define lock_rw_init(lock) checklock_init(check_lock_rwlock, &((lock)->c_rw), __func__, __FILE__, __LINE__)
+#define lock_rw_destroy(lock) checklock_destroy(check_lock_rwlock, &((lock)->c_rw), __func__, __FILE__, __LINE__)
+#define lock_rw_rdlock(lock) checklock_rdlock(check_lock_rwlock, (lock)->c_rw, __func__, __FILE__, __LINE__)
+#define lock_rw_wrlock(lock) checklock_wrlock(check_lock_rwlock, (lock)->c_rw, __func__, __FILE__, __LINE__)
+#define lock_rw_unlock(lock) checklock_unlock(check_lock_rwlock, (lock)->c_rw, __func__, __FILE__, __LINE__)
+
+/** debugging mutex */
+typedef struct checked_lock_mutex lock_basic_t;
+#define lock_basic_init(lock) checklock_init(check_lock_mutex, &((lock)->c_m), __func__, __FILE__, __LINE__)
+#define lock_basic_destroy(lock) checklock_destroy(check_lock_mutex, &((lock)->c_m), __func__, __FILE__, __LINE__)
+#define lock_basic_lock(lock) checklock_lock(check_lock_mutex, (lock)->c_m, __func__, __FILE__, __LINE__)
+#define lock_basic_unlock(lock) checklock_unlock(check_lock_mutex, (lock)->c_m, __func__, __FILE__, __LINE__)
+
+/** debugging spinlock */
+typedef struct checked_lock_spl lock_quick_t;
+#define lock_quick_init(lock) checklock_init(check_lock_spinlock, &((lock)->c_spl), __func__, __FILE__, __LINE__)
+#define lock_quick_destroy(lock) checklock_destroy(check_lock_spinlock, &((lock)->c_spl), __func__, __FILE__, __LINE__)
+#define lock_quick_lock(lock) checklock_lock(check_lock_spinlock, (lock)->c_spl, __func__, __FILE__, __LINE__)
+#define lock_quick_unlock(lock) checklock_unlock(check_lock_spinlock, (lock)->c_spl, __func__, __FILE__, __LINE__)
+
+/** we use the pthread id, our thr_check structure is kept behind the scenes */
+typedef pthread_t ub_thread_t;
+#define ub_thread_create(thr, func, arg) checklock_thrcreate(thr, func, arg)
+#define ub_thread_self() pthread_self()
+#define ub_thread_join(thread) checklock_thrjoin(thread)
+
+typedef pthread_key_t ub_thread_key_t;
+#define ub_thread_key_create(key, f) LOCKRET(pthread_key_create(key, f))
+#define ub_thread_key_set(key, v) LOCKRET(pthread_setspecific(key, v))
+#define ub_thread_key_get(key) pthread_getspecific(key)
+
+#endif /* USE_THREAD_DEBUG */
+
+#endif /* TESTCODE_CHECK_LOCKS_H */
diff --git a/util/alloc.c b/util/alloc.c
index af6066700..ff0fc9528 100644
--- a/util/alloc.c
+++ b/util/alloc.c
@@ -43,6 +43,7 @@
 #include "util/alloc.h"
 
 /** prealloc some entries in the cache. To minimize contention. 
+ * Result is 1 lock per alloc_max newly created entries.
  * @param alloc: the structure to fill up.
  */
 static void
@@ -108,10 +109,13 @@ alloc_special_obtain(struct alloc_cache* alloc)
 		alloc->quar = alloc_special_next(p);
 		alloc->num_quar--;
 		alloc->special_allocated++;
+		alloc_special_clean(p);
 		return p;
 	}
 	/* see if in global cache */
 	if(alloc->super) {
+		/* could maybe grab alloc_max/2 entries in one go,
+		 * but really, isn't that just as fast as this code? */
 		lock_quick_lock(&alloc->super->lock);
 		if((p = alloc->super->quar)) {
 			alloc->super->quar = alloc_special_next(p);
@@ -120,6 +124,7 @@ alloc_special_obtain(struct alloc_cache* alloc)
 		lock_quick_unlock(&alloc->super->lock);
 		if(p) {
 			alloc->special_allocated++;
+			alloc_special_clean(p);
 			return p;
 		}
 	}
@@ -128,6 +133,7 @@ alloc_special_obtain(struct alloc_cache* alloc)
 	if(!(p = (alloc_special_t*)malloc(sizeof(alloc_special_t))))
 		fatal_exit("alloc_special_obtain: out of memory");
 	alloc->special_allocated++;
+	alloc_special_clean(p);
 	return p;
 }
 
@@ -148,11 +154,13 @@ pushintosuper(struct alloc_cache* alloc, alloc_special_t* mem)
 	alloc->quar = alloc_special_next(p);
 	alloc->num_quar -= ALLOC_SPECIAL_MAX/2;
 
+	/* dump mem+list into the super quar list */
 	lock_quick_lock(&alloc->super->lock);
 	alloc_special_next(p) = alloc->super->quar;
 	alloc->super->quar = mem;
 	alloc->super->num_quar += ALLOC_SPECIAL_MAX/2 + 1;
 	lock_quick_unlock(&alloc->super->lock);
+	/* so 1 lock per mem+alloc/2 deletes */
 }
 
 void 
@@ -161,6 +169,7 @@ alloc_special_release(struct alloc_cache* alloc, alloc_special_t* mem)
 	log_assert(alloc);
 	if(!mem)
 		return;
+	alloc_special_clean(mem);
 	if(alloc->super && alloc->num_quar >= ALLOC_SPECIAL_MAX) {
 		/* push it to the super structure */
 		alloc->special_allocated --;
diff --git a/util/alloc.h b/util/alloc.h
index 2801c8e89..05f7930f9 100644
--- a/util/alloc.h
+++ b/util/alloc.h
@@ -43,13 +43,6 @@
  *	o The packed rrset type needs to be kept on special freelists,
  *	  so that they are reused for other packet rrset allocations.
  *
- * Design choices:
- *	o The global malloc/free is used to handle fragmentation, etc.
- *	  If freelists become very large, it is returned to the system.
- *	o Only 1k and smaller is cached, bigger uses malloc.
- *	  Because DNS fragments are mostly this size.
- *	o On startup preallocated memory can be given, so threads can
- *	  avoid contention in the startup phase.
  */
 
 #ifndef UTIL_ALLOC_H
@@ -89,13 +82,14 @@ struct alloc_cache {
  * Init alloc (zeroes the struct).
  * @param alloc: this parameter is allocated by the caller.
  * @param super: super to use (init that before with super_init).
+ *    Pass this argument NULL to init the toplevel alloc structure.
  */
 void alloc_init(struct alloc_cache* alloc, struct alloc_cache* super);
 
 /**
  * Free the alloc. Pushes all the cached items into the super structure.
- * Or deletes them if super is NULL.
- * Does not free the alloc struct itself.
+ * Or deletes them if alloc->super is NULL.
+ * Does not free the alloc struct itself (it was also allocated by caller).
  * @param alloc: is almost zeroed on exit (except some stats).
  */
 void alloc_delete(struct alloc_cache* alloc);
@@ -104,11 +98,13 @@ void alloc_delete(struct alloc_cache* alloc);
  * Get a new special_t element.
  * @param alloc: where to alloc it.
  * @return: memory block. Will not return NULL (instead fatal_exit).
+ *    The block is zeroed.
  */
 alloc_special_t* alloc_special_obtain(struct alloc_cache* alloc);
 
 /**
  * Return special_t back to pool.
+ * The block is cleaned up (zeroed) which also invalidates the ID inside.
  * @param alloc: where to alloc it.
  * @param mem: block to free.
  */
diff --git a/util/locks.h b/util/locks.h
index b0dfa08e7..408f14a33 100644
--- a/util/locks.h
+++ b/util/locks.h
@@ -69,6 +69,15 @@
 		__FILE__, __LINE__, strerror(err));	\
  	} while(0)
 
+#define USE_THREAD_DEBUG
+#ifdef USE_THREAD_DEBUG
+/******************* THREAD DEBUG ************************/
+/* (some) checking; to detect races and deadlocks. */
+#include "testcode/checklocks.h"
+
+#else /* USE_THREAD_DEBUG */
+#define lock_protect(lock, area, size) /* nop */
+
 #ifdef HAVE_PTHREAD
 #include <pthread.h>
 
@@ -210,6 +219,7 @@ typedef void* ub_thread_key_t;
 
 #endif /* HAVE_SOLARIS_THREADS */
 #endif /* HAVE_PTHREAD */
+#endif /* USE_THREAD_DEBUG */
 
 /**
  * Block all signals for this thread.