From: Wouter Wijngaards Date: Wed, 7 Mar 2007 16:21:31 +0000 (+0000) Subject: Simple thread problem detector code. X-Git-Tag: release-0.1~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a43a0427923081c9e7467c31723450b44e181025;p=thirdparty%2Funbound.git Simple thread problem detector code. git-svn-id: file:///svn/unbound/trunk@165 be551aaa-1e26-0410-a405-d3ace91eadb9 --- diff --git a/Makefile.in b/Makefile.in index ba622448d..313f2b3ef 100644 --- a/Makefile.in +++ b/Makefile.in @@ -50,7 +50,7 @@ LINTFLAGS+="-DBN_ULONG=unsigned long" -Dkrb5_int32=int "-Dkrb5_ui_4=unsigned int INSTALL=$(srcdir)/install-sh -COMMON_SRC=$(wildcard services/*.c util/*.c) util/configparser.c util/configlexer.c +COMMON_SRC=$(wildcard services/*.c util/*.c) util/configparser.c util/configlexer.c testcode/checklocks.c COMMON_OBJ=$(addprefix $(BUILD),$(COMMON_SRC:.c=.o)) COMPAT_OBJ=$(addprefix $(BUILD)compat/,$(LIBOBJS)) UNITTEST_SRC=testcode/unitmain.c $(COMMON_SRC) diff --git a/daemon/worker.h b/daemon/worker.h index 72576a5f8..5121bdc0d 100644 --- a/daemon/worker.h +++ b/daemon/worker.h @@ -68,10 +68,10 @@ enum worker_commands { * Holds globally visible information. */ struct worker { + /** the thread number (in daemon array). First in struct for debug. */ + int thread_num; /** global shared daemon structure */ struct daemon* daemon; - /** the thread number (in daemon array). */ - int thread_num; /** thread id */ ub_thread_t thr_id; /** fd 0 of socketpair, write commands for worker to this one */ diff --git a/doc/Changelog b/doc/Changelog index 57ac174b1..19cd2f1d0 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -1,3 +1,8 @@ +7 March 2007: Wouter + - created a wrapper around thread calls that performs some basic + checking for data race and deadlock, and basic performance + contention measurement. + 6 March 2007: Wouter - Testbed works with threading (different machines, different options). - alloc work, does the special type. diff --git a/doc/TODO b/doc/TODO index 50e3825cc..32a8a6ada 100644 --- a/doc/TODO +++ b/doc/TODO @@ -1,3 +1,5 @@ TODO items. o use real entropy to make random (ID, port) numbers more random. o in production mode, do not free memory on exit. In debug mode, test leaks. +o profile memory allocation, and if performance issues, use special memory + allocator. For example, with caches per thread. diff --git a/testcode/checklocks.c b/testcode/checklocks.c new file mode 100644 index 000000000..917e30395 --- /dev/null +++ b/testcode/checklocks.c @@ -0,0 +1,554 @@ +/** + * testcode/checklocks.c - wrapper on locks that checks access. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include +#include "util/locks.h" /* include before checklocks.h */ +#include "testcode/checklocks.h" + +/** + * \file + * Locks that are checked. + * + * Ugly hack: uses the fact that workers are passed to thread_create to make + * the thread numbers here the same as those used for logging which is nice. + * + * Todo: - check global ordering of instances of locks. + * - refcount statistics. + * - debug status print, of thread lock stacks, and current waiting. + */ +#ifdef USE_THREAD_DEBUG + +/** if key has been created */ +static int key_created = 0; +/** we hide the thread debug info with this key. */ +static ub_thread_key_t thr_debug_key; +/** the list of threads, so all threads can be examined. NULL at start. */ +static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS]; + +/** print pretty lock error and exit */ +static void lock_error(struct checked_lock* lock, + const char* func, const char* file, int line, const char* err) +{ + log_err("lock error (description follows)"); + log_err("Created at %s %s %d", lock->create_func, lock->create_file, lock->create_line); + log_err("Previously %s %s %d", lock->holder_func, lock->holder_file, lock->holder_line); + log_err("At %s %s %d", func, file, line); + log_err("Error for %s lock: %s", + (lock->type==check_lock_mutex)?"mutex": ( + (lock->type==check_lock_spinlock)?"spinlock": "rwlock"), err); + fatal_exit("bailing out"); +} + +/** obtain lock on debug lock structure. This could be a deadlock. + * (could it?) Anyway, check with timeouts. + * @param lock: on what to acquire lock. + * @param func: user level caller identification. + * @param file: user level caller identification. + * @param line: user level caller identification. + */ +static void +acquire_locklock(struct checked_lock* lock, + const char* func, const char* file, int line) +{ + struct timespec to; + int err; + int contend = 0; + /* first try; inc contention counter if not immediately */ + if((err = pthread_mutex_trylock(&lock->lock))) { + if(err==EBUSY) + contend++; + else fatal_exit("error in mutex_trylock: %s", strerror(err)); + } + if(!err) + return; /* immediate success */ + to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT; + to.tv_nsec = 0; + err = pthread_mutex_timedlock(&lock->lock, &to); + if(err) { + log_err("in acquiring locklock: %s", strerror(err)); + lock_error(lock, func, file, line, "acquire locklock"); + } + lock->contention_count += contend; +} + +/** add protected region */ +void +lock_protect(struct checked_lock* lock, void* area, size_t size) +{ + struct protected_area* e = (struct protected_area*)calloc(1, + sizeof(struct protected_area)); + if(!e) + fatal_exit("lock_protect: out of memory"); + e->region = area; + e->size = size; + e->hold = calloc(1, size); + if(!e->hold) + fatal_exit("lock_protect: out of memory"); + memcpy(e->hold, e->region, e->size); + + acquire_locklock(lock, __func__, __FILE__, __LINE__); + e->next = lock->prot; + lock->prot = e; + LOCKRET(pthread_mutex_unlock(&lock->lock)); +} + +/** + * Check protected memory region. Memory compare. Exit on error. + * @param lock: which lock to check. + * @param func: location we are now (when failure is detected). + * @param file: location we are now (when failure is detected). + * @param line: location we are now (when failure is detected). + */ +static void +prot_check(struct checked_lock* lock, + const char* func, const char* file, int line) +{ + struct protected_area* p = lock->prot; + while(p) { + if(memcmp(p->hold, p->region, p->size) != 0) { + lock_error(lock, func, file, line, + "protected area modified"); + } + p = p->next; + } +} + +/** Copy protected memory region. */ +static void +prot_store(struct checked_lock* lock) +{ + struct protected_area* p = lock->prot; + while(p) { + memcpy(p->hold, p->region, p->size); + p = p->next; + } +} + + +/** alloc struct, init lock empty */ +void +checklock_init(enum check_lock_type type, struct checked_lock** lock, + const char* func, const char* file, int line) +{ + struct checked_lock* e = (struct checked_lock*)calloc(1, + sizeof(struct checked_lock)); + if(!e) + fatal_exit("%s %s %d: out of memory", func, file, line); + *lock = e; + e->type = type; + e->create_func = func; + e->create_file = file; + e->create_line = line; + LOCKRET(pthread_mutex_init(&e->lock, NULL)); + switch(e->type) { + case check_lock_mutex: + LOCKRET(pthread_mutex_init(&e->mutex, NULL)); + break; + case check_lock_spinlock: + LOCKRET(pthread_spin_init(&e->spinlock, PTHREAD_PROCESS_PRIVATE)); + break; + case check_lock_rwlock: + LOCKRET(pthread_rwlock_init(&e->rwlock, NULL)); + break; + default: + log_assert(0); + } +} + +/** delete prot items */ +static void prot_delete(struct checked_lock* lock) +{ + struct protected_area* p=lock->prot, *np; + while(p) { + np = p->next; + free(p->hold); + free(p); + p = np; + } +} + +/** check if type is OK for the lock given */ +static void +checktype(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line) +{ + if(type != lock->type) { + lock_error(lock, func, file, line, "wrong lock type"); + } +} + +/** check if OK, free struct */ +void +checklock_destroy(enum check_lock_type type, struct checked_lock** lock, + const char* func, const char* file, int line) +{ + const size_t contention_interest = 10; + struct checked_lock* e; + if(!lock) + return; + e = *lock; + if(!e) + return; + *lock = NULL; /* use after free will fail */ + checktype(type, e, func, file, line); + + /* check if delete is OK */ + acquire_locklock(e, func, file, line); + if(e->hold_count != 0) + lock_error(e, func, file, line, "delete while locked."); + if(e->wait_count != 0) + lock_error(e, func, file, line, "delete while waited on."); + prot_check(e, func, file, line); + LOCKRET(pthread_mutex_unlock(&e->lock)); + + /* contention */ + if(e->contention_count > contention_interest) { + log_info("lock created %s %s %d has contention %u", + e->create_func, e->create_file, e->create_line, + (unsigned int)e->contention_count); + } + + /* delete it */ + LOCKRET(pthread_mutex_destroy(&e->lock)); + prot_delete(e); + /* since nobody holds the lock - see check above, no need to unlink */ + switch(e->type) { + case check_lock_mutex: + LOCKRET(pthread_mutex_destroy(&e->mutex)); + break; + case check_lock_spinlock: + LOCKRET(pthread_spin_destroy(&e->spinlock)); + break; + case check_lock_rwlock: + LOCKRET(pthread_rwlock_destroy(&e->rwlock)); + break; + default: + log_assert(0); + } + memset(e, 0, sizeof(*lock)); + free(e); +} + +/** finish acquiring lock, shared between _(rd|wr||)lock() routines. */ +static void +finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock, + const char* func, const char* file, int line) +{ + thr->waiting = NULL; + lock->wait_count --; + lock->holder = thr; + lock->hold_count ++; + lock->holder_func = func; + lock->holder_file = file; + lock->holder_line = line; + + /* insert in thread lock list, as first */ + lock->prev_held_lock[thr->num] = NULL; + lock->next_held_lock[thr->num] = thr->holding_first; + if(thr->holding_first) + /* no need to lock it, since this thread already holds the + * lock (since it is on this list) and we only edit thr->num + * member in array. So it is safe. */ + thr->holding_first->prev_held_lock[thr->num] = lock; + else thr->holding_last = lock; + thr->holding_first = lock; +} + +/** + * Locking routine. + * @param type: as passed by user. + * @param lock: as passed by user. + * @param func: caller location. + * @param file: caller location. + * @param line: caller location. + * @param tryfunc: the pthread_mutex_trylock or similar function. + * @param timedfunc: the pthread_mutex_timedlock or similar function. + * Uses absolute timeout value. + * @param arg: what to pass to tryfunc and timedlock. + * @param exclusive: if lock must be exlusive (only one allowed). + */ +static void +checklock_lockit(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line, + int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*), + void* arg, int exclusive) +{ + int err; + int contend = 0; + struct thr_check *thr = (struct thr_check*)pthread_getspecific( + thr_debug_key); + checktype(type, lock, func, file, line); + if(!thr) lock_error(lock, func, file, line, "no thread info"); + + acquire_locklock(lock, func, file, line); + lock->wait_count ++; + thr->waiting = lock; + if(exclusive && lock->hold_count > 0 && lock->holder == thr) + lock_error(lock, func, file, line, "thread already owns lock"); + LOCKRET(pthread_mutex_unlock(&lock->lock)); + + /* first try; if busy increase contention counter */ + if((err=tryfunc(arg))) { + struct timespec to; + if(err != EBUSY) log_err("trylock: %s", strerror(err)); + to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT; + to.tv_nsec = 0; + if((err=timedfunc(arg, &to))) { + if(err == ETIMEDOUT) + lock_error(lock, func, file, line, + "timeout, deadlock?"); + log_err("timedlock: %s", strerror(err)); + } + contend ++; + } + /* got the lock */ + + acquire_locklock(lock, func, file, line); + lock->contention_count += contend; + if(exclusive && lock->hold_count > 0) + lock_error(lock, func, file, line, "got nonexclusive lock"); + /* check the memory areas for unauthorized changes, + * between last unlock time and current lock time. + * we check while holding the lock (threadsafe). + */ + prot_check(lock, func, file, line); + finish_acquire_lock(thr, lock, func, file, line); + LOCKRET(pthread_mutex_unlock(&lock->lock)); +} + +/** helper for rdlock: try */ +static int try_rd(void* arg) +{ return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); } +/** helper for rdlock: timed */ +static int timed_rd(void* arg, struct timespec* to) +{ return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); } + +/** check if OK, lock */ +void +checklock_rdlock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line) +{ + + log_assert(type == check_lock_rwlock); + checklock_lockit(type, lock, func, file, line, + try_rd, timed_rd, &lock->rwlock, 0); +} + +/** helper for wrlock: try */ +static int try_wr(void* arg) +{ return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); } +/** helper for wrlock: timed */ +static int timed_wr(void* arg, struct timespec* to) +{ return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); } + +/** check if OK, lock */ +void +checklock_wrlock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line) +{ + log_assert(type == check_lock_rwlock); + checklock_lockit(type, lock, func, file, line, + try_wr, timed_wr, &lock->rwlock, 0); +} + +/** helper for lock mutex: try */ +static int try_mutex(void* arg) +{ return pthread_mutex_trylock((pthread_mutex_t*)arg); } +/** helper for lock mutex: timed */ +static int timed_mutex(void* arg, struct timespec* to) +{ return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); } + +/** helper for lock spinlock: try */ +static int try_spinlock(void* arg) +{ return pthread_spin_trylock((pthread_spinlock_t*)arg); } +/** helper for lock spinlock: timed */ +static int timed_spinlock(void* arg, struct timespec* to) +{ + int err; + /* spin for 5 seconds. (ouch for the CPU, but it beats forever) */ + while( (err=try_spinlock(arg)) == EBUSY) { +#ifndef S_SPLINT_S + if(time(NULL) >= to->tv_sec) + return ETIMEDOUT; +#endif + } + return err; +} + +/** check if OK, lock */ +void +checklock_lock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line) +{ + log_assert(type != check_lock_rwlock); + switch(type) { + case check_lock_mutex: + checklock_lockit(type, lock, func, file, line, + try_mutex, timed_mutex, &lock->mutex, 1); + break; + case check_lock_spinlock: + /* void* cast needed because 'volatile' on some OS */ + checklock_lockit(type, lock, func, file, line, + try_spinlock, timed_spinlock, + (void*)&lock->spinlock, 1); + break; + default: + log_assert(0); + } +} + +/** check if OK, unlock */ +void +checklock_unlock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line) +{ + struct thr_check *thr = (struct thr_check*)pthread_getspecific( + thr_debug_key); + checktype(type, lock, func, file, line); + if(!thr) lock_error(lock, func, file, line, "no thread info"); + + acquire_locklock(lock, func, file, line); + /* was this thread even holding this lock? */ + if(thr->holding_first != lock && + lock->prev_held_lock[thr->num] == NULL) { + lock_error(lock, func, file, line, "unlock nonlocked lock"); + } + if(lock->hold_count <= 0) + lock_error(lock, func, file, line, "too many unlocks"); + + /* store this point as last touched by */ + lock->holder = thr; + lock->hold_count --; + lock->holder_func = func; + lock->holder_file = file; + lock->holder_line = line; + + /* delete from thread holder list */ + /* no need to lock other lockstructs, because they are all on the + * held-locks list, and this threads holds their locks. + * we only touch the thr->num members, so it is safe. */ + if(thr->holding_first == lock) + thr->holding_first = lock->next_held_lock[thr->num]; + if(thr->holding_last == lock) + thr->holding_last = lock->prev_held_lock[thr->num]; + if(lock->next_held_lock[thr->num]) + lock->next_held_lock[thr->num]->prev_held_lock[thr->num] = + lock->prev_held_lock[thr->num]; + if(lock->prev_held_lock[thr->num]) + lock->prev_held_lock[thr->num]->next_held_lock[thr->num] = + lock->next_held_lock[thr->num]; + lock->next_held_lock[thr->num] = NULL; + lock->prev_held_lock[thr->num] = NULL; + + /* store memory areas that are protected, for later checks */ + prot_store(lock); + LOCKRET(pthread_mutex_unlock(&lock->lock)); + + /* unlock it */ + switch(type) { + case check_lock_mutex: + LOCKRET(pthread_mutex_unlock(&lock->mutex)); + break; + case check_lock_spinlock: + LOCKRET(pthread_spin_unlock(&lock->spinlock)); + break; + case check_lock_rwlock: + LOCKRET(pthread_rwlock_unlock(&lock->rwlock)); + break; + default: + log_assert(0); + } +} + +/** checklock thread main, Inits thread structure. */ +static void* checklock_main(void* arg) +{ + struct thr_check* thr = (struct thr_check*)arg; + void* ret; + thr->id = pthread_self(); + /* Hack to get same numbers as in log file */ + thr->num = *(int*)(thr->arg); + log_assert(thread_infos[thr->num] == NULL); + thread_infos[thr->num] = thr; + LOCKRET(pthread_setspecific(thr_debug_key, thr)); + ret = thr->func(thr->arg); + thread_infos[thr->num] = NULL; + free(thr); + return ret; +} + +/** allocate debug info and create thread */ +void +checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg) +{ + struct thr_check* thr = (struct thr_check*)calloc(1, + sizeof(struct thr_check)); + if(!thr) + fatal_exit("thrcreate: out of memory"); + if(!key_created) { + struct thr_check* thisthr = (struct thr_check*)calloc(1, + sizeof(struct thr_check)); + if(!thisthr) + fatal_exit("thrcreate: out of memory"); + key_created = 1; + LOCKRET(pthread_key_create(&thr_debug_key, NULL)); + LOCKRET(pthread_setspecific(thr_debug_key, thisthr)); + thread_infos[0] = thisthr; + } + thr->func = func; + thr->arg = arg; + LOCKRET(pthread_create(id, NULL, checklock_main, thr)); +} + +/** signal handler for join timeout, Exits. */ +static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig)) +{ + fatal_exit("join thread timeout. hangup or deadlock."); +} + +/** wait for thread with a timeout. */ +void +checklock_thrjoin(pthread_t thread) +{ + /* wait with a timeout */ + if(signal(SIGALRM, joinalarm) == SIG_ERR) + fatal_exit("signal(): %s", strerror(errno)); + (void)alarm(CHECK_JOIN_TIMEOUT); + LOCKRET(pthread_join(thread, NULL)); + (void)alarm(0); +} + +#endif /* USE_THREAD_DEBUG */ diff --git a/testcode/checklocks.h b/testcode/checklocks.h new file mode 100644 index 000000000..af14fc06e --- /dev/null +++ b/testcode/checklocks.h @@ -0,0 +1,304 @@ +/** + * testcode/checklocks.h - wrapper on locks that checks access. + * + * Copyright (c) 2007, NLnet Labs. All rights reserved. + * + * This software is open source. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * Neither the name of the NLNET LABS nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TESTCODE_CHECK_LOCKS_H +#define TESTCODE_CHECK_LOCKS_H + +/** + * \file + * Locks that are checked. + * + * Holds information per lock and per thread. + * That information is protected by a mutex (unchecked). + * + * Checks: + * o which func, file, line created the lock. + * o contention count, measures amount of contention on the lock. + * o the memory region(s) that the lock protects are + * memcmp'ed to ascertain no race conditions. + * o checks that locks are unlocked properly (before deletion). + * keeps which func, file, line that locked it. + * + * Limitations: + * o Detects unprotected memory access when the lock is locked or freed, + * which detects races only if they happen, and only if in protected + * memory areas. + * o Detects deadlocks by timeout, so approximately, as they happen. + * o Does not check order of locking. + * o Uses a lot of memory. + * o The checks use locks themselves, changing scheduling, + * thus affecting the races that you see. + * o for rwlocks does not detect exclusive writelock, or double locking. + */ + +#ifdef USE_THREAD_DEBUG +#ifndef HAVE_PTHREAD +/* really pretty arbitrary, since it will work with solaris threads too */ +#error "Need pthreads for checked locks" +#endif +/******************* THREAD DEBUG ************************/ +#include + +/** How long to wait before lock attempt is a failure. */ +#define CHECK_LOCK_TIMEOUT 5 /* seconds */ +/** How long to wait before join attempt is a failure. */ +#define CHECK_JOIN_TIMEOUT 120 /* seconds */ +/** How many trheads to allocate for */ +#define THRDEBUG_MAX_THREADS 32 /* threads */ + +/** + * Protection memory area. + * It is copied to a holding buffer to compare against later. + * Note that it may encompass the lock structure. + */ +struct protected_area { + /** where the memory region starts */ + void* region; + /** size of the region */ + size_t size; + /** backbuffer that holds a copy, of same size. */ + void* hold; + /** next protected area in list */ + struct protected_area* next; +}; + +/** + * per thread information for locking debug wrappers. + */ +struct thr_check { + /** thread id */ + pthread_t id; + /** real thread func */ + void* (*func)(void*); + /** func user arg */ + void* arg; + /** number of thread in list structure */ + int num; + /** + * list of locks that this thread is holding, double + * linked list, which first element the most recent lock acquired. + * So a represents the stack of locks acquired. (of all types). + */ + struct checked_lock *holding_first, *holding_last; + /** if the thread is currently waiting for a lock, which one */ + struct checked_lock* waiting; +}; + +/** + * One structure for all types of locks. + */ +struct checked_lock { + /** mutex for exclusive access to this structure */ + pthread_mutex_t lock; + /** list of memory regions protected by this checked lock */ + struct protected_area* prot; + /** where was this lock created */ + const char* create_func, *create_file; + /** where was this lock created */ + int create_line; + /** contention count */ + size_t contention_count; + /** hold count (how many threads are holding this lock) */ + int hold_count; + /** how many threads are waiting for this lock */ + int wait_count; + /** who touched it last */ + const char* holder_func, *holder_file; + /** who touched it last */ + int holder_line; + /** who owns the lock now */ + struct thr_check* holder; + + /** next lock a thread is holding (less recent) */ + struct checked_lock* next_held_lock[THRDEBUG_MAX_THREADS]; + /** prev lock a thread is holding (more recent) */ + struct checked_lock* prev_held_lock[THRDEBUG_MAX_THREADS]; + + /** type of lock */ + enum check_lock_type { + /** basic mutex */ + check_lock_mutex, + /** fast spinlock */ + check_lock_spinlock, + /** rwlock */ + check_lock_rwlock + } type; + /** the lock itself, see type to disambiguate the union */ + union { + /** mutex */ + pthread_mutex_t mutex; + /** spinlock */ + pthread_spinlock_t spinlock; + /** rwlock */ + pthread_rwlock_t rwlock; + }; +}; + +/** + * Additional call for the user to specify what areas are protected + * @param lock: the lock that protects the area. It can be inside the area. + * @param area: ptr to mem. + * @param size: length of area. + * You can call it multiple times with the same lock to give several areas. + */ +void lock_protect(struct checked_lock* lock, void* area, size_t size); + +/** + * Init locks. + * @param type: what type of lock this is. + * @param lock: ptr to user alloced ptr structure. This is inited. + * So an alloc is done and the ptr is stored as result. + * @param func: caller function name. + * @param file: caller file name. + * @param line: caller line number. + */ +void checklock_init(enum check_lock_type type, struct checked_lock** lock, + const char* func, const char* file, int line); + +/** + * Destroy locks. Free the structure. + * @param type: what type of lock this is. + * @param lock: ptr to user alloced structure. This is destroyed. + * @param func: caller function name. + * @param file: caller file name. + * @param line: caller line number. + */ +void checklock_destroy(enum check_lock_type type, struct checked_lock** lock, + const char* func, const char* file, int line); + +/** + * Acquire readlock. + * @param type: what type of lock this is. Had better be a rwlock. + * @param lock: ptr to lock. + * @param func: caller function name. + * @param file: caller file name. + * @param line: caller line number. + */ +void checklock_rdlock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line); + +/** + * Acquire writelock. + * @param type: what type of lock this is. Had better be a rwlock. + * @param lock: ptr to lock. + * @param func: caller function name. + * @param file: caller file name. + * @param line: caller line number. + */ +void checklock_wrlock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line); + +/** + * Locks. + * @param type: what type of lock this is. Had better be mutex or spinlock. + * @param lock: the lock. + * @param func: caller function name. + * @param file: caller file name. + * @param line: caller line number. + */ +void checklock_lock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line); + +/** + * Unlocks. + * @param type: what type of lock this is. + * @param lock: the lock. + * @param func: caller function name. + * @param file: caller file name. + * @param line: caller line number. + */ +void checklock_unlock(enum check_lock_type type, struct checked_lock* lock, + const char* func, const char* file, int line); + +/** + * Create thread. + * @param thr: Thread id, where to store result. + * @param func: thread start function. + * @param arg: user argument. + */ +void checklock_thrcreate(pthread_t* thr, void* (*func)(void*), void* arg); + +/** + * Wait for thread to exit. Returns thread return value. + * @param thread: thread to wait for. + */ +void checklock_thrjoin(pthread_t thread); + +/** structures to enable compiler type checking on the locks. + * Also the pointer makes it so that the lock can be part of the protected + * region without any possible problem (since the ptr will stay the same.) + */ +struct checked_lock_rw { struct checked_lock* c_rw; }; +/** structures to enable compiler type checking on the locks. */ +struct checked_lock_mutex { struct checked_lock* c_m; }; +/** structures to enable compiler type checking on the locks. */ +struct checked_lock_spl { struct checked_lock* c_spl; }; + +/** debugging rwlock */ +typedef struct checked_lock_rw lock_rw_t; +#define lock_rw_init(lock) checklock_init(check_lock_rwlock, &((lock)->c_rw), __func__, __FILE__, __LINE__) +#define lock_rw_destroy(lock) checklock_destroy(check_lock_rwlock, &((lock)->c_rw), __func__, __FILE__, __LINE__) +#define lock_rw_rdlock(lock) checklock_rdlock(check_lock_rwlock, (lock)->c_rw, __func__, __FILE__, __LINE__) +#define lock_rw_wrlock(lock) checklock_wrlock(check_lock_rwlock, (lock)->c_rw, __func__, __FILE__, __LINE__) +#define lock_rw_unlock(lock) checklock_unlock(check_lock_rwlock, (lock)->c_rw, __func__, __FILE__, __LINE__) + +/** debugging mutex */ +typedef struct checked_lock_mutex lock_basic_t; +#define lock_basic_init(lock) checklock_init(check_lock_mutex, &((lock)->c_m), __func__, __FILE__, __LINE__) +#define lock_basic_destroy(lock) checklock_destroy(check_lock_mutex, &((lock)->c_m), __func__, __FILE__, __LINE__) +#define lock_basic_lock(lock) checklock_lock(check_lock_mutex, (lock)->c_m, __func__, __FILE__, __LINE__) +#define lock_basic_unlock(lock) checklock_unlock(check_lock_mutex, (lock)->c_m, __func__, __FILE__, __LINE__) + +/** debugging spinlock */ +typedef struct checked_lock_spl lock_quick_t; +#define lock_quick_init(lock) checklock_init(check_lock_spinlock, &((lock)->c_spl), __func__, __FILE__, __LINE__) +#define lock_quick_destroy(lock) checklock_destroy(check_lock_spinlock, &((lock)->c_spl), __func__, __FILE__, __LINE__) +#define lock_quick_lock(lock) checklock_lock(check_lock_spinlock, (lock)->c_spl, __func__, __FILE__, __LINE__) +#define lock_quick_unlock(lock) checklock_unlock(check_lock_spinlock, (lock)->c_spl, __func__, __FILE__, __LINE__) + +/** we use the pthread id, our thr_check structure is kept behind the scenes */ +typedef pthread_t ub_thread_t; +#define ub_thread_create(thr, func, arg) checklock_thrcreate(thr, func, arg) +#define ub_thread_self() pthread_self() +#define ub_thread_join(thread) checklock_thrjoin(thread) + +typedef pthread_key_t ub_thread_key_t; +#define ub_thread_key_create(key, f) LOCKRET(pthread_key_create(key, f)) +#define ub_thread_key_set(key, v) LOCKRET(pthread_setspecific(key, v)) +#define ub_thread_key_get(key) pthread_getspecific(key) + +#endif /* USE_THREAD_DEBUG */ + +#endif /* TESTCODE_CHECK_LOCKS_H */ diff --git a/util/alloc.c b/util/alloc.c index af6066700..ff0fc9528 100644 --- a/util/alloc.c +++ b/util/alloc.c @@ -43,6 +43,7 @@ #include "util/alloc.h" /** prealloc some entries in the cache. To minimize contention. + * Result is 1 lock per alloc_max newly created entries. * @param alloc: the structure to fill up. */ static void @@ -108,10 +109,13 @@ alloc_special_obtain(struct alloc_cache* alloc) alloc->quar = alloc_special_next(p); alloc->num_quar--; alloc->special_allocated++; + alloc_special_clean(p); return p; } /* see if in global cache */ if(alloc->super) { + /* could maybe grab alloc_max/2 entries in one go, + * but really, isn't that just as fast as this code? */ lock_quick_lock(&alloc->super->lock); if((p = alloc->super->quar)) { alloc->super->quar = alloc_special_next(p); @@ -120,6 +124,7 @@ alloc_special_obtain(struct alloc_cache* alloc) lock_quick_unlock(&alloc->super->lock); if(p) { alloc->special_allocated++; + alloc_special_clean(p); return p; } } @@ -128,6 +133,7 @@ alloc_special_obtain(struct alloc_cache* alloc) if(!(p = (alloc_special_t*)malloc(sizeof(alloc_special_t)))) fatal_exit("alloc_special_obtain: out of memory"); alloc->special_allocated++; + alloc_special_clean(p); return p; } @@ -148,11 +154,13 @@ pushintosuper(struct alloc_cache* alloc, alloc_special_t* mem) alloc->quar = alloc_special_next(p); alloc->num_quar -= ALLOC_SPECIAL_MAX/2; + /* dump mem+list into the super quar list */ lock_quick_lock(&alloc->super->lock); alloc_special_next(p) = alloc->super->quar; alloc->super->quar = mem; alloc->super->num_quar += ALLOC_SPECIAL_MAX/2 + 1; lock_quick_unlock(&alloc->super->lock); + /* so 1 lock per mem+alloc/2 deletes */ } void @@ -161,6 +169,7 @@ alloc_special_release(struct alloc_cache* alloc, alloc_special_t* mem) log_assert(alloc); if(!mem) return; + alloc_special_clean(mem); if(alloc->super && alloc->num_quar >= ALLOC_SPECIAL_MAX) { /* push it to the super structure */ alloc->special_allocated --; diff --git a/util/alloc.h b/util/alloc.h index 2801c8e89..05f7930f9 100644 --- a/util/alloc.h +++ b/util/alloc.h @@ -43,13 +43,6 @@ * o The packed rrset type needs to be kept on special freelists, * so that they are reused for other packet rrset allocations. * - * Design choices: - * o The global malloc/free is used to handle fragmentation, etc. - * If freelists become very large, it is returned to the system. - * o Only 1k and smaller is cached, bigger uses malloc. - * Because DNS fragments are mostly this size. - * o On startup preallocated memory can be given, so threads can - * avoid contention in the startup phase. */ #ifndef UTIL_ALLOC_H @@ -89,13 +82,14 @@ struct alloc_cache { * Init alloc (zeroes the struct). * @param alloc: this parameter is allocated by the caller. * @param super: super to use (init that before with super_init). + * Pass this argument NULL to init the toplevel alloc structure. */ void alloc_init(struct alloc_cache* alloc, struct alloc_cache* super); /** * Free the alloc. Pushes all the cached items into the super structure. - * Or deletes them if super is NULL. - * Does not free the alloc struct itself. + * Or deletes them if alloc->super is NULL. + * Does not free the alloc struct itself (it was also allocated by caller). * @param alloc: is almost zeroed on exit (except some stats). */ void alloc_delete(struct alloc_cache* alloc); @@ -104,11 +98,13 @@ void alloc_delete(struct alloc_cache* alloc); * Get a new special_t element. * @param alloc: where to alloc it. * @return: memory block. Will not return NULL (instead fatal_exit). + * The block is zeroed. */ alloc_special_t* alloc_special_obtain(struct alloc_cache* alloc); /** * Return special_t back to pool. + * The block is cleaned up (zeroed) which also invalidates the ID inside. * @param alloc: where to alloc it. * @param mem: block to free. */ diff --git a/util/locks.h b/util/locks.h index b0dfa08e7..408f14a33 100644 --- a/util/locks.h +++ b/util/locks.h @@ -69,6 +69,15 @@ __FILE__, __LINE__, strerror(err)); \ } while(0) +#define USE_THREAD_DEBUG +#ifdef USE_THREAD_DEBUG +/******************* THREAD DEBUG ************************/ +/* (some) checking; to detect races and deadlocks. */ +#include "testcode/checklocks.h" + +#else /* USE_THREAD_DEBUG */ +#define lock_protect(lock, area, size) /* nop */ + #ifdef HAVE_PTHREAD #include @@ -210,6 +219,7 @@ typedef void* ub_thread_key_t; #endif /* HAVE_SOLARIS_THREADS */ #endif /* HAVE_PTHREAD */ +#endif /* USE_THREAD_DEBUG */ /** * Block all signals for this thread.