From: Jaroslav Kysela Date: Wed, 14 Nov 2018 22:15:04 +0000 (+0100) Subject: thread: add mutex watchdog X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=76dd042e0d3bb93e1102eae65c2d23aa31233274;p=thirdparty%2Ftvheadend.git thread: add mutex watchdog --- diff --git a/src/main.c b/src/main.c index 94140ae2f..3a18bc6cb 100644 --- a/src/main.c +++ b/src/main.c @@ -794,7 +794,8 @@ main(int argc, char **argv) opt_nobackup = 0, opt_nobat = 0, opt_subsystems = 0, - opt_tprofile = 0; + opt_tprofile = 0, + opt_thread_debug = 0; const char *opt_config = NULL, *opt_user = NULL, *opt_group = NULL, @@ -904,6 +905,7 @@ main(int argc, char **argv) #endif { 0, "tprofile", N_("Gather timing statistics for the code"), OPT_BOOL, &opt_tprofile }, + { 0, "thrdebug", N_("Thread debugging"), OPT_INT, &opt_thread_debug }, }; @@ -1039,6 +1041,8 @@ main(int argc, char **argv) if (opt_log_debug) log_debug = opt_log_debug; + tvh_thread_init(opt_thread_debug); + tvhlog_init(log_level, log_options, opt_logpath); tvhlog_set_debug(log_debug); tvhlog_set_trace(log_trace); @@ -1375,6 +1379,8 @@ main(int argc, char **argv) tvhftrace(LS_MAIN, config_done); tvhftrace(LS_MAIN, hts_settings_done); + tvh_thread_done(); + if(opt_fork) unlink(opt_pidpath); diff --git a/src/queue.h b/src/queue.h index 3388b46fc..70453a515 100644 --- a/src/queue.h +++ b/src/queue.h @@ -68,6 +68,16 @@ * Extra TAILQ-ops */ +#define TAILQ_SAFE_ENTRY(elm, field) \ + ((elm)->field.tqe_next == NULL && (elm)->field.tqe_prev == NULL) + +#define TAILQ_SAFE_REMOVE(head, elm, field) \ + if ((elm)->field.tqe_next != NULL || (elm)->field.tqe_prev != NULL) { \ + TAILQ_REMOVE(head, elm, field); \ + (elm)->field.tqe_next = NULL; \ + (elm)->field.tqe_prev = NULL; \ + } + #define TAILQ_INSERT_SORTED(head, elm, field, cmpfunc) do { \ if(TAILQ_FIRST(head) == NULL) { \ TAILQ_INSERT_HEAD(head, elm, field); \ diff --git a/src/settings.c b/src/settings.c index 8e3c9efe1..3bc2afb2a 100644 --- a/src/settings.c +++ b/src/settings.c @@ -401,10 +401,10 @@ hts_settings_remove(const char *pathfmt, ...) * */ int -hts_settings_open_file(int for_write, const char *pathfmt, ...) +hts_settings_open_file(int flags, const char *pathfmt, ...) { char path[PATH_MAX]; - int flags; + int _flags; va_list ap; /* Build path */ @@ -413,13 +413,16 @@ hts_settings_open_file(int for_write, const char *pathfmt, ...) va_end(ap); /* Create directories */ - if (for_write) + if (flags & HTS_SETTINGS_OPEN_WRITE) if (hts_settings_makedirs(path)) return -1; /* Open file */ - flags = for_write ? O_CREAT | O_TRUNC | O_WRONLY : O_RDONLY; + _flags = (flags & HTS_SETTINGS_OPEN_WRITE) ? O_CREAT | O_TRUNC | O_WRONLY : O_RDONLY; - return tvh_open(path, flags, S_IRUSR | S_IWUSR); + if (flags & HTS_SETTINGS_OPEN_DIRECT) + return open(path, _flags, S_IRUSR | S_IWUSR); + + return tvh_open(path, _flags, S_IRUSR | S_IWUSR); } /* diff --git a/src/settings.h b/src/settings.h index be2cc5ecd..4bdb0f69b 100644 --- a/src/settings.h +++ b/src/settings.h @@ -23,6 +23,9 @@ #include "htsmsg.h" +#define HTS_SETTINGS_OPEN_WRITE (1<<0) +#define HTS_SETTINGS_OPEN_DIRECT (1<<1) + void hts_settings_init(const char *confpath); void hts_settings_done(void); @@ -37,7 +40,7 @@ void hts_settings_remove(const char *pathfmt, ...); const char *hts_settings_get_root(void); -int hts_settings_open_file(int for_write, const char *pathfmt, ...); +int hts_settings_open_file(int flags, const char *pathfmt, ...); int hts_settings_buildpath(char *dst, size_t dstsize, const char *pathfmt, ...); diff --git a/src/tvh_thread.c b/src/tvh_thread.c index 3849ec5b0..7dd29759a 100644 --- a/src/tvh_thread.c +++ b/src/tvh_thread.c @@ -1,4 +1,5 @@ #define __USE_GNU +#define TVH_THREAD_C 1 #include "tvheadend.h" #include #include @@ -6,8 +7,8 @@ #include #include #include -#define TVH_THREAD_C 1 -#include "tvh_thread.h" + +#include "settings.h" #ifdef PLATFORM_LINUX #include @@ -18,6 +19,13 @@ #include #endif +int tvh_thread_debug; +static int tvhwatch_done; +static pthread_t thrwatch_tid; +static pthread_mutex_t thrwatch_mutex = PTHREAD_MUTEX_INITIALIZER; +static TAILQ_HEAD(, tvh_mutex) thrwatch_mutexes = TAILQ_HEAD_INITIALIZER(thrwatch_mutexes); +static int64_t tvh_thread_crash_time; + /* * thread routines */ @@ -33,8 +41,25 @@ thread_state { char name[17]; }; +static void +thread_get_name(pthread_t tid, char *buf, int len) +{ + buf[0] = '?'; + buf[1] = '\0'; +#if defined(PLATFORM_LINUX) + /* Set name */ + if (len >= 16) + prctl(PR_GET_NAME, buf); +#elif defined(PLATFORM_FREEBSD) + /* Get name of thread */ + //pthread_get_name_np(tid, buf); ??? +#elif defined(PLATFORM_DARWIN) + // ??? +#endif +} + static void * -thread_wrapper ( void *p ) +thread_wrapper(void *p) { struct thread_state *ts = p; sigset_t set; @@ -117,6 +142,7 @@ tvh_thread_renice(int value) int tvh_mutex_init(tvh_mutex_t *restrict mutex, const pthread_mutexattr_t *restrict attr) { + memset(mutex, 0, sizeof(*mutex)); return pthread_mutex_init(&mutex->mutex, attr); } @@ -125,21 +151,64 @@ int tvh_mutex_destroy(tvh_mutex_t *mutex) return pthread_mutex_destroy(&mutex->mutex); } -int tvh_mutex_lock(tvh_mutex_t *mutex) +static void tvh_mutex_add_to_list(tvh_mutex_t *mutex, const char *filename, int lineno) +{ + pthread_mutex_lock(&thrwatch_mutex); + if (filename != NULL) { + mutex->thread = pthread_self(); + mutex->filename = filename; + mutex->lineno = lineno; + } + mutex->tstamp = getfastmonoclock(); + TAILQ_SAFE_REMOVE(&thrwatch_mutexes, mutex, link); + TAILQ_INSERT_HEAD(&thrwatch_mutexes, mutex, link); + pthread_mutex_unlock(&thrwatch_mutex); +} + +static void tvh_mutex_remove_from_list(tvh_mutex_t *mutex) { - return pthread_mutex_lock(&mutex->mutex); + pthread_mutex_lock(&thrwatch_mutex); + TAILQ_SAFE_REMOVE(&thrwatch_mutexes, mutex, link); + mutex->filename = NULL; + mutex->lineno = 0; + pthread_mutex_unlock(&thrwatch_mutex); } -int tvh_mutex_trylock(tvh_mutex_t *mutex) +static void tvh_mutex_remove_from_list_keep_info(tvh_mutex_t *mutex) { - return pthread_mutex_trylock(&mutex->mutex); + pthread_mutex_lock(&thrwatch_mutex); + TAILQ_SAFE_REMOVE(&thrwatch_mutexes, mutex, link); + pthread_mutex_unlock(&thrwatch_mutex); } -int tvh_mutex_unlock(tvh_mutex_t *mutex) +int tvh__mutex_lock(tvh_mutex_t *mutex, const char *filename, int lineno) { - return pthread_mutex_unlock(&mutex->mutex); + int r; + tvh_mutex_add_to_list(mutex, filename, lineno); + r = pthread_mutex_lock(&mutex->mutex); + if (r) + tvh_mutex_remove_from_list(mutex); + return r; } +int tvh__mutex_trylock(tvh_mutex_t *mutex, const char *filename, int lineno) +{ + int r; + tvh_mutex_add_to_list(mutex, filename, lineno); + r = pthread_mutex_trylock(&mutex->mutex); + if (r) + tvh_mutex_remove_from_list(mutex); + return r; +} + +int tvh__mutex_unlock(tvh_mutex_t *mutex) +{ + int r; + r = pthread_mutex_unlock(&mutex->mutex); + if (r == 0) + tvh_mutex_remove_from_list(mutex); + return r; +} int tvh_mutex_timedlock @@ -208,13 +277,22 @@ int tvh_cond_wait ( tvh_cond_t *cond, tvh_mutex_t *mutex) { - return pthread_cond_wait(&cond->cond, &mutex->mutex); + int r; + + tvh_mutex_remove_from_list_keep_info(mutex); + r = pthread_cond_wait(&cond->cond, &mutex->mutex); + tvh_mutex_add_to_list(mutex, NULL, -1); + return r; } int tvh_cond_timedwait ( tvh_cond_t *cond, tvh_mutex_t *mutex, int64_t monoclock ) { + int r; + + tvh_mutex_remove_from_list_keep_info(mutex); + #if defined(PLATFORM_DARWIN) /* Use a relative timedwait implementation */ int64_t now = getmonoclock(); @@ -228,19 +306,27 @@ tvh_cond_timedwait ts.tv_nsec = (relative % MONOCLOCK_RESOLUTION) * (1000000000ULL/MONOCLOCK_RESOLUTION); - return pthread_cond_timedwait_relative_np(&cond->cond, &mutex->mutex, &ts); + r = pthread_cond_timedwait_relative_np(&cond->cond, &mutex->mutex, &ts); #else struct timespec ts; ts.tv_sec = monoclock / MONOCLOCK_RESOLUTION; ts.tv_nsec = (monoclock % MONOCLOCK_RESOLUTION) * (1000000000ULL/MONOCLOCK_RESOLUTION); - return pthread_cond_timedwait(&cond->cond, &mutex->mutex, &ts); + r = pthread_cond_timedwait(&cond->cond, &mutex->mutex, &ts); #endif + + tvh_mutex_add_to_list(mutex, NULL, -1); + return r; } int tvh_cond_timedwait_ts(tvh_cond_t *cond, tvh_mutex_t *mutex, struct timespec *ts) { - return pthread_cond_timedwait(&cond->cond, &mutex->mutex, ts); + int r; + + tvh_mutex_remove_from_list_keep_info(mutex); + r = pthread_cond_timedwait(&cond->cond, &mutex->mutex, ts); + tvh_mutex_add_to_list(mutex, NULL, -1); + return r; } void @@ -249,3 +335,59 @@ tvh_mutex_not_held(const char *file, int line) fprintf(stderr, "Mutex not held at %s:%d\n", file, line); abort(); } + +static void tvh_thread_mutex_deadlock(tvh_mutex_t *mutex) +{ + int fd = hts_settings_open_file(HTS_SETTINGS_OPEN_WRITE | HTS_SETTINGS_OPEN_DIRECT, "mutex-deadlock.txt"); + if (fd < 0) fd = fileno(stderr); + int sid = mutex->mutex.__data.__owner; /* unportable */ + char name[256]; + thread_get_name(mutex->thread, name, sizeof(name)); + FILE *f = fdopen(fd, "w"); + fprintf(f, "Thread %i: %s\n", sid, name); + fprintf(f, " locked in: %s:%i\n", mutex->filename, mutex->lineno); + fclose(f); + abort(); +} + +static void *tvh_thread_watch_thread(void *aux) +{ + int64_t now; + tvh_mutex_t *mutex, dmutex; + + while (!tvhwatch_done) { + pthread_mutex_lock(&thrwatch_mutex); + now = getfastmonoclock(); + mutex = TAILQ_LAST(&thrwatch_mutexes, tvh_mutex_queue); + if (mutex && mutex->tstamp + sec2mono(5) < now) { + pthread_mutex_unlock(&thrwatch_mutex); + tvh_thread_mutex_deadlock(mutex); + } + pthread_mutex_unlock(&thrwatch_mutex); + if (tvh_thread_debug == 12345678 && tvh_thread_crash_time < getfastmonoclock()) { + tvh_thread_debug--; + tvh_mutex_init(&dmutex, NULL); + tvh_mutex_lock(&dmutex); + } + tvh_usleep(1000000); + } + return NULL; +} + +void tvh_thread_init(int debug_level) +{ + tvh_thread_debug = debug_level; + tvh_thread_crash_time = getfastmonoclock() + sec2mono(15); + if (debug_level > 0) { + tvhwatch_done = 0; + tvh_thread_create(&thrwatch_tid, NULL, tvh_thread_watch_thread, NULL, "thrwatch"); + } +} + +void tvh_thread_done(void) +{ + if (tvh_thread_debug > 0) { + tvhwatch_done = 1; + pthread_join(thrwatch_tid, NULL); + } +} diff --git a/src/tvh_thread.h b/src/tvh_thread.h index 2fbf435a2..56c7dc448 100644 --- a/src/tvh_thread.h +++ b/src/tvh_thread.h @@ -20,14 +20,23 @@ #include #include -#include "pthread.h" +#include + +#include "queue.h" typedef struct { pthread_cond_t cond; } tvh_cond_t; -typedef struct { +TAILQ_HEAD(tvh_mutex_queue, tvh_mutex); + +typedef struct tvh_mutex { pthread_mutex_t mutex; + pthread_t thread; + const char *filename; + int lineno; + int64_t tstamp; + TAILQ_ENTRY(tvh_mutex) link; } tvh_mutex_t; /* @@ -54,6 +63,11 @@ lock_assert0(tvh_mutex_t *l, const char *file, int line) * */ +extern int tvh_thread_debug; + +void tvh_thread_init(int debug_level); +void tvh_thread_done(void); + int tvh_thread_create (pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg, @@ -64,9 +78,27 @@ int tvh_thread_renice(int value); int tvh_mutex_init(tvh_mutex_t *restrict mutex, const pthread_mutexattr_t *restrict attr); int tvh_mutex_destroy(tvh_mutex_t *mutex); -int tvh_mutex_lock(tvh_mutex_t *mutex); -int tvh_mutex_trylock(tvh_mutex_t *mutex); -int tvh_mutex_unlock(tvh_mutex_t *mutex); +int tvh__mutex_lock(tvh_mutex_t *mutex, const char *filename, int lineno); +#define tvh_mutex_lock(_mutex) \ + ({ \ + tvh_thread_debug == 0 ? \ + pthread_mutex_lock(&(_mutex)->mutex) : \ + tvh__mutex_lock((_mutex), __FILE__, __LINE__); \ + }) +int tvh__mutex_trylock(tvh_mutex_t *mutex, const char *filename, int lineno); +#define tvh_mutex_trylock(_mutex) \ + ({ \ + tvh_thread_debug == 0 ? \ + pthread_mutex_lock(&(_mutex)->mutex) : \ + tvh__mutex_trylock((_mutex), __FILE__, __LINE__); \ + }) +int tvh__mutex_unlock(tvh_mutex_t *mutex); +static inline int tvh_mutex_unlock(tvh_mutex_t *mutex) +{ + if (tvh_thread_debug == 0) + return pthread_mutex_unlock(&mutex->mutex); + return tvh__mutex_unlock(mutex); +} int tvh_mutex_timedlock(tvh_mutex_t *mutex, int64_t usec); int tvh_cond_init(tvh_cond_t *cond, int monotonic);