From: Willy Tarreau Date: Mon, 28 Apr 2025 07:42:58 +0000 (+0200) Subject: MEDIUM: threads: keep history of taken locks with DEBUG_THREAD > 0 X-Git-Tag: v3.2-dev13~66 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b8a1c2380b995841e6a8f9ef94dd534d0f83dea7;p=thirdparty%2Fhaproxy.git MEDIUM: threads: keep history of taken locks with DEBUG_THREAD > 0 by only storing a word in each thread context, we can keep the history of all taken/dropped locks by label. This is expected to be very cheap and to permit to store up to 8 consecutive lock operations in 64 bits. That should significantly help detect recursive locks as well as figure what thread was likely to hinder another one waiting for a lock. For now we only store the final state of the lock, we don't store the attempt to get it. It's just a matter of space since we already need 4 ops (rd,sk,wr,un) which take 2 bits, leaving max 64 labels. We're already around 45. We could also multiply by 5 and still keep 8 bits total per lock, that would limit us to 51 locks max. It seems that most of the time if we get a watchdog panic, anyway the victim thread will be perfectly located so that we don't need a specific value for this. Another benefit is that we perform a single memory write per lock. --- diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h index f723583ab..e6c8e4b9d 100644 --- a/include/haproxy/thread.h +++ b/include/haproxy/thread.h @@ -305,35 +305,61 @@ static inline unsigned long thread_isolated() return _HA_ATOMIC_LOAD(&isolated_thread) == tid; } +/* locking levels, for history and debugging */ +#define _LK_UN 0 +#define _LK_RD 1 +#define _LK_SK 2 +#define _LK_WR 3 + +#if (DEBUG_THREAD < 1) && !defined(DEBUG_FULL) + +#define _lock_wait(_LK_, lbl, expr) do { (void)(expr); } while (0) +#define _lock_cond(_LK_, lbl, expr) ({ typeof(expr) _expr = (expr); _expr; }) + +#else + +#define _lock_wait(_LK_, lbl, expr) do { \ + (void)(expr); \ + th_ctx->lock_history = (th_ctx->lock_history << 8) + ((lbl + 1) << 2) + _LK_; \ + } while (0) +#define _lock_cond(_LK_, lbl, expr) ({ \ + typeof(expr) _expr = (expr); \ + if (!_expr) \ + th_ctx->lock_history = (th_ctx->lock_history << 8) + ((lbl + 1) << 2) + _LK_; \ + _expr; \ + }) + +#endif + #if (DEBUG_THREAD < 2) && !defined(DEBUG_FULL) /* Thread debugging is DISABLED, these are the regular locking functions */ #define HA_SPIN_INIT(l) ({ (*l) = 0; }) #define HA_SPIN_DESTROY(l) ({ (*l) = 0; }) -#define HA_SPIN_LOCK(lbl, l) pl_take_s(l) -#define HA_SPIN_TRYLOCK(lbl, l) (!pl_try_s(l)) -#define HA_SPIN_UNLOCK(lbl, l) pl_drop_s(l) +#define HA_SPIN_LOCK(lbl, l) _lock_wait(_LK_SK, lbl, pl_take_s(l)) +#define HA_SPIN_TRYLOCK(lbl, l) _lock_cond(_LK_SK, lbl, !pl_try_s(l)) +#define HA_SPIN_UNLOCK(lbl, l) _lock_wait(_LK_UN, lbl, pl_drop_s(l)) #define HA_RWLOCK_INIT(l) ({ (*l) = 0; }) #define HA_RWLOCK_DESTROY(l) ({ (*l) = 0; }) -#define HA_RWLOCK_WRLOCK(lbl,l) pl_take_w(l) -#define HA_RWLOCK_TRYWRLOCK(lbl,l) (!pl_try_w(l)) -#define HA_RWLOCK_WRUNLOCK(lbl,l) pl_drop_w(l) -#define HA_RWLOCK_RDLOCK(lbl,l) pl_take_r(l) -#define HA_RWLOCK_TRYRDLOCK(lbl,l) (!pl_try_r(l)) -#define HA_RWLOCK_RDUNLOCK(lbl,l) pl_drop_r(l) +#define HA_RWLOCK_WRLOCK(lbl,l) _lock_wait(_LK_WR, lbl, pl_take_w(l)) +#define HA_RWLOCK_TRYWRLOCK(lbl,l) _lock_cond(_LK_WR, lbl, !pl_try_w(l)) +#define HA_RWLOCK_WRUNLOCK(lbl,l) _lock_wait(_LK_UN, lbl, pl_drop_w(l)) +#define HA_RWLOCK_RDLOCK(lbl,l) _lock_wait(_LK_RD, lbl, pl_take_r(l)) +#define HA_RWLOCK_TRYRDLOCK(lbl,l) _lock_cond(_LK_RD, lbl, (!pl_try_r(l))) +#define HA_RWLOCK_RDUNLOCK(lbl,l) _lock_wait(_LK_UN, lbl, pl_drop_r(l)) /* rwlock upgrades via seek locks */ -#define HA_RWLOCK_SKLOCK(lbl,l) pl_take_s(l) /* N --> S */ -#define HA_RWLOCK_SKTOWR(lbl,l) pl_stow(l) /* S --> W */ -#define HA_RWLOCK_WRTOSK(lbl,l) pl_wtos(l) /* W --> S */ -#define HA_RWLOCK_SKTORD(lbl,l) pl_stor(l) /* S --> R */ -#define HA_RWLOCK_WRTORD(lbl,l) pl_wtor(l) /* W --> R */ -#define HA_RWLOCK_SKUNLOCK(lbl,l) pl_drop_s(l) /* S --> N */ -#define HA_RWLOCK_TRYSKLOCK(lbl,l) (!pl_try_s(l)) /* N -?> S */ -#define HA_RWLOCK_TRYRDTOSK(lbl,l) (!pl_try_rtos(l)) /* R -?> S */ -#define HA_RWLOCK_TRYRDTOWR(lbl, l) (!pl_try_rtow(l)) /* R -?> W */ +#define HA_RWLOCK_SKLOCK(lbl,l) _lock_wait(_LK_SK, lbl, pl_take_s(l)) /* N --> S */ +#define HA_RWLOCK_SKTOWR(lbl,l) _lock_wait(_LK_WR, lbl, pl_stow(l)) /* S --> W */ +#define HA_RWLOCK_WRTOSK(lbl,l) _lock_wait(_LK_SK, lbl, pl_wtos(l)) /* W --> S */ +#define HA_RWLOCK_SKTORD(lbl,l) _lock_wait(_LK_RD, lbl, pl_stor(l)) /* S --> R */ +#define HA_RWLOCK_WRTORD(lbl,l) _lock_wait(_LK_RD, lbl, pl_wtor(l)) /* W --> R */ +#define HA_RWLOCK_SKUNLOCK(lbl,l) _lock_wait(_LK_UN, lbl, pl_drop_s(l)) /* S --> N */ +#define HA_RWLOCK_TRYSKLOCK(lbl,l) _lock_cond(_LK_SK, lbl, !pl_try_s(l)) /* N -?> S */ +#define HA_RWLOCK_TRYRDTOSK(lbl,l) _lock_cond(_LK_SK, lbl, !pl_try_rtos(l)) /* R -?> S */ +#define HA_RWLOCK_TRYRDTOWR(lbl, l) _lock_cond(_LK_WR, lbl, !pl_try_rtow(l)) /* R -?> W */ #else /* (DEBUG_THREAD < 2) && !defined(DEBUG_FULL) */ @@ -368,28 +394,28 @@ static inline unsigned long thread_isolated() #define HA_SPIN_INIT(l) __spin_init(l) #define HA_SPIN_DESTROY(l) __spin_destroy(l) -#define HA_SPIN_LOCK(lbl, l) __spin_lock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_SPIN_TRYLOCK(lbl, l) __spin_trylock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_SPIN_UNLOCK(lbl, l) __spin_unlock(lbl, l, __func__, __FILE__, __LINE__) +#define HA_SPIN_LOCK(lbl, l) _lock_wait(_LK_SK, lbl, __spin_lock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_SPIN_TRYLOCK(lbl, l) _lock_cond(_LK_SK, lbl, __spin_trylock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_SPIN_UNLOCK(lbl, l) _lock_wait(_LK_UN, lbl, __spin_unlock(lbl, l, __func__, __FILE__, __LINE__)) #define HA_RWLOCK_INIT(l) __ha_rwlock_init((l)) #define HA_RWLOCK_DESTROY(l) __ha_rwlock_destroy((l)) -#define HA_RWLOCK_WRLOCK(lbl,l) __ha_rwlock_wrlock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_TRYWRLOCK(lbl,l) __ha_rwlock_trywrlock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_WRUNLOCK(lbl,l) __ha_rwlock_wrunlock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_RDLOCK(lbl,l) __ha_rwlock_rdlock(lbl, l) -#define HA_RWLOCK_TRYRDLOCK(lbl,l) __ha_rwlock_tryrdlock(lbl, l) -#define HA_RWLOCK_RDUNLOCK(lbl,l) __ha_rwlock_rdunlock(lbl, l) - -#define HA_RWLOCK_SKLOCK(lbl,l) __ha_rwlock_sklock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_SKTOWR(lbl,l) __ha_rwlock_sktowr(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_WRTOSK(lbl,l) __ha_rwlock_wrtosk(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_SKTORD(lbl,l) __ha_rwlock_sktord(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_WRTORD(lbl,l) __ha_rwlock_wrtord(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_SKUNLOCK(lbl,l) __ha_rwlock_skunlock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_TRYSKLOCK(lbl,l) __ha_rwlock_trysklock(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_TRYRDTOSK(lbl,l) __ha_rwlock_tryrdtosk(lbl, l, __func__, __FILE__, __LINE__) -#define HA_RWLOCK_TRYRDTOWR(lbl,l) __ha_rwlock_tryrdtowr(lbl, l, __func__, __FILE__, __LINE__) +#define HA_RWLOCK_WRLOCK(lbl,l) _lock_wait(_LK_WR, lbl, __ha_rwlock_wrlock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_TRYWRLOCK(lbl,l) _lock_cond(_LK_WR, lbl, __ha_rwlock_trywrlock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_WRUNLOCK(lbl,l) _lock_wait(_LK_UN, lbl, __ha_rwlock_wrunlock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_RDLOCK(lbl,l) _lock_wait(_LK_RD, lbl, __ha_rwlock_rdlock(lbl, l)) +#define HA_RWLOCK_TRYRDLOCK(lbl,l) _lock_cond(_LK_RD, lbl, __ha_rwlock_tryrdlock(lbl, l)) +#define HA_RWLOCK_RDUNLOCK(lbl,l) _lock_wait(_LK_UN, lbl, __ha_rwlock_rdunlock(lbl, l)) + +#define HA_RWLOCK_SKLOCK(lbl,l) _lock_wait(_LK_SK, lbl, __ha_rwlock_sklock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_SKTOWR(lbl,l) _lock_wait(_LK_WR, lbl, __ha_rwlock_sktowr(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_WRTOSK(lbl,l) _lock_wait(_LK_SK, lbl, __ha_rwlock_wrtosk(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_SKTORD(lbl,l) _lock_wait(_LK_RD, lbl, __ha_rwlock_sktord(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_WRTORD(lbl,l) _lock_wait(_LK_RD, lbl, __ha_rwlock_wrtord(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_SKUNLOCK(lbl,l) _lock_wait(_LK_UN, lbl, __ha_rwlock_skunlock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_TRYSKLOCK(lbl,l) _lock_cond(_LK_SK, lbl, __ha_rwlock_trysklock(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_TRYRDTOSK(lbl,l) _lock_cond(_LK_RD, lbl, __ha_rwlock_tryrdtosk(lbl, l, __func__, __FILE__, __LINE__)) +#define HA_RWLOCK_TRYRDTOWR(lbl,l) _lock_cond(_LK_WR, lbl, __ha_rwlock_tryrdtowr(lbl, l, __func__, __FILE__, __LINE__)) /* Following functions are used to collect some stats about locks. We wrap * pthread functions to known how much time we wait in a lock. */ diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h index f0e32534f..9e379ae5b 100644 --- a/include/haproxy/tinfo-t.h +++ b/include/haproxy/tinfo-t.h @@ -165,7 +165,7 @@ struct thread_ctx { uint64_t prev_mono_time; /* previous system wide monotonic time (leaving poll) */ uint64_t curr_mono_time; /* latest system wide monotonic time (leaving poll) */ - // around 8 bytes here for thread-local variables + ulong lock_history; /* history of used locks, see thread.h for more details */ // third cache line here on 64 bits: accessed mostly using atomic ops ALWAYS_ALIGN(64);