From: Willy Tarreau Date: Wed, 25 Oct 2023 13:42:27 +0000 (+0200) Subject: DEBUG: pools: detect that malloc_trim() is in progress X-Git-Tag: v2.9-dev9~28 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=96bb99a87da409c21364d38d2d429c9239ef08df;p=thirdparty%2Fhaproxy.git DEBUG: pools: detect that malloc_trim() is in progress Now when calling ha_panic() with a thread still under malloc_trim(), we'll set a new tainted flag to easily report it, and the output trace will report that this condition happened and will suggest to use no-memory-trimming to avoid it in the future. --- diff --git a/include/haproxy/bug.h b/include/haproxy/bug.h index fbed1203d9..c980727e7a 100644 --- a/include/haproxy/bug.h +++ b/include/haproxy/bug.h @@ -251,6 +251,7 @@ enum tainted_flags { TAINTED_PANIC = 0x00000200, /* a panic dump has started */ TAINTED_LUA_STUCK = 0x00000400, /* stuck in a Lua context */ TAINTED_LUA_STUCK_SHARED = 0x00000800, /* stuck in a shared Lua context */ + TAINTED_MEM_TRIMMING_STUCK = 0x00001000, /* stuck while trimming memory */ }; /* this is a bit field made of TAINTED_*, and is declared in haproxy.c */ diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h index 133e954fc9..5375b35c91 100644 --- a/include/haproxy/pool.h +++ b/include/haproxy/pool.h @@ -100,6 +100,9 @@ /* poison each newly allocated area with this byte if >= 0 */ extern int mem_poison_byte; +/* trim() in progress */ +extern int pool_trim_in_progress; + /* set of POOL_DBG_* flags */ extern uint pool_debugging; diff --git a/src/debug.c b/src/debug.c index fbab47e2e2..9417a9613c 100644 --- a/src/debug.c +++ b/src/debug.c @@ -240,6 +240,10 @@ void ha_thread_dump_one(int thr, int from_signal) } } #endif + + if (HA_ATOMIC_LOAD(&pool_trim_in_progress)) + mark_tainted(TAINTED_MEM_TRIMMING_STUCK); + /* We only emit the backtrace for stuck threads in order not to * waste precious output buffer space with non-interesting data. * Please leave this as the last instruction in this function @@ -468,6 +472,14 @@ void ha_panic() DISGUISE(write(2, trash.area, trash.data)); } #endif + if (get_tainted() & TAINTED_MEM_TRIMMING_STUCK) { + chunk_printf(&trash, + "### Note: one thread was found stuck under malloc_trim(), which can run for a\n" + " very long time on large memory systems. You way want to disable this\n" + " memory reclaiming feature by setting 'no-memory-trimming' in the\n" + " 'global' section of your configuration to avoid this in the future.\n"); + DISGUISE(write(2, trash.area, trash.data)); + } for (;;) abort(); diff --git a/src/pool.c b/src/pool.c index 6af1af48a3..02b5c6d250 100644 --- a/src/pool.c +++ b/src/pool.c @@ -38,6 +38,7 @@ THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */ static struct list pools __read_mostly = LIST_HEAD_INIT(pools); int mem_poison_byte __read_mostly = 'P'; +int pool_trim_in_progress = 0; uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */ #ifdef DEBUG_FAIL_ALLOC POOL_DBG_FAIL_ALLOC | @@ -218,6 +219,8 @@ int malloc_trim(size_t pad) if (disable_trim) return ret; + HA_ATOMIC_INC(&pool_trim_in_progress); + if (my_mallctl) { /* here we're on jemalloc and malloc_trim() is called either * by haproxy or another dependency (the worst case that @@ -263,6 +266,8 @@ int malloc_trim(size_t pad) } } #endif + HA_ATOMIC_DEC(&pool_trim_in_progress); + /* here we have ret=0 if nothing was release, or 1 if some were */ return ret; }