From: Willy Tarreau Date: Thu, 17 Apr 2025 06:59:45 +0000 (+0200) Subject: MINOR: debug: make ha_stuck_warning() print the whole message at once X-Git-Tag: v3.2-dev11~14 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=513397ac8215cdc1c0ab077a4a6809d7bd32f262;p=thirdparty%2Fhaproxy.git MINOR: debug: make ha_stuck_warning() print the whole message at once It has been noticed quite a few times during troubleshooting and even testing that warnings can happen in avalanches from multiple threads at the same time, and that their reporting it interleaved bacause the output is produced in small chunks. Originally, this code inspired by the panic code aimed at making sure to log whatever could be emitted in case it would crash later. But this approach was wrong since writes are atomic, and performing 5 writes in sequence in each dumping thread also means that the outputs can be mixed up at 5 different locations between multiple threads. The output of warnings is never very long, and the stack-based buffer is 4kB so let's just concatenate everything in the buffer and emit it at once using a single write(). Now there's no longer this confusion on the output. --- diff --git a/src/debug.c b/src/debug.c index aa2b47388..93372ce01 100644 --- a/src/debug.c +++ b/src/debug.c @@ -830,7 +830,7 @@ void ha_stuck_warning(void) p = HA_ATOMIC_LOAD(&th_ctx->prev_cpu_time); n = now_cpu_time(); - chunk_printf(&buf, + chunk_appendf(&buf, "\nWARNING! thread %u has stopped processing traffic for %llu milliseconds\n" " with %d streams currently blocked, prevented from making any progress.\n" " While this may occasionally happen with inefficient configurations\n" @@ -845,40 +845,37 @@ void ha_stuck_warning(void) tid + 1, (n - p) / 1000000ULL, HA_ATOMIC_LOAD(&ha_thread_ctx[tid].stream_cnt)); - DISGUISE(write(2, buf.area, buf.data)); - - chunk_reset(&buf); ha_thread_dump_one(&buf, 1); - DISGUISE(write(2, buf.area, buf.data)); #ifdef USE_LUA if (get_tainted() & TAINTED_LUA_STUCK_SHARED && global.nbthread > 1) { - chunk_printf(&buf, + chunk_appendf(&buf, "### Note: at least one thread was stuck in a Lua context loaded using the\n" " 'lua-load' directive, which is known for causing heavy contention\n" " when used with threads. Please consider using 'lua-load-per-thread'\n" " instead if your code is safe to run in parallel on multiple threads.\n"); - DISGUISE(write(2, buf.area, buf.data)); } else if (get_tainted() & TAINTED_LUA_STUCK) { - chunk_printf(&buf, + chunk_appendf(&buf, "### Note: at least one thread was stuck in a Lua context in a way that suggests\n" " heavy processing inside a dependency or a long loop that can't yield.\n" " Please make sure any external code you may rely on is safe for use in\n" " an event-driven engine.\n"); - DISGUISE(write(2, buf.area, buf.data)); } #endif if (get_tainted() & TAINTED_MEM_TRIMMING_STUCK) { - chunk_printf(&buf, + chunk_appendf(&buf, "### Note: one thread was found stuck under malloc_trim(), which can run for a\n" " very long time on large memory systems. You way want to disable this\n" " memory reclaiming feature by setting 'no-memory-trimming' in the\n" " 'global' section of your configuration to avoid this in the future.\n"); - DISGUISE(write(2, buf.area, buf.data)); } - chunk_printf(&buf, " => Trying to gracefully recover now.\n"); + chunk_appendf(&buf, " => Trying to gracefully recover now.\n"); + + /* Note: it's important to dump the whole buffer at once to avoid + * interleaved outputs from multiple threads dumping in parallel. + */ DISGUISE(write(2, buf.area, buf.data)); }