From: Ralph Boehme Date: Thu, 4 Apr 2024 10:31:05 +0000 (+0200) Subject: s3/lib: add option "serverid watch:debug = yes" to print kernel stack of hanging... X-Git-Tag: ldb-2.8.2~15 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b365e1068e47369751f7b626d9ffc429be6e2201;p=thirdparty%2Fsamba.git s3/lib: add option "serverid watch:debug = yes" to print kernel stack of hanging process We only do if sys_have_proc_fds() returns true, so it's most likely linux... Enabled by default with log level 10... BUG: https://bugzilla.samba.org/show_bug.cgi?id=15624 Pair-Programmed-With: Stefan Metzmacher Signed-off-by: Ralph Boehme Signed-off-by: Stefan Metzmacher Reviewed-by: Guenther Deschner (cherry picked from commit 5c57e840527432c4b1a7ec94894939022a9e9622) --- diff --git a/source3/lib/server_id_watch.c b/source3/lib/server_id_watch.c index 50b35f27b3e..c372ec8c431 100644 --- a/source3/lib/server_id_watch.c +++ b/source3/lib/server_id_watch.c @@ -17,17 +17,18 @@ * along with this program. If not, see . */ -#include "replace.h" -#include -#include +#include "includes.h" #include "serverid.h" #include "server_id_watch.h" +#include "lib/util/server_id.h" #include "lib/util/tevent_unix.h" struct server_id_watch_state { struct tevent_context *ev; struct server_id pid; struct timeval start; + struct timeval warn; + bool debug; }; static void server_id_watch_waited(struct tevent_req *subreq); @@ -47,6 +48,12 @@ struct tevent_req *server_id_watch_send(TALLOC_CTX *mem_ctx, state->ev = ev; state->pid = pid; state->start = tevent_timeval_current(); + state->warn = tevent_timeval_add(&state->start, 10, 0); + + state->debug = lp_parm_bool(GLOBAL_SECTION_SNUM, + "serverid watch", + "debug", + CHECK_DEBUGLVL(DBGLVL_DEBUG)); if (!serverid_exists(&state->pid)) { tevent_req_done(req); @@ -86,6 +93,55 @@ static void server_id_watch_waited(struct tevent_req *subreq) } now = tevent_timeval_current(); + + if (!state->debug) { + goto next; + } + + if (timeval_compare(&state->warn, &now) == -1) { + double duration = timeval_elapsed2(&state->start, &now); + char proc_path[64] = { 0, }; + char *kstack = NULL; + struct server_id_buf buf; + const char *pid = server_id_str_buf(state->pid, &buf); + int ret; + + state->warn = tevent_timeval_add(&now, 10, 0); + + if (!procid_is_local(&state->pid) || !sys_have_proc_fds()) { + DBG_ERR("Process %s hanging for %f seconds?\n", + pid, duration); + goto next; + } + + ret = snprintf(proc_path, + ARRAY_SIZE(proc_path), + "/proc/%" PRIu64 "/stack", + state->pid.pid); + if (ret < 0) { + DBG_ERR("Process %s hanging for %f seconds?\n" + "snprintf failed\n", + pid, duration); + goto next; + } + + become_root(); + kstack = file_load(proc_path, NULL, 0, state); + unbecome_root(); + if (kstack == NULL) { + DBG_ERR("Process %s hanging for %f seconds?\n" + "file_load [%s] failed\n", + pid, duration, proc_path); + goto next; + } + + DBG_ERR("Process %s hanging for %f seconds?\n" + "%s:\n%s", + pid, duration, proc_path, kstack); + TALLOC_FREE(kstack); + } + +next: next = tevent_timeval_add(&now, 0, 500000); subreq = tevent_wakeup_send(state, state->ev, next); if (tevent_req_nomem(subreq, req)) {