From 55890a919454a2165031a04b175ca92e3ed70e69 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Mon, 6 Apr 2026 12:39:14 -0400 Subject: [PATCH] Add errdetail() with PID and UID about source of termination signal. When a backend is terminated via pg_terminate_backend() or an external SIGTERM, the error message now includes the sender's PID and UID as errdetail, making it easier to identify the source of unexpected terminations in multi-user environments. On platforms that support SA_SIGINFO (Linux, FreeBSD, and most modern Unix systems), the signal handler captures si_pid and si_uid from the siginfo_t structure. On platforms without SA_SIGINFO, the detail is simply omitted. Author: Jakub Wartak Reviewed-by: Andrew Dunstan Reviewed-by: Chao Li <1356863904@qq.com> Discussion: https://postgr.es/m/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw@mail.gmail.com --- configure | 42 +++++++++++++++++++++++++++++++ configure.ac | 18 +++++++++++++ meson.build | 4 +++ src/backend/replication/syncrep.c | 14 ++++++++++- src/backend/tcop/postgres.c | 34 +++++++++++++++++++------ src/backend/utils/init/globals.c | 2 ++ src/bin/psql/t/001_basic.pl | 7 +++--- src/include/miscadmin.h | 2 ++ src/include/pg_config.h.in | 3 +++ src/port/pqsignal.c | 35 +++++++++++++++++++++++--- 10 files changed, 146 insertions(+), 15 deletions(-) diff --git a/configure b/configure index c56ef60226d..f66c1054a7a 100755 --- a/configure +++ b/configure @@ -15797,6 +15797,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then fi fi +# Check for SA_SIGINFO extended signal handler availability +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5 +$as_echo_n "checking for SA_SIGINFO... " >&6; } +if ${ac_cv_have_sa_siginfo+:} false; then : + $as_echo_n "(cached) " >&6 +else + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + + #include + #include + +int +main () +{ + + struct sigaction sa; + sa.sa_flags = SA_SIGINFO; + + ; + return 0; +} + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_have_sa_siginfo=yes +else + ac_cv_have_sa_siginfo=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5 +$as_echo "$ac_cv_have_sa_siginfo" >&6; } + +if test "x$ac_cv_have_sa_siginfo" = "xyes"; then + +$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h + +fi ## ## Functions, global variables diff --git a/configure.ac b/configure.ac index ff5dd64468e..8d176bd3468 100644 --- a/configure.ac +++ b/configure.ac @@ -1817,6 +1817,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then fi fi +# Check for SA_SIGINFO extended signal handler availability +AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [ + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([[ + #include + #include + ]], [[ + struct sigaction sa; + sa.sa_flags = SA_SIGINFO; + ]]) + ], + [ac_cv_have_sa_siginfo=yes], + [ac_cv_have_sa_siginfo=no]) +]) + +if test "x$ac_cv_have_sa_siginfo" = "xyes"; then + AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.]) +fi ## ## Functions, global variables diff --git a/meson.build b/meson.build index 43d5ffc30b1..be97e986e5d 100644 --- a/meson.build +++ b/meson.build @@ -2985,6 +2985,10 @@ if cc.has_member('struct sockaddr', 'sa_len', cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1) endif +if cc.has_header_symbol('signal.h', 'SA_SIGINFO') + cdata.set('HAVE_SA_SIGINFO', 1) +endif + if cc.has_member('struct tm', 'tm_zone', args: test_c_args, include_directories: postgres_inc, prefix: ''' diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 9cecc83ed68..46a778f0917 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -300,10 +300,22 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit) */ if (ProcDiePending) { + /* + * ProcDieSenderPid/Uid are read directly from the globals here + * rather than copied to locals first; a second SIGTERM could + * change them between reads, but that is harmless because the + * process is about to die anyway. The signal sender detail is + * inlined rather than using a separate errdetail() call because + * it must be appended to the existing detail message. + */ ereport(WARNING, (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"), - errdetail("The transaction has already committed locally, but might not have been replicated to the standby."))); + errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s", + ProcDieSenderPid == 0 ? "" : + psprintf("\nSignal sent by PID %d, UID %d.", + (int) ProcDieSenderPid, + (int) ProcDieSenderUid)))); whereToSendOutput = DestNone; SyncRepCancelWait(); break; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index cb353f2ed46..14a061599bc 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -109,6 +109,14 @@ int client_connection_check_interval = 0; /* flags for non-system relation kinds to restrict use */ int restrict_nonsystem_relation_kind; +/* + * Include signal sender PID/UID as errdetail when available (SA_SIGINFO). + * The caller must supply the (already-captured) pid and uid values. + */ +#define ERRDETAIL_SIGNAL_SENDER(pid, uid) \ + ((pid) == 0 ? 0 : \ + errdetail("Signal sent by PID %d, UID %d.", (int) (pid), (int) (uid))) + /* ---------------- * private typedefs etc * ---------------- @@ -3347,7 +3355,12 @@ ProcessInterrupts(void) if (ProcDiePending) { + int sender_pid = ProcDieSenderPid; + int sender_uid = ProcDieSenderUid; + ProcDiePending = false; + ProcDieSenderPid = 0; + ProcDieSenderUid = 0; QueryCancelPending = false; /* ProcDie trumps QueryCancel */ LockErrorCleanup(); /* As in quickdie, don't risk sending to client during auth */ @@ -3360,15 +3373,18 @@ ProcessInterrupts(void) else if (AmAutoVacuumWorkerProcess()) ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), - errmsg("terminating autovacuum process due to administrator command"))); + errmsg("terminating autovacuum process due to administrator command"), + ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid))); else if (IsLogicalWorker()) ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), - errmsg("terminating logical replication worker due to administrator command"))); + errmsg("terminating logical replication worker due to administrator command"), + ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid))); else if (IsLogicalLauncher()) { ereport(DEBUG1, - (errmsg_internal("logical replication launcher shutting down"))); + (errmsg_internal("logical replication launcher shutting down"), + ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid))); /* * The logical replication launcher can be stopped at any time. @@ -3379,23 +3395,27 @@ ProcessInterrupts(void) else if (AmWalReceiverProcess()) ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), - errmsg("terminating walreceiver process due to administrator command"))); + errmsg("terminating walreceiver process due to administrator command"), + ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid))); else if (AmBackgroundWorkerProcess()) ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("terminating background worker \"%s\" due to administrator command", - MyBgworkerEntry->bgw_type))); + MyBgworkerEntry->bgw_type), + ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid))); else if (AmIoWorkerProcess()) { ereport(DEBUG1, - (errmsg_internal("io worker shutting down due to administrator command"))); + (errmsg_internal("io worker shutting down due to administrator command"), + ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid))); proc_exit(0); } else ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), - errmsg("terminating connection due to administrator command"))); + errmsg("terminating connection due to administrator command"), + ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid))); } if (CheckClientConnectionPending) diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 24ddb276f0c..bbd28d14d99 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; volatile uint32 CritSectionCount = 0; +volatile int ProcDieSenderPid = 0; +volatile int ProcDieSenderUid = 0; int MyProcPid; pg_time_t MyStartTime; diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl index 6839f27cbe5..7c21204c1f2 100644 --- a/src/bin/psql/t/001_basic.pl +++ b/src/bin/psql/t/001_basic.pl @@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres', is($ret, 2, 'server crash: psql exit code'); like($out, qr/before/, 'server crash: output before crash'); unlike($out, qr/AFTER/, 'server crash: no output after crash'); -is( $err, - 'psql::2: FATAL: terminating connection due to administrator command -psql::2: server closed the connection unexpectedly +like( $err, qr/psql::2: FATAL: terminating connection due to administrator command +(?:DETAIL: Signal sent by PID \d+, UID \d+\.\n)?psql::2: server closed the connection unexpectedly This probably means the server terminated abnormally before or while processing the request. -psql::2: error: connection to server was lost', +psql::2: error: connection to server was lost/, 'server crash: error message'); # test \errverbose diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 93b7816c09c..e099b91014f 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -90,6 +90,8 @@ extern PGDLLIMPORT volatile sig_atomic_t InterruptPending; extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending; extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending; +extern PGDLLIMPORT volatile int ProcDieSenderPid; +extern PGDLLIMPORT volatile int ProcDieSenderUid; extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 9f6d512347e..4f8113c144b 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -354,6 +354,9 @@ /* Define to 1 if you have the `rl_variable_bind' function. */ #undef HAVE_RL_VARIABLE_BIND +/* Define to 1 if you have SA_SIGINFO available. */ +#undef HAVE_SA_SIGINFO + /* Define to 1 if you have the header file. */ #undef HAVE_SECURITY_PAM_APPL_H diff --git a/src/port/pqsignal.c b/src/port/pqsignal.c index fbdf9341c2f..8841464b5cb 100644 --- a/src/port/pqsignal.c +++ b/src/port/pqsignal.c @@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG]; * * This wrapper also handles restoring the value of errno. */ +#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO) +static void +wrapper_handler(int signo, siginfo_t * info, void *context) +#else static void wrapper_handler(SIGNAL_ARGS) +#endif { int save_errno = errno; +#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO) + /* SA_SIGINFO signature uses signo, not SIGNAL_ARGS macro */ + int postgres_signal_arg = signo; +#endif Assert(postgres_signal_arg > 0); Assert(postgres_signal_arg < PG_NSIG); @@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS) raise(postgres_signal_arg); return; } + +#ifdef HAVE_SA_SIGINFO + if (signo == SIGTERM && info) + { + ProcDieSenderPid = info->si_pid; + ProcDieSenderUid = info->si_uid; + } +#endif #endif (*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg); @@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func) #if !(defined(WIN32) && defined(FRONTEND)) struct sigaction act; #endif + bool use_wrapper = false; Assert(signo > 0); Assert(signo < PG_NSIG); @@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func) if (func != SIG_IGN && func != SIG_DFL) { pqsignal_handlers[signo] = func; /* assumed atomic */ - func = wrapper_handler; + use_wrapper = true; } #if !(defined(WIN32) && defined(FRONTEND)) - act.sa_handler = func; sigemptyset(&act.sa_mask); act.sa_flags = SA_RESTART; +#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO) + if (use_wrapper) + { + act.sa_sigaction = wrapper_handler; + act.sa_flags |= SA_SIGINFO; + } + else + act.sa_handler = func; +#else + act.sa_handler = use_wrapper ? wrapper_handler : func; +#endif + #ifdef SA_NOCLDSTOP if (signo == SIGCHLD) act.sa_flags |= SA_NOCLDSTOP; @@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func) Assert(false); /* probably indicates coding error */ #else /* Forward to Windows native signal system. */ - if (signal(signo, func) == SIG_ERR) + if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR) Assert(false); /* probably indicates coding error */ #endif } -- 2.47.3