]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Add errdetail() with PID and UID about source of termination signal.
authorAndrew Dunstan <andrew@dunslane.net>
Mon, 6 Apr 2026 16:39:14 +0000 (12:39 -0400)
committerAndrew Dunstan <andrew@dunslane.net>
Tue, 7 Apr 2026 14:22:33 +0000 (10:22 -0400)
When a backend is terminated via pg_terminate_backend() or an external
SIGTERM, the error message now includes the sender's PID and UID as
errdetail, making it easier to identify the source of unexpected
terminations in multi-user environments.

On platforms that support SA_SIGINFO (Linux, FreeBSD, and most modern
Unix systems), the signal handler captures si_pid and si_uid from the
siginfo_t structure.  On platforms without SA_SIGINFO, the detail is
simply omitted.

Author: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
Reviewed-by: Chao Li <1356863904@qq.com>
Discussion: https://postgr.es/m/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw@mail.gmail.com

configure
configure.ac
meson.build
src/backend/replication/syncrep.c
src/backend/tcop/postgres.c
src/backend/utils/init/globals.c
src/bin/psql/t/001_basic.pl
src/include/miscadmin.h
src/include/pg_config.h.in
src/port/pqsignal.c

index c56ef60226dc5602db5275cb1ad87c3b05b2ef0b..f66c1054a7a1eb74d8cb93da04309cd40155f3cb 100755 (executable)
--- a/configure
+++ b/configure
@@ -15797,6 +15797,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
   fi
 fi
 
+# Check for SA_SIGINFO extended signal handler availability
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
+$as_echo_n "checking for SA_SIGINFO... " >&6; }
+if ${ac_cv_have_sa_siginfo+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+            #include <signal.h>
+            #include <stddef.h>
+
+int
+main ()
+{
+
+            struct sigaction sa;
+            sa.sa_flags = SA_SIGINFO;
+
+  ;
+  return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_have_sa_siginfo=yes
+else
+  ac_cv_have_sa_siginfo=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
+$as_echo "$ac_cv_have_sa_siginfo" >&6; }
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+
+$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
+
+fi
 
 ##
 ## Functions, global variables
index ff5dd64468edc574831a67c1b29a359d5244e30a..8d176bd3468e9eaab0fa485d2c24a9b0b88ee42f 100644 (file)
@@ -1817,6 +1817,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
   fi
 fi
 
+# Check for SA_SIGINFO extended signal handler availability
+AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
+    AC_COMPILE_IFELSE([
+        AC_LANG_PROGRAM([[
+            #include <signal.h>
+            #include <stddef.h>
+        ]], [[
+            struct sigaction sa;
+            sa.sa_flags = SA_SIGINFO;
+        ]])
+    ],
+    [ac_cv_have_sa_siginfo=yes],
+    [ac_cv_have_sa_siginfo=no])
+])
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+    AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
+fi
 
 ##
 ## Functions, global variables
index 43d5ffc30b12906ec6a3b401bea387b79175caf3..be97e986e5d4d39a980df8c2b15b63860156ab96 100644 (file)
@@ -2985,6 +2985,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
   cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
 endif
 
+if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
+  cdata.set('HAVE_SA_SIGINFO', 1)
+endif
+
 if cc.has_member('struct tm', 'tm_zone',
     args: test_c_args, include_directories: postgres_inc,
     prefix: '''
index 9cecc83ed68c3fda2372592c91bec9e1aa8154f9..46a778f091797572cc542382c6c66b709fe0d3e7 100644 (file)
@@ -300,10 +300,22 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
                 */
                if (ProcDiePending)
                {
+                       /*
+                        * ProcDieSenderPid/Uid are read directly from the globals here
+                        * rather than copied to locals first; a second SIGTERM could
+                        * change them between reads, but that is harmless because the
+                        * process is about to die anyway.  The signal sender detail is
+                        * inlined rather than using a separate errdetail() call because
+                        * it must be appended to the existing detail message.
+                        */
                        ereport(WARNING,
                                        (errcode(ERRCODE_ADMIN_SHUTDOWN),
                                         errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
-                                        errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
+                                        errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
+                                                          ProcDieSenderPid == 0 ? "" :
+                                                          psprintf("\nSignal sent by PID %d, UID %d.",
+                                                                               (int) ProcDieSenderPid,
+                                                                               (int) ProcDieSenderUid))));
                        whereToSendOutput = DestNone;
                        SyncRepCancelWait();
                        break;
index cb353f2ed462b7d2c6213076641952c3c64276a9..14a061599bce8e840afabac380ccf29262c184d6 100644 (file)
@@ -109,6 +109,14 @@ int                        client_connection_check_interval = 0;
 /* flags for non-system relation kinds to restrict use */
 int                    restrict_nonsystem_relation_kind;
 
+/*
+ * Include signal sender PID/UID as errdetail when available (SA_SIGINFO).
+ * The caller must supply the (already-captured) pid and uid values.
+ */
+#define ERRDETAIL_SIGNAL_SENDER(pid, uid) \
+       ((pid) == 0 ? 0 : \
+        errdetail("Signal sent by PID %d, UID %d.", (int) (pid), (int) (uid)))
+
 /* ----------------
  *             private typedefs etc
  * ----------------
@@ -3347,7 +3355,12 @@ ProcessInterrupts(void)
 
        if (ProcDiePending)
        {
+               int                     sender_pid = ProcDieSenderPid;
+               int                     sender_uid = ProcDieSenderUid;
+
                ProcDiePending = false;
+               ProcDieSenderPid = 0;
+               ProcDieSenderUid = 0;
                QueryCancelPending = false; /* ProcDie trumps QueryCancel */
                LockErrorCleanup();
                /* As in quickdie, don't risk sending to client during auth */
@@ -3360,15 +3373,18 @@ ProcessInterrupts(void)
                else if (AmAutoVacuumWorkerProcess())
                        ereport(FATAL,
                                        (errcode(ERRCODE_ADMIN_SHUTDOWN),
-                                        errmsg("terminating autovacuum process due to administrator command")));
+                                        errmsg("terminating autovacuum process due to administrator command"),
+                                        ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
                else if (IsLogicalWorker())
                        ereport(FATAL,
                                        (errcode(ERRCODE_ADMIN_SHUTDOWN),
-                                        errmsg("terminating logical replication worker due to administrator command")));
+                                        errmsg("terminating logical replication worker due to administrator command"),
+                                        ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
                else if (IsLogicalLauncher())
                {
                        ereport(DEBUG1,
-                                       (errmsg_internal("logical replication launcher shutting down")));
+                                       (errmsg_internal("logical replication launcher shutting down"),
+                                        ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
 
                        /*
                         * The logical replication launcher can be stopped at any time.
@@ -3379,23 +3395,27 @@ ProcessInterrupts(void)
                else if (AmWalReceiverProcess())
                        ereport(FATAL,
                                        (errcode(ERRCODE_ADMIN_SHUTDOWN),
-                                        errmsg("terminating walreceiver process due to administrator command")));
+                                        errmsg("terminating walreceiver process due to administrator command"),
+                                        ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
                else if (AmBackgroundWorkerProcess())
                        ereport(FATAL,
                                        (errcode(ERRCODE_ADMIN_SHUTDOWN),
                                         errmsg("terminating background worker \"%s\" due to administrator command",
-                                                       MyBgworkerEntry->bgw_type)));
+                                                       MyBgworkerEntry->bgw_type),
+                                        ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
                else if (AmIoWorkerProcess())
                {
                        ereport(DEBUG1,
-                                       (errmsg_internal("io worker shutting down due to administrator command")));
+                                       (errmsg_internal("io worker shutting down due to administrator command"),
+                                        ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
 
                        proc_exit(0);
                }
                else
                        ereport(FATAL,
                                        (errcode(ERRCODE_ADMIN_SHUTDOWN),
-                                        errmsg("terminating connection due to administrator command")));
+                                        errmsg("terminating connection due to administrator command"),
+                                        ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
        }
 
        if (CheckClientConnectionPending)
index 24ddb276f0c0f490012dc7ad68ed3007810e2ab7..bbd28d14d99484a4f658a89e37202be7ed2aca96 100644 (file)
@@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
 volatile uint32 InterruptHoldoffCount = 0;
 volatile uint32 QueryCancelHoldoffCount = 0;
 volatile uint32 CritSectionCount = 0;
+volatile int ProcDieSenderPid = 0;
+volatile int ProcDieSenderUid = 0;
 
 int                    MyProcPid;
 pg_time_t      MyStartTime;
index 6839f27cbe5fda3f5ba84e626c0e833d0f1c8ffc..7c21204c1f2f8d800a25300cbed63bc239e4fc35 100644 (file)
@@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
 is($ret, 2, 'server crash: psql exit code');
 like($out, qr/before/, 'server crash: output before crash');
 unlike($out, qr/AFTER/, 'server crash: no output after crash');
-is( $err,
-       'psql:<stdin>:2: FATAL:  terminating connection due to administrator command
-psql:<stdin>:2: server closed the connection unexpectedly
+like( $err, qr/psql:<stdin>:2: FATAL:  terminating connection due to administrator command
+(?:DETAIL:  Signal sent by PID \d+, UID \d+\.\n)?psql:<stdin>:2: server closed the connection unexpectedly
        This probably means the server terminated abnormally
        before or while processing the request.
-psql:<stdin>:2: error: connection to server was lost',
+psql:<stdin>:2: error: connection to server was lost/,
        'server crash: error message');
 
 # test \errverbose
index 93b7816c09ca87f9a160735f5a707c4b175b5882..e099b91014f9747fb0263599504fdf46f04439c9 100644 (file)
@@ -90,6 +90,8 @@
 extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
 extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
 extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
+extern PGDLLIMPORT volatile int ProcDieSenderPid;
+extern PGDLLIMPORT volatile int ProcDieSenderUid;
 extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
 extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
 extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
index 9f6d512347e7c08b66f222430382a0c21a04d44b..4f8113c144b0c0bcc1da42a4babc1c8b671b240b 100644 (file)
 /* Define to 1 if you have the `rl_variable_bind' function. */
 #undef HAVE_RL_VARIABLE_BIND
 
+/* Define to 1 if you have SA_SIGINFO available. */
+#undef HAVE_SA_SIGINFO
+
 /* Define to 1 if you have the <security/pam_appl.h> header file. */
 #undef HAVE_SECURITY_PAM_APPL_H
 
index fbdf9341c2f8ee7f96e391997861333a87c42350..8841464b5cb0ac2b105cce2bfd0613298f732f0b 100644 (file)
@@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
  *
  * This wrapper also handles restoring the value of errno.
  */
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+static void
+wrapper_handler(int signo, siginfo_t * info, void *context)
+#else
 static void
 wrapper_handler(SIGNAL_ARGS)
+#endif
 {
        int                     save_errno = errno;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+       /* SA_SIGINFO signature uses signo, not SIGNAL_ARGS macro */
+       int                     postgres_signal_arg = signo;
+#endif
 
        Assert(postgres_signal_arg > 0);
        Assert(postgres_signal_arg < PG_NSIG);
@@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
                raise(postgres_signal_arg);
                return;
        }
+
+#ifdef HAVE_SA_SIGINFO
+       if (signo == SIGTERM && info)
+       {
+               ProcDieSenderPid = info->si_pid;
+               ProcDieSenderUid = info->si_uid;
+       }
+#endif
 #endif
 
        (*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
@@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
 #if !(defined(WIN32) && defined(FRONTEND))
        struct sigaction act;
 #endif
+       bool            use_wrapper = false;
 
        Assert(signo > 0);
        Assert(signo < PG_NSIG);
@@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
        if (func != SIG_IGN && func != SIG_DFL)
        {
                pqsignal_handlers[signo] = func;        /* assumed atomic */
-               func = wrapper_handler;
+               use_wrapper = true;
        }
 
 #if !(defined(WIN32) && defined(FRONTEND))
-       act.sa_handler = func;
        sigemptyset(&act.sa_mask);
        act.sa_flags = SA_RESTART;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+       if (use_wrapper)
+       {
+               act.sa_sigaction = wrapper_handler;
+               act.sa_flags |= SA_SIGINFO;
+       }
+       else
+               act.sa_handler = func;
+#else
+       act.sa_handler = use_wrapper ? wrapper_handler : func;
+#endif
+
 #ifdef SA_NOCLDSTOP
        if (signo == SIGCHLD)
                act.sa_flags |= SA_NOCLDSTOP;
@@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
                Assert(false);                  /* probably indicates coding error */
 #else
        /* Forward to Windows native signal system. */
-       if (signal(signo, func) == SIG_ERR)
+       if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
                Assert(false);                  /* probably indicates coding error */
 #endif
 }