From be5257725d7f65708f5955a3a4beaedaa370e45b Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Tue, 10 Feb 2026 16:23:10 +0200
Subject: [PATCH] Refactor ProcessRecoveryConflictInterrupt for readability

Two changes here:

1. Introduce a separate RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK flag to
indicate a suspected deadlock that involves a buffer pin. Previously
the startup process used the same flag for a deadlock involving just
regular locks, and to check for deadlocks involving the buffer
pin. The cases are handled separately in the startup process, but the
receiving backend had to deduce which one it was based on
HoldingBufferPinThatDelaysRecovery(). With a separate flag, the
receiver doesn't need to guess.

2. Rewrite the ProcessRecoveryConflictInterrupt() function to not rely
on fallthrough through the switch-statement. That was difficult to
read.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://www.postgresql.org/message-id/4cc13ba1-4248-4884-b6ba-4805349e7f39@iki.fi
---
 src/backend/storage/ipc/standby.c            |   7 +-
 src/backend/tcop/postgres.c                  | 262 +++++++++++--------
 src/backend/utils/activity/pgstat_database.c |  10 +
 src/include/storage/standby.h                |  10 +-
 4 files changed, 181 insertions(+), 108 deletions(-)

diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 0851789e8b6..d83afbfb9d6 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -859,7 +859,7 @@ ResolveRecoveryConflictWithBufferPin(void)
 		 * not be so harmful because the period that the buffer is kept pinned
 		 * is basically no so long. But we should fix this?
 		 */
-		SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+		SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
 	}
 
 	/*
@@ -877,7 +877,7 @@ static void
 SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason)
 {
 	Assert(reason == RECOVERY_CONFLICT_BUFFERPIN ||
-		   reason == RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+		   reason == RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
 
 	/*
 	 * We send signal to all backends to ask them if they are holding the
@@ -1512,6 +1512,9 @@ get_recovery_conflict_desc(RecoveryConflictReason reason)
 			reasonDesc = _("recovery conflict on replication slot");
 			break;
 		case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+			reasonDesc = _("recovery conflict on deadlock");
+			break;
+		case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
 			reasonDesc = _("recovery conflict on buffer deadlock");
 			break;
 		case RECOVERY_CONFLICT_DATABASE:
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 664161886cf..21de158adbb 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -179,6 +179,9 @@ static bool IsTransactionExitStmt(Node *parsetree);
 static bool IsTransactionExitStmtList(List *pstmts);
 static bool IsTransactionStmtList(List *pstmts);
 static void drop_unnamed_stmt(void);
+static void ProcessRecoveryConflictInterrupts(void);
+static void ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason);
+static void report_recovery_conflict(RecoveryConflictReason reason);
 static void log_disconnections(int code, Datum arg);
 static void enable_statement_timeout(void);
 static void disable_statement_timeout(void);
@@ -2554,6 +2557,9 @@ errdetail_recovery_conflict(RecoveryConflictReason reason)
 			errdetail("User was using a logical replication slot that must be invalidated.");
 			break;
 		case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+			errdetail("User transaction caused deadlock with recovery.");
+			break;
+		case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
 			errdetail("User transaction caused buffer deadlock with recovery.");
 			break;
 		case RECOVERY_CONFLICT_DATABASE:
@@ -3083,35 +3089,62 @@ ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason)
 		case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
 
 			/*
+			 * The startup process is waiting on a lock held by us, and has
+			 * requested us to check if it is a deadlock (i.e. the deadlock
+			 * timeout expired).
+			 *
 			 * If we aren't waiting for a lock we can never deadlock.
 			 */
 			if (GetAwaitedLock() == NULL)
 				return;
 
-			/* Intentional fall through to check wait for pin */
-			/* FALLTHROUGH */
+			/* Set the flag so that ProcSleep() will check for deadlocks. */
+			CheckDeadLockAlert();
+			return;
 
-		case RECOVERY_CONFLICT_BUFFERPIN:
+		case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
 
 			/*
-			 * If RECOVERY_CONFLICT_BUFFERPIN is requested but we aren't
-			 * blocking the Startup process there is nothing more to do.
+			 * The startup process is waiting on a buffer pin, and has
+			 * requested us to check if there is a deadlock involving the pin.
 			 *
-			 * When RECOVERY_CONFLICT_STARTUP_DEADLOCK is requested, if we're
-			 * waiting for locks and the startup process is not waiting for
-			 * buffer pin (i.e., also waiting for locks), we set the flag so
-			 * that ProcSleep() will check for deadlocks.
+			 * If we're not waiting on a lock, there can be no deadlock.
+			 */
+			if (GetAwaitedLock() == NULL)
+				return;
+
+			/*
+			 * If we're not holding the buffer pin, also no deadlock. (The
+			 * startup process doesn't know who's holding the pin, and sends
+			 * this signal to *all* backends, so this is the common case.)
 			 */
 			if (!HoldingBufferPinThatDelaysRecovery())
-			{
-				if (reason == RECOVERY_CONFLICT_STARTUP_DEADLOCK &&
-					GetStartupBufferPinWaitBufId() < 0)
-					CheckDeadLockAlert();
 				return;
-			}
 
-			/* Intentional fall through to error handling */
-			/* FALLTHROUGH */
+			/*
+			 * Otherwise, we probably have a deadlock.  Unfortunately the
+			 * normal deadlock detector doesn't know about buffer pins, so we
+			 * cannot perform comprehensively deadlock check.  Instead, we
+			 * just assume that it is a deadlock if the above two conditions
+			 * are met.  In principle this can lead to false positives, but
+			 * it's rare in practice because sessions in a hot standby server
+			 * rarely hold locks that can block other backends.
+			 */
+			report_recovery_conflict(reason);
+			return;
+
+		case RECOVERY_CONFLICT_BUFFERPIN:
+
+			/*
+			 * Someone is holding a buffer pin that the startup process is
+			 * waiting for, and it got tired of waiting.  If that's us, error
+			 * out to release the pin.
+			 */
+			if (!HoldingBufferPinThatDelaysRecovery())
+				return;
+
+			report_recovery_conflict(reason);
+			return;
 
 		case RECOVERY_CONFLICT_LOCK:
 		case RECOVERY_CONFLICT_TABLESPACE:
@@ -3123,109 +3156,128 @@ ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason)
 			if (!IsTransactionOrTransactionBlock())
 				return;
 
-			/* FALLTHROUGH */
+			report_recovery_conflict(reason);
+			return;
 
 		case RECOVERY_CONFLICT_LOGICALSLOT:
+			report_recovery_conflict(reason);
+			return;
 
-			/*
-			 * If we're not in a subtransaction then we are OK to throw an
-			 * ERROR to resolve the conflict.  Otherwise drop through to the
-			 * FATAL case.
-			 *
-			 * RECOVERY_CONFLICT_LOGICALSLOT is a special case that always
-			 * throws an ERROR (ie never promotes to FATAL), though it still
-			 * has to respect QueryCancelHoldoffCount, so it shares this code
-			 * path.  Logical decoding slots are only acquired while
-			 * performing logical decoding.  During logical decoding no user
-			 * controlled code is run.  During [sub]transaction abort, the
-			 * slot is released.  Therefore user controlled code cannot
-			 * intercept an error before the replication slot is released.
-			 *
-			 * XXX other times that we can throw just an ERROR *may* be
-			 * RECOVERY_CONFLICT_LOCK if no locks are held in parent
-			 * transactions
-			 *
-			 * RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by parent
-			 * transactions and the transaction is not transaction-snapshot
-			 * mode
-			 *
-			 * RECOVERY_CONFLICT_TABLESPACE if no temp files or cursors open
-			 * in parent transactions
-			 */
-			if (reason == RECOVERY_CONFLICT_LOGICALSLOT ||
-				!IsSubTransaction())
-			{
-				/*
-				 * If we already aborted then we no longer need to cancel.  We
-				 * do this here since we do not wish to ignore aborted
-				 * subtransactions, which must cause FATAL, currently.
-				 */
-				if (IsAbortedTransactionBlockState())
-					return;
+		case RECOVERY_CONFLICT_DATABASE:
+
+			/* The database is being dropped; terminate the session */
+			report_recovery_conflict(reason);
+			return;
+	}
+	elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
+}
 
+/*
+ * This transaction or session is conflicting with recovery and needs to be
+ * killed.  Roll back the transaction, if that's sufficient, or terminate the
+ * connection, or do nothing if we're already in an aborted state.
+ */
+static void
+report_recovery_conflict(RecoveryConflictReason reason)
+{
+	bool		fatal;
+
+	if (reason == RECOVERY_CONFLICT_DATABASE)
+	{
+		/* note: no hint about reconnecting, and different errcode */
+		pgstat_report_recovery_conflict(reason);
+		ereport(FATAL,
+				(errcode(ERRCODE_DATABASE_DROPPED),
+				 errmsg("terminating connection due to conflict with recovery"),
+				 errdetail_recovery_conflict(reason)));
+	}
+	if (reason == RECOVERY_CONFLICT_LOGICALSLOT)
+	{
+		/*
+		 * RECOVERY_CONFLICT_LOGICALSLOT is a special case that always throws
+		 * an ERROR (ie never promotes to FATAL), though it still has to
+		 * respect QueryCancelHoldoffCount, so it shares this code path.
+		 * Logical decoding slots are only acquired while performing logical
+		 * decoding.  During logical decoding no user controlled code is run.
+		 * During [sub]transaction abort, the slot is released.  Therefore
+		 * user controlled code cannot intercept an error before the
+		 * replication slot is released.
+		 */
+		fatal = false;
+	}
+	else
+	{
+		fatal = IsSubTransaction();
+	}
+
+	/*
+	 * If we're not in a subtransaction then we are OK to throw an ERROR to
+	 * resolve the conflict.
+	 *
+	 * XXX other times that we can throw just an ERROR *may* be
+	 * RECOVERY_CONFLICT_LOCK if no locks are held in parent transactions
+	 *
+	 * RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by parent
+	 * transactions and the transaction is not transaction-snapshot mode
+	 *
+	 * RECOVERY_CONFLICT_TABLESPACE if no temp files or cursors open in parent
+	 * transactions
+	 */
+	if (!fatal)
+	{
+		/*
+		 * If we already aborted then we no longer need to cancel.  We do this
+		 * here since we do not wish to ignore aborted subtransactions, which
+		 * must cause FATAL, currently.
+		 */
+		if (IsAbortedTransactionBlockState())
+			return;
+
+		/*
+		 * If a recovery conflict happens while we are waiting for input from
+		 * the client, the client is presumably just sitting idle in a
+		 * transaction, preventing recovery from making progress.  We'll drop
+		 * through to the FATAL case below to dislodge it, in that case.
+		 */
+		if (!DoingCommandRead)
+		{
+			/* Avoid losing sync in the FE/BE protocol. */
+			if (QueryCancelHoldoffCount != 0)
+			{
 				/*
-				 * If a recovery conflict happens while we are waiting for
-				 * input from the client, the client is presumably just
-				 * sitting idle in a transaction, preventing recovery from
-				 * making progress.  We'll drop through to the FATAL case
-				 * below to dislodge it, in that case.
+				 * Re-arm and defer this interrupt until later.  See similar
+				 * code in ProcessInterrupts().
 				 */
-				if (!DoingCommandRead)
-				{
-					/* Avoid losing sync in the FE/BE protocol. */
-					if (QueryCancelHoldoffCount != 0)
-					{
-						/*
-						 * Re-arm and defer this interrupt until later.  See
-						 * similar code in ProcessInterrupts().
-						 */
-						(void) pg_atomic_fetch_or_u32(&MyProc->pendingRecoveryConflicts, (1 << reason));
-						InterruptPending = true;
-						return;
-					}
-
-					/*
-					 * We are cleared to throw an ERROR.  Either it's the
-					 * logical slot case, or we have a top-level transaction
-					 * that we can abort and a conflict that isn't inherently
-					 * non-retryable.
-					 */
-					LockErrorCleanup();
-					pgstat_report_recovery_conflict(reason);
-					ereport(ERROR,
-							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-							 errmsg("canceling statement due to conflict with recovery"),
-							 errdetail_recovery_conflict(reason)));
-					break;
-				}
+				(void) pg_atomic_fetch_or_u32(&MyProc->pendingRecoveryConflicts, (1 << reason));
+				InterruptPending = true;
+				return;
 			}
 
 			/*
-			 * We couldn't resolve the conflict with ERROR, so terminate the
-			 * whole session.
+			 * We are cleared to throw an ERROR.  Either it's the logical slot
+			 * case, or we have a top-level transaction that we can abort and
+			 * a conflict that isn't inherently non-retryable.
 			 */
+			LockErrorCleanup();
 			pgstat_report_recovery_conflict(reason);
-			ereport(FATAL,
+			ereport(ERROR,
 					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-					 errmsg("terminating connection due to conflict with recovery"),
-					 errdetail_recovery_conflict(reason),
-					 errhint("In a moment you should be able to reconnect to the"
-							 " database and repeat your command.")));
-			break;
-
-		case RECOVERY_CONFLICT_DATABASE:
-
-			/* The database is being dropped; terminate the session */
-			pgstat_report_recovery_conflict(reason);
-			ereport(FATAL,
-					(errcode(ERRCODE_DATABASE_DROPPED),
-					 errmsg("terminating connection due to conflict with recovery"),
+					 errmsg("canceling statement due to conflict with recovery"),
 					 errdetail_recovery_conflict(reason)));
-			break;
-
-		default:
-			elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
+		}
 	}
+
+	/*
+	 * We couldn't resolve the conflict with ERROR, so terminate the whole
+	 * session.
+	 */
+	pgstat_report_recovery_conflict(reason);
+	ereport(FATAL,
+			(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+			 errmsg("terminating connection due to conflict with recovery"),
+			 errdetail_recovery_conflict(reason),
+			 errhint("In a moment you should be able to reconnect to the"
+					 " database and repeat your command.")));
 }
 
 /*
diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c
index e6759ccaa3d..6309909bcd0 100644
--- a/src/backend/utils/activity/pgstat_database.c
+++ b/src/backend/utils/activity/pgstat_database.c
@@ -115,6 +115,16 @@ pgstat_report_recovery_conflict(int reason)
 		case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
 			dbentry->conflict_startup_deadlock++;
 			break;
+		case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
+
+			/*
+			 * The difference between RECOVERY_CONFLICT_STARTUP_DEADLOCK and
+			 * RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK is merely whether a buffer
+			 * pin was part of the deadlock. We use the same counter for both
+			 * reasons.
+			 */
+			dbentry->conflict_startup_deadlock++;
+			break;
 	}
 }
 
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index 65a8176785e..c63a4f2cc6a 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -51,9 +51,17 @@ typedef enum
 	 * other backends instead of the startup process.
 	 */
 	RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+
+	/*
+	 * Like RECOVERY_CONFLICT_STARTUP_DEADLOCK is, but the suspected deadlock
+	 * involves a buffer pin that some other backend is holding. That needs
+	 * special checking because the normal deadlock detector doesn't track the
+	 * buffer pins.
+	 */
+	RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK,
 } RecoveryConflictReason;
 
-#define NUM_RECOVERY_CONFLICT_REASONS (RECOVERY_CONFLICT_STARTUP_DEADLOCK + 1)
+#define NUM_RECOVERY_CONFLICT_REASONS (RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK + 1)
 
 extern void InitRecoveryTransactionEnvironment(void);
 extern void ShutdownRecoveryTransactionEnvironment(void);
-- 
2.47.3