Refactor ProcessRecoveryConflictInterrupt for readability

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Tue, 10 Feb 2026 14:23:10 +0000 (16:23 +0200)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Tue, 10 Feb 2026 14:23:10 +0000 (16:23 +0200)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 10 Feb 2026 14:23:10 +0000 (16:23 +0200)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 10 Feb 2026 14:23:10 +0000 (16:23 +0200)
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c

index 0851789e8b6d7a0e294463165598be353a3b90ad..d83afbfb9d62846350bfbe308694f580e6a17736 100644 (file)
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -859,7 +859,7 @@ ResolveRecoveryConflictWithBufferPin(void)
                  * not be so harmful because the period that the buffer is kept pinned
                  * is basically no so long. But we should fix this?
                  */
-               SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+               SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
         }
  
         /*
@@ -877,7 +877,7 @@ static void
  SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason)
  {
         Assert(reason == RECOVERY_CONFLICT_BUFFERPIN ||
-                  reason == RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+                  reason == RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
  
         /*
          * We send signal to all backends to ask them if they are holding the
@@ -1512,6 +1512,9 @@ get_recovery_conflict_desc(RecoveryConflictReason reason)
                         reasonDesc = _("recovery conflict on replication slot");
                         break;
                 case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+                       reasonDesc = _("recovery conflict on deadlock");
+                       break;
+               case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
                         reasonDesc = _("recovery conflict on buffer deadlock");
                         break;
                 case RECOVERY_CONFLICT_DATABASE:
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c

index 664161886cfd9a69075b3ec204adb840e2248a08..21de158adbba4d733c4c2b2b17d5515bb37bdeaa 100644 (file)
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -179,6 +179,9 @@ static bool IsTransactionExitStmt(Node *parsetree);
  static bool IsTransactionExitStmtList(List *pstmts);
  static bool IsTransactionStmtList(List *pstmts);
  static void drop_unnamed_stmt(void);
+static void ProcessRecoveryConflictInterrupts(void);
+static void ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason);
+static void report_recovery_conflict(RecoveryConflictReason reason);
  static void log_disconnections(int code, Datum arg);
  static void enable_statement_timeout(void);
  static void disable_statement_timeout(void);
@@ -2554,6 +2557,9 @@ errdetail_recovery_conflict(RecoveryConflictReason reason)
                         errdetail("User was using a logical replication slot that must be invalidated.");
                         break;
                 case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+                       errdetail("User transaction caused deadlock with recovery.");
+                       break;
+               case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
                         errdetail("User transaction caused buffer deadlock with recovery.");
                         break;
                 case RECOVERY_CONFLICT_DATABASE:
@@ -3083,35 +3089,62 @@ ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason)
                 case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
  
                         /*
+                        * The startup process is waiting on a lock held by us, and has
+                        * requested us to check if it is a deadlock (i.e. the deadlock
+                        * timeout expired).
+                        *
                          * If we aren't waiting for a lock we can never deadlock.
                          */
                         if (GetAwaitedLock() == NULL)
                                 return;
  
-                       /* Intentional fall through to check wait for pin */
-                       /* FALLTHROUGH */
+                       /* Set the flag so that ProcSleep() will check for deadlocks. */
+                       CheckDeadLockAlert();
+                       return;
  
-               case RECOVERY_CONFLICT_BUFFERPIN:
+               case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
  
                         /*
-                        * If RECOVERY_CONFLICT_BUFFERPIN is requested but we aren't
-                        * blocking the Startup process there is nothing more to do.
+                        * The startup process is waiting on a buffer pin, and has
+                        * requested us to check if there is a deadlock involving the pin.
                          *
-                        * When RECOVERY_CONFLICT_STARTUP_DEADLOCK is requested, if we're
-                        * waiting for locks and the startup process is not waiting for
-                        * buffer pin (i.e., also waiting for locks), we set the flag so
-                        * that ProcSleep() will check for deadlocks.
+                        * If we're not waiting on a lock, there can be no deadlock.
+                        */
+                       if (GetAwaitedLock() == NULL)
+                               return;
+
+                       /*
+                        * If we're not holding the buffer pin, also no deadlock. (The
+                        * startup process doesn't know who's holding the pin, and sends
+                        * this signal to *all* backends, so this is the common case.)
                          */
                         if (!HoldingBufferPinThatDelaysRecovery())
-                       {
-                               if (reason == RECOVERY_CONFLICT_STARTUP_DEADLOCK &&
-                                       GetStartupBufferPinWaitBufId() < 0)
-                                       CheckDeadLockAlert();
                                 return;
-                       }
  
-                       /* Intentional fall through to error handling */
-                       /* FALLTHROUGH */
+                       /*
+                        * Otherwise, we probably have a deadlock.  Unfortunately the
+                        * normal deadlock detector doesn't know about buffer pins, so we
+                        * cannot perform comprehensively deadlock check.  Instead, we
+                        * just assume that it is a deadlock if the above two conditions
+                        * are met.  In principle this can lead to false positives, but
+                        * it's rare in practice because sessions in a hot standby server
+                        * rarely hold locks that can block other backends.
+                        */
+                       report_recovery_conflict(reason);
+                       return;
+
+               case RECOVERY_CONFLICT_BUFFERPIN:
+
+                       /*
+                        * Someone is holding a buffer pin that the startup process is
+                        * waiting for, and it got tired of waiting.  If that's us, error
+                        * out to release the pin.
+                        */
+                       if (!HoldingBufferPinThatDelaysRecovery())
+                               return;
+
+                       report_recovery_conflict(reason);
+                       return;
  
                 case RECOVERY_CONFLICT_LOCK:
                 case RECOVERY_CONFLICT_TABLESPACE:
@@ -3123,109 +3156,128 @@ ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason)
                         if (!IsTransactionOrTransactionBlock())
                                 return;
  
-                       /* FALLTHROUGH */
+                       report_recovery_conflict(reason);
+                       return;
  
                 case RECOVERY_CONFLICT_LOGICALSLOT:
+                       report_recovery_conflict(reason);
+                       return;
  
-                       /*
-                        * If we're not in a subtransaction then we are OK to throw an
-                        * ERROR to resolve the conflict.  Otherwise drop through to the
-                        * FATAL case.
-                        *
-                        * RECOVERY_CONFLICT_LOGICALSLOT is a special case that always
-                        * throws an ERROR (ie never promotes to FATAL), though it still
-                        * has to respect QueryCancelHoldoffCount, so it shares this code
-                        * path.  Logical decoding slots are only acquired while
-                        * performing logical decoding.  During logical decoding no user
-                        * controlled code is run.  During [sub]transaction abort, the
-                        * slot is released.  Therefore user controlled code cannot
-                        * intercept an error before the replication slot is released.
-                        *
-                        * XXX other times that we can throw just an ERROR *may* be
-                        * RECOVERY_CONFLICT_LOCK if no locks are held in parent
-                        * transactions
-                        *
-                        * RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by parent
-                        * transactions and the transaction is not transaction-snapshot
-                        * mode
-                        *
-                        * RECOVERY_CONFLICT_TABLESPACE if no temp files or cursors open
-                        * in parent transactions
-                        */
-                       if (reason == RECOVERY_CONFLICT_LOGICALSLOT ||
-                               !IsSubTransaction())
-                       {
-                               /*
-                                * If we already aborted then we no longer need to cancel.  We
-                                * do this here since we do not wish to ignore aborted
-                                * subtransactions, which must cause FATAL, currently.
-                                */
-                               if (IsAbortedTransactionBlockState())
-                                       return;
+               case RECOVERY_CONFLICT_DATABASE:
+
+                       /* The database is being dropped; terminate the session */
+                       report_recovery_conflict(reason);
+                       return;
+       }
+       elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
+}
  
+/*
+ * This transaction or session is conflicting with recovery and needs to be
+ * killed.  Roll back the transaction, if that's sufficient, or terminate the
+ * connection, or do nothing if we're already in an aborted state.
+ */
+static void
+report_recovery_conflict(RecoveryConflictReason reason)
+{
+       bool            fatal;
+
+       if (reason == RECOVERY_CONFLICT_DATABASE)
+       {
+               /* note: no hint about reconnecting, and different errcode */
+               pgstat_report_recovery_conflict(reason);
+               ereport(FATAL,
+                               (errcode(ERRCODE_DATABASE_DROPPED),
+                                errmsg("terminating connection due to conflict with recovery"),
+                                errdetail_recovery_conflict(reason)));
+       }
+       if (reason == RECOVERY_CONFLICT_LOGICALSLOT)
+       {
+               /*
+                * RECOVERY_CONFLICT_LOGICALSLOT is a special case that always throws
+                * an ERROR (ie never promotes to FATAL), though it still has to
+                * respect QueryCancelHoldoffCount, so it shares this code path.
+                * Logical decoding slots are only acquired while performing logical
+                * decoding.  During logical decoding no user controlled code is run.
+                * During [sub]transaction abort, the slot is released.  Therefore
+                * user controlled code cannot intercept an error before the
+                * replication slot is released.
+                */
+               fatal = false;
+       }
+       else
+       {
+               fatal = IsSubTransaction();
+       }
+
+       /*
+        * If we're not in a subtransaction then we are OK to throw an ERROR to
+        * resolve the conflict.
+        *
+        * XXX other times that we can throw just an ERROR *may* be
+        * RECOVERY_CONFLICT_LOCK if no locks are held in parent transactions
+        *
+        * RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by parent
+        * transactions and the transaction is not transaction-snapshot mode
+        *
+        * RECOVERY_CONFLICT_TABLESPACE if no temp files or cursors open in parent
+        * transactions
+        */
+       if (!fatal)
+       {
+               /*
+                * If we already aborted then we no longer need to cancel.  We do this
+                * here since we do not wish to ignore aborted subtransactions, which
+                * must cause FATAL, currently.
+                */
+               if (IsAbortedTransactionBlockState())
+                       return;
+
+               /*
+                * If a recovery conflict happens while we are waiting for input from
+                * the client, the client is presumably just sitting idle in a
+                * transaction, preventing recovery from making progress.  We'll drop
+                * through to the FATAL case below to dislodge it, in that case.
+                */
+               if (!DoingCommandRead)
+               {
+                       /* Avoid losing sync in the FE/BE protocol. */
+                       if (QueryCancelHoldoffCount != 0)
+                       {
                                 /*
-                                * If a recovery conflict happens while we are waiting for
-                                * input from the client, the client is presumably just
-                                * sitting idle in a transaction, preventing recovery from
-                                * making progress.  We'll drop through to the FATAL case
-                                * below to dislodge it, in that case.
+                                * Re-arm and defer this interrupt until later.  See similar
+                                * code in ProcessInterrupts().
                                  */
-                               if (!DoingCommandRead)
-                               {
-                                       /* Avoid losing sync in the FE/BE protocol. */
-                                       if (QueryCancelHoldoffCount != 0)
-                                       {
-                                               /*
-                                                * Re-arm and defer this interrupt until later.  See
-                                                * similar code in ProcessInterrupts().
-                                                */
-                                               (void) pg_atomic_fetch_or_u32(&MyProc->pendingRecoveryConflicts, (1 << reason));
-                                               InterruptPending = true;
-                                               return;
-                                       }
-
-                                       /*
-                                        * We are cleared to throw an ERROR.  Either it's the
-                                        * logical slot case, or we have a top-level transaction
-                                        * that we can abort and a conflict that isn't inherently
-                                        * non-retryable.
-                                        */
-                                       LockErrorCleanup();
-                                       pgstat_report_recovery_conflict(reason);
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                                                        errmsg("canceling statement due to conflict with recovery"),
-                                                        errdetail_recovery_conflict(reason)));
-                                       break;
-                               }
+                               (void) pg_atomic_fetch_or_u32(&MyProc->pendingRecoveryConflicts, (1 << reason));
+                               InterruptPending = true;
+                               return;
                         }
  
                         /*
-                        * We couldn't resolve the conflict with ERROR, so terminate the
-                        * whole session.
+                        * We are cleared to throw an ERROR.  Either it's the logical slot
+                        * case, or we have a top-level transaction that we can abort and
+                        * a conflict that isn't inherently non-retryable.
                          */
+                       LockErrorCleanup();
                         pgstat_report_recovery_conflict(reason);
-                       ereport(FATAL,
+                       ereport(ERROR,
                                         (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                                        errmsg("terminating connection due to conflict with recovery"),
-                                        errdetail_recovery_conflict(reason),
-                                        errhint("In a moment you should be able to reconnect to the"
-                                                        " database and repeat your command.")));
-                       break;
-
-               case RECOVERY_CONFLICT_DATABASE:
-
-                       /* The database is being dropped; terminate the session */
-                       pgstat_report_recovery_conflict(reason);
-                       ereport(FATAL,
-                                       (errcode(ERRCODE_DATABASE_DROPPED),
-                                        errmsg("terminating connection due to conflict with recovery"),
+                                        errmsg("canceling statement due to conflict with recovery"),
                                          errdetail_recovery_conflict(reason)));
-                       break;
-
-               default:
-                       elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
+               }
         }
+
+       /*
+        * We couldn't resolve the conflict with ERROR, so terminate the whole
+        * session.
+        */
+       pgstat_report_recovery_conflict(reason);
+       ereport(FATAL,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("terminating connection due to conflict with recovery"),
+                        errdetail_recovery_conflict(reason),
+                        errhint("In a moment you should be able to reconnect to the"
+                                        " database and repeat your command.")));
  }
  
  /*
diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c

index e6759ccaa3d2ce5e730e0bf24296756320a21fb2..6309909bcd020bf4323cd1f7c3a29b5636d25ab9 100644 (file)
--- a/src/backend/utils/activity/pgstat_database.c
+++ b/src/backend/utils/activity/pgstat_database.c
@@ -115,6 +115,16 @@ pgstat_report_recovery_conflict(int reason)
                 case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
                         dbentry->conflict_startup_deadlock++;
                         break;
+               case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
+
+                       /*
+                        * The difference between RECOVERY_CONFLICT_STARTUP_DEADLOCK and
+                        * RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK is merely whether a buffer
+                        * pin was part of the deadlock. We use the same counter for both
+                        * reasons.
+                        */
+                       dbentry->conflict_startup_deadlock++;
+                       break;
         }
  }
  
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h

index 65a8176785e5e129e1dc44a10dafa1b60ee413e9..c63a4f2cc6a6647ff429ff089a1bb64c9a0ff91b 100644 (file)
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -51,9 +51,17 @@ typedef enum
          * other backends instead of the startup process.
          */
         RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+
+       /*
+        * Like RECOVERY_CONFLICT_STARTUP_DEADLOCK is, but the suspected deadlock
+        * involves a buffer pin that some other backend is holding. That needs
+        * special checking because the normal deadlock detector doesn't track the
+        * buffer pins.
+        */
+       RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK,
  } RecoveryConflictReason;
  
-#define NUM_RECOVERY_CONFLICT_REASONS (RECOVERY_CONFLICT_STARTUP_DEADLOCK + 1)
+#define NUM_RECOVERY_CONFLICT_REASONS (RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK + 1)
  
  extern void InitRecoveryTransactionEnvironment(void);
  extern void ShutdownRecoveryTransactionEnvironment(void);
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Tue, 10 Feb 2026 14:23:10 +0000 (16:23 +0200)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Tue, 10 Feb 2026 14:23:10 +0000 (16:23 +0200)
src/backend/storage/ipc/standby.c		patch \| blob \| blame \| history
src/backend/tcop/postgres.c		patch \| blob \| blame \| history
src/backend/utils/activity/pgstat_database.c		patch \| blob \| blame \| history
src/include/storage/standby.h		patch \| blob \| blame \| history