From: Heikki Linnakangas Date: Tue, 10 Feb 2026 14:23:08 +0000 (+0200) Subject: Separate RecoveryConflictReasons from procsignals X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=17f51ea818753093f929b4c235f3b89ebcc7c5fb;p=thirdparty%2Fpostgresql.git Separate RecoveryConflictReasons from procsignals Share the same PROCSIG_RECOVERY_CONFLICT flag for all recovery conflict reasons. To distinguish, have a bitmask in PGPROC to indicate the reason(s). Reviewed-by: Chao Li Discussion: https://www.postgresql.org/message-id/4cc13ba1-4248-4884-b6ba-4805349e7f39@iki.fi --- diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 87949054f26..33311760df7 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -60,6 +60,7 @@ #include "storage/lmgr.h" #include "storage/md.h" #include "storage/procarray.h" +#include "storage/procsignal.h" #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 0b064891932..3511a4ec0fd 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -70,6 +70,7 @@ #include "miscadmin.h" #include "postmaster/bgwriter.h" #include "storage/fd.h" +#include "storage/procsignal.h" #include "storage/standby.h" #include "utils/acl.h" #include "utils/builtins.h" diff --git a/src/backend/replication/logical/logicalctl.c b/src/backend/replication/logical/logicalctl.c index 9f787f3dc51..4e292951201 100644 --- a/src/backend/replication/logical/logicalctl.c +++ b/src/backend/replication/logical/logicalctl.c @@ -71,6 +71,7 @@ #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/procsignal.h" #include "utils/injection_point.h" /* diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index d5628d62117..28c7019402b 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -2114,9 +2114,9 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes, slot_idle_secs); if (MyBackendType == B_STARTUP) - (void) SendProcSignal(active_pid, - PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT, - active_proc); + (void) SignalRecoveryConflict(GetPGProcByNumber(active_proc), + active_pid, + RECOVERY_CONFLICT_LOGICALSLOT); else (void) kill(active_pid, SIGTERM); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7241477cac0..d1babaff023 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -59,6 +59,7 @@ #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/proclist.h" +#include "storage/procsignal.h" #include "storage/read_stream.h" #include "storage/smgr.h" #include "storage/standby.h" @@ -6570,7 +6571,7 @@ LockBufferForCleanup(Buffer buffer) * deadlock_timeout for it. */ if (logged_recovery_conflict) - LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + LogRecoveryConflict(RECOVERY_CONFLICT_BUFFERPIN, waitStart, GetCurrentTimestamp(), NULL, false); @@ -6621,7 +6622,7 @@ LockBufferForCleanup(Buffer buffer) if (TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout)) { - LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + LogRecoveryConflict(RECOVERY_CONFLICT_BUFFERPIN, waitStart, now, NULL, true); logged_recovery_conflict = true; } diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 301f54fb5a8..40312df2cac 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -60,6 +60,7 @@ #include "port/pg_lfind.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/procsignal.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/injection_point.h" @@ -708,6 +709,8 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) /* be sure this is cleared in abort */ proc->delayChkptFlags = 0; + pg_atomic_write_u32(&proc->pendingRecoveryConflicts, 0); + /* must be cleared with xid/xmin: */ /* avoid unnecessarily dirtying shared cachelines */ if (proc->statusFlags & PROC_VACUUM_STATE_MASK) @@ -748,6 +751,8 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid) /* be sure this is cleared in abort */ proc->delayChkptFlags = 0; + pg_atomic_write_u32(&proc->pendingRecoveryConflicts, 0); + /* must be cleared with xid/xmin: */ /* avoid unnecessarily dirtying shared cachelines */ if (proc->statusFlags & PROC_VACUUM_STATE_MASK) @@ -929,6 +934,7 @@ ProcArrayClearTransaction(PGPROC *proc) proc->vxid.lxid = InvalidLocalTransactionId; proc->xmin = InvalidTransactionId; + pg_atomic_write_u32(&proc->pendingRecoveryConflicts, 0); Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK)); Assert(!proc->delayChkptFlags); @@ -3440,12 +3446,46 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) } /* - * SignalVirtualTransaction - used in recovery conflict processing + * SignalRecoveryConflict -- signal that a process is blocking recovery * - * Returns pid of the process signaled, or 0 if not found. + * The 'pid' is redundant with 'proc', but it acts as a cross-check to + * detect process had exited and the PGPROC entry was reused for a different + * process. + * + * Returns true if the process was signaled, or false if not found. */ -pid_t -SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) +bool +SignalRecoveryConflict(PGPROC *proc, pid_t pid, RecoveryConflictReason reason) +{ + bool found = false; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + + /* + * Kill the pid if it's still here. If not, that's what we wanted so + * ignore any errors. + */ + if (proc->pid == pid) + { + (void) pg_atomic_fetch_or_u32(&proc->pendingRecoveryConflicts, (1 << reason)); + + /* wake up the process */ + (void) SendProcSignal(pid, PROCSIG_RECOVERY_CONFLICT, GetNumberFromPGProc(proc)); + found = true; + } + + LWLockRelease(ProcArrayLock); + + return found; +} + +/* + * SignalRecoveryConflictWithVirtualXID -- signal that a VXID is blocking recovery + * + * Like SignalRecoveryConflict, but the target is identified by VXID + */ +bool +SignalRecoveryConflictWithVirtualXID(VirtualTransactionId vxid, RecoveryConflictReason reason) { ProcArrayStruct *arrayP = procArray; int index; @@ -3467,11 +3507,13 @@ SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) pid = proc->pid; if (pid != 0) { + (void) pg_atomic_fetch_or_u32(&proc->pendingRecoveryConflicts, (1 << reason)); + /* * Kill the pid if it's still here. If not, that's what we * wanted so ignore any errors. */ - (void) SendProcSignal(pid, sigmode, vxid.procNumber); + (void) SendProcSignal(pid, PROCSIG_RECOVERY_CONFLICT, vxid.procNumber); } break; } @@ -3479,7 +3521,50 @@ SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) LWLockRelease(ProcArrayLock); - return pid; + return pid != 0; +} + +/* + * SignalRecoveryConflictWithDatabase --- signal all backends specified database + * + * Like SignalRecoveryConflict, but signals all backends using the database. + */ +void +SignalRecoveryConflictWithDatabase(Oid databaseid, RecoveryConflictReason reason) +{ + ProcArrayStruct *arrayP = procArray; + int index; + + /* tell all backends to die */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + + for (index = 0; index < arrayP->numProcs; index++) + { + int pgprocno = arrayP->pgprocnos[index]; + PGPROC *proc = &allProcs[pgprocno]; + + if (databaseid == InvalidOid || proc->databaseId == databaseid) + { + VirtualTransactionId procvxid; + pid_t pid; + + GET_VXID_FROM_PGPROC(procvxid, *proc); + + pid = proc->pid; + if (pid != 0) + { + (void) pg_atomic_fetch_or_u32(&proc->pendingRecoveryConflicts, (1 << reason)); + + /* + * Kill the pid if it's still here. If not, that's what we + * wanted so ignore any errors. + */ + (void) SendProcSignal(pid, PROCSIG_RECOVERY_CONFLICT, procvxid.procNumber); + } + } + } + + LWLockRelease(ProcArrayLock); } /* @@ -3601,45 +3686,6 @@ CountDBConnections(Oid databaseid) return count; } -/* - * CancelDBBackends --- cancel backends that are using specified database - */ -void -CancelDBBackends(Oid databaseid, ProcSignalReason sigmode) -{ - ProcArrayStruct *arrayP = procArray; - int index; - - /* tell all backends to die */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - - for (index = 0; index < arrayP->numProcs; index++) - { - int pgprocno = arrayP->pgprocnos[index]; - PGPROC *proc = &allProcs[pgprocno]; - - if (databaseid == InvalidOid || proc->databaseId == databaseid) - { - VirtualTransactionId procvxid; - pid_t pid; - - GET_VXID_FROM_PGPROC(procvxid, *proc); - - pid = proc->pid; - if (pid != 0) - { - /* - * Kill the pid if it's still here. If not, that's what we - * wanted so ignore any errors. - */ - (void) SendProcSignal(pid, sigmode, procvxid.procNumber); - } - } - } - - LWLockRelease(ProcArrayLock); -} - /* * CountUserBackends --- count backends that are used by specified user * (only regular backends, not any type of background worker) diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 8e56922dcea..5d33559926a 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -697,26 +697,8 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE)) HandleParallelApplyMessageInterrupt(); - if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE)) - HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE); - - if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE)) - HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE); - - if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK)) - HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK); - - if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT)) - HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT); - - if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT)) - HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT); - - if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK)) - HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); - - if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) - HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT)) + HandleRecoveryConflictInterrupt(); SetLatch(MyLatch); } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 6db803476c4..0851789e8b6 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -71,13 +71,13 @@ static volatile sig_atomic_t got_standby_delay_timeout = false; static volatile sig_atomic_t got_standby_lock_timeout = false; static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, - ProcSignalReason reason, + RecoveryConflictReason reason, uint32 wait_event_info, bool report_waiting); -static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason); +static void SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason); static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts); static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks); -static const char *get_recovery_conflict_desc(ProcSignalReason reason); +static const char *get_recovery_conflict_desc(RecoveryConflictReason reason); /* * InitRecoveryTransactionEnvironment @@ -271,7 +271,7 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) * to be resolved or not. */ void -LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, +LogRecoveryConflict(RecoveryConflictReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting) { @@ -358,7 +358,8 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, */ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, - ProcSignalReason reason, uint32 wait_event_info, + RecoveryConflictReason reason, + uint32 wait_event_info, bool report_waiting) { TimestampTz waitStart = 0; @@ -384,19 +385,19 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, /* Is it time to kill it? */ if (WaitExceedsMaxStandbyDelay(wait_event_info)) { - pid_t pid; + bool signaled; /* * Now find out who to throw out of the balloon. */ Assert(VirtualTransactionIdIsValid(*waitlist)); - pid = SignalVirtualTransaction(*waitlist, reason); + signaled = SignalRecoveryConflictWithVirtualXID(*waitlist, reason); /* * Wait a little bit for it to die so that we avoid flooding * an unresponsive backend when system is heavily loaded. */ - if (pid != 0) + if (signaled) pg_usleep(5000L); } @@ -489,7 +490,7 @@ ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon, backends = GetConflictingVirtualXIDs(snapshotConflictHorizon, locator.dbOid); ResolveRecoveryConflictWithVirtualXIDs(backends, - PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, + RECOVERY_CONFLICT_SNAPSHOT, WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT, true); @@ -560,7 +561,7 @@ ResolveRecoveryConflictWithTablespace(Oid tsid) temp_file_users = GetConflictingVirtualXIDs(InvalidTransactionId, InvalidOid); ResolveRecoveryConflictWithVirtualXIDs(temp_file_users, - PROCSIG_RECOVERY_CONFLICT_TABLESPACE, + RECOVERY_CONFLICT_TABLESPACE, WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE, true); } @@ -581,7 +582,7 @@ ResolveRecoveryConflictWithDatabase(Oid dbid) */ while (CountDBBackends(dbid) > 0) { - CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE); + SignalRecoveryConflictWithDatabase(dbid, RECOVERY_CONFLICT_DATABASE); /* * Wait awhile for them to die so that we avoid flooding an @@ -665,7 +666,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict) * because the caller, WaitOnLock(), has already reported that. */ ResolveRecoveryConflictWithVirtualXIDs(backends, - PROCSIG_RECOVERY_CONFLICT_LOCK, + RECOVERY_CONFLICT_LOCK, PG_WAIT_LOCK | locktag.locktag_type, false); } @@ -723,8 +724,8 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict) */ while (VirtualTransactionIdIsValid(*backends)) { - SignalVirtualTransaction(*backends, - PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + (void) SignalRecoveryConflictWithVirtualXID(*backends, + RECOVERY_CONFLICT_STARTUP_DEADLOCK); backends++; } @@ -802,7 +803,7 @@ ResolveRecoveryConflictWithBufferPin(void) /* * We're already behind, so clear a path as quickly as possible. */ - SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN); } else { @@ -842,7 +843,7 @@ ResolveRecoveryConflictWithBufferPin(void) ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP); if (got_standby_delay_timeout) - SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN); else if (got_standby_deadlock_timeout) { /* @@ -858,7 +859,7 @@ ResolveRecoveryConflictWithBufferPin(void) * not be so harmful because the period that the buffer is kept pinned * is basically no so long. But we should fix this? */ - SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_STARTUP_DEADLOCK); } /* @@ -873,10 +874,10 @@ ResolveRecoveryConflictWithBufferPin(void) } static void -SendRecoveryConflictWithBufferPin(ProcSignalReason reason) +SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason) { - Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN || - reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + Assert(reason == RECOVERY_CONFLICT_BUFFERPIN || + reason == RECOVERY_CONFLICT_STARTUP_DEADLOCK); /* * We send signal to all backends to ask them if they are holding the @@ -884,7 +885,7 @@ SendRecoveryConflictWithBufferPin(ProcSignalReason reason) * innocent, but we let the SIGUSR1 handling in each backend decide their * own fate. */ - CancelDBBackends(InvalidOid, reason); + SignalRecoveryConflictWithDatabase(InvalidOid, reason); } /* @@ -1489,35 +1490,33 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, /* Return the description of recovery conflict */ static const char * -get_recovery_conflict_desc(ProcSignalReason reason) +get_recovery_conflict_desc(RecoveryConflictReason reason) { const char *reasonDesc = _("unknown reason"); switch (reason) { - case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + case RECOVERY_CONFLICT_BUFFERPIN: reasonDesc = _("recovery conflict on buffer pin"); break; - case PROCSIG_RECOVERY_CONFLICT_LOCK: + case RECOVERY_CONFLICT_LOCK: reasonDesc = _("recovery conflict on lock"); break; - case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + case RECOVERY_CONFLICT_TABLESPACE: reasonDesc = _("recovery conflict on tablespace"); break; - case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + case RECOVERY_CONFLICT_SNAPSHOT: reasonDesc = _("recovery conflict on snapshot"); break; - case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT: + case RECOVERY_CONFLICT_LOGICALSLOT: reasonDesc = _("recovery conflict on replication slot"); break; - case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + case RECOVERY_CONFLICT_STARTUP_DEADLOCK: reasonDesc = _("recovery conflict on buffer deadlock"); break; - case PROCSIG_RECOVERY_CONFLICT_DATABASE: + case RECOVERY_CONFLICT_DATABASE: reasonDesc = _("recovery conflict on database"); break; - default: - break; } return reasonDesc; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 8560a903bc8..31ccdb1ef89 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -504,6 +504,7 @@ InitProcess(void) Assert(dlist_is_empty(&(MyProc->myProcLocks[i]))); } #endif + pg_atomic_write_u32(&MyProc->pendingRecoveryConflicts, 0); /* Initialize fields for sync rep */ MyProc->waitLSN = InvalidXLogRecPtr; @@ -1445,7 +1446,7 @@ ProcSleep(LOCALLOCK *locallock) * because the startup process here has already waited * longer than deadlock_timeout. */ - LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, + LogRecoveryConflict(RECOVERY_CONFLICT_LOCK, standbyWaitStart, now, cnt > 0 ? vxids : NULL, true); logged_recovery_conflict = true; @@ -1686,7 +1687,7 @@ ProcSleep(LOCALLOCK *locallock) * startup process waited longer than deadlock_timeout for it. */ if (InHotStandby && logged_recovery_conflict) - LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, + LogRecoveryConflict(RECOVERY_CONFLICT_LOCK, standbyWaitStart, GetCurrentTimestamp(), NULL, false); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 02e9aaa6bca..664161886cf 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -67,6 +67,7 @@ #include "storage/proc.h" #include "storage/procsignal.h" #include "storage/sinval.h" +#include "storage/standby.h" #include "tcop/backend_startup.h" #include "tcop/fastpath.h" #include "tcop/pquery.h" @@ -155,10 +156,6 @@ static const char *userDoption = NULL; /* -D switch */ static bool EchoQuery = false; /* -E switch */ static bool UseSemiNewlineNewline = false; /* -j switch */ -/* whether or not, and why, we were canceled by conflict with recovery */ -static volatile sig_atomic_t RecoveryConflictPending = false; -static volatile sig_atomic_t RecoveryConflictPendingReasons[NUM_PROCSIGNALS]; - /* reused buffer to pass to SendRowDescriptionMessage() */ static MemoryContext row_description_context = NULL; static StringInfoData row_description_buf; @@ -2537,34 +2534,31 @@ errdetail_params(ParamListInfo params) * Add an errdetail() line showing conflict source. */ static int -errdetail_recovery_conflict(ProcSignalReason reason) +errdetail_recovery_conflict(RecoveryConflictReason reason) { switch (reason) { - case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + case RECOVERY_CONFLICT_BUFFERPIN: errdetail("User was holding shared buffer pin for too long."); break; - case PROCSIG_RECOVERY_CONFLICT_LOCK: + case RECOVERY_CONFLICT_LOCK: errdetail("User was holding a relation lock for too long."); break; - case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + case RECOVERY_CONFLICT_TABLESPACE: errdetail("User was or might have been using tablespace that must be dropped."); break; - case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + case RECOVERY_CONFLICT_SNAPSHOT: errdetail("User query might have needed to see row versions that must be removed."); break; - case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT: + case RECOVERY_CONFLICT_LOGICALSLOT: errdetail("User was using a logical replication slot that must be invalidated."); break; - case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + case RECOVERY_CONFLICT_STARTUP_DEADLOCK: errdetail("User transaction caused buffer deadlock with recovery."); break; - case PROCSIG_RECOVERY_CONFLICT_DATABASE: + case RECOVERY_CONFLICT_DATABASE: errdetail("User was connected to a database that must be dropped."); break; - default: - break; - /* no errdetail */ } return 0; @@ -3067,15 +3061,14 @@ FloatExceptionHandler(SIGNAL_ARGS) } /* - * Tell the next CHECK_FOR_INTERRUPTS() to check for a particular type of - * recovery conflict. Runs in a SIGUSR1 handler. + * Tell the next CHECK_FOR_INTERRUPTS() to process recovery conflicts. Runs + * in a SIGUSR1 handler. */ void -HandleRecoveryConflictInterrupt(ProcSignalReason reason) +HandleRecoveryConflictInterrupt(void) { - RecoveryConflictPendingReasons[reason] = true; - RecoveryConflictPending = true; - InterruptPending = true; + if (pg_atomic_read_u32(&MyProc->pendingRecoveryConflicts) != 0) + InterruptPending = true; /* latch will be set by procsignal_sigusr1_handler */ } @@ -3083,11 +3076,11 @@ HandleRecoveryConflictInterrupt(ProcSignalReason reason) * Check one individual conflict reason. */ static void -ProcessRecoveryConflictInterrupt(ProcSignalReason reason) +ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason) { switch (reason) { - case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + case RECOVERY_CONFLICT_STARTUP_DEADLOCK: /* * If we aren't waiting for a lock we can never deadlock. @@ -3098,21 +3091,20 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason) /* Intentional fall through to check wait for pin */ /* FALLTHROUGH */ - case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + case RECOVERY_CONFLICT_BUFFERPIN: /* - * If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we - * aren't blocking the Startup process there is nothing more to - * do. + * If RECOVERY_CONFLICT_BUFFERPIN is requested but we aren't + * blocking the Startup process there is nothing more to do. * - * When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is requested, - * if we're waiting for locks and the startup process is not - * waiting for buffer pin (i.e., also waiting for locks), we set - * the flag so that ProcSleep() will check for deadlocks. + * When RECOVERY_CONFLICT_STARTUP_DEADLOCK is requested, if we're + * waiting for locks and the startup process is not waiting for + * buffer pin (i.e., also waiting for locks), we set the flag so + * that ProcSleep() will check for deadlocks. */ if (!HoldingBufferPinThatDelaysRecovery()) { - if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK && + if (reason == RECOVERY_CONFLICT_STARTUP_DEADLOCK && GetStartupBufferPinWaitBufId() < 0) CheckDeadLockAlert(); return; @@ -3121,9 +3113,9 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason) /* Intentional fall through to error handling */ /* FALLTHROUGH */ - case PROCSIG_RECOVERY_CONFLICT_LOCK: - case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: - case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + case RECOVERY_CONFLICT_LOCK: + case RECOVERY_CONFLICT_TABLESPACE: + case RECOVERY_CONFLICT_SNAPSHOT: /* * If we aren't in a transaction any longer then ignore. @@ -3133,34 +3125,34 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason) /* FALLTHROUGH */ - case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT: + case RECOVERY_CONFLICT_LOGICALSLOT: /* * If we're not in a subtransaction then we are OK to throw an * ERROR to resolve the conflict. Otherwise drop through to the * FATAL case. * - * PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT is a special case that - * always throws an ERROR (ie never promotes to FATAL), though it - * still has to respect QueryCancelHoldoffCount, so it shares this - * code path. Logical decoding slots are only acquired while + * RECOVERY_CONFLICT_LOGICALSLOT is a special case that always + * throws an ERROR (ie never promotes to FATAL), though it still + * has to respect QueryCancelHoldoffCount, so it shares this code + * path. Logical decoding slots are only acquired while * performing logical decoding. During logical decoding no user * controlled code is run. During [sub]transaction abort, the * slot is released. Therefore user controlled code cannot * intercept an error before the replication slot is released. * * XXX other times that we can throw just an ERROR *may* be - * PROCSIG_RECOVERY_CONFLICT_LOCK if no locks are held in parent + * RECOVERY_CONFLICT_LOCK if no locks are held in parent * transactions * - * PROCSIG_RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by - * parent transactions and the transaction is not - * transaction-snapshot mode + * RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by parent + * transactions and the transaction is not transaction-snapshot + * mode * - * PROCSIG_RECOVERY_CONFLICT_TABLESPACE if no temp files or - * cursors open in parent transactions + * RECOVERY_CONFLICT_TABLESPACE if no temp files or cursors open + * in parent transactions */ - if (reason == PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT || + if (reason == RECOVERY_CONFLICT_LOGICALSLOT || !IsSubTransaction()) { /* @@ -3187,8 +3179,7 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason) * Re-arm and defer this interrupt until later. See * similar code in ProcessInterrupts(). */ - RecoveryConflictPendingReasons[reason] = true; - RecoveryConflictPending = true; + (void) pg_atomic_fetch_or_u32(&MyProc->pendingRecoveryConflicts, (1 << reason)); InterruptPending = true; return; } @@ -3222,7 +3213,7 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason) " database and repeat your command."))); break; - case PROCSIG_RECOVERY_CONFLICT_DATABASE: + case RECOVERY_CONFLICT_DATABASE: /* The database is being dropped; terminate the session */ pgstat_report_recovery_conflict(reason); @@ -3243,6 +3234,8 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason) static void ProcessRecoveryConflictInterrupts(void) { + uint32 pending; + /* * We don't need to worry about joggling the elbow of proc_exit, because * proc_exit_prepare() holds interrupts, so ProcessInterrupts() won't call @@ -3250,17 +3243,27 @@ ProcessRecoveryConflictInterrupts(void) */ Assert(!proc_exit_inprogress); Assert(InterruptHoldoffCount == 0); - Assert(RecoveryConflictPending); - RecoveryConflictPending = false; + /* Are any recovery conflict pending? */ + pending = pg_atomic_read_membarrier_u32(&MyProc->pendingRecoveryConflicts); + if (pending == 0) + return; - for (ProcSignalReason reason = PROCSIG_RECOVERY_CONFLICT_FIRST; - reason <= PROCSIG_RECOVERY_CONFLICT_LAST; + /* + * Check the conflicts one by one, clearing each flag only before + * processing the particular conflict. This ensures that if multiple + * conflicts are pending, we come back here to process the remaining + * conflicts, if an error is thrown during processing one of them. + */ + for (RecoveryConflictReason reason = 0; + reason < NUM_RECOVERY_CONFLICT_REASONS; reason++) { - if (RecoveryConflictPendingReasons[reason]) + if ((pending & (1 << reason)) != 0) { - RecoveryConflictPendingReasons[reason] = false; + /* clear the flag */ + (void) pg_atomic_fetch_and_u32(&MyProc->pendingRecoveryConflicts, ~(1 << reason)); + ProcessRecoveryConflictInterrupt(reason); } } @@ -3451,7 +3454,7 @@ ProcessInterrupts(void) } } - if (RecoveryConflictPending) + if (pg_atomic_read_u32(&MyProc->pendingRecoveryConflicts) != 0) ProcessRecoveryConflictInterrupts(); if (IdleInTransactionSessionTimeoutPending) diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c index d7f6d4c5ee6..e6759ccaa3d 100644 --- a/src/backend/utils/activity/pgstat_database.c +++ b/src/backend/utils/activity/pgstat_database.c @@ -17,7 +17,7 @@ #include "postgres.h" -#include "storage/procsignal.h" +#include "storage/standby.h" #include "utils/pgstat_internal.h" #include "utils/timestamp.h" @@ -88,31 +88,31 @@ pgstat_report_recovery_conflict(int reason) dbentry = pgstat_prep_database_pending(MyDatabaseId); - switch (reason) + switch ((RecoveryConflictReason) reason) { - case PROCSIG_RECOVERY_CONFLICT_DATABASE: + case RECOVERY_CONFLICT_DATABASE: /* * Since we drop the information about the database as soon as it * replicates, there is no point in counting these conflicts. */ break; - case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + case RECOVERY_CONFLICT_TABLESPACE: dbentry->conflict_tablespace++; break; - case PROCSIG_RECOVERY_CONFLICT_LOCK: + case RECOVERY_CONFLICT_LOCK: dbentry->conflict_lock++; break; - case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + case RECOVERY_CONFLICT_SNAPSHOT: dbentry->conflict_snapshot++; break; - case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + case RECOVERY_CONFLICT_BUFFERPIN: dbentry->conflict_bufferpin++; break; - case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT: + case RECOVERY_CONFLICT_LOGICALSLOT: dbentry->conflict_logicalslot++; break; - case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + case RECOVERY_CONFLICT_STARTUP_DEADLOCK: dbentry->conflict_startup_deadlock++; break; } diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c index 12b8d4cefaf..c7f7b8bc2dd 100644 --- a/src/backend/utils/adt/mcxtfuncs.c +++ b/src/backend/utils/adt/mcxtfuncs.c @@ -19,6 +19,7 @@ #include "mb/pg_wchar.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/procsignal.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/hsearch.h" diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 679f0624f92..ac0df4aeaaa 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -236,6 +236,16 @@ struct PGPROC BackendType backendType; /* what kind of process is this? */ + /* + * While in hot standby mode, shows that a conflict signal has been sent + * for the current transaction. Set/cleared while holding ProcArrayLock, + * though not required. Accessed without lock, if needed. + * + * This is a bitmask; each bit corresponds to a RecoveryConflictReason + * enum value. + */ + pg_atomic_uint32 pendingRecoveryConflicts; + /* * Info about LWLock the process is currently waiting for, if any. * diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 3a8593f87ba..c5ab1574fe3 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -77,12 +77,15 @@ extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, bool allDbs, int excludeVacuum, int *nvxids); extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid); -extern pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode); + +extern bool SignalRecoveryConflict(PGPROC *proc, pid_t pid, RecoveryConflictReason reason); +extern bool SignalRecoveryConflictWithVirtualXID(VirtualTransactionId vxid, RecoveryConflictReason reason); +extern void SignalRecoveryConflictWithDatabase(Oid databaseid, RecoveryConflictReason reason); + extern bool MinimumActiveBackends(int min); extern int CountDBBackends(Oid databaseid); extern int CountDBConnections(Oid databaseid); -extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode); extern int CountUserBackends(Oid roleid); extern bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared); diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index e52b8eb7697..348fba53a93 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -36,20 +36,12 @@ typedef enum PROCSIG_BARRIER, /* global barrier interrupt */ PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */ PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */ - - /* Recovery conflict reasons */ - PROCSIG_RECOVERY_CONFLICT_FIRST, - PROCSIG_RECOVERY_CONFLICT_DATABASE = PROCSIG_RECOVERY_CONFLICT_FIRST, - PROCSIG_RECOVERY_CONFLICT_TABLESPACE, - PROCSIG_RECOVERY_CONFLICT_LOCK, - PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, - PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT, - PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, - PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, - PROCSIG_RECOVERY_CONFLICT_LAST = PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + PROCSIG_RECOVERY_CONFLICT, /* backend is blocking recovery, check + * PGPROC->pendingRecoveryConflicts for the + * reason */ } ProcSignalReason; -#define NUM_PROCSIGNALS (PROCSIG_RECOVERY_CONFLICT_LAST + 1) +#define NUM_PROCSIGNALS (PROCSIG_RECOVERY_CONFLICT + 1) typedef enum { diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 7b10932635a..65a8176785e 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -16,7 +16,6 @@ #include "datatype/timestamp.h" #include "storage/lock.h" -#include "storage/procsignal.h" #include "storage/relfilelocator.h" #include "storage/standbydefs.h" @@ -25,6 +24,37 @@ extern PGDLLIMPORT int max_standby_archive_delay; extern PGDLLIMPORT int max_standby_streaming_delay; extern PGDLLIMPORT bool log_recovery_conflict_waits; +/* Recovery conflict reasons */ +typedef enum +{ + /* Backend is connected to a database that is being dropped */ + RECOVERY_CONFLICT_DATABASE, + + /* Backend is using a tablespace that is being dropped */ + RECOVERY_CONFLICT_TABLESPACE, + + /* Backend is holding a lock that is blocking recovery */ + RECOVERY_CONFLICT_LOCK, + + /* Backend is holding a snapshot that is blocking recovery */ + RECOVERY_CONFLICT_SNAPSHOT, + + /* Backend is using a logical replication slot that must be invalidated */ + RECOVERY_CONFLICT_LOGICALSLOT, + + /* Backend is holding a pin on a buffer that is blocking recovery */ + RECOVERY_CONFLICT_BUFFERPIN, + + /* + * The backend is requested to check for deadlocks. The startup process + * doesn't check for deadlock directly, because we want to kill one of the + * other backends instead of the startup process. + */ + RECOVERY_CONFLICT_STARTUP_DEADLOCK, +} RecoveryConflictReason; + +#define NUM_RECOVERY_CONFLICT_REASONS (RECOVERY_CONFLICT_STARTUP_DEADLOCK + 1) + extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void); @@ -43,7 +73,7 @@ extern void CheckRecoveryConflictDeadlock(void); extern void StandbyDeadLockHandler(void); extern void StandbyTimeoutHandler(void); extern void StandbyLockTimeoutHandler(void); -extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, +extern void LogRecoveryConflict(RecoveryConflictReason reason, TimestampTz wait_start, TimestampTz now, VirtualTransactionId *wait_list, bool still_waiting); diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h index 54ddee875ed..5bc5bcfb20d 100644 --- a/src/include/tcop/tcopprot.h +++ b/src/include/tcop/tcopprot.h @@ -74,7 +74,7 @@ extern void die(SIGNAL_ARGS); pg_noreturn extern void quickdie(SIGNAL_ARGS); extern void StatementCancelHandler(SIGNAL_ARGS); pg_noreturn extern void FloatExceptionHandler(SIGNAL_ARGS); -extern void HandleRecoveryConflictInterrupt(ProcSignalReason reason); +extern void HandleRecoveryConflictInterrupt(void); extern void ProcessClientReadInterrupt(bool blocked); extern void ProcessClientWriteInterrupt(bool blocked); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 7619845fba9..df42b78bc9d 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2489,6 +2489,7 @@ RecordCacheArrayEntry RecordCacheEntry RecordCompareData RecordIOData +RecoveryConflictReason RecoveryLockEntry RecoveryLockXidEntry RecoveryPauseState