Prevent invalidation of newly synced replication slots.

author Amit Kapila <akapila@postgresql.org>

Tue, 27 Jan 2026 05:49:23 +0000 (05:49 +0000)

committer Amit Kapila <akapila@postgresql.org>

Tue, 27 Jan 2026 05:49:23 +0000 (05:49 +0000)
author Amit Kapila <akapila@postgresql.org>
Tue, 27 Jan 2026 05:49:23 +0000 (05:49 +0000)
committer Amit Kapila <akapila@postgresql.org>
Tue, 27 Jan 2026 05:49:23 +0000 (05:49 +0000)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 0a8617ea76107b353cf8989642bece38aa751714..948cb1fc7038fb50d3990a6d51d49e1fc19c44ab 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -669,7 +669,6 @@ static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn,
  static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
  static void KeepLogSeg(XLogRecPtr recptr, XLogRecPtr slotsMinLSN,
                                            XLogSegNo *logSegNo);
-static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void);
  
  static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli,
                                                                   bool opportunistic);
@@ -2674,7 +2673,7 @@ XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn)
   * Return the oldest LSN we must retain to satisfy the needs of some
   * replication slot.
   */
-static XLogRecPtr
+XLogRecPtr
  XLogGetReplicationSlotMinimumLSN(void)
  {
         XLogRecPtr      retval;
@@ -7794,6 +7793,9 @@ CreateRestartPoint(int flags)
         replayPtr = GetXLogReplayRecPtr(&replayTLI);
         endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
         KeepLogSeg(endptr, slotsMinReqLSN, &_logSegNo);
+
+       INJECTION_POINT("restartpoint-before-slot-invalidation");
+
         if (InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_REMOVED,
                                                                                    _logSegNo, InvalidOid,
                                                                                    InvalidTransactionId))
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c

index 35874e6f1bfdbf5b8c3b1ae5a4892add4015ba28..d3b8fca96ccc1cf154fa5fbcfb6a0178297b7c7c 100644 (file)
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -467,70 +467,71 @@ drop_local_obsolete_slots(List *remote_slot_list)
   * Reserve WAL for the currently active local slot using the specified WAL
   * location (restart_lsn).
   *
- * If the given WAL location has been removed, reserve WAL using the oldest
- * existing WAL segment.
+ * If the given WAL location has been removed or is at risk of removal,
+ * reserve WAL using the oldest segment that is non-removable.
   */
  static void
  reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
  {
-       XLogSegNo       oldest_segno;
+       XLogRecPtr      slot_min_lsn;
+       XLogRecPtr      min_safe_lsn;
         XLogSegNo       segno;
         ReplicationSlot *slot = MyReplicationSlot;
  
         Assert(slot != NULL);
         Assert(XLogRecPtrIsInvalid(slot->data.restart_lsn));
  
-       while (true)
-       {
-               SpinLockAcquire(&slot->mutex);
-               slot->data.restart_lsn = restart_lsn;
-               SpinLockRelease(&slot->mutex);
-
-               /* Prevent WAL removal as fast as possible */
-               ReplicationSlotsComputeRequiredLSN();
-
-               XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size);
+       /*
+        * Acquire an exclusive lock to prevent the checkpoint process from
+        * concurrently calculating the minimum slot LSN (see
+        * CheckPointReplicationSlots), ensuring that if WAL reservation occurs
+        * first, the checkpoint must wait for the restart_lsn update before
+        * calculating the minimum LSN.
+        *
+        * Note: Unlike ReplicationSlotReserveWal(), this lock does not protect a
+        * newly synced slot from being invalidated if a concurrent checkpoint has
+        * invoked CheckPointReplicationSlots() before the WAL reservation here.
+        * This can happen because the initial restart_lsn received from the
+        * remote server can precede the redo pointer. Therefore, when selecting
+        * the initial restart_lsn, we consider using the redo pointer or the
+        * minimum slot LSN (if those values are greater than the remote
+        * restart_lsn) instead of relying solely on the remote value.
+        */
+       LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
  
-               /*
-                * Find the oldest existing WAL segment file.
-                *
-                * Normally, we can determine it by using the last removed segment
-                * number. However, if no WAL segment files have been removed by a
-                * checkpoint since startup, we need to search for the oldest segment
-                * file from the current timeline existing in XLOGDIR.
-                *
-                * XXX: Currently, we are searching for the oldest segment in the
-                * current timeline as there is less chance of the slot's restart_lsn
-                * from being some prior timeline, and even if it happens, in the
-                * worst case, we will wait to sync till the slot's restart_lsn moved
-                * to the current timeline.
-                */
-               oldest_segno = XLogGetLastRemovedSegno() + 1;
+       /*
+        * Determine the minimum non-removable LSN by comparing the redo pointer
+        * with the minimum slot LSN.
+        *
+        * The minimum slot LSN is considered because the redo pointer advances at
+        * every checkpoint, even when replication slots are present on the
+        * standby. In such scenarios, the redo pointer can exceed the remote
+        * restart_lsn, while WALs preceding the remote restart_lsn remain
+        * protected by a local replication slot.
+        */
+       min_safe_lsn = GetRedoRecPtr();
+       slot_min_lsn = XLogGetReplicationSlotMinimumLSN();
  
-               if (oldest_segno == 1)
-               {
-                       TimeLineID      cur_timeline;
+       if (XLogRecPtrIsValid(slot_min_lsn) && min_safe_lsn > slot_min_lsn)
+               min_safe_lsn = slot_min_lsn;
  
-                       GetWalRcvFlushRecPtr(NULL, &cur_timeline);
-                       oldest_segno = XLogGetOldestSegno(cur_timeline);
-               }
+       /*
+        * If the minimum safe LSN is greater than the given restart_lsn, use it
+        * as the initial restart_lsn for the newly synced slot. Otherwise, use
+        * the given remote restart_lsn.
+        */
+       SpinLockAcquire(&slot->mutex);
+       slot->data.restart_lsn = Max(restart_lsn, min_safe_lsn);
+       SpinLockRelease(&slot->mutex);
  
-               elog(DEBUG1, "segno: " UINT64_FORMAT " of purposed restart_lsn for the synced slot, oldest_segno: " UINT64_FORMAT " available",
-                        segno, oldest_segno);
+       ReplicationSlotsComputeRequiredLSN();
  
-               /*
-                * If all required WAL is still there, great, otherwise retry. The
-                * slot should prevent further removal of WAL, unless there's a
-                * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
-                * the new restart_lsn above, so normally we should never need to loop
-                * more than twice.
-                */
-               if (segno >= oldest_segno)
-                       break;
+       XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size);
+       if (XLogGetLastRemovedSegno() >= segno)
+               elog(ERROR, "WAL required by replication slot %s has been removed concurrently",
+                        NameStr(slot->data.name));
  
-               /* Retry using the location of the oldest wal segment */
-               XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size, restart_lsn);
-       }
+       LWLockRelease(ReplicationSlotAllocationLock);
  }
  
  /*
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index 9c2d101c570c66c884480cbf8220a837c5188d7c..3d8bc98886146aac1bf3359b1782995ec8f11fce 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -212,6 +212,7 @@ extern XLogSegNo XLogGetLastRemovedSegno(void);
  extern XLogSegNo XLogGetOldestSegno(TimeLineID tli);
  extern void XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN);
  extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
+extern XLogRecPtr XLogGetReplicationSlotMinimumLSN(void);
  
  extern void xlog_redo(struct XLogReaderState *record);
  extern void xlog_desc(StringInfo buf, struct XLogReaderState *record);
diff --git a/src/test/recovery/t/046_checkpoint_logical_slot.pl b/src/test/recovery/t/046_checkpoint_logical_slot.pl

index d2cf1cb4464e65db30f9d822ed4ec71ce1003b4b..1cfb6a3734ee1cd60db6c9ca3f8e3c74552af582 100644 (file)
--- a/src/test/recovery/t/046_checkpoint_logical_slot.pl
+++ b/src/test/recovery/t/046_checkpoint_logical_slot.pl
@@ -20,8 +20,7 @@ if ($ENV{enable_injection_points} ne 'yes')
  my ($node, $result);
  
  $node = PostgreSQL::Test::Cluster->new('mike');
-$node->init;
-$node->append_conf('postgresql.conf', "wal_level = 'logical'");
+$node->init(allows_streaming => 'logical');
  $node->start;
  
  # Check if the extension injection_points is available, as it may be
@@ -142,4 +141,85 @@ eval {
  };
  is($@, '', "Logical slot still valid");
  
+# Verify that the synchronized slots won't be invalidated immediately after
+# synchronization in the presence of a concurrent checkpoint.
+my $primary = $node;
+
+$primary->append_conf('postgresql.conf', "autovacuum = off");
+$primary->reload;
+
+my $backup_name = 'backup';
+
+$primary->backup($backup_name);
+
+# Create a standby
+my $standby = PostgreSQL::Test::Cluster->new('standby');
+$standby->init_from_backup(
+       $primary, $backup_name,
+       has_streaming => 1,
+       has_restoring => 1);
+
+my $connstr_1 = $primary->connstr;
+$standby->append_conf(
+       'postgresql.conf', qq(
+hot_standby_feedback = on
+primary_slot_name = 'phys_slot'
+primary_conninfo = '$connstr_1 dbname=postgres'
+));
+
+$primary->safe_psql('postgres',
+       q{SELECT pg_create_logical_replication_slot('failover_slot', 'test_decoding', false, false, true);
+        SELECT pg_create_physical_replication_slot('phys_slot');}
+);
+
+$standby->start;
+
+# Generate some activity and switch WAL file on the primary
+$primary->advance_wal(1);
+$primary->safe_psql('postgres', "CHECKPOINT");
+$primary->wait_for_replay_catchup($standby);
+
+# checkpoint on the standby and make it wait on the injection point so that the
+# checkpoint stops right before invalidating replication slots.
+note('starting checkpoint');
+
+$checkpoint = $standby->background_psql('postgres');
+$checkpoint->query_safe(
+       q(select injection_points_attach('restartpoint-before-slot-invalidation','wait'))
+);
+$checkpoint->query_until(
+       qr/starting_checkpoint/,
+       q(\echo starting_checkpoint
+checkpoint;
+));
+
+# Wait until the checkpoint stops right before invalidating slots
+note('waiting for injection_point');
+$standby->wait_for_event('checkpointer', 'restartpoint-before-slot-invalidation');
+note('injection_point is reached');
+
+# Enable slot sync worker to synchronize the failover slot to the standby
+$standby->append_conf('postgresql.conf', qq(sync_replication_slots = on));
+$standby->reload;
+
+# Wait for the slot to be synced
+$standby->poll_query_until(
+       'postgres',
+       "SELECT COUNT(*) > 0 FROM pg_replication_slots WHERE slot_name = 'failover_slot'");
+
+# Release the checkpointer
+$standby->safe_psql('postgres',
+       q{select injection_points_wakeup('restartpoint-before-slot-invalidation');
+         select injection_points_detach('restartpoint-before-slot-invalidation')});
+
+$checkpoint->quit;
+
+# Confirm that the slot is not invalidated
+is( $standby->safe_psql(
+               'postgres',
+               q{SELECT invalidation_reason IS NULL AND synced FROM pg_replication_slots WHERE slot_name = 'failover_slot';}
+       ),
+       "t",
+       'logical slot is not invalidated');
+
  done_testing();
author	Amit Kapila <akapila@postgresql.org>
	Tue, 27 Jan 2026 05:49:23 +0000 (05:49 +0000)
committer	Amit Kapila <akapila@postgresql.org>
	Tue, 27 Jan 2026 05:49:23 +0000 (05:49 +0000)
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/replication/logical/slotsync.c		patch \| blob \| blame \| history
src/include/access/xlog.h		patch \| blob \| blame \| history
src/test/recovery/t/046_checkpoint_logical_slot.pl		patch \| blob \| blame \| history