]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix memory ordering in WAIT FOR LSN wakeup mechanism
authorAlexander Korotkov <akorotkov@postgresql.org>
Sun, 3 May 2026 13:16:30 +0000 (16:16 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Sun, 3 May 2026 13:22:02 +0000 (16:22 +0300)
WAIT FOR LSN uses a Dekker-style handshake: the waker stores an LSN
position then reads minWaitedLSN; the waiter stores its target into
minWaitedLSN then reads the position.  Without a barrier between each
side's store and load, a CPU may satisfy the load before the store
becomes globally visible, causing either side to miss a concurrent
update.  The result is a missed wakeup: the waiter sleeps indefinitely
until the next unrelated event.

Fix by embedding the required barriers into the atomic operations on
minWaitedLSN:

- In updateMinWaitedLSN(), use pg_atomic_write_membarrier_u64() so the
  waiter's preceding heap update is visible before the new minWaitedLSN
  value is published.

- In WaitLSNWakeup(), use pg_atomic_read_membarrier_u64() in the
  fast-path check so the waker's preceding position store is globally
  visible before minWaitedLSN is read.

The waiter side is also covered by the barrier semantics already present
in GetCurrentLSNForWaitType(): GetWalRcvWriteRecPtr() uses an explicit
read barrier (from patch 0001), while the remaining getters acquire a
spinlock, which implies the same ordering.

Also call ResetLatch() unconditionally after WaitLatch(), following the
standard latch loop pattern.  WaitLatch() does not guarantee that all
simultaneously true wake conditions are reported in one return, so a
timeout can race with SetLatch().  If we skip ResetLatch() on a timeout
return, the code performs further asynchronous-state checks before
consuming the latch, violating the latch API's required wait/reset
pattern.  That can leave the latch set across loop exit and cause a
later unrelated WaitLatch() in the same backend to return immediately.

Reported-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/zqbppucpmkeqecfy4s5kscnru4tbk6khp3ozqz6ad2zijz354k%40w4bdf4z3wqoz
Author: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
src/backend/access/transam/xlogwait.c

index 2e31c0d67d7626934ab86aed2755bd3a4c8accb8..6a27183c207609ee7ad993a059eef2c46c164630 100644 (file)
@@ -92,13 +92,19 @@ StaticAssertDecl(lengthof(WaitLSNWaitEvents) == WAIT_LSN_TYPE_COUNT,
                                 "WaitLSNWaitEvents must match WaitLSNType enum");
 
 /*
- * Get the current LSN for the specified wait type.
+ * Get the current LSN for the specified wait type.  Provide memory
+ * barrier semantics before getting the value.
  */
 XLogRecPtr
 GetCurrentLSNForWaitType(WaitLSNType lsnType)
 {
        Assert(lsnType >= 0 && lsnType < WAIT_LSN_TYPE_COUNT);
 
+       /*
+        * All of the cases below provide memory barrier semantics:
+        * GetWalRcvWriteRecPtr() and GetFlushRecPtr() have explicit barriers,
+        * while GetXLogReplayRecPtr() and GetWalRcvFlushRecPtr() use spinlocks.
+        */
        switch (lsnType)
        {
                case WAIT_LSN_TYPE_STANDBY_REPLAY:
@@ -184,7 +190,8 @@ updateMinWaitedLSN(WaitLSNType lsnType)
 
                minWaitedLSN = procInfo->waitLSN;
        }
-       pg_atomic_write_u64(&waitLSNState->minWaitedLSN[i], minWaitedLSN);
+       /* Pairs with pg_atomic_read_membarrier_u64() in WaitLSNWakeup(). */
+       pg_atomic_write_membarrier_u64(&waitLSNState->minWaitedLSN[i], minWaitedLSN);
 }
 
 /*
@@ -325,10 +332,11 @@ WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
 
        /*
         * Fast path check.  Skip if currentLSN is InvalidXLogRecPtr, which means
-        * "wake all waiters" (e.g., during promotion when recovery ends).
+        * "wake all waiters" (e.g., during promotion when recovery ends). Pairs
+        * with pg_atomic_write_membarrier_u64() in updateMinWaitedLSN().
         */
        if (XLogRecPtrIsValid(currentLSN) &&
-               pg_atomic_read_u64(&waitLSNState->minWaitedLSN[i]) > currentLSN)
+               pg_atomic_read_membarrier_u64(&waitLSNState->minWaitedLSN[i]) > currentLSN)
                return;
 
        wakeupWaiters(lsnType, currentLSN);
@@ -450,8 +458,7 @@ WaitForLSN(WaitLSNType lsnType, XLogRecPtr targetLSN, int64 timeout)
                                        errmsg("terminating connection due to unexpected postmaster exit"),
                                        errcontext("while waiting for LSN"));
 
-               if (rc & WL_LATCH_SET)
-                       ResetLatch(MyLatch);
+               ResetLatch(MyLatch);
        }
 
        /*