]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Improve heuristics for compressing the KnownAssignedXids array.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 29 Nov 2022 20:43:17 +0000 (15:43 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 29 Nov 2022 20:43:17 +0000 (15:43 -0500)
Previously, we'd compress only when the active range of array entries
reached Max(4 * PROCARRAY_MAXPROCS, 2 * pArray->numKnownAssignedXids).
If max_connections is large, the first term could result in not
compressing for a long time, resulting in much wastage of cycles in
hot-standby backends scanning the array to take snapshots.  Get rid
of that term, and just bound it to 2 * pArray->numKnownAssignedXids.

That however creates the opposite risk, that we might spend too much
effort compressing.  Hence, consider compressing only once every 128
commit records.  (This frequency was chosen by benchmarking.  While
we only tried one benchmark scenario, the results seem stable over
a fairly wide range of frequencies.)

Also, force compression when processing RecoveryInfo WAL records
(which should be infrequent); the old code could perform compression
then, but would do so only after the same array-range check as for
the transaction-commit path.

Also, opportunistically run compression if the startup process is about
to wait for WAL, though not oftener than once a second.  This should
prevent cases where we waste lots of time by leaving the array
not-compressed for long intervals due to low WAL traffic.

Lastly, add a simple check to keep us from uselessly compressing
when the array storage is already compact.

Back-patch, as the performance problem is worse in pre-v14 branches
than in HEAD.

Simon Riggs and Michail Nikolaev, with help from Tom Lane and
Andres Freund.

Discussion: https://postgr.es/m/CALdSSPgahNUD_=pB_j=1zSnDBaiOtqVfzo8Ejt5J_k7qZiU1Tw@mail.gmail.com

src/backend/access/transam/xlogrecovery.c
src/backend/storage/ipc/procarray.c
src/include/storage/procarray.h

index 65bc11d5d36fedf3950d4e769f697eeb1056c88a..b51173a8c09ddaf3c28955d25a586672c962a3d0 100644 (file)
@@ -3565,6 +3565,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                                                elog(LOG, "waiting for WAL to become available at %X/%X",
                                                         LSN_FORMAT_ARGS(RecPtr));
 
+                                               /* Do background tasks that might benefit us later. */
+                                               KnownAssignedTransactionIdsIdleMaintenance();
+
                                                (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
                                                                                 WL_LATCH_SET | WL_TIMEOUT |
                                                                                 WL_EXIT_ON_PM_DEATH,
@@ -3831,6 +3834,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                                                streaming_reply_sent = true;
                                        }
 
+                                       /* Do any background tasks that might benefit us later. */
+                                       KnownAssignedTransactionIdsIdleMaintenance();
+
                                        /* Update pg_stat_recovery_prefetch before sleeping. */
                                        XLogPrefetcherComputeStats(xlogprefetcher);
 
index 0d1972104617f5ae016e52855480b926937c1332..655d11e2f94f8e6634b9e2dd6b0a25307f3340b1 100644 (file)
@@ -256,6 +256,17 @@ typedef enum GlobalVisHorizonKind
        VISHORIZON_TEMP
 } GlobalVisHorizonKind;
 
+/*
+ * Reason codes for KnownAssignedXidsCompress().
+ */
+typedef enum KAXCompressReason
+{
+       KAX_NO_SPACE,                           /* need to free up space at array end */
+       KAX_PRUNE,                                      /* we just pruned old entries */
+       KAX_TRANSACTION_END,            /* we just committed/removed some XIDs */
+       KAX_STARTUP_PROCESS_IDLE        /* startup process is about to sleep */
+} KAXCompressReason;
+
 
 static ProcArrayStruct *procArray;
 
@@ -335,7 +346,7 @@ static void DisplayXidCache(void);
 #endif                                                 /* XIDCACHE_DEBUG */
 
 /* Primitives for KnownAssignedXids array handling for standby */
-static void KnownAssignedXidsCompress(bool force);
+static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock);
 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
                                                                 bool exclusive_lock);
 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
@@ -4508,6 +4519,17 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
        LWLockRelease(ProcArrayLock);
 }
 
+/*
+ * KnownAssignedTransactionIdsIdleMaintenance
+ *             Opportunistically do maintenance work when the startup process
+ *             is about to go idle.
+ */
+void
+KnownAssignedTransactionIdsIdleMaintenance(void)
+{
+       KnownAssignedXidsCompress(KAX_STARTUP_PROCESS_IDLE, false);
+}
+
 
 /*
  * Private module functions to manipulate KnownAssignedXids
@@ -4590,7 +4612,9 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  * so there is an optimal point for any workload mix. We use a heuristic to
  * decide when to compress the array, though trimming also helps reduce
  * frequency of compressing. The heuristic requires us to track the number of
- * currently valid XIDs in the array.
+ * currently valid XIDs in the array (N).  Except in special cases, we'll
+ * compress when S >= 2N.  Bounding S at 2N in turn bounds the time for
+ * taking a snapshot to be O(N), which it would have to be anyway.
  */
 
 
@@ -4598,42 +4622,91 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  * Compress KnownAssignedXids by shifting valid data down to the start of the
  * array, removing any gaps.
  *
- * A compression step is forced if "force" is true, otherwise we do it
- * only if a heuristic indicates it's a good time to do it.
+ * A compression step is forced if "reason" is KAX_NO_SPACE, otherwise
+ * we do it only if a heuristic indicates it's a good time to do it.
  *
- * Caller must hold ProcArrayLock in exclusive mode.
+ * Compression requires holding ProcArrayLock in exclusive mode.
+ * Caller must pass haveLock = true if it already holds the lock.
  */
 static void
-KnownAssignedXidsCompress(bool force)
+KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock)
 {
        ProcArrayStruct *pArray = procArray;
        int                     head,
-                               tail;
+                               tail,
+                               nelements;
        int                     compress_index;
        int                     i;
 
-       /* no spinlock required since we hold ProcArrayLock exclusively */
+       /* Counters for compression heuristics */
+       static unsigned int transactionEndsCounter;
+       static TimestampTz lastCompressTs;
+
+       /* Tuning constants */
+#define KAX_COMPRESS_FREQUENCY 128     /* in transactions */
+#define KAX_COMPRESS_IDLE_INTERVAL 1000 /* in ms */
+
+       /*
+        * Since only the startup process modifies the head/tail pointers, we
+        * don't need a lock to read them here.
+        */
        head = pArray->headKnownAssignedXids;
        tail = pArray->tailKnownAssignedXids;
+       nelements = head - tail;
 
-       if (!force)
+       /*
+        * If we can choose whether to compress, use a heuristic to avoid
+        * compressing too often or not often enough.  "Compress" here simply
+        * means moving the values to the beginning of the array, so it is not as
+        * complex or costly as typical data compression algorithms.
+        */
+       if (nelements == pArray->numKnownAssignedXids)
        {
                /*
-                * If we can choose how much to compress, use a heuristic to avoid
-                * compressing too often or not often enough.
-                *
-                * Heuristic is if we have a large enough current spread and less than
-                * 50% of the elements are currently in use, then compress. This
-                * should ensure we compress fairly infrequently. We could compress
-                * less often though the virtual array would spread out more and
-                * snapshots would become more expensive.
+                * When there are no gaps between head and tail, don't bother to
+                * compress, except in the KAX_NO_SPACE case where we must compress to
+                * create some space after the head.
+                */
+               if (reason != KAX_NO_SPACE)
+                       return;
+       }
+       else if (reason == KAX_TRANSACTION_END)
+       {
+               /*
+                * Consider compressing only once every so many commits.  Frequency
+                * determined by benchmarks.
                 */
-               int                     nelements = head - tail;
+               if ((transactionEndsCounter++) % KAX_COMPRESS_FREQUENCY != 0)
+                       return;
 
-               if (nelements < 4 * PROCARRAY_MAXPROCS ||
-                       nelements < 2 * pArray->numKnownAssignedXids)
+               /*
+                * Furthermore, compress only if the used part of the array is less
+                * than 50% full (see comments above).
+                */
+               if (nelements < 2 * pArray->numKnownAssignedXids)
                        return;
        }
+       else if (reason == KAX_STARTUP_PROCESS_IDLE)
+       {
+               /*
+                * We're about to go idle for lack of new WAL, so we might as well
+                * compress.  But not too often, to avoid ProcArray lock contention
+                * with readers.
+                */
+               if (lastCompressTs != 0)
+               {
+                       TimestampTz compress_after;
+
+                       compress_after = TimestampTzPlusMilliseconds(lastCompressTs,
+                                                                                                                KAX_COMPRESS_IDLE_INTERVAL);
+                       if (GetCurrentTimestamp() < compress_after)
+                               return;
+               }
+       }
+
+       /* Need to compress, so get the lock if we don't have it. */
+       if (!haveLock)
+               LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 
        /*
         * We compress the array by reading the valid values from tail to head,
@@ -4649,9 +4722,16 @@ KnownAssignedXidsCompress(bool force)
                        compress_index++;
                }
        }
+       Assert(compress_index == pArray->numKnownAssignedXids);
 
        pArray->tailKnownAssignedXids = 0;
        pArray->headKnownAssignedXids = compress_index;
+
+       if (!haveLock)
+               LWLockRelease(ProcArrayLock);
+
+       /* Update timestamp for maintenance.  No need to hold lock for this. */
+       lastCompressTs = GetCurrentTimestamp();
 }
 
 /*
@@ -4723,18 +4803,11 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
         */
        if (head + nxids > pArray->maxKnownAssignedXids)
        {
-               /* must hold lock to compress */
-               if (!exclusive_lock)
-                       LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-
-               KnownAssignedXidsCompress(true);
+               KnownAssignedXidsCompress(KAX_NO_SPACE, exclusive_lock);
 
                head = pArray->headKnownAssignedXids;
                /* note: we no longer care about the tail pointer */
 
-               if (!exclusive_lock)
-                       LWLockRelease(ProcArrayLock);
-
                /*
                 * If it still won't fit then we're out of memory
                 */
@@ -4928,7 +5001,7 @@ KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
                KnownAssignedXidsRemove(subxids[i]);
 
        /* Opportunistically compress the array */
-       KnownAssignedXidsCompress(false);
+       KnownAssignedXidsCompress(KAX_TRANSACTION_END, true);
 }
 
 /*
@@ -5003,7 +5076,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
        }
 
        /* Opportunistically compress the array */
-       KnownAssignedXidsCompress(false);
+       KnownAssignedXidsCompress(KAX_PRUNE, true);
 }
 
 /*
index 1b2cfac5ad0ad608dceda07c1d770e048f2d7307..781e3f6169ffb37b57dbaaaff8f4f2f3b12e7f82 100644 (file)
@@ -39,6 +39,7 @@ extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
                                                                                                  TransactionId max_xid);
 extern void ExpireAllKnownAssignedTransactionIds(void);
 extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
+extern void KnownAssignedTransactionIdsIdleMaintenance(void);
 
 extern int     GetMaxSnapshotXidCount(void);
 extern int     GetMaxSnapshotSubxidCount(void);