]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Improve heuristics for compressing the KnownAssignedXids array.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 29 Nov 2022 20:43:17 +0000 (15:43 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 29 Nov 2022 20:43:17 +0000 (15:43 -0500)
Previously, we'd compress only when the active range of array entries
reached Max(4 * PROCARRAY_MAXPROCS, 2 * pArray->numKnownAssignedXids).
If max_connections is large, the first term could result in not
compressing for a long time, resulting in much wastage of cycles in
hot-standby backends scanning the array to take snapshots.  Get rid
of that term, and just bound it to 2 * pArray->numKnownAssignedXids.

That however creates the opposite risk, that we might spend too much
effort compressing.  Hence, consider compressing only once every 128
commit records.  (This frequency was chosen by benchmarking.  While
we only tried one benchmark scenario, the results seem stable over
a fairly wide range of frequencies.)

Also, force compression when processing RecoveryInfo WAL records
(which should be infrequent); the old code could perform compression
then, but would do so only after the same array-range check as for
the transaction-commit path.

Also, opportunistically run compression if the startup process is about
to wait for WAL, though not oftener than once a second.  This should
prevent cases where we waste lots of time by leaving the array
not-compressed for long intervals due to low WAL traffic.

Lastly, add a simple check to keep us from uselessly compressing
when the array storage is already compact.

Back-patch, as the performance problem is worse in pre-v14 branches
than in HEAD.

Simon Riggs and Michail Nikolaev, with help from Tom Lane and
Andres Freund.

Discussion: https://postgr.es/m/CALdSSPgahNUD_=pB_j=1zSnDBaiOtqVfzo8Ejt5J_k7qZiU1Tw@mail.gmail.com

src/backend/access/transam/xlog.c
src/backend/storage/ipc/procarray.c
src/include/storage/procarray.h

index 3f5776c24606da200f252001e8e9394645fbf0ac..8ae41bb7810df71afb44a8aa1b7b820180e3c9dc 100644 (file)
@@ -12447,6 +12447,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                                                wait_time = wal_retrieve_retry_interval -
                                                        TimestampDifferenceMilliseconds(last_fail_time, now);
 
+                                               /* Do background tasks that might benefit us later. */
+                                               KnownAssignedTransactionIdsIdleMaintenance();
+
                                                WaitLatch(&XLogCtl->recoveryWakeupLatch,
                                                                  WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
                                                                  wait_time, WAIT_EVENT_RECOVERY_WAL_STREAM);
@@ -12642,6 +12645,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                                                streaming_reply_sent = true;
                                        }
 
+                                       /* Do any background tasks that might benefit us later. */
+                                       KnownAssignedTransactionIdsIdleMaintenance();
+
                                        /*
                                         * Wait for more WAL to arrive. Time out after 5 seconds
                                         * to react to a trigger file promptly.
index 01670fbe23a42f4dca196ba9298e44aa11b349d2..596c59ba5612da7cf8b90bb10c44bd5568c27260 100644 (file)
@@ -100,6 +100,17 @@ static ProcArrayStruct *procArray;
 static PGPROC *allProcs;
 static PGXACT *allPgXact;
 
+/*
+ * Reason codes for KnownAssignedXidsCompress().
+ */
+typedef enum KAXCompressReason
+{
+       KAX_NO_SPACE,                           /* need to free up space at array end */
+       KAX_PRUNE,                                      /* we just pruned old entries */
+       KAX_TRANSACTION_END,            /* we just committed/removed some XIDs */
+       KAX_STARTUP_PROCESS_IDLE        /* startup process is about to sleep */
+} KAXCompressReason;
+
 /*
  * Cache to reduce overhead of repeated calls to TransactionIdIsInProgress()
  */
@@ -162,7 +173,7 @@ static bool HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids,
                                                                                         int nvxids, int type);
 
 /* Primitives for KnownAssignedXids array handling for standby */
-static void KnownAssignedXidsCompress(bool force);
+static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock);
 static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
                                         bool exclusive_lock);
 static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
@@ -3418,6 +3429,17 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
        LWLockRelease(ProcArrayLock);
 }
 
+/*
+ * KnownAssignedTransactionIdsIdleMaintenance
+ *             Opportunistically do maintenance work when the startup process
+ *             is about to go idle.
+ */
+void
+KnownAssignedTransactionIdsIdleMaintenance(void)
+{
+       KnownAssignedXidsCompress(KAX_STARTUP_PROCESS_IDLE, false);
+}
+
 
 /*
  * Private module functions to manipulate KnownAssignedXids
@@ -3500,7 +3522,9 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  * so there is an optimal point for any workload mix. We use a heuristic to
  * decide when to compress the array, though trimming also helps reduce
  * frequency of compressing. The heuristic requires us to track the number of
- * currently valid XIDs in the array.
+ * currently valid XIDs in the array (N).  Except in special cases, we'll
+ * compress when S >= 2N.  Bounding S at 2N in turn bounds the time for
+ * taking a snapshot to be O(N), which it would have to be anyway.
  */
 
 
@@ -3508,43 +3532,92 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  * Compress KnownAssignedXids by shifting valid data down to the start of the
  * array, removing any gaps.
  *
- * A compression step is forced if "force" is true, otherwise we do it
- * only if a heuristic indicates it's a good time to do it.
+ * A compression step is forced if "reason" is KAX_NO_SPACE, otherwise
+ * we do it only if a heuristic indicates it's a good time to do it.
  *
- * Caller must hold ProcArrayLock in exclusive mode.
+ * Compression requires holding ProcArrayLock in exclusive mode.
+ * Caller must pass haveLock = true if it already holds the lock.
  */
 static void
-KnownAssignedXidsCompress(bool force)
+KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock)
 {
        /* use volatile pointer to prevent code rearrangement */
        volatile ProcArrayStruct *pArray = procArray;
        int                     head,
-                               tail;
+                               tail,
+                               nelements;
        int                     compress_index;
        int                     i;
 
-       /* no spinlock required since we hold ProcArrayLock exclusively */
+       /* Counters for compression heuristics */
+       static unsigned int transactionEndsCounter;
+       static TimestampTz lastCompressTs;
+
+       /* Tuning constants */
+#define KAX_COMPRESS_FREQUENCY 128     /* in transactions */
+#define KAX_COMPRESS_IDLE_INTERVAL 1000 /* in ms */
+
+       /*
+        * Since only the startup process modifies the head/tail pointers, we
+        * don't need a lock to read them here.
+        */
        head = pArray->headKnownAssignedXids;
        tail = pArray->tailKnownAssignedXids;
+       nelements = head - tail;
 
-       if (!force)
+       /*
+        * If we can choose whether to compress, use a heuristic to avoid
+        * compressing too often or not often enough.  "Compress" here simply
+        * means moving the values to the beginning of the array, so it is not as
+        * complex or costly as typical data compression algorithms.
+        */
+       if (nelements == pArray->numKnownAssignedXids)
        {
                /*
-                * If we can choose how much to compress, use a heuristic to avoid
-                * compressing too often or not often enough.
-                *
-                * Heuristic is if we have a large enough current spread and less than
-                * 50% of the elements are currently in use, then compress. This
-                * should ensure we compress fairly infrequently. We could compress
-                * less often though the virtual array would spread out more and
-                * snapshots would become more expensive.
+                * When there are no gaps between head and tail, don't bother to
+                * compress, except in the KAX_NO_SPACE case where we must compress to
+                * create some space after the head.
                 */
-               int                     nelements = head - tail;
+               if (reason != KAX_NO_SPACE)
+                       return;
+       }
+       else if (reason == KAX_TRANSACTION_END)
+       {
+               /*
+                * Consider compressing only once every so many commits.  Frequency
+                * determined by benchmarks.
+                */
+               if ((transactionEndsCounter++) % KAX_COMPRESS_FREQUENCY != 0)
+                       return;
 
-               if (nelements < 4 * PROCARRAY_MAXPROCS ||
-                       nelements < 2 * pArray->numKnownAssignedXids)
+               /*
+                * Furthermore, compress only if the used part of the array is less
+                * than 50% full (see comments above).
+                */
+               if (nelements < 2 * pArray->numKnownAssignedXids)
                        return;
        }
+       else if (reason == KAX_STARTUP_PROCESS_IDLE)
+       {
+               /*
+                * We're about to go idle for lack of new WAL, so we might as well
+                * compress.  But not too often, to avoid ProcArray lock contention
+                * with readers.
+                */
+               if (lastCompressTs != 0)
+               {
+                       TimestampTz compress_after;
+
+                       compress_after = TimestampTzPlusMilliseconds(lastCompressTs,
+                                                                                                                KAX_COMPRESS_IDLE_INTERVAL);
+                       if (GetCurrentTimestamp() < compress_after)
+                               return;
+               }
+       }
+
+       /* Need to compress, so get the lock if we don't have it. */
+       if (!haveLock)
+               LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 
        /*
         * We compress the array by reading the valid values from tail to head,
@@ -3560,9 +3633,16 @@ KnownAssignedXidsCompress(bool force)
                        compress_index++;
                }
        }
+       Assert(compress_index == pArray->numKnownAssignedXids);
 
        pArray->tailKnownAssignedXids = 0;
        pArray->headKnownAssignedXids = compress_index;
+
+       if (!haveLock)
+               LWLockRelease(ProcArrayLock);
+
+       /* Update timestamp for maintenance.  No need to hold lock for this. */
+       lastCompressTs = GetCurrentTimestamp();
 }
 
 /*
@@ -3635,18 +3715,11 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
         */
        if (head + nxids > pArray->maxKnownAssignedXids)
        {
-               /* must hold lock to compress */
-               if (!exclusive_lock)
-                       LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-
-               KnownAssignedXidsCompress(true);
+               KnownAssignedXidsCompress(KAX_NO_SPACE, exclusive_lock);
 
                head = pArray->headKnownAssignedXids;
                /* note: we no longer care about the tail pointer */
 
-               if (!exclusive_lock)
-                       LWLockRelease(ProcArrayLock);
-
                /*
                 * If it still won't fit then we're out of memory
                 */
@@ -3841,7 +3914,7 @@ KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
                KnownAssignedXidsRemove(subxids[i]);
 
        /* Opportunistically compress the array */
-       KnownAssignedXidsCompress(false);
+       KnownAssignedXidsCompress(KAX_TRANSACTION_END, true);
 }
 
 /*
@@ -3917,7 +3990,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
        }
 
        /* Opportunistically compress the array */
-       KnownAssignedXidsCompress(false);
+       KnownAssignedXidsCompress(KAX_PRUNE, true);
 }
 
 /*
index ea116cae21f0df1d855a23295dbe9ff1c2f3c951..1a81863768e76d017c4453da5198fac847d3c807 100644 (file)
@@ -74,6 +74,7 @@ extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
                                                                          TransactionId max_xid);
 extern void ExpireAllKnownAssignedTransactionIds(void);
 extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
+extern void KnownAssignedTransactionIdsIdleMaintenance(void);
 
 extern int     GetMaxSnapshotXidCount(void);
 extern int     GetMaxSnapshotSubxidCount(void);