]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE
authorMelanie Plageman <melanieplageman@gmail.com>
Thu, 9 Oct 2025 20:25:50 +0000 (16:25 -0400)
committerMelanie Plageman <melanieplageman@gmail.com>
Thu, 9 Oct 2025 20:29:01 +0000 (16:29 -0400)
Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting
bits in the VM, specify the VM block changes in the
XLOG_HEAP2_MULTI_INSERT record.

This halves the number of WAL records emitted by COPY FREEZE.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com

src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_xlog.c
src/backend/access/heap/visibilitymap.c
src/backend/access/rmgrdesc/heapdesc.c
src/include/access/visibilitymap.h

index ed0c0c2dc9f48177079d744c93f86407e3a4bffe..568696333c259b643141c0df1e7b86f0db68e390 100644 (file)
@@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
 
                if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
+               {
                        all_frozen_set = true;
+                       /* Lock the vmbuffer before entering the critical section */
+                       LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+               }
 
                /* NO EREPORT(ERROR) from here till changes are logged */
                START_CRIT_SECTION();
@@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                 * going to add further frozen rows to it.
                 *
                 * If we're only adding already frozen rows to a previously empty
-                * page, mark it as all-visible.
+                * page, mark it as all-frozen and update the visibility map. We're
+                * already holding a pin on the vmbuffer.
                 */
                if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
                {
@@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                                                                vmbuffer, VISIBILITYMAP_VALID_BITS);
                }
                else if (all_frozen_set)
+               {
                        PageSetAllVisible(page);
+                       visibilitymap_set_vmbits(BufferGetBlockNumber(buffer),
+                                                                        vmbuffer,
+                                                                        VISIBILITYMAP_ALL_VISIBLE |
+                                                                        VISIBILITYMAP_ALL_FROZEN,
+                                                                        relation->rd_locator);
+               }
 
                /*
                 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                        xlrec->flags = 0;
                        if (all_visible_cleared)
                                xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
+
+                       /*
+                        * We don't have to worry about including a conflict xid in the
+                        * WAL record, as HEAP_INSERT_FROZEN intentionally violates
+                        * visibility rules.
+                        */
                        if (all_frozen_set)
                                xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
 
@@ -2628,6 +2646,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                        XLogBeginInsert();
                        XLogRegisterData(xlrec, tupledata - scratch.data);
                        XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+                       if (all_frozen_set)
+                               XLogRegisterBuffer(1, vmbuffer, 0);
 
                        XLogRegisterBufData(0, tupledata, totaldatalen);
 
@@ -2637,26 +2657,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                        recptr = XLogInsert(RM_HEAP2_ID, info);
 
                        PageSetLSN(page, recptr);
+                       if (all_frozen_set)
+                       {
+                               Assert(BufferIsDirty(vmbuffer));
+                               PageSetLSN(BufferGetPage(vmbuffer), recptr);
+                       }
                }
 
                END_CRIT_SECTION();
 
-               /*
-                * If we've frozen everything on the page, update the visibilitymap.
-                * We're already holding pin on the vmbuffer.
-                */
                if (all_frozen_set)
-               {
-                       /*
-                        * It's fine to use InvalidTransactionId here - this is only used
-                        * when HEAP_INSERT_FROZEN is specified, which intentionally
-                        * violates visibility rules.
-                        */
-                       visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
-                                                         InvalidXLogRecPtr, vmbuffer,
-                                                         InvalidTransactionId,
-                                                         VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
-               }
+                       LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
                UnlockReleaseBuffer(buffer);
                ndone += nthispage;
index cf843277938de424f5a09135e9ae7ebbcb9cc39d..30e339d8fe2d65bc44fd99aec106f81c836a5ddc 100644 (file)
@@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
        int                     i;
        bool            isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
        XLogRedoAction action;
+       Buffer          vmbuffer = InvalidBuffer;
 
        /*
         * Insertion doesn't overwrite MVCC data, so no conflict processing is
@@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record)
        if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
        {
                Relation        reln = CreateFakeRelcacheEntry(rlocator);
-               Buffer          vmbuffer = InvalidBuffer;
 
                visibilitymap_pin(reln, blkno, &vmbuffer);
                visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
                ReleaseBuffer(vmbuffer);
+               vmbuffer = InvalidBuffer;
                FreeFakeRelcacheEntry(reln);
        }
 
@@ -662,6 +663,52 @@ heap_xlog_multi_insert(XLogReaderState *record)
        if (BufferIsValid(buffer))
                UnlockReleaseBuffer(buffer);
 
+       buffer = InvalidBuffer;
+
+       /*
+        * Read and update the visibility map (VM) block.
+        *
+        * We must always redo VM changes, even if the corresponding heap page
+        * update was skipped due to the LSN interlock. Each VM block covers
+        * multiple heap pages, so later WAL records may update other bits in the
+        * same block. If this record includes an FPI (full-page image),
+        * subsequent WAL records may depend on it to guard against torn pages.
+        *
+        * Heap page changes are replayed first to preserve the invariant:
+        * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
+        *
+        * Note that we released the heap page lock above. During normal
+        * operation, this would be unsafe — a concurrent modification could
+        * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
+        * invariant.
+        *
+        * During recovery, however, no concurrent writers exist. Therefore,
+        * updating the VM without holding the heap page lock is safe enough. This
+        * same approach is taken when replaying xl_heap_visible records (see
+        * heap_xlog_visible()).
+        */
+       if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
+               XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
+                                                                         &vmbuffer) == BLK_NEEDS_REDO)
+       {
+               Page            vmpage = BufferGetPage(vmbuffer);
+
+               /* initialize the page if it was read as zeros */
+               if (PageIsNew(vmpage))
+                       PageInit(vmpage, BLCKSZ, 0);
+
+               visibilitymap_set_vmbits(blkno,
+                                                                vmbuffer,
+                                                                VISIBILITYMAP_ALL_VISIBLE |
+                                                                VISIBILITYMAP_ALL_FROZEN,
+                                                                rlocator);
+
+               PageSetLSN(vmpage, lsn);
+       }
+
+       if (BufferIsValid(vmbuffer))
+               UnlockReleaseBuffer(vmbuffer);
+
        /*
         * If the page is running low on free space, update the FSM as well.
         * Arbitrarily, our definition of "low" is less than 20%. We can't do much
index 0414ce1945c3fd46382b278d44509cb41fa590a2..2f5e61e239220d5f569a9460c7aeeda87c1ef146 100644 (file)
@@ -14,7 +14,8 @@
  *             visibilitymap_clear  - clear bits for one page in the visibility map
  *             visibilitymap_pin        - pin a map page for setting a bit
  *             visibilitymap_pin_ok - check whether correct map page is already pinned
- *             visibilitymap_set        - set a bit in a previously pinned page
+ *             visibilitymap_set        - set bit(s) in a previously pinned page and log
+ *             visibilitymap_set_vmbits - set bit(s) in a pinned page
  *             visibilitymap_get_status - get status of bits
  *             visibilitymap_count  - count number of bits set in visibility map
  *             visibilitymap_prepare_truncate -
@@ -322,6 +323,73 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
        return status;
 }
 
+/*
+ * Set VM (visibility map) flags in the VM block in vmBuf.
+ *
+ * This function is intended for callers that log VM changes together
+ * with the heap page modifications that rendered the page all-visible.
+ * Callers that log VM changes separately should use visibilitymap_set().
+ *
+ * vmBuf must be pinned and exclusively locked, and it must cover the VM bits
+ * corresponding to heapBlk.
+ *
+ * In normal operation (not recovery), this must be called inside a critical
+ * section that also applies the necessary heap page changes and, if
+ * applicable, emits WAL.
+ *
+ * The caller is responsible for ensuring consistency between the heap page
+ * and the VM page by holding a pin and exclusive lock on the buffer
+ * containing heapBlk.
+ *
+ * rlocator is used only for debugging messages.
+ */
+uint8
+visibilitymap_set_vmbits(BlockNumber heapBlk,
+                                                Buffer vmBuf, uint8 flags,
+                                                const RelFileLocator rlocator)
+{
+       BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+       uint32          mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
+       uint8           mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
+       Page            page;
+       uint8      *map;
+       uint8           status;
+
+#ifdef TRACE_VISIBILITYMAP
+       elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
+                flags,
+                relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
+                heapBlk);
+#endif
+
+       /* Call in same critical section where WAL is emitted. */
+       Assert(InRecovery || CritSectionCount > 0);
+
+       /* Flags should be valid. Also never clear bits with this function */
+       Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
+
+       /* Must never set all_frozen bit without also setting all_visible bit */
+       Assert(flags != VISIBILITYMAP_ALL_FROZEN);
+
+       /* Check that we have the right VM page pinned */
+       if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
+               elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
+
+       Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
+
+       page = BufferGetPage(vmBuf);
+       map = (uint8 *) PageGetContents(page);
+
+       status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
+       if (flags != status)
+       {
+               map[mapByte] |= (flags << mapOffset);
+               MarkBufferDirty(vmBuf);
+       }
+
+       return status;
+}
+
 /*
  *     visibilitymap_get_status - get status of bits
  *
index 82b62c95de5744659bd087f59ebe869af20e9128..056beb24d4075d0bb7642569b529c71bee8c5a6f 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/heapam_xlog.h"
 #include "access/rmgrdesc_utils.h"
+#include "access/visibilitymapdefs.h"
 #include "storage/standbydefs.h"
 
 /*
@@ -354,6 +355,11 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
                appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
                                                 xlrec->flags);
 
+               if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
+                       appendStringInfo(buf, ", vm_flags: 0x%02X",
+                                                        VISIBILITYMAP_ALL_VISIBLE |
+                                                        VISIBILITYMAP_ALL_FROZEN);
+
                if (XLogRecHasBlockData(record, 0) && !isinit)
                {
                        appendStringInfoString(buf, ", offsets:");
index be21c6dd1a306b9dbd53a2ffda35a9f3e7a3a403..c6fa37be9682cf4607a38d138da4484f7e63eaed 100644 (file)
@@ -18,6 +18,7 @@
 #include "access/xlogdefs.h"
 #include "storage/block.h"
 #include "storage/buf.h"
+#include "storage/relfilelocator.h"
 #include "utils/relcache.h"
 
 /* Macros for visibilitymap test */
@@ -37,6 +38,9 @@ extern uint8 visibilitymap_set(Relation rel,
                                                           Buffer vmBuf,
                                                           TransactionId cutoff_xid,
                                                           uint8 flags);
+extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk,
+                                                                         Buffer vmBuf, uint8 flags,
+                                                                         const RelFileLocator rlocator);
 extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
 extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
 extern BlockNumber visibilitymap_prepare_truncate(Relation rel,