]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Set pd_prune_xid on insert
authorMelanie Plageman <melanieplageman@gmail.com>
Mon, 30 Mar 2026 20:07:11 +0000 (16:07 -0400)
committerMelanie Plageman <melanieplageman@gmail.com>
Mon, 30 Mar 2026 20:07:11 +0000 (16:07 -0400)
Now that on-access pruning can update the visibility map (VM) during
read-only queries, set the page’s pd_prune_xid hint during INSERT and on
the new page during UPDATE.

This allows heap_page_prune_and_freeze() to set the VM the first time a
page is read after being filled with tuples. This may avoid I/O
amplification by setting the page all-visible when it is still in shared
buffers and allowing later vacuums to skip scanning the page. It also
enables index-only scans of newly inserted data much sooner.

As a side benefit, this addresses a long-standing note in heap_insert()
and heap_multi_insert(): aborted inserts can now be pruned on-access
rather than lingering until the next VACUUM.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com

src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_xlog.c
src/backend/access/heap/pruneheap.c

index 129b01da8641bee4260e22c3ba564da11b120029..d34136d2e94b482ee9eb3f1e43a70f12d47e1585 100644 (file)
@@ -2154,6 +2154,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
        TransactionId xid = GetCurrentTransactionId();
        HeapTuple       heaptup;
        Buffer          buffer;
+       Page            page;
        Buffer          vmbuffer = InvalidBuffer;
        bool            all_visible_cleared = false;
 
@@ -2180,6 +2181,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
                                                                           &vmbuffer, NULL,
                                                                           0);
 
+       page = BufferGetPage(buffer);
+
        /*
         * We're about to do the actual insert -- but check for conflict first, to
         * avoid possibly having to roll back work we've just done.
@@ -2203,25 +2206,30 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
        RelationPutHeapTuple(relation, buffer, heaptup,
                                                 (options & HEAP_INSERT_SPECULATIVE) != 0);
 
-       if (PageIsAllVisible(BufferGetPage(buffer)))
+       if (PageIsAllVisible(page))
        {
                all_visible_cleared = true;
-               PageClearAllVisible(BufferGetPage(buffer));
+               PageClearAllVisible(page);
                visibilitymap_clear(relation,
                                                        ItemPointerGetBlockNumber(&(heaptup->t_self)),
                                                        vmbuffer, VISIBILITYMAP_VALID_BITS);
        }
 
        /*
-        * XXX Should we set PageSetPrunable on this page ?
+        * Set pd_prune_xid to trigger heap_page_prune_and_freeze() once the page
+        * is full so that we can set the page all-visible in the VM on the next
+        * page access.
         *
-        * The inserting transaction may eventually abort thus making this tuple
-        * DEAD and hence available for pruning. Though we don't want to optimize
-        * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
-        * aborted tuple will never be pruned until next vacuum is triggered.
+        * Setting pd_prune_xid is also handy if the inserting transaction
+        * eventually aborts making this tuple DEAD and hence available for
+        * pruning. If no other tuple in this page is UPDATEd/DELETEd, the aborted
+        * tuple would never otherwise be pruned until next vacuum is triggered.
         *
-        * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
+        * Don't set it if we are in bootstrap mode or we are inserting a frozen
+        * tuple, as there is no further pruning/freezing needed in those cases.
         */
+       if (TransactionIdIsNormal(xid) && !(options & HEAP_INSERT_FROZEN))
+               PageSetPrunable(page, xid);
 
        MarkBufferDirty(buffer);
 
@@ -2231,7 +2239,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
                xl_heap_insert xlrec;
                xl_heap_header xlhdr;
                XLogRecPtr      recptr;
-               Page            page = BufferGetPage(buffer);
                uint8           info = XLOG_HEAP_INSERT;
                int                     bufflags = 0;
 
@@ -2596,8 +2603,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                }
 
                /*
-                * XXX Should we set PageSetPrunable on this page ? See heap_insert()
+                * Set pd_prune_xid. See heap_insert() for more on why we do this when
+                * inserting tuples. This only makes sense if we aren't already
+                * setting the page frozen in the VM and we're not in bootstrap mode.
                 */
+               if (!all_frozen_set && TransactionIdIsNormal(xid))
+                       PageSetPrunable(page, xid);
 
                MarkBufferDirty(buffer);
 
@@ -4139,12 +4150,12 @@ l2:
         * the subsequent page pruning will be a no-op and the hint will be
         * cleared.
         *
-        * XXX Should we set hint on newbuf as well?  If the transaction aborts,
-        * there would be a prunable tuple in the newbuf; but for now we choose
-        * not to optimize for aborts.  Note that heap_xlog_update must be kept in
-        * sync if this decision changes.
+        * We set the new page prunable as well. See heap_insert() for more on why
+        * we do this when inserting tuples.
         */
        PageSetPrunable(page, xid);
+       if (newbuf != buffer)
+               PageSetPrunable(newpage, xid);
 
        if (use_hot_update)
        {
index 1302bb13e18864f18904f2e4a19dfb84a739b85d..f3f419d3dc195da32422c920540ee4e2570dff83 100644 (file)
@@ -450,6 +450,14 @@ heap_xlog_insert(XLogReaderState *record)
 
                freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
 
+               /*
+                * Set the page prunable to trigger on-access pruning later, which may
+                * set the page all-visible in the VM. See comments in heap_insert().
+                */
+               if (TransactionIdIsNormal(XLogRecGetXid(record)) &&
+                       !HeapTupleHeaderXminFrozen(htup))
+                       PageSetPrunable(page, XLogRecGetXid(record));
+
                PageSetLSN(page, lsn);
 
                if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
@@ -599,12 +607,19 @@ heap_xlog_multi_insert(XLogReaderState *record)
                if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
                        PageClearAllVisible(page);
 
-               /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
+               /*
+                * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If
+                * we are not setting the page frozen, then set the page's prunable
+                * hint so that we trigger on-access pruning later which may set the
+                * page all-visible in the VM.
+                */
                if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
                {
                        PageSetAllVisible(page);
                        PageClearPrunable(page);
                }
+               else
+                       PageSetPrunable(page, XLogRecGetXid(record));
 
                MarkBufferDirty(buffer);
        }
@@ -921,6 +936,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
                freespace = PageGetHeapFreeSpace(npage);
 
                PageSetLSN(npage, lsn);
+               /* See heap_insert() for why we set pd_prune_xid on insert */
+               PageSetPrunable(npage, XLogRecGetXid(record));
                MarkBufferDirty(nbuffer);
        }
 
index 6a2c3513497cc059cc863aa65348f6dad9fcb5f9..74c355be2199ea165112053cd3b37ed3efec0864 100644 (file)
@@ -287,7 +287,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer,
        /*
         * First check whether there's any chance there's something to prune,
         * determining the appropriate horizon is a waste if there's no prune_xid
-        * (i.e. no updates/deletes left potentially dead tuples around).
+        * (i.e. no updates/deletes left potentially dead tuples around and no
+        * inserts inserted new tuples that may be visible to all).
         */
        prune_xid = PageGetPruneXid(page);
        if (!TransactionIdIsValid(prune_xid))
@@ -1930,17 +1931,14 @@ heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum)
                        prstate->set_all_visible = false;
                        prstate->set_all_frozen = false;
 
-                       /* The page should not be marked all-visible */
-                       if (PageIsAllVisible(page))
-                               heap_page_fix_vm_corruption(prstate, offnum,
-                                                                                       VM_CORRUPT_TUPLE_VISIBILITY);
-
                        /*
-                        * If we wanted to optimize for aborts, we might consider marking
-                        * the page prunable when we see INSERT_IN_PROGRESS.  But we
-                        * don't.  See related decisions about when to mark the page
-                        * prunable in heapam.c.
+                        * Though there is nothing "prunable" on the page, we maintain
+                        * pd_prune_xid for inserts so that we have the opportunity to
+                        * mark them all-visible during the next round of pruning.
                         */
+                       heap_prune_record_prunable(prstate,
+                                                                          HeapTupleHeaderGetXmin(htup),
+                                                                          offnum);
                        break;
 
                case HEAPTUPLE_DELETE_IN_PROGRESS: