From 378a216187aea1b488ce60ed07dd1ac5c14a9984 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 30 Mar 2026 16:07:11 -0400 Subject: [PATCH] Set pd_prune_xid on insert MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Now that on-access pruning can update the visibility map (VM) during read-only queries, set the page’s pd_prune_xid hint during INSERT and on the new page during UPDATE. This allows heap_page_prune_and_freeze() to set the VM the first time a page is read after being filled with tuples. This may avoid I/O amplification by setting the page all-visible when it is still in shared buffers and allowing later vacuums to skip scanning the page. It also enables index-only scans of newly inserted data much sooner. As a side benefit, this addresses a long-standing note in heap_insert() and heap_multi_insert(): aborted inserts can now be pruned on-access rather than lingering until the next VACUUM. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Chao Li Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com --- src/backend/access/heap/heapam.c | 39 +++++++++++++++++---------- src/backend/access/heap/heapam_xlog.c | 19 ++++++++++++- src/backend/access/heap/pruneheap.c | 18 ++++++------- 3 files changed, 51 insertions(+), 25 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 129b01da864..d34136d2e94 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2154,6 +2154,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; Buffer buffer; + Page page; Buffer vmbuffer = InvalidBuffer; bool all_visible_cleared = false; @@ -2180,6 +2181,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, &vmbuffer, NULL, 0); + page = BufferGetPage(buffer); + /* * We're about to do the actual insert -- but check for conflict first, to * avoid possibly having to roll back work we've just done. @@ -2203,25 +2206,30 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, RelationPutHeapTuple(relation, buffer, heaptup, (options & HEAP_INSERT_SPECULATIVE) != 0); - if (PageIsAllVisible(BufferGetPage(buffer))) + if (PageIsAllVisible(page)) { all_visible_cleared = true; - PageClearAllVisible(BufferGetPage(buffer)); + PageClearAllVisible(page); visibilitymap_clear(relation, ItemPointerGetBlockNumber(&(heaptup->t_self)), vmbuffer, VISIBILITYMAP_VALID_BITS); } /* - * XXX Should we set PageSetPrunable on this page ? + * Set pd_prune_xid to trigger heap_page_prune_and_freeze() once the page + * is full so that we can set the page all-visible in the VM on the next + * page access. * - * The inserting transaction may eventually abort thus making this tuple - * DEAD and hence available for pruning. Though we don't want to optimize - * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the - * aborted tuple will never be pruned until next vacuum is triggered. + * Setting pd_prune_xid is also handy if the inserting transaction + * eventually aborts making this tuple DEAD and hence available for + * pruning. If no other tuple in this page is UPDATEd/DELETEd, the aborted + * tuple would never otherwise be pruned until next vacuum is triggered. * - * If you do add PageSetPrunable here, add it in heap_xlog_insert too. + * Don't set it if we are in bootstrap mode or we are inserting a frozen + * tuple, as there is no further pruning/freezing needed in those cases. */ + if (TransactionIdIsNormal(xid) && !(options & HEAP_INSERT_FROZEN)) + PageSetPrunable(page, xid); MarkBufferDirty(buffer); @@ -2231,7 +2239,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, xl_heap_insert xlrec; xl_heap_header xlhdr; XLogRecPtr recptr; - Page page = BufferGetPage(buffer); uint8 info = XLOG_HEAP_INSERT; int bufflags = 0; @@ -2596,8 +2603,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, } /* - * XXX Should we set PageSetPrunable on this page ? See heap_insert() + * Set pd_prune_xid. See heap_insert() for more on why we do this when + * inserting tuples. This only makes sense if we aren't already + * setting the page frozen in the VM and we're not in bootstrap mode. */ + if (!all_frozen_set && TransactionIdIsNormal(xid)) + PageSetPrunable(page, xid); MarkBufferDirty(buffer); @@ -4139,12 +4150,12 @@ l2: * the subsequent page pruning will be a no-op and the hint will be * cleared. * - * XXX Should we set hint on newbuf as well? If the transaction aborts, - * there would be a prunable tuple in the newbuf; but for now we choose - * not to optimize for aborts. Note that heap_xlog_update must be kept in - * sync if this decision changes. + * We set the new page prunable as well. See heap_insert() for more on why + * we do this when inserting tuples. */ PageSetPrunable(page, xid); + if (newbuf != buffer) + PageSetPrunable(newpage, xid); if (use_hot_update) { diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 1302bb13e18..f3f419d3dc1 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -450,6 +450,14 @@ heap_xlog_insert(XLogReaderState *record) freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + /* + * Set the page prunable to trigger on-access pruning later, which may + * set the page all-visible in the VM. See comments in heap_insert(). + */ + if (TransactionIdIsNormal(XLogRecGetXid(record)) && + !HeapTupleHeaderXminFrozen(htup)) + PageSetPrunable(page, XLogRecGetXid(record)); + PageSetLSN(page, lsn); if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) @@ -599,12 +607,19 @@ heap_xlog_multi_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); - /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */ + /* + * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If + * we are not setting the page frozen, then set the page's prunable + * hint so that we trigger on-access pruning later which may set the + * page all-visible in the VM. + */ if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) { PageSetAllVisible(page); PageClearPrunable(page); } + else + PageSetPrunable(page, XLogRecGetXid(record)); MarkBufferDirty(buffer); } @@ -921,6 +936,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) freespace = PageGetHeapFreeSpace(npage); PageSetLSN(npage, lsn); + /* See heap_insert() for why we set pd_prune_xid on insert */ + PageSetPrunable(npage, XLogRecGetXid(record)); MarkBufferDirty(nbuffer); } diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 6a2c3513497..74c355be219 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -287,7 +287,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer, /* * First check whether there's any chance there's something to prune, * determining the appropriate horizon is a waste if there's no prune_xid - * (i.e. no updates/deletes left potentially dead tuples around). + * (i.e. no updates/deletes left potentially dead tuples around and no + * inserts inserted new tuples that may be visible to all). */ prune_xid = PageGetPruneXid(page); if (!TransactionIdIsValid(prune_xid)) @@ -1930,17 +1931,14 @@ heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum) prstate->set_all_visible = false; prstate->set_all_frozen = false; - /* The page should not be marked all-visible */ - if (PageIsAllVisible(page)) - heap_page_fix_vm_corruption(prstate, offnum, - VM_CORRUPT_TUPLE_VISIBILITY); - /* - * If we wanted to optimize for aborts, we might consider marking - * the page prunable when we see INSERT_IN_PROGRESS. But we - * don't. See related decisions about when to mark the page - * prunable in heapam.c. + * Though there is nothing "prunable" on the page, we maintain + * pd_prune_xid for inserts so that we have the opportunity to + * mark them all-visible during the next round of pruning. */ + heap_prune_record_prunable(prstate, + HeapTupleHeaderGetXmin(htup), + offnum); break; case HEAPTUPLE_DELETE_IN_PROGRESS: -- 2.47.3