]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Allow on-access pruning to set pages all-visible
authorMelanie Plageman <melanieplageman@gmail.com>
Mon, 30 Mar 2026 19:47:07 +0000 (15:47 -0400)
committerMelanie Plageman <melanieplageman@gmail.com>
Mon, 30 Mar 2026 19:47:07 +0000 (15:47 -0400)
Many queries do not modify the underlying relation. For such queries, if
on-access pruning occurs during the scan, we can check whether the page
has become all-visible and update the visibility map accordingly.
Previously, only vacuum and COPY FREEZE marked pages as all-visible or
all-frozen.

This commit implements on-access VM setting for sequential scans, tid
range scans, sample scans, bitmap heap scans, and the underlying heap
relation in index scans.

Setting the visibility map on-access can avoid write amplification
caused by vacuum later needing to set the page all-visible, which could
trigger a write and potentially an FPI. It also allows more frequent
index-only scans, since they require pages to be marked all-visible in
the VM.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com

src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_handler.c
src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index 4db4a2068ee5f3e997928564753488ae6d0a305e..129b01da8641bee4260e22c3ba564da11b120029 100644 (file)
@@ -633,7 +633,8 @@ heap_prepare_pagescan(TableScanDesc sscan)
        /*
         * Prune and repair fragmentation for the whole page, if possible.
         */
-       heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer);
+       heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer,
+                                               sscan->rs_flags & SO_HINT_REL_READ_ONLY);
 
        /*
         * We must hold share lock on the buffer content while examining tuple
index e63b12c3c6127adc39b15dc9b33d7df56fd6f60a..cdd153c6b6d7e845af09a77c8a4174706a5a98a3 100644 (file)
@@ -149,7 +149,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
                 */
                if (prev_buf != hscan->xs_cbuf)
                        heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
-                                                               &hscan->xs_vmbuffer);
+                                                               &hscan->xs_vmbuffer,
+                                                               hscan->xs_base.flags & SO_HINT_REL_READ_ONLY);
        }
 
        /* Obtain share-lock on the buffer so we can examine visibility */
@@ -2546,7 +2547,8 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
        /*
         * Prune and repair fragmentation for the whole page, if possible.
         */
-       heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer);
+       heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer,
+                                               scan->rs_flags & SO_HINT_REL_READ_ONLY);
 
        /*
         * We must hold share lock on the buffer content while examining tuple
index 6693af8da7fc7e2449d6f6dbe592dfc40427f9fe..6a2c3513497cc059cc863aa65348f6dad9fcb5f9 100644 (file)
@@ -44,6 +44,8 @@ typedef struct
        bool            mark_unused_now;
        /* whether to attempt freezing tuples */
        bool            attempt_freeze;
+       /* whether to attempt setting the VM */
+       bool            attempt_set_vm;
        struct VacuumCutoffs *cutoffs;
        Relation        relation;
 
@@ -75,7 +77,8 @@ typedef struct
        /*
         * set_all_visible and set_all_frozen indicate if the all-visible and
         * all-frozen bits in the visibility map can be set for this page after
-        * pruning.
+        * pruning. They are only tracked when the caller requests VM updates
+        * (attempt_set_vm); otherwise they remain false throughout.
         *
         * NOTE: set_all_visible and set_all_frozen initially don't include
         * LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
@@ -232,7 +235,8 @@ static void page_verify_redirects(Page page);
 
 static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
                                                                  PruneState *prstate);
-static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
+static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
+                                                                 bool do_prune, bool do_freeze);
 
 
 /*
@@ -251,9 +255,21 @@ static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
  * reuse the pin across calls, avoiding repeated pin/unpin cycles. If we find
  * VM corruption during pruning, we will fix it. Caller is responsible for
  * unpinning *vmbuffer.
+ *
+ * rel_read_only is true if we determined at plan time that the query does not
+ * modify the relation. It is counterproductive to set the VM if the query
+ * will immediately clear it.
+ *
+ * As noted in ScanRelIsReadOnly(), INSERT ... SELECT from the same table will
+ * report the scan relation as read-only. This is usually harmless in
+ * practice. It is useful to set scanned pages all-visible that won't be
+ * inserted into. Pages it does insert to will rarely meet the criteria for
+ * pruning, and those that do are likely to contain in-progress inserts which
+ * make the page not fully all-visible.
  */
 void
-heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
+heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer,
+                                       bool rel_read_only)
 {
        Page            page = BufferGetPage(buffer);
        TransactionId prune_xid;
@@ -336,6 +352,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
                         * current implementation.
                         */
                        params.options = HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
+                       if (rel_read_only)
+                               params.options |= HEAP_PAGE_PRUNE_SET_VM;
 
                        heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
                                                                           NULL, NULL);
@@ -392,6 +410,7 @@ prune_freeze_setup(PruneFreezeParams *params,
        /* cutoffs must be provided if we will attempt freezing */
        Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
        prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
+       prstate->attempt_set_vm = (params->options & HEAP_PAGE_PRUNE_SET_VM) != 0;
        prstate->cutoffs = params->cutoffs;
        prstate->relation = params->relation;
        prstate->block = BufferGetBlockNumber(params->buffer);
@@ -461,14 +480,13 @@ prune_freeze_setup(PruneFreezeParams *params,
         * We track whether the page will be all-visible/all-frozen at the end of
         * pruning and freezing. While examining tuple visibility, we'll set
         * set_all_visible to false if there are tuples on the page not visible to
-        * all running and future transactions. set_all_visible is always
-        * maintained but only VACUUM will set the VM if the page ends up being
-        * all-visible.
+        * all running and future transactions. If setting the VM is enabled for
+        * this scan, we will do so if the page ends up being all-visible.
         *
         * We also keep track of the newest live XID, which is used to calculate
         * the snapshot conflict horizon for a WAL record setting the VM.
         */
-       prstate->set_all_visible = true;
+       prstate->set_all_visible = prstate->attempt_set_vm;
        prstate->newest_live_xid = InvalidTransactionId;
 
        /*
@@ -477,7 +495,9 @@ prune_freeze_setup(PruneFreezeParams *params,
         * caller passed HEAP_PAGE_PRUNE_FREEZE, because if they did not, we won't
         * call heap_prepare_freeze_tuple() for each tuple, and set_all_frozen
         * will never be cleared for tuples that need freezing. This would lead to
-        * incorrectly setting the visibility map all-frozen for this page.
+        * incorrectly setting the visibility map all-frozen for this page. We
+        * can't set the page all-frozen in the VM if the caller didn't pass
+        * HEAP_PAGE_PRUNE_SET_VM.
         *
         * When freezing is not required (no XIDs/MXIDs older than the freeze
         * cutoff), we may still choose to "opportunistically" freeze if doing so
@@ -494,7 +514,7 @@ prune_freeze_setup(PruneFreezeParams *params,
         * whether to freeze, but before updating the VM, to avoid setting the VM
         * bits incorrectly.
         */
-       prstate->set_all_frozen = prstate->attempt_freeze;
+       prstate->set_all_frozen = prstate->attempt_freeze && prstate->attempt_set_vm;
 }
 
 /*
@@ -920,21 +940,34 @@ heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum,
  * This function does not actually set the VM bits or page-level visibility
  * hint, PD_ALL_VISIBLE.
  *
+ * This should be called only after do_freeze has been decided (and do_prune
+ * has been set), as these factor into our heuristic-based decision.
+ *
  * Returns true if one or both VM bits should be set and false otherwise.
  */
 static bool
-heap_page_will_set_vm(PruneState *prstate, PruneReason reason)
+heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
+                                         bool do_prune, bool do_freeze)
 {
-       /*
-        * Though on-access pruning maintains prstate->set_all_visible, we don't
-        * set the VM on-access for now.
-        */
-       if (reason == PRUNE_ON_ACCESS)
+       if (!prstate->attempt_set_vm)
                return false;
 
        if (!prstate->set_all_visible)
                return false;
 
+       /*
+        * If this is an on-access call and we're not actually pruning, avoid
+        * setting the visibility map if it would newly dirty the heap page or, if
+        * the page is already dirty, if doing so would require including a
+        * full-page image (FPI) of the heap page in the WAL.
+        */
+       if (reason == PRUNE_ON_ACCESS && !do_prune && !do_freeze &&
+               (!BufferIsDirty(prstate->buffer) || XLogCheckBufferNeedsBackup(prstate->buffer)))
+       {
+               prstate->set_all_visible = prstate->set_all_frozen = false;
+               return false;
+       }
+
        prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
 
        if (prstate->set_all_frozen)
@@ -1165,9 +1198,10 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
                prstate.set_all_visible = prstate.set_all_frozen = false;
 
        Assert(!prstate.set_all_frozen || prstate.set_all_visible);
+       Assert(!prstate.set_all_visible || prstate.attempt_set_vm);
        Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
 
-       do_set_vm = heap_page_will_set_vm(&prstate, params->reason);
+       do_set_vm = heap_page_will_set_vm(&prstate, params->reason, do_prune, do_freeze);
 
        /*
         * new_vmbits should be 0 regardless of whether or not the page is
index f698c2d899bc3ff62027783a3b484b18fcb6a540..24001b273871f8d47139723d346d5c24f37d4c2b 100644 (file)
@@ -2021,7 +2021,7 @@ lazy_scan_prune(LVRelState *vacrel,
                .buffer = buf,
                .vmbuffer = vmbuffer,
                .reason = PRUNE_VACUUM_SCAN,
-               .options = HEAP_PAGE_PRUNE_FREEZE,
+               .options = HEAP_PAGE_PRUNE_FREEZE | HEAP_PAGE_PRUNE_SET_VM,
                .vistest = vacrel->vistest,
                .cutoffs = &vacrel->cutoffs,
        };
index 696b1f49a9d0a3d96d92723628b217f8f0a448eb..6018dacf0f7307fefe098382d2648efbf975c098 100644 (file)
@@ -43,6 +43,7 @@
 #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW                (1 << 0)
 #define HEAP_PAGE_PRUNE_FREEZE                         (1 << 1)
 #define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH                (1 << 2)
+#define HEAP_PAGE_PRUNE_SET_VM                         (1 << 3)
 
 typedef struct BulkInsertStateData *BulkInsertState;
 typedef struct GlobalVisState GlobalVisState;
@@ -431,7 +432,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 
 /* in heap/pruneheap.c */
 extern void heap_page_prune_opt(Relation relation, Buffer buffer,
-                                                               Buffer *vmbuffer);
+                                                               Buffer *vmbuffer, bool rel_read_only);
 extern void heap_page_prune_and_freeze(PruneFreezeParams *params,
                                                                           PruneFreezeResult *presult,
                                                                           OffsetNumber *off_loc,