From 648a7e28d7c28754ca46caa09864435bc45e543d Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 26 Jan 2026 17:00:13 -0500 Subject: [PATCH] Eliminate use of cached VM value in lazy_scan_prune() lazy_scan_prune() takes a parameter from lazy_scan_heap() indicating whether the page was marked all-visible in the VM at the time it was last checked in find_next_unskippable_block(). This behavior is historical, dating back to commit 608195a3a365, when we did not pin the VM page until deciding we must read it. Now that the VM page is already pinned, there is no meaningful benefit to relying on a cached VM status. Removing this cached value simplifies the logic in both lazy_scan_heap() and lazy_scan_prune(). It also clarifies future work that will set the visibility map on-access: such paths will not have a cached value available, which would make the logic harder to reason about. And eliminating it enables us to detect and repair VM corruption on-access. Along with removing the cached value and unconditionally checking the visibility status of the heap page, this commit also moves the VM corruption handling to occur first. This reordering should have no performance impact, since the checks are inexpensive and performed only once per page. It does, however, make the control flow easier to understand. The new restructuring also makes it possible to set the VM after fixing corruption (if pruning found the page all-visible). Now that no callers of visibilitymap_set() use its return value, change its (and visibilitymap_set_vmbits()) return type to void. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Kirill Reshke Reviewed-by: Chao Li Reviewed-by: Andrey Borodin Reviewed-by: Xuneng Zhou Discussion: https://postgr.es/m/5CEAA162-67B1-44DA-B60D-8B65717E8B05%40gmail.com --- src/backend/access/heap/vacuumlazy.c | 180 +++++++++++------------- src/backend/access/heap/visibilitymap.c | 9 +- src/include/access/visibilitymap.h | 18 +-- 3 files changed, 92 insertions(+), 115 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index e0f35655a08..d5021450eb4 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -246,13 +246,6 @@ typedef enum */ #define EAGER_SCAN_REGION_SIZE 4096 -/* - * heap_vac_scan_next_block() sets these flags to communicate information - * about the block it read to the caller. - */ -#define VAC_BLK_WAS_EAGER_SCANNED (1 << 0) -#define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1) - typedef struct LVRelState { /* Target heap relation and its indexes */ @@ -358,7 +351,6 @@ typedef struct LVRelState /* State maintained by heap_vac_scan_next_block() */ BlockNumber current_block; /* last block returned */ BlockNumber next_unskippable_block; /* next unskippable block */ - bool next_unskippable_allvis; /* its visibility status */ bool next_unskippable_eager_scanned; /* if it was eagerly scanned */ Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */ @@ -432,7 +424,7 @@ static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, bool sharelock, Buffer vmbuffer); static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, - Buffer vmbuffer, bool all_visible_according_to_vm, + Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen); static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, @@ -1275,7 +1267,6 @@ lazy_scan_heap(LVRelState *vacrel) /* Initialize for the first heap_vac_scan_next_block() call */ vacrel->current_block = InvalidBlockNumber; vacrel->next_unskippable_block = InvalidBlockNumber; - vacrel->next_unskippable_allvis = false; vacrel->next_unskippable_eager_scanned = false; vacrel->next_unskippable_vmbuffer = InvalidBuffer; @@ -1291,13 +1282,13 @@ lazy_scan_heap(LVRelState *vacrel) MAIN_FORKNUM, heap_vac_scan_next_block, vacrel, - sizeof(uint8)); + sizeof(bool)); while (true) { Buffer buf; Page page; - uint8 blk_info = 0; + bool was_eager_scanned = false; int ndeleted = 0; bool has_lpdead_items; void *per_buffer_data = NULL; @@ -1366,13 +1357,13 @@ lazy_scan_heap(LVRelState *vacrel) if (!BufferIsValid(buf)) break; - blk_info = *((uint8 *) per_buffer_data); + was_eager_scanned = *((bool *) per_buffer_data); CheckBufferIsPinnedOnce(buf); page = BufferGetPage(buf); blkno = BufferGetBlockNumber(buf); vacrel->scanned_pages++; - if (blk_info & VAC_BLK_WAS_EAGER_SCANNED) + if (was_eager_scanned) vacrel->eager_scanned_pages++; /* Report as block scanned, update error traceback information */ @@ -1443,7 +1434,6 @@ lazy_scan_heap(LVRelState *vacrel) if (got_cleanup_lock) ndeleted = lazy_scan_prune(vacrel, buf, blkno, page, vmbuffer, - blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, &has_lpdead_items, &vm_page_frozen); /* @@ -1460,8 +1450,7 @@ lazy_scan_heap(LVRelState *vacrel) * exclude pages skipped due to cleanup lock contention from eager * freeze algorithm caps. */ - if (got_cleanup_lock && - (blk_info & VAC_BLK_WAS_EAGER_SCANNED)) + if (got_cleanup_lock && was_eager_scanned) { /* Aggressive vacuums do not eager scan. */ Assert(!vacrel->aggressive); @@ -1628,7 +1617,6 @@ heap_vac_scan_next_block(ReadStream *stream, { BlockNumber next_block; LVRelState *vacrel = callback_private_data; - uint8 blk_info = 0; /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ next_block = vacrel->current_block + 1; @@ -1691,8 +1679,8 @@ heap_vac_scan_next_block(ReadStream *stream, * otherwise they would've been unskippable. */ vacrel->current_block = next_block; - blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; - *((uint8 *) per_buffer_data) = blk_info; + /* Block was not eager scanned */ + *((bool *) per_buffer_data) = false; return vacrel->current_block; } else @@ -1704,11 +1692,7 @@ heap_vac_scan_next_block(ReadStream *stream, Assert(next_block == vacrel->next_unskippable_block); vacrel->current_block = next_block; - if (vacrel->next_unskippable_allvis) - blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; - if (vacrel->next_unskippable_eager_scanned) - blk_info |= VAC_BLK_WAS_EAGER_SCANNED; - *((uint8 *) per_buffer_data) = blk_info; + *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned; return vacrel->current_block; } } @@ -1733,7 +1717,6 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis) BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1; Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer; bool next_unskippable_eager_scanned = false; - bool next_unskippable_allvis; *skipsallvis = false; @@ -1743,7 +1726,6 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis) next_unskippable_block, &next_unskippable_vmbuffer); - next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0; /* * At the start of each eager scan region, normal vacuums with eager @@ -1762,7 +1744,7 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis) * A block is unskippable if it is not all visible according to the * visibility map. */ - if (!next_unskippable_allvis) + if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0) { Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0); break; @@ -1819,7 +1801,6 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis) /* write the local variables back to vacrel */ vacrel->next_unskippable_block = next_unskippable_block; - vacrel->next_unskippable_allvis = next_unskippable_allvis; vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned; vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer; } @@ -1980,9 +1961,7 @@ cmpOffsetNumbers(const void *a, const void *b) * Caller must hold pin and buffer cleanup lock on the buffer. * * vmbuffer is the buffer containing the VM block with visibility information - * for the heap block, blkno. all_visible_according_to_vm is the saved - * visibility status of the heap block looked up earlier by the caller. We - * won't rely entirely on this status, as it may be out of date. + * for the heap block, blkno. * * *has_lpdead_items is set to true or false depending on whether, upon return * from this function, any LP_DEAD items are still present on the page. @@ -1999,7 +1978,6 @@ lazy_scan_prune(LVRelState *vacrel, BlockNumber blkno, Page page, Buffer vmbuffer, - bool all_visible_according_to_vm, bool *has_lpdead_items, bool *vm_page_frozen) { @@ -2013,6 +1991,8 @@ lazy_scan_prune(LVRelState *vacrel, .vistest = vacrel->vistest, .cutoffs = &vacrel->cutoffs, }; + uint8 old_vmbits = 0; + uint8 new_vmbits = 0; Assert(BufferGetBlockNumber(buf) == blkno); @@ -2115,70 +2095,7 @@ lazy_scan_prune(LVRelState *vacrel, Assert(!presult.all_visible || !(*has_lpdead_items)); Assert(!presult.all_frozen || presult.all_visible); - /* - * Handle setting visibility map bit based on information from the VM (as - * of last heap_vac_scan_next_block() call), and from all_visible and - * all_frozen variables - */ - if ((presult.all_visible && !all_visible_according_to_vm) || - (presult.all_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))) - { - uint8 old_vmbits; - uint8 flags = VISIBILITYMAP_ALL_VISIBLE; - - if (presult.all_frozen) - flags |= VISIBILITYMAP_ALL_FROZEN; - - /* - * It should never be the case that the visibility map page is set - * while the page-level bit is clear, but the reverse is allowed (if - * checksums are not enabled). Regardless, set both bits so that we - * get back in sync. - * - * Even if PD_ALL_VISIBLE is already set, we don't need to worry about - * unnecessarily dirtying the heap buffer. Nearly the only scenario - * where PD_ALL_VISIBLE is set but the VM is not is if the VM was - * removed -- and that isn't worth optimizing for. And if we add the - * heap buffer to the WAL chain (without passing REGBUF_NO_CHANGES), - * it must be marked dirty. - */ - PageSetAllVisible(page); - MarkBufferDirty(buf); - - /* - * If the page is being set all-frozen, we pass InvalidTransactionId - * as the cutoff_xid, since a snapshot conflict horizon sufficient to - * make everything safe for REDO was logged when the page's tuples - * were frozen. - */ - Assert(!presult.all_frozen || - !TransactionIdIsValid(presult.vm_conflict_horizon)); - - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, presult.vm_conflict_horizon, - flags); - - /* - * If the page wasn't already set all-visible and/or all-frozen in the - * VM, count it as newly set for logging. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - if (presult.all_frozen) - { - vacrel->vm_new_visible_frozen_pages++; - *vm_page_frozen = true; - } - } - else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && - presult.all_frozen) - { - vacrel->vm_new_frozen_pages++; - *vm_page_frozen = true; - } - } + old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer); /* * As of PostgreSQL 9.2, the visibility map bit should never be set if the @@ -2186,8 +2103,8 @@ lazy_scan_prune(LVRelState *vacrel, * cleared after heap_vac_scan_next_block() was called, so we must recheck * with buffer lock before concluding that the VM is corrupt. */ - else if (all_visible_according_to_vm && !PageIsAllVisible(page) && - visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0) + if (!PageIsAllVisible(page) && + (old_vmbits & VISIBILITYMAP_VALID_BITS) != 0) { ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED), @@ -2196,6 +2113,8 @@ lazy_scan_prune(LVRelState *vacrel, visibilitymap_clear(vacrel->rel, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); + /* VM bits are now clear */ + old_vmbits = 0; } /* @@ -2223,6 +2142,69 @@ lazy_scan_prune(LVRelState *vacrel, MarkBufferDirty(buf); visibilitymap_clear(vacrel->rel, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); + /* VM bits are now clear */ + old_vmbits = 0; + } + + if (!presult.all_visible) + return presult.ndeleted; + + /* Set the visibility map and page visibility hint */ + new_vmbits = VISIBILITYMAP_ALL_VISIBLE; + + if (presult.all_frozen) + new_vmbits |= VISIBILITYMAP_ALL_FROZEN; + + /* Nothing to do */ + if (old_vmbits == new_vmbits) + return presult.ndeleted; + + /* + * It should never be the case that the visibility map page is set while + * the page-level bit is clear (and if so, we cleared it above), but the + * reverse is allowed (if checksums are not enabled). Regardless, set both + * bits so that we get back in sync. + * + * The heap buffer must be marked dirty before adding it to the WAL chain + * when setting the VM. We don't worry about unnecessarily dirtying the + * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely + * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and + * the VM bits clear, so there is no point in optimizing it. + */ + PageSetAllVisible(page); + MarkBufferDirty(buf); + + /* + * If the page is being set all-frozen, we pass InvalidTransactionId as + * the cutoff_xid, since a snapshot conflict horizon sufficient to make + * everything safe for REDO was logged when the page's tuples were frozen. + */ + Assert(!presult.all_frozen || + !TransactionIdIsValid(presult.vm_conflict_horizon)); + + visibilitymap_set(vacrel->rel, blkno, buf, + InvalidXLogRecPtr, + vmbuffer, presult.vm_conflict_horizon, + new_vmbits); + + /* + * If the page wasn't already set all-visible and/or all-frozen in the VM, + * count it as newly set for logging. + */ + if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) + { + vacrel->vm_new_visible_pages++; + if (presult.all_frozen) + { + vacrel->vm_new_visible_frozen_pages++; + *vm_page_frozen = true; + } + } + else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && + presult.all_frozen) + { + vacrel->vm_new_frozen_pages++; + *vm_page_frozen = true; } return presult.ndeleted; diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 2382d18f72b..3047bd46def 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -240,10 +240,8 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf) * You must pass a buffer containing the correct map page to this function. * Call visibilitymap_pin first to pin the right one. This function doesn't do * any I/O. - * - * Returns the state of the page's VM bits before setting flags. */ -uint8 +void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags) @@ -320,7 +318,6 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, } LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK); - return status; } /* @@ -343,7 +340,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, * * rlocator is used only for debugging messages. */ -uint8 +void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator) @@ -386,8 +383,6 @@ visibilitymap_set_vmbits(BlockNumber heapBlk, map[mapByte] |= (flags << mapOffset); MarkBufferDirty(vmBuf); } - - return status; } /* diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h index 47ad489a9a7..a0166c5b410 100644 --- a/src/include/access/visibilitymap.h +++ b/src/include/access/visibilitymap.h @@ -32,15 +32,15 @@ extern bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf); -extern uint8 visibilitymap_set(Relation rel, - BlockNumber heapBlk, Buffer heapBuf, - XLogRecPtr recptr, - Buffer vmBuf, - TransactionId cutoff_xid, - uint8 flags); -extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk, - Buffer vmBuf, uint8 flags, - const RelFileLocator rlocator); +extern void visibilitymap_set(Relation rel, + BlockNumber heapBlk, Buffer heapBuf, + XLogRecPtr recptr, + Buffer vmBuf, + TransactionId cutoff_xid, + uint8 flags); +extern void visibilitymap_set_vmbits(BlockNumber heapBlk, + Buffer vmBuf, uint8 flags, + const RelFileLocator rlocator); extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen); extern BlockNumber visibilitymap_prepare_truncate(Relation rel, -- 2.47.3