]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Separate TBM[Shared|Private]Iterator and TBMIterateResult
authorMelanie Plageman <melanieplageman@gmail.com>
Sat, 15 Mar 2025 14:10:51 +0000 (10:10 -0400)
committerMelanie Plageman <melanieplageman@gmail.com>
Sat, 15 Mar 2025 14:11:19 +0000 (10:11 -0400)
Remove the TBMIterateResult member from the TBMPrivateIterator and
TBMSharedIterator and make tbm_[shared|private_]iterate() take a
TBMIterateResult as a parameter.

This allows tidbitmap API users to manage multiple TBMIterateResults per
scan. This is required for bitmap heap scan to use the read stream API,
with which there may be multiple I/Os in flight at once, each one with a
TBMIterateResult.

Reviewed-by: Tomas Vondra <tomas@vondra.me>
Discussion: https://postgr.es/m/d4bb26c9-fe07-439e-ac53-c0e244387e01%40vondra.me

src/backend/access/gin/ginget.c
src/backend/access/gin/ginscan.c
src/backend/access/heap/heapam_handler.c
src/backend/executor/nodeBitmapHeapscan.c
src/backend/nodes/tidbitmap.c
src/include/access/gin_private.h
src/include/nodes/tidbitmap.h

index 4a56f19390dcd4cd8610020042904d0f3f4c6a20..f29ccd3c2d1ff31e2678a644a5277d09fb983f49 100644 (file)
@@ -332,8 +332,8 @@ restartScanEntry:
        entry->list = NULL;
        entry->nlist = 0;
        entry->matchBitmap = NULL;
-       entry->matchResult = NULL;
        entry->matchNtuples = -1;
+       entry->matchResult.blockno = InvalidBlockNumber;
        entry->reduceResult = false;
        entry->predictNumberResult = 0;
 
@@ -827,20 +827,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
                {
                        /*
                         * If we've exhausted all items on this block, move to next block
-                        * in the bitmap.
+                        * in the bitmap. tbm_private_iterate() sets matchResult.blockno
+                        * to InvalidBlockNumber when the bitmap is exhausted.
                         */
-                       while (entry->matchResult == NULL ||
-                                  (!entry->matchResult->lossy &&
+                       while ((!BlockNumberIsValid(entry->matchResult.blockno)) ||
+                                  (!entry->matchResult.lossy &&
                                        entry->offset >= entry->matchNtuples) ||
-                                  entry->matchResult->blockno < advancePastBlk ||
+                                  entry->matchResult.blockno < advancePastBlk ||
                                   (ItemPointerIsLossyPage(&advancePast) &&
-                                       entry->matchResult->blockno == advancePastBlk))
+                                       entry->matchResult.blockno == advancePastBlk))
                        {
-                               entry->matchResult =
-                                       tbm_private_iterate(entry->matchIterator);
-
-                               if (entry->matchResult == NULL)
+                               if (!tbm_private_iterate(entry->matchIterator, &entry->matchResult))
                                {
+                                       Assert(!BlockNumberIsValid(entry->matchResult.blockno));
                                        ItemPointerSetInvalid(&entry->curItem);
                                        tbm_end_private_iterate(entry->matchIterator);
                                        entry->matchIterator = NULL;
@@ -849,14 +848,14 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
                                }
 
                                /* Exact pages need their tuple offsets extracted. */
-                               if (!entry->matchResult->lossy)
-                                       entry->matchNtuples = tbm_extract_page_tuple(entry->matchResult,
+                               if (!entry->matchResult.lossy)
+                                       entry->matchNtuples = tbm_extract_page_tuple(&entry->matchResult,
                                                                                                                                 entry->matchOffsets,
                                                                                                                                 TBM_MAX_TUPLES_PER_PAGE);
 
                                /*
                                 * Reset counter to the beginning of entry->matchResult. Note:
-                                * entry->offset is still greater than entry->matchNtuples if
+                                * entry->offset is still greater than matchResult.ntuples if
                                 * matchResult is lossy.  So, on next call we will get next
                                 * result from TIDBitmap.
                                 */
@@ -869,10 +868,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
                         * We're now on the first page after advancePast which has any
                         * items on it. If it's a lossy result, return that.
                         */
-                       if (entry->matchResult->lossy)
+                       if (entry->matchResult.lossy)
                        {
                                ItemPointerSetLossyPage(&entry->curItem,
-                                                                               entry->matchResult->blockno);
+                                                                               entry->matchResult.blockno);
 
                                /*
                                 * We might as well fall out of the loop; we could not
@@ -889,7 +888,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
                        Assert(entry->matchNtuples > -1);
 
                        /* Skip over any offsets <= advancePast, and return that. */
-                       if (entry->matchResult->blockno == advancePastBlk)
+                       if (entry->matchResult.blockno == advancePastBlk)
                        {
                                Assert(entry->matchNtuples > 0);
 
@@ -910,7 +909,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
                        }
 
                        ItemPointerSet(&entry->curItem,
-                                                  entry->matchResult->blockno,
+                                                  entry->matchResult.blockno,
                                                   entry->matchOffsets[entry->offset]);
                        entry->offset++;
 
index f6cdd098a028a19cdafcdb190c23071256f1dace..c2d1771bd77b5ddb846e6389be18d6f9ab1c6314 100644 (file)
@@ -111,7 +111,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
        ItemPointerSetMin(&scanEntry->curItem);
        scanEntry->matchBitmap = NULL;
        scanEntry->matchIterator = NULL;
-       scanEntry->matchResult = NULL;
+       scanEntry->matchResult.blockno = InvalidBlockNumber;
        scanEntry->matchNtuples = -1;
        scanEntry->list = NULL;
        scanEntry->nlist = 0;
index eecff8ffd67d2f53ce6a694b1eea39ad26f049f4..25d26409e2cd619a8292505fb257ca83ea351322 100644 (file)
@@ -2126,7 +2126,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
        Buffer          buffer;
        Snapshot        snapshot;
        int                     ntup;
-       TBMIterateResult *tbmres;
+       TBMIterateResult tbmres;
        OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
        int                     noffsets = -1;
 
@@ -2142,14 +2142,12 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
        {
                CHECK_FOR_INTERRUPTS();
 
-               tbmres = tbm_iterate(&scan->st.rs_tbmiterator);
-
-               if (tbmres == NULL)
+               if (!tbm_iterate(&scan->st.rs_tbmiterator, &tbmres))
                        return false;
 
                /* Exact pages need their tuple offsets extracted. */
-               if (!tbmres->lossy)
-                       noffsets = tbm_extract_page_tuple(tbmres, offsets,
+               if (!tbmres.lossy)
+                       noffsets = tbm_extract_page_tuple(&tbmres, offsets,
                                                                                          TBM_MAX_TUPLES_PER_PAGE);
 
                /*
@@ -2161,11 +2159,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
                 * reachable by the index.
                 */
        } while (!IsolationIsSerializable() &&
-                        tbmres->blockno >= hscan->rs_nblocks);
+                        tbmres.blockno >= hscan->rs_nblocks);
 
        /* Got a valid block */
-       *blockno = tbmres->blockno;
-       *recheck = tbmres->recheck;
+       *blockno = tbmres.blockno;
+       *recheck = tbmres.recheck;
 
        /*
         * We can skip fetching the heap page if we don't need any fields from the
@@ -2173,11 +2171,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
         * page are visible to our transaction.
         */
        if (!(scan->rs_flags & SO_NEED_TUPLES) &&
-               !tbmres->recheck &&
-               VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &bscan->rs_vmbuffer))
+               !tbmres.recheck &&
+               VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &bscan->rs_vmbuffer))
        {
                /* can't be lossy in the skip_fetch case */
-               Assert(!tbmres->lossy);
+               Assert(!tbmres.lossy);
                Assert(bscan->rs_empty_tuples_pending >= 0);
                Assert(noffsets > -1);
 
@@ -2186,7 +2184,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
                return true;
        }
 
-       block = tbmres->blockno;
+       block = tbmres.blockno;
 
        /*
         * Acquire pin on the target heap page, trading in any pin we held before.
@@ -2215,7 +2213,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
        /*
         * We need two separate strategies for lossy and non-lossy cases.
         */
-       if (!tbmres->lossy)
+       if (!tbmres.lossy)
        {
                /*
                 * Bitmap is non-lossy, so we just look through the offsets listed in
@@ -2279,7 +2277,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
        Assert(ntup <= MaxHeapTuplesPerPage);
        hscan->rs_ntuples = ntup;
 
-       if (tbmres->lossy)
+       if (tbmres.lossy)
                (*lossy_pages)++;
        else
                (*exact_pages)++;
index be0d24d901b5e32fc22d36b9a816ecc2e1373d37..3b4ea0f61446e8f9f09bfaab153a89a4ce7d264e 100644 (file)
@@ -317,7 +317,7 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
 {
 #ifdef USE_PREFETCH
        ParallelBitmapHeapState *pstate = node->pstate;
-       TBMIterateResult *tbmpre;
+       TBMIterateResult tbmpre;
 
        if (pstate == NULL)
        {
@@ -330,9 +330,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
                }
                else if (!tbm_exhausted(prefetch_iterator))
                {
-                       tbmpre = tbm_iterate(prefetch_iterator);
-                       node->prefetch_blockno = tbmpre ? tbmpre->blockno :
-                               InvalidBlockNumber;
+                       tbm_iterate(prefetch_iterator, &tbmpre);
+                       node->prefetch_blockno = tbmpre.blockno;
                }
                return;
        }
@@ -371,9 +370,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
                         */
                        if (!tbm_exhausted(prefetch_iterator))
                        {
-                               tbmpre = tbm_iterate(prefetch_iterator);
-                               node->prefetch_blockno = tbmpre ? tbmpre->blockno :
-                                       InvalidBlockNumber;
+                               tbm_iterate(prefetch_iterator, &tbmpre);
+                               node->prefetch_blockno = tbmpre.blockno;
                        }
                }
        }
@@ -441,17 +439,18 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                {
                        while (node->prefetch_pages < node->prefetch_target)
                        {
-                               TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
+                               TBMIterateResult tbmpre;
                                bool            skip_fetch;
 
-                               if (tbmpre == NULL)
+                               if (!tbm_iterate(prefetch_iterator, &tbmpre))
                                {
                                        /* No more pages to prefetch */
+                                       Assert(!BlockNumberIsValid(tbmpre.blockno));
                                        tbm_end_iterate(prefetch_iterator);
                                        break;
                                }
                                node->prefetch_pages++;
-                               node->prefetch_blockno = tbmpre->blockno;
+                               node->prefetch_blockno = tbmpre.blockno;
 
                                /*
                                 * If we expect not to have to actually read this heap page,
@@ -460,13 +459,13 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                                 * prefetch_pages?)
                                 */
                                skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-                                                         !tbmpre->recheck &&
+                                                         !tbmpre.recheck &&
                                                          VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-                                                                                        tbmpre->blockno,
+                                                                                        tbmpre.blockno,
                                                                                         &node->pvmbuffer));
 
                                if (!skip_fetch)
-                                       PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+                                       PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno);
                        }
                }
 
@@ -481,7 +480,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                {
                        while (1)
                        {
-                               TBMIterateResult *tbmpre;
+                               TBMIterateResult tbmpre;
                                bool            do_prefetch = false;
                                bool            skip_fetch;
 
@@ -500,25 +499,25 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                                if (!do_prefetch)
                                        return;
 
-                               tbmpre = tbm_iterate(prefetch_iterator);
-                               if (tbmpre == NULL)
+                               if (!tbm_iterate(prefetch_iterator, &tbmpre))
                                {
+                                       Assert(!BlockNumberIsValid(tbmpre.blockno));
                                        /* No more pages to prefetch */
                                        tbm_end_iterate(prefetch_iterator);
                                        break;
                                }
 
-                               node->prefetch_blockno = tbmpre->blockno;
+                               node->prefetch_blockno = tbmpre.blockno;
 
                                /* As above, skip prefetch if we expect not to need page */
                                skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-                                                         !tbmpre->recheck &&
+                                                         !tbmpre.recheck &&
                                                          VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-                                                                                        tbmpre->blockno,
+                                                                                        tbmpre.blockno,
                                                                                         &node->pvmbuffer));
 
                                if (!skip_fetch)
-                                       PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+                                       PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno);
                        }
                }
        }
index 3d835024caa23dd86d8d792bac1c5840ad3ad096..41031aa8f2fa85dd34989ee4299a996b4eb8cb8a 100644 (file)
@@ -172,7 +172,6 @@ struct TBMPrivateIterator
        int                     spageptr;               /* next spages index */
        int                     schunkptr;              /* next schunks index */
        int                     schunkbit;              /* next bit to check in current schunk */
-       TBMIterateResult output;
 };
 
 /*
@@ -213,7 +212,6 @@ struct TBMSharedIterator
        PTEntryArray *ptbase;           /* pagetable element array */
        PTIterationArray *ptpages;      /* sorted exact page index list */
        PTIterationArray *ptchunks; /* sorted lossy page index list */
-       TBMIterateResult output;
 };
 
 /* Local function prototypes */
@@ -957,21 +955,28 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp)
 /*
  * tbm_private_iterate - scan through next page of a TIDBitmap
  *
- * Returns a TBMIterateResult representing one page, or NULL if there are
- * no more pages to scan.  Pages are guaranteed to be delivered in numerical
- * order.  If lossy is true, then the bitmap is "lossy" and failed to
- * remember the exact tuples to look at on this page --- the caller must
- * examine all tuples on the page and check if they meet the intended
- * condition.  result->ntuples is set to -1 when the bitmap is lossy.
- * If result->recheck is true, only the indicated tuples need
- * be examined, but the condition must be rechecked anyway.  (For ease of
- * testing, recheck is always set true when lossy is true.)
+ * Caller must pass in a TBMIterateResult to be filled.
+ *
+ * Pages are guaranteed to be delivered in numerical order.
+ *
+ * Returns false when there are no more pages to scan and true otherwise. When
+ * there are no more pages to scan, tbmres->blockno is set to
+ * InvalidBlockNumber.
+ *
+ * If lossy is true, then the bitmap is "lossy" and failed to remember
+ * the exact tuples to look at on this page --- the caller must examine all
+ * tuples on the page and check if they meet the intended condition. If lossy
+ * is false, the caller must later extract the tuple offsets from the page
+ * pointed to by internal_page with tbm_extract_page_tuple.
+ *
+ * If tbmres->recheck is true, only the indicated tuples need be examined, but
+ * the condition must be rechecked anyway.  (For ease of testing, recheck is
+ * always set true when lossy is true.)
  */
-TBMIterateResult *
-tbm_private_iterate(TBMPrivateIterator *iterator)
+bool
+tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres)
 {
        TIDBitmap  *tbm = iterator->tbm;
-       TBMIterateResult *output = &(iterator->output);
 
        Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
 
@@ -1009,12 +1014,12 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
                        chunk_blockno < tbm->spages[iterator->spageptr]->blockno)
                {
                        /* Return a lossy page indicator from the chunk */
-                       output->blockno = chunk_blockno;
-                       output->lossy = true;
-                       output->recheck = true;
-                       output->internal_page = NULL;
+                       tbmres->blockno = chunk_blockno;
+                       tbmres->lossy = true;
+                       tbmres->recheck = true;
+                       tbmres->internal_page = NULL;
                        iterator->schunkbit++;
-                       return output;
+                       return true;
                }
        }
 
@@ -1028,16 +1033,17 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
                else
                        page = tbm->spages[iterator->spageptr];
 
-               output->internal_page = page;
-               output->blockno = page->blockno;
-               output->lossy = false;
-               output->recheck = page->recheck;
+               tbmres->internal_page = page;
+               tbmres->blockno = page->blockno;
+               tbmres->lossy = false;
+               tbmres->recheck = page->recheck;
                iterator->spageptr++;
-               return output;
+               return true;
        }
 
        /* Nothing more in the bitmap */
-       return NULL;
+       tbmres->blockno = InvalidBlockNumber;
+       return false;
 }
 
 /*
@@ -1047,10 +1053,9 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
  *     across multiple processes.  We need to acquire the iterator LWLock,
  *     before accessing the shared members.
  */
-TBMIterateResult *
-tbm_shared_iterate(TBMSharedIterator *iterator)
+bool
+tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres)
 {
-       TBMIterateResult *output = &iterator->output;
        TBMSharedIteratorState *istate = iterator->state;
        PagetableEntry *ptbase = NULL;
        int                *idxpages = NULL;
@@ -1101,14 +1106,14 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
                        chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno)
                {
                        /* Return a lossy page indicator from the chunk */
-                       output->blockno = chunk_blockno;
-                       output->lossy = true;
-                       output->recheck = true;
-                       output->internal_page = NULL;
+                       tbmres->blockno = chunk_blockno;
+                       tbmres->lossy = true;
+                       tbmres->recheck = true;
+                       tbmres->internal_page = NULL;
                        istate->schunkbit++;
 
                        LWLockRelease(&istate->lock);
-                       return output;
+                       return true;
                }
        }
 
@@ -1116,21 +1121,22 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
        {
                PagetableEntry *page = &ptbase[idxpages[istate->spageptr]];
 
-               output->internal_page = page;
-               output->blockno = page->blockno;
-               output->lossy = false;
-               output->recheck = page->recheck;
+               tbmres->internal_page = page;
+               tbmres->blockno = page->blockno;
+               tbmres->lossy = false;
+               tbmres->recheck = page->recheck;
                istate->spageptr++;
 
                LWLockRelease(&istate->lock);
 
-               return output;
+               return true;
        }
 
        LWLockRelease(&istate->lock);
 
        /* Nothing more in the bitmap */
-       return NULL;
+       tbmres->blockno = InvalidBlockNumber;
+       return false;
 }
 
 /*
@@ -1604,15 +1610,17 @@ tbm_end_iterate(TBMIterator *iterator)
 }
 
 /*
- * Get the next TBMIterateResult from the shared or private bitmap iterator.
+ * Populate the next TBMIterateResult using the shared or private bitmap
+ * iterator. Returns false when there is nothing more to scan.
  */
-TBMIterateResult *
-tbm_iterate(TBMIterator *iterator)
+bool
+tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres)
 {
        Assert(iterator);
+       Assert(tbmres);
 
        if (iterator->shared)
-               return tbm_shared_iterate(iterator->i.shared_iterator);
+               return tbm_shared_iterate(iterator->i.shared_iterator, tbmres);
        else
-               return tbm_private_iterate(iterator->i.private_iterator);
+               return tbm_private_iterate(iterator->i.private_iterator, tbmres);
 }
index 95d8805b66f2c9d4e3da850aeab3274ae3fcf13b..aee1f70c22eee717c8a831d839f20cb16b52b3fa 100644 (file)
@@ -354,7 +354,12 @@ typedef struct GinScanEntryData
        /* for a partial-match or full-scan query, we accumulate all TIDs here */
        TIDBitmap  *matchBitmap;
        TBMPrivateIterator *matchIterator;
-       TBMIterateResult *matchResult;
+
+       /*
+        * If blockno is InvalidBlockNumber, all of the other fields in the
+        * matchResult are meaningless.
+        */
+       TBMIterateResult matchResult;
        OffsetNumber matchOffsets[TBM_MAX_TUPLES_PER_PAGE];
        int                     matchNtuples;
 
index e185635c10be446a3e7511aa09e81e6a99c93b98..99f795ceab526925972a9a3ca4416db9111ca77e 100644 (file)
@@ -101,8 +101,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm);
 
 extern TBMPrivateIterator *tbm_begin_private_iterate(TIDBitmap *tbm);
 extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm);
-extern TBMIterateResult *tbm_private_iterate(TBMPrivateIterator *iterator);
-extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator);
+extern bool tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres);
+extern bool tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres);
 extern void tbm_end_private_iterate(TBMPrivateIterator *iterator);
 extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
 extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
@@ -113,7 +113,7 @@ extern TBMIterator tbm_begin_iterate(TIDBitmap *tbm,
                                                                         dsa_area *dsa, dsa_pointer dsp);
 extern void tbm_end_iterate(TBMIterator *iterator);
 
-extern TBMIterateResult *tbm_iterate(TBMIterator *iterator);
+extern bool tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres);
 
 static inline bool
 tbm_exhausted(TBMIterator *iterator)