]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
bloom: Optimize bitmap scan path with streaming read
authorMichael Paquier <michael@paquier.xyz>
Tue, 10 Mar 2026 22:36:10 +0000 (07:36 +0900)
committerMichael Paquier <michael@paquier.xyz>
Tue, 10 Mar 2026 22:36:10 +0000 (07:36 +0900)
This commit replaces the per-page buffer read look in blgetbitmap() with
a reading stream, to improve scan efficiency, particularly useful for
large bloom indexes.  Some benchmarking with a large number of rows has
shown a very nice improvement in terms of runtime and IO read reduction
with test cases up to 10M rows for a bloom index scan.

For the io_uring method, The author has reported a 3x in runtime with
io_uring while I was at close to a 7x.  For the worker method with 3
workers, the author has reported better numbers than myself in runtime,
with the reduction in IO stats being appealing for all the cases
measured.

Author: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/CABPTF7VrqfbcDXqGrdLQ2xaQ=K0RzExNuw6U_GGqzSJu32wfdQ@mail.gmail.com

contrib/bloom/blscan.c

index 0535d45f2d825705dd0209a75057f9356c3360d8..1a0e42021ec1e6627e423e8d73a1b456cbfff3c3 100644 (file)
@@ -18,6 +18,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
+#include "storage/read_stream.h"
 
 /*
  * Begin scan of bloom index.
@@ -76,11 +77,13 @@ int64
 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 {
        int64           ntids = 0;
-       BlockNumber blkno = BLOOM_HEAD_BLKNO,
+       BlockNumber blkno,
                                npages;
        int                     i;
        BufferAccessStrategy bas;
        BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+       BlockRangeReadStreamPrivate p;
+       ReadStream *stream;
 
        if (so->sign == NULL)
        {
@@ -120,14 +123,29 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
        if (scan->instrument)
                scan->instrument->nsearches++;
 
+       /* Scan all blocks except the metapage using streaming reads */
+       p.current_blocknum = BLOOM_HEAD_BLKNO;
+       p.last_exclusive = npages;
+
+       /*
+        * It is safe to use batchmode as block_range_read_stream_cb takes no
+        * locks.
+        */
+       stream = read_stream_begin_relation(READ_STREAM_FULL |
+                                                                               READ_STREAM_USE_BATCHING,
+                                                                               bas,
+                                                                               scan->indexRelation,
+                                                                               MAIN_FORKNUM,
+                                                                               block_range_read_stream_cb,
+                                                                               &p,
+                                                                               0);
+
        for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
        {
                Buffer          buffer;
                Page            page;
 
-               buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
-                                                                       blkno, RBM_NORMAL, bas);
-
+               buffer = read_stream_next_buffer(stream, NULL);
                LockBuffer(buffer, BUFFER_LOCK_SHARE);
                page = BufferGetPage(buffer);
 
@@ -163,6 +181,9 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
                UnlockReleaseBuffer(buffer);
                CHECK_FOR_INTERRUPTS();
        }
+
+       Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+       read_stream_end(stream);
        FreeAccessStrategy(bas);
 
        return ntids;