]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Use streaming read for VACUUM cleanup of GIN
authorMichael Paquier <michael@paquier.xyz>
Thu, 12 Mar 2026 02:48:31 +0000 (11:48 +0900)
committerMichael Paquier <michael@paquier.xyz>
Thu, 12 Mar 2026 02:48:31 +0000 (11:48 +0900)
This commit replace the synchronous ReadBufferExtended() loop done in
ginvacuumcleanup() with the streaming read equivalent, to improve I/O
efficiency during GIN index vacuum cleanup operations.

With dm_delay to emulate some latency and debug_io_direct=data to force
synchronous writes and force the read path to be exercised, the author
has noticed a 5x improvement in runtime, with a substantial reduction in
IO stats numbers.  I have reproduced similar numbers while running
similar tests, with improvements becoming better with more tuples and
more pages manipulated.

Author: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/CABPTF7VrqfbcDXqGrdLQ2xaQ=K0RzExNuw6U_GGqzSJu32wfdQ@mail.gmail.com

src/backend/access/gin/ginvacuum.c

index c9f143f6c31b5db148c69b824d4f31b15aca2c3d..d5c8bef5ceb18c9a6190cccd5a9160ba2725f163 100644 (file)
@@ -22,6 +22,7 @@
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
+#include "storage/read_stream.h"
 #include "utils/memutils.h"
 
 struct GinVacuumState
@@ -693,6 +694,8 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
        BlockNumber totFreePages;
        GinState        ginstate;
        GinStatsData idxStat;
+       BlockRangeReadStreamPrivate p;
+       ReadStream *stream;
 
        /*
         * In an autovacuum analyze, we want to clean up pending insertions.
@@ -743,6 +746,24 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
        totFreePages = 0;
 
+       /* Scan all blocks starting from the root using streaming reads */
+       p.current_blocknum = GIN_ROOT_BLKNO;
+       p.last_exclusive = npages;
+
+       /*
+        * It is safe to use batchmode as block_range_read_stream_cb takes no
+        * locks.
+        */
+       stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
+                                                                               READ_STREAM_FULL |
+                                                                               READ_STREAM_USE_BATCHING,
+                                                                               info->strategy,
+                                                                               index,
+                                                                               MAIN_FORKNUM,
+                                                                               block_range_read_stream_cb,
+                                                                               &p,
+                                                                               0);
+
        for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++)
        {
                Buffer          buffer;
@@ -750,8 +771,8 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
                vacuum_delay_point(false);
 
-               buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
-                                                                       RBM_NORMAL, info->strategy);
+               buffer = read_stream_next_buffer(stream, NULL);
+
                LockBuffer(buffer, GIN_SHARE);
                page = BufferGetPage(buffer);
 
@@ -776,6 +797,9 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
                UnlockReleaseBuffer(buffer);
        }
 
+       Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+       read_stream_end(stream);
+
        /* Update the metapage with accurate page and entry counts */
        idxStat.nTotalPages = npages;
        ginUpdateStats(info->index, &idxStat, false);