]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix rare bug in read_stream.c's split IO handling. master github/master
authorThomas Munro <tmunro@postgresql.org>
Sat, 9 Aug 2025 00:33:06 +0000 (12:33 +1200)
committerThomas Munro <tmunro@postgresql.org>
Sat, 9 Aug 2025 01:04:38 +0000 (13:04 +1200)
The internal queue of buffers could become corrupted in a rare edge case
that failed to invalidate an entry, causing a stale buffer to be
"forwarded" to StartReadBuffers().  This is a simple fix for the
immediate problem.

A small API change might be able to remove this and related fragility
entirely, but that will have to wait a bit.

Defect in commit ed0b87ca.

Bug: 19006
Backpatch-through: 18
Reported-by: Alexander Lakhin <exclusion@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Xuneng Zhou <xunengzhou@gmail.com>
Discussion: https://postgr.es/m/19006-80fcaaf69000377e%40postgresql.org

src/backend/storage/aio/read_stream.c

index 0e7f5557f5cb93b5c5ac2411167d17f3ba4decc4..031fde9f4cbef1227e33ae5250915621ac158294 100644 (file)
@@ -247,12 +247,33 @@ read_stream_start_pending_read(ReadStream *stream)
        Assert(stream->pinned_buffers + stream->pending_read_nblocks <=
                   stream->max_pinned_buffers);
 
        Assert(stream->pinned_buffers + stream->pending_read_nblocks <=
                   stream->max_pinned_buffers);
 
+#ifdef USE_ASSERT_CHECKING
        /* We had better not be overwriting an existing pinned buffer. */
        if (stream->pinned_buffers > 0)
                Assert(stream->next_buffer_index != stream->oldest_buffer_index);
        else
                Assert(stream->next_buffer_index == stream->oldest_buffer_index);
 
        /* We had better not be overwriting an existing pinned buffer. */
        if (stream->pinned_buffers > 0)
                Assert(stream->next_buffer_index != stream->oldest_buffer_index);
        else
                Assert(stream->next_buffer_index == stream->oldest_buffer_index);
 
+       /*
+        * Pinned buffers forwarded by a preceding StartReadBuffers() call that
+        * had to split the operation should match the leading blocks of this
+        * following StartReadBuffers() call.
+        */
+       Assert(stream->forwarded_buffers <= stream->pending_read_nblocks);
+       for (int i = 0; i < stream->forwarded_buffers; ++i)
+               Assert(BufferGetBlockNumber(stream->buffers[stream->next_buffer_index + i]) ==
+                          stream->pending_read_blocknum + i);
+
+       /*
+        * Check that we've cleared the queue/overflow entries corresponding to
+        * the rest of the blocks covered by this read, unless it's the first go
+        * around and we haven't even initialized them yet.
+        */
+       for (int i = stream->forwarded_buffers; i < stream->pending_read_nblocks; ++i)
+               Assert(stream->next_buffer_index + i >= stream->initialized_buffers ||
+                          stream->buffers[stream->next_buffer_index + i] == InvalidBuffer);
+#endif
+
        /* Do we need to issue read-ahead advice? */
        flags = stream->read_buffers_flags;
        if (stream->advice_enabled)
        /* Do we need to issue read-ahead advice? */
        flags = stream->read_buffers_flags;
        if (stream->advice_enabled)
@@ -979,6 +1000,19 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
                stream->pending_read_nblocks == 0 &&
                stream->per_buffer_data_size == 0)
        {
                stream->pending_read_nblocks == 0 &&
                stream->per_buffer_data_size == 0)
        {
+               /*
+                * The fast path spins on one buffer entry repeatedly instead of
+                * rotating through the whole queue and clearing the entries behind
+                * it.  If the buffer it starts with happened to be forwarded between
+                * StartReadBuffers() calls and also wrapped around the circular queue
+                * partway through, then a copy also exists in the overflow zone, and
+                * it won't clear it out as the regular path would.  Do that now, so
+                * it doesn't need code for that.
+                */
+               if (stream->oldest_buffer_index < stream->io_combine_limit - 1)
+                       stream->buffers[stream->queue_size + stream->oldest_buffer_index] =
+                               InvalidBuffer;
+
                stream->fast_path = true;
        }
 #endif
                stream->fast_path = true;
        }
 #endif