From: Andres Freund Date: Thu, 26 Mar 2026 14:51:52 +0000 (-0400) Subject: bufmgr: Restructure AsyncReadBuffers() X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8a1a1d6ab86217020a612ee2e03eae4fe00ae781;p=thirdparty%2Fpostgresql.git bufmgr: Restructure AsyncReadBuffers() Restructure AsyncReadBuffers() to use early return when the head buffer is already valid, instead of using a did_start_io flag and if/else branches. Also move around a bit of the code to be located closer to where it is used. This is a refactor only. Author: Melanie Plageman Reviewed-by: Andres Freund Discussion: https://postgr.es/m/zljergweqti7x67lg5ije2rzjusie37nslsnkjkkby4laqqbfw@3p3zu522yykv --- diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index fa85570a791..e212f6110f2 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1920,21 +1920,12 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress) void *io_pages[MAX_IO_COMBINE_LIMIT]; IOContext io_context; IOObject io_object; - bool did_start_io; - - /* - * When this IO is executed synchronously, either because the caller will - * immediately block waiting for the IO or because IOMETHOD_SYNC is used, - * the AIO subsystem needs to know. - */ - if (flags & READ_BUFFERS_SYNCHRONOUSLY) - ioh_flags |= PGAIO_HF_SYNCHRONOUS; + instr_time io_start; if (persistence == RELPERSISTENCE_TEMP) { io_context = IOCONTEXT_NORMAL; io_object = IOOBJECT_TEMP_RELATION; - ioh_flags |= PGAIO_HF_REFERENCES_LOCAL; } else { @@ -1942,6 +1933,17 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress) io_object = IOOBJECT_RELATION; } + /* + * When this IO is executed synchronously, either because the caller will + * immediately block waiting for the IO or because IOMETHOD_SYNC is used, + * the AIO subsystem needs to know. + */ + if (flags & READ_BUFFERS_SYNCHRONOUSLY) + ioh_flags |= PGAIO_HF_SYNCHRONOUS; + + if (persistence == RELPERSISTENCE_TEMP) + ioh_flags |= PGAIO_HF_REFERENCES_LOCAL; + /* * If zero_damaged_pages is enabled, add the READ_BUFFERS_ZERO_ON_ERROR * flag. The reason for that is that, hopefully, zero_damaged_pages isn't @@ -1992,7 +1994,6 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress) if (unlikely(!ioh)) { pgaio_submit_staged(); - ioh = pgaio_io_acquire(CurrentResourceOwner, &operation->io_return); } @@ -2017,91 +2018,95 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress) pgaio_io_release(ioh); pgaio_wref_clear(&operation->io_wref); - did_start_io = false; /* * Report and track this as a 'hit' for this backend, even though it * must have started out as a miss in PinBufferForBlock(). The other * backend will track this as a 'read'. */ - TrackBufferHit(io_object, io_context, operation->rel, persistence, - operation->smgr, forknum, blocknum); + TrackBufferHit(io_object, io_context, + operation->rel, operation->persistence, + operation->smgr, operation->forknum, + blocknum); + return false; } - else + + Assert(io_buffers[0] == buffers[nblocks_done]); + io_pages[0] = BufferGetBlock(buffers[nblocks_done]); + io_buffers_len = 1; + + /* + * NB: As little code as possible should be added between the + * ReadBuffersCanStartIO() above, the further ReadBuffersCanStartIO()s + * below and the smgrstartreadv(), as some of the buffers are now marked + * as IO_IN_PROGRESS and will thus cause other backends to wait. + */ + + /* + * How many neighboring-on-disk blocks can we scatter-read into other + * buffers at the same time? In this case we don't wait if we see an I/O + * already in progress (see comment above). + */ + for (int i = nblocks_done + 1; i < operation->nblocks; i++) { - instr_time io_start; + /* Must be consecutive block numbers. */ + Assert(BufferGetBlockNumber(buffers[i - 1]) == + BufferGetBlockNumber(buffers[i]) - 1); - /* We found a buffer that we need to read in. */ - Assert(io_buffers[0] == buffers[nblocks_done]); - io_pages[0] = BufferGetBlock(buffers[nblocks_done]); - io_buffers_len = 1; + if (!ReadBuffersCanStartIO(buffers[i], true)) + break; - /* - * How many neighboring-on-disk blocks can we scatter-read into other - * buffers at the same time? In this case we don't wait if we see an - * I/O already in progress. We already set BM_IO_IN_PROGRESS for the - * head block, so we should get on with that I/O as soon as possible. - */ - for (int i = nblocks_done + 1; i < operation->nblocks; i++) - { - if (!ReadBuffersCanStartIO(buffers[i], true)) - break; - /* Must be consecutive block numbers. */ - Assert(BufferGetBlockNumber(buffers[i - 1]) == - BufferGetBlockNumber(buffers[i]) - 1); - Assert(io_buffers[io_buffers_len] == buffers[i]); + Assert(io_buffers[io_buffers_len] == buffers[i]); - io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]); - } + io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]); + } - /* get a reference to wait for in WaitReadBuffers() */ - pgaio_io_get_wref(ioh, &operation->io_wref); + /* get a reference to wait for in WaitReadBuffers() */ + pgaio_io_get_wref(ioh, &operation->io_wref); - /* provide the list of buffers to the completion callbacks */ - pgaio_io_set_handle_data_32(ioh, (uint32 *) io_buffers, io_buffers_len); + /* provide the list of buffers to the completion callbacks */ + pgaio_io_set_handle_data_32(ioh, (uint32 *) io_buffers, io_buffers_len); - pgaio_io_register_callbacks(ioh, - persistence == RELPERSISTENCE_TEMP ? - PGAIO_HCB_LOCAL_BUFFER_READV : - PGAIO_HCB_SHARED_BUFFER_READV, - flags); + pgaio_io_register_callbacks(ioh, + persistence == RELPERSISTENCE_TEMP ? + PGAIO_HCB_LOCAL_BUFFER_READV : + PGAIO_HCB_SHARED_BUFFER_READV, + flags); - pgaio_io_set_flag(ioh, ioh_flags); + pgaio_io_set_flag(ioh, ioh_flags); - /* --- - * Even though we're trying to issue IO asynchronously, track the time - * in smgrstartreadv(): - * - if io_method == IOMETHOD_SYNC, we will always perform the IO - * immediately - * - the io method might not support the IO (e.g. worker IO for a temp - * table) - * --- - */ - io_start = pgstat_prepare_io_time(track_io_timing); - smgrstartreadv(ioh, operation->smgr, forknum, - blocknum, - io_pages, io_buffers_len); - pgstat_count_io_op_time(io_object, io_context, IOOP_READ, - io_start, 1, io_buffers_len * BLCKSZ); + /* --- + * Even though we're trying to issue IO asynchronously, track the time + * in smgrstartreadv(): + * - if io_method == IOMETHOD_SYNC, we will always perform the IO + * immediately + * - the io method might not support the IO (e.g. worker IO for a temp + * table) + * --- + */ + io_start = pgstat_prepare_io_time(track_io_timing); + smgrstartreadv(ioh, operation->smgr, forknum, + blocknum, + io_pages, io_buffers_len); + pgstat_count_io_op_time(io_object, io_context, IOOP_READ, + io_start, 1, io_buffers_len * BLCKSZ); - if (persistence == RELPERSISTENCE_TEMP) - pgBufferUsage.local_blks_read += io_buffers_len; - else - pgBufferUsage.shared_blks_read += io_buffers_len; + if (persistence == RELPERSISTENCE_TEMP) + pgBufferUsage.local_blks_read += io_buffers_len; + else + pgBufferUsage.shared_blks_read += io_buffers_len; - /* - * Track vacuum cost when issuing IO, not after waiting for it. - * Otherwise we could end up issuing a lot of IO in a short timespan, - * despite a low cost limit. - */ - if (VacuumCostActive) - VacuumCostBalance += VacuumCostPageMiss * io_buffers_len; + /* + * Track vacuum cost when issuing IO, not after waiting for it. Otherwise + * we could end up issuing a lot of IO in a short timespan, despite a low + * cost limit. + */ + if (VacuumCostActive) + VacuumCostBalance += VacuumCostPageMiss * io_buffers_len; - *nblocks_progress = io_buffers_len; - did_start_io = true; - } + *nblocks_progress = io_buffers_len; - return did_start_io; + return true; } /*