From: Michael Paquier Date: Wed, 17 Jun 2026 07:05:11 +0000 (+0900) Subject: Fix pgstat_count_io_op_time() calls passing incorrect information X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;ds=inline;p=thirdparty%2Fpostgresql.git Fix pgstat_count_io_op_time() calls passing incorrect information Several calls of pgstat_count_io_op_time() have been used as data to count negative values returned by pg_pread() or pg_pwrite(), leading to an incorrect count reported, casting them back to uint64. Most of the problematic calls updated here are adjusted so as we do not report buggy negative numbers anymore. In xlogrecovery.c, the spot updated still counts short reads. In xlog.c, after a WAL segment initialization, I/O numbers are aggregated only after checking that the operation has succeeded. issues introduced by a051e71e28a1. Reported-by: Peter Eisentraut Author: Bertrand Drouvot Reviewed-by: Michael Paquier Reviewed-by: Ayush Tiwari Discussion: https://postgr.es/m/0db864e6-4477-4eba-b2be-d3523cc86564@eisentraut.org Backpatch-through: 18 --- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6c2304fef33..a81912b7441 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2455,9 +2455,6 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) written = pg_pwrite(openLogFile, from, nleft, startoffset); pgstat_report_wait_end(); - pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, - IOOP_WRITE, start, 1, written); - if (written <= 0) { char xlogfname[MAXFNAMELEN]; @@ -2475,6 +2472,9 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m", xlogfname, startoffset, nleft))); } + + pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, + IOOP_WRITE, start, 1, written); nleft -= written; from += written; startoffset += written; @@ -3331,14 +3331,6 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, } pgstat_report_wait_end(); - /* - * A full segment worth of data is written when using wal_init_zero. One - * byte is written when not using it. - */ - pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT, IOOP_WRITE, - io_start, 1, - wal_init_zero ? wal_segment_size : 1); - if (save_errno) { /* @@ -3355,6 +3347,14 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, errmsg("could not write to file \"%s\": %m", tmppath))); } + /* + * A full segment worth of data is written when using wal_init_zero. One + * byte is written when not using it. + */ + pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT, IOOP_WRITE, + io_start, 1, + wal_init_zero ? wal_segment_size : 1); + /* Measure I/O timing when flushing segment */ io_start = pgstat_prepare_io_time(track_wal_io_timing); diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 3145c58a9b1..9d64ae34932 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1597,9 +1597,6 @@ WALRead(XLogReaderState *state, #ifndef FRONTEND pgstat_report_wait_end(); - - pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ, - io_start, 1, readbytes); #endif if (readbytes <= 0) @@ -1612,6 +1609,11 @@ WALRead(XLogReaderState *state, return false; } +#ifndef FRONTEND + pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ, + io_start, 1, readbytes); +#endif + /* Update state for read */ recptr += readbytes; nbytes -= readbytes; diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 73b78a83fa7..4d61795b483 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -3390,8 +3390,10 @@ retry: pgstat_report_wait_end(); - pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ, - io_start, 1, r); + /* Count I/O stats only for successful short reads */ + if (r > 0) + pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ, + io_start, 1, r); XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size); if (r < 0) diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index d19317703c1..05e2f690fa7 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -954,9 +954,6 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli) byteswritten = pg_pwrite(recvFile, buf, segbytes, (pgoff_t) startoff); pgstat_report_wait_end(); - pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, - IOOP_WRITE, start, 1, byteswritten); - if (byteswritten <= 0) { char xlogfname[MAXFNAMELEN]; @@ -976,6 +973,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli) xlogfname, startoff, segbytes))); } + pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, + IOOP_WRITE, start, 1, byteswritten); + /* Update state for write */ recptr += byteswritten;