]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix pgstat_count_io_op_time() calls passing incorrect information
authorMichael Paquier <michael@paquier.xyz>
Wed, 17 Jun 2026 07:05:11 +0000 (16:05 +0900)
committerMichael Paquier <michael@paquier.xyz>
Wed, 17 Jun 2026 07:05:11 +0000 (16:05 +0900)
Several calls of pgstat_count_io_op_time() have been used as data to
count negative values returned by pg_pread() or pg_pwrite(), leading to
an incorrect count reported, casting them back to uint64.

Most of the problematic calls updated here are adjusted so as we do not
report buggy negative numbers anymore.  In xlogrecovery.c, the spot
updated still counts short reads.  In xlog.c, after a WAL segment
initialization, I/O numbers are aggregated only after checking that the
operation has succeeded.

issues introduced by a051e71e28a1.

Reported-by: Peter Eisentraut <peter@eisentraut.org>
Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Ayush Tiwari <ayushtiwari.slg01@gmail.com>
Discussion: https://postgr.es/m/0db864e6-4477-4eba-b2be-d3523cc86564@eisentraut.org
Backpatch-through: 18

src/backend/access/transam/xlog.c
src/backend/access/transam/xlogreader.c
src/backend/access/transam/xlogrecovery.c
src/backend/replication/walreceiver.c

index 6c2304fef33f80ff9f8d84bac351aa9c440cf635..a81912b7441eef7e07a65366b2dd16d8dc6afe56 100644 (file)
@@ -2455,9 +2455,6 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
                                written = pg_pwrite(openLogFile, from, nleft, startoffset);
                                pgstat_report_wait_end();
 
-                               pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
-                                                                               IOOP_WRITE, start, 1, written);
-
                                if (written <= 0)
                                {
                                        char            xlogfname[MAXFNAMELEN];
@@ -2475,6 +2472,9 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
                                                         errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
                                                                        xlogfname, startoffset, nleft)));
                                }
+
+                               pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
+                                                                               IOOP_WRITE, start, 1, written);
                                nleft -= written;
                                from += written;
                                startoffset += written;
@@ -3331,14 +3331,6 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
        }
        pgstat_report_wait_end();
 
-       /*
-        * A full segment worth of data is written when using wal_init_zero. One
-        * byte is written when not using it.
-        */
-       pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT, IOOP_WRITE,
-                                                       io_start, 1,
-                                                       wal_init_zero ? wal_segment_size : 1);
-
        if (save_errno)
        {
                /*
@@ -3355,6 +3347,14 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
                                 errmsg("could not write to file \"%s\": %m", tmppath)));
        }
 
+       /*
+        * A full segment worth of data is written when using wal_init_zero. One
+        * byte is written when not using it.
+        */
+       pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT, IOOP_WRITE,
+                                                       io_start, 1,
+                                                       wal_init_zero ? wal_segment_size : 1);
+
        /* Measure I/O timing when flushing segment */
        io_start = pgstat_prepare_io_time(track_wal_io_timing);
 
index 3145c58a9b194085b57faf41d162ad8c51630063..9d64ae34932bd0f2d1d8081af74a4e5297195557 100644 (file)
@@ -1597,9 +1597,6 @@ WALRead(XLogReaderState *state,
 
 #ifndef FRONTEND
                pgstat_report_wait_end();
-
-               pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
-                                                               io_start, 1, readbytes);
 #endif
 
                if (readbytes <= 0)
@@ -1612,6 +1609,11 @@ WALRead(XLogReaderState *state,
                        return false;
                }
 
+#ifndef FRONTEND
+               pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
+                                                               io_start, 1, readbytes);
+#endif
+
                /* Update state for read */
                recptr += readbytes;
                nbytes -= readbytes;
index 73b78a83fa7442c81447f66fe3cc52f16de35cac..4d61795b48363d9e749f5a2128f8dafd6a2488d5 100644 (file)
@@ -3390,8 +3390,10 @@ retry:
 
                pgstat_report_wait_end();
 
-               pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
-                                                               io_start, 1, r);
+               /* Count I/O stats only for successful short reads */
+               if (r > 0)
+                       pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
+                                                                       io_start, 1, r);
 
                XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
                if (r < 0)
index d19317703c1f2f090bf21138fd16d0a53315b8d9..05e2f690fa792421241e48dbc21f0cbea5ad458b 100644 (file)
@@ -954,9 +954,6 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
                byteswritten = pg_pwrite(recvFile, buf, segbytes, (pgoff_t) startoff);
                pgstat_report_wait_end();
 
-               pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
-                                                               IOOP_WRITE, start, 1, byteswritten);
-
                if (byteswritten <= 0)
                {
                        char            xlogfname[MAXFNAMELEN];
@@ -976,6 +973,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
                                                        xlogfname, startoff, segbytes)));
                }
 
+               pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
+                                                               IOOP_WRITE, start, 1, byteswritten);
+
                /* Update state for write */
                recptr += byteswritten;