]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Address set of issues with errno handling
authorMichael Paquier <michael@paquier.xyz>
Mon, 25 Jun 2018 02:21:49 +0000 (11:21 +0900)
committerMichael Paquier <michael@paquier.xyz>
Mon, 25 Jun 2018 02:21:49 +0000 (11:21 +0900)
System calls mixed up in error code paths are causing two issues which
several code paths have not correctly handled:
1) For write() calls, sometimes the system may return less bytes than
what has been written without errno being set.  Some paths were careful
enough to consider that case, and assumed that errno should be set to
ENOSPC, other calls missed that.
2) errno generated by a system call is overwritten by other system calls
which may succeed once an error code path is taken, causing what is
reported to the user to be incorrect.

This patch uses the brute-force approach of correcting all those code
paths.  Some refactoring could happen in the future, but this is let as
future work, which is not targeted for back-branches anyway.

Author: Michael Paquier
Reviewed-by: Ashutosh Sharma
Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz

src/backend/access/heap/rewriteheap.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xlog.c
src/backend/replication/basebackup.c
src/backend/replication/logical/origin.c
src/backend/replication/logical/reorderbuffer.c
src/backend/replication/logical/snapbuild.c
src/backend/replication/slot.c
src/bin/pg_basebackup/receivelog.c

index f29c06855f8911bab8f62470b7b9fb052b75504e..dd03543b6b37ec817fb256d81d92dc2b6ab9f34f 100644 (file)
@@ -1165,9 +1165,14 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
 
        /* write out tail end of mapping file (again) */
        if (write(fd, data, len) != len)
+       {
+               /* if write didn't set errno, assume problem is no disk space */
+               if (errno == 0)
+                       errno = ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m", path)));
+       }
 
        /*
         * Now fsync all previously written data. We could improve things and only
index 161282b0794f1c8506e9edb2b4e692949d4c28ff..10d615068ce6b91d7d7ea1996ccb764aa008e657 100644 (file)
@@ -1249,12 +1249,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
         */
        if (fstat(fd, &stat))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
                if (give_warnings)
+               {
+                       errno = save_errno;
                        ereport(WARNING,
                                        (errcode_for_file_access(),
                                         errmsg("could not stat two-phase state file \"%s\": %m",
                                                        path)));
+               }
                return NULL;
        }
 
@@ -1281,12 +1286,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
 
        if (read(fd, buf, stat.st_size) != stat.st_size)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
                if (give_warnings)
+               {
+                       errno = save_errno;
                        ereport(WARNING,
                                        (errcode_for_file_access(),
                                         errmsg("could not read two-phase state file \"%s\": %m",
                                                        path)));
+               }
                pfree(buf);
                return NULL;
        }
@@ -1574,14 +1584,24 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
        /* Write content and CRC */
        if (write(fd, content, len) != len)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write two-phase state file: %m")));
        }
        if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write two-phase state file: %m")));
@@ -1593,7 +1613,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
         */
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync two-phase state file: %m")));
index 5d845a817897b04a6218ea72ee5ba3655628eb09..76dd767ece76a119e99e610d5c965a296547c078 100644 (file)
@@ -3034,7 +3034,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                close(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -11159,8 +11162,10 @@ retry:
        if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
        {
                char            fname[MAXFNAMELEN];
+               int                     save_errno = errno;
 
                XLogFileName(fname, curFileTLI, readSegNo);
+               errno = save_errno;
                ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
                                (errcode_for_file_access(),
                                 errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11171,8 +11176,10 @@ retry:
        if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
        {
                char            fname[MAXFNAMELEN];
+               int                     save_errno = errno;
 
                XLogFileName(fname, curFileTLI, readSegNo);
+               errno = save_errno;
                ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
                                (errcode_for_file_access(),
                                 errmsg("could not read from log segment %s, offset %u: %m",
index b7d732a7d8f1db9fce2c0a2814fc10e6f7f12117..8259ec5bd89ff5a0361a4361324aa3c5ecddd6d0 100644 (file)
@@ -388,6 +388,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
                        fp = AllocateFile(pathbuf, "rb");
                        if (fp == NULL)
                        {
+                               int                     save_errno = errno;
+
                                /*
                                 * Most likely reason for this is that the file was already
                                 * removed by a checkpoint, so check for that to get a better
@@ -395,6 +397,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
                                 */
                                CheckXLogRemoved(segno, tli);
 
+                               errno = save_errno;
                                ereport(ERROR,
                                                (errcode_for_file_access(),
                                                 errmsg("could not open file \"%s\": %m", pathbuf)));
index ad99739971e4e6efb99a54946d9047d77fea5a2b..6fccdd8654439b8c83b61f304aa7472c8f8f50ef 100644 (file)
@@ -549,7 +549,12 @@ CheckPointReplicationOrigin(void)
        /* write magic */
        if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(tmpfd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(PANIC,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m",
@@ -588,7 +593,12 @@ CheckPointReplicationOrigin(void)
                if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
                        sizeof(disk_state))
                {
+                       int                     save_errno = errno;
+
                        CloseTransientFile(tmpfd);
+
+                       /* if write didn't set errno, assume problem is no disk space */
+                       errno = save_errno ? save_errno : ENOSPC;
                        ereport(PANIC,
                                        (errcode_for_file_access(),
                                         errmsg("could not write to file \"%s\": %m",
@@ -604,7 +614,12 @@ CheckPointReplicationOrigin(void)
        FIN_CRC32C(crc);
        if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(tmpfd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(PANIC,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m",
index 71fa528bbb4077d473fde85725cb3ddc389b30e8..97c6721a7d583d4d6b2a23239a7946673c0de095 100644 (file)
@@ -2241,7 +2241,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
                int save_errno = errno;
 
                CloseTransientFile(fd);
-               errno = save_errno;
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write to data file for XID %u: %m",
index f4b1fea0f8459e8777337884a7446cd68cf96d3d..18dec24b3529a542e26c33d6dba49b28a5f7f9ce 100644 (file)
@@ -1566,7 +1566,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
 
        if ((write(fd, ondisk, needed_length)) != needed_length)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m", tmppath)));
@@ -1582,7 +1587,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
         */
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -1664,7 +1672,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
        if (readBytes != SnapBuildOnDiskConstantSize)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1690,7 +1701,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
        if (readBytes != sizeof(SnapBuild))
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1705,7 +1719,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
        if (readBytes != sz)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1719,7 +1736,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
        readBytes = read(fd, ondisk.builder.committed.xip, sz);
        if (readBytes != sz)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not read file \"%s\", read %d of %d: %m",
index e37098faffb5c56b740437e0c4df773bce64f23c..a5bc452d8128f768b728a92601fd44d8448b98e6 100644 (file)
@@ -1033,7 +1033,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
                int                     save_errno = errno;
 
                CloseTransientFile(fd);
-               errno = save_errno;
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
                ereport(elevel,
                                (errcode_for_file_access(),
                                 errmsg("could not write to file \"%s\": %m",
@@ -1136,7 +1138,10 @@ RestoreSlotFromDisk(const char *name)
         */
        if (pg_fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                CloseTransientFile(fd);
+               errno = save_errno;
                ereport(PANIC,
                                (errcode_for_file_access(),
                                 errmsg("could not fsync file \"%s\": %m",
index 406c01bfccc83fe7660bac001b095dd6e158f274..71f3fb59b7c344a378839a8dc259cc58e2615e4a 100644 (file)
@@ -159,6 +159,9 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir,
        {
                if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
                {
+                       /* if write didn't set errno, assume problem is no disk space */
+                       if (errno == 0)
+                               errno = ENOSPC;
                        fprintf(stderr,
                                        _("%s: could not pad transaction log file \"%s\": %s\n"),
                                        progname, fn, strerror(errno));
@@ -345,7 +348,9 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename,
                 */
                close(fd);
                unlink(tmppath);
-               errno = save_errno;
+
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
 
                fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"),
                                progname, tmppath, strerror(errno));
@@ -354,7 +359,10 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename,
 
        if (fsync(fd) != 0)
        {
+               int                     save_errno = errno;
+
                close(fd);
+               errno = save_errno;
                fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
                                progname, tmppath, strerror(errno));
                return false;
@@ -1213,6 +1221,9 @@ ProcessXLogDataMsg(PGconn *conn, char *copybuf, int len,
                                  copybuf + hdr_len + bytes_written,
                                  bytes_to_write) != bytes_to_write)
                {
+                       /* if write didn't set errno, assume problem is no disk space */
+                       if (errno == 0)
+                               errno = ENOSPC;
                        fprintf(stderr,
                                  _("%s: could not write %u bytes to WAL file \"%s\": %s\n"),
                                        progname, bytes_to_write, current_walfile_name,