[thirdparty/systemd.git] / src / basic / fd-util.c

/* SPDX-License-Identifier: LGPL-2.1+ */
/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include <errno.h>
#include <fcntl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <unistd.h>

#include "dirent-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "macro.h"
#include "memfd-util.h"
#include "missing.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "socket-util.h"
#include "stdio-util.h"
#include "util.h"

int close_nointr(int fd) {
        assert(fd >= 0);

        if (close(fd) >= 0)
                return 0;

        /*
         * Just ignore EINTR; a retry loop is the wrong thing to do on
         * Linux.
         *
         * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
         * https://bugzilla.gnome.org/show_bug.cgi?id=682819
         * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
         * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
         */
        if (errno == EINTR)
                return 0;

        return -errno;
}

int safe_close(int fd) {

        /*
         * Like close_nointr() but cannot fail. Guarantees errno is
         * unchanged. Is a NOP with negative fds passed, and returns
         * -1, so that it can be used in this syntax:
         *
         * fd = safe_close(fd);
         */

        if (fd >= 0) {
                PROTECT_ERRNO;

                /* The kernel might return pretty much any error code
                 * via close(), but the fd will be closed anyway. The
                 * only condition we want to check for here is whether
                 * the fd was invalid at all... */

                assert_se(close_nointr(fd) != -EBADF);
        }

        return -1;
}

void safe_close_pair(int p[]) {
        assert(p);

        if (p[0] == p[1]) {
                /* Special case pairs which use the same fd in both
                 * directions... */
                p[0] = p[1] = safe_close(p[0]);
                return;
        }

        p[0] = safe_close(p[0]);
        p[1] = safe_close(p[1]);
}

void close_many(const int fds[], unsigned n_fd) {
        unsigned i;

        assert(fds || n_fd <= 0);

        for (i = 0; i < n_fd; i++)
                safe_close(fds[i]);
}

int fclose_nointr(FILE *f) {
        assert(f);

        /* Same as close_nointr(), but for fclose() */

        if (fclose(f) == 0)
                return 0;

        if (errno == EINTR)
                return 0;

        return -errno;
}

FILE* safe_fclose(FILE *f) {

        /* Same as safe_close(), but for fclose() */

        if (f) {
                PROTECT_ERRNO;

                assert_se(fclose_nointr(f) != EBADF);
        }

        return NULL;
}

DIR* safe_closedir(DIR *d) {

        if (d) {
                PROTECT_ERRNO;

                assert_se(closedir(d) >= 0 || errno != EBADF);
        }

        return NULL;
}

int fd_nonblock(int fd, bool nonblock) {
        int flags, nflags;

        assert(fd >= 0);

        flags = fcntl(fd, F_GETFL, 0);
        if (flags < 0)
                return -errno;

        if (nonblock)
                nflags = flags | O_NONBLOCK;
        else
                nflags = flags & ~O_NONBLOCK;

        if (nflags == flags)
                return 0;

        if (fcntl(fd, F_SETFL, nflags) < 0)
                return -errno;

        return 0;
}

int fd_cloexec(int fd, bool cloexec) {
        int flags, nflags;

        assert(fd >= 0);

        flags = fcntl(fd, F_GETFD, 0);
        if (flags < 0)
                return -errno;

        if (cloexec)
                nflags = flags | FD_CLOEXEC;
        else
                nflags = flags & ~FD_CLOEXEC;

        if (nflags == flags)
                return 0;

        if (fcntl(fd, F_SETFD, nflags) < 0)
                return -errno;

        return 0;
}

_pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
        unsigned i;

        assert(n_fdset == 0 || fdset);

        for (i = 0; i < n_fdset; i++)
                if (fdset[i] == fd)
                        return true;

        return false;
}

int close_all_fds(const int except[], unsigned n_except) {
        _cleanup_closedir_ DIR *d = NULL;
        struct dirent *de;
        int r = 0;

        assert(n_except == 0 || except);

        d = opendir("/proc/self/fd");
        if (!d) {
                int fd;
                struct rlimit rl;

                /* When /proc isn't available (for example in chroots)
                 * the fallback is brute forcing through the fd
                 * table */

                assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
                for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
                        int q;

                        if (fd_in_set(fd, except, n_except))
                                continue;

                        q = close_nointr(fd);
                        if (q < 0 && q != -EBADF && r >= 0)
                                r = q;
                }

                return r;
        }

        FOREACH_DIRENT(de, d, return -errno) {
                int fd = -1, q;

                if (safe_atoi(de->d_name, &fd) < 0)
                        /* Let's better ignore this, just in case */
                        continue;

                if (fd < 3)
                        continue;

                if (fd == dirfd(d))
                        continue;

                if (fd_in_set(fd, except, n_except))
                        continue;

                q = close_nointr(fd);
                if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
                        r = q;
        }

        return r;
}

int same_fd(int a, int b) {
        struct stat sta, stb;
        pid_t pid;
        int r, fa, fb;

        assert(a >= 0);
        assert(b >= 0);

        /* Compares two file descriptors. Note that semantics are
         * quite different depending on whether we have kcmp() or we
         * don't. If we have kcmp() this will only return true for
         * dup()ed file descriptors, but not otherwise. If we don't
         * have kcmp() this will also return true for two fds of the same
         * file, created by separate open() calls. Since we use this
         * call mostly for filtering out duplicates in the fd store
         * this difference hopefully doesn't matter too much. */

        if (a == b)
                return true;

        /* Try to use kcmp() if we have it. */
        pid = getpid_cached();
        r = kcmp(pid, pid, KCMP_FILE, a, b);
        if (r == 0)
                return true;
        if (r > 0)
                return false;
        if (errno != ENOSYS)
                return -errno;

        /* We don't have kcmp(), use fstat() instead. */
        if (fstat(a, &sta) < 0)
                return -errno;

        if (fstat(b, &stb) < 0)
                return -errno;

        if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
                return false;

        /* We consider all device fds different, since two device fds
         * might refer to quite different device contexts even though
         * they share the same inode and backing dev_t. */

        if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
                return false;

        if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
                return false;

        /* The fds refer to the same inode on disk, let's also check
         * if they have the same fd flags. This is useful to
         * distinguish the read and write side of a pipe created with
         * pipe(). */
        fa = fcntl(a, F_GETFL);
        if (fa < 0)
                return -errno;

        fb = fcntl(b, F_GETFL);
        if (fb < 0)
                return -errno;

        return fa == fb;
}

void cmsg_close_all(struct msghdr *mh) {
        struct cmsghdr *cmsg;

        assert(mh);

        CMSG_FOREACH(cmsg, mh)
                if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
                        close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
}

bool fdname_is_valid(const char *s) {
        const char *p;

        /* Validates a name for $LISTEN_FDNAMES. We basically allow
         * everything ASCII that's not a control character. Also, as
         * special exception the ":" character is not allowed, as we
         * use that as field separator in $LISTEN_FDNAMES.
         *
         * Note that the empty string is explicitly allowed
         * here. However, we limit the length of the names to 255
         * characters. */

        if (!s)
                return false;

        for (p = s; *p; p++) {
                if (*p < ' ')
                        return false;
                if (*p >= 127)
                        return false;
                if (*p == ':')
                        return false;
        }

        return p - s < 256;
}

int fd_get_path(int fd, char **ret) {
        _cleanup_close_ int dir = -1;
        char fdname[DECIMAL_STR_MAX(int)];
        int r;

        dir = open("/proc/self/fd/", O_CLOEXEC | O_DIRECTORY | O_PATH);
        if (dir < 0)
                /* /proc is not available or not set up properly, we're most likely
                 * in some chroot environment. */
                return errno == ENOENT ? -EOPNOTSUPP : -errno;

        xsprintf(fdname, "%i", fd);

        r = readlinkat_malloc(dir, fdname, ret);
        if (r == -ENOENT)
                /* If the file doesn't exist the fd is invalid */
                return -EBADF;

        return r;
}

int move_fd(int from, int to, int cloexec) {
        int r;

        /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
         * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
         * off, if it is > 0 it is turned on. */

        if (from < 0)
                return -EBADF;
        if (to < 0)
                return -EBADF;

        if (from == to) {

                if (cloexec >= 0) {
                        r = fd_cloexec(to, cloexec);
                        if (r < 0)
                                return r;
                }

                return to;
        }

        if (cloexec < 0) {
                int fl;

                fl = fcntl(from, F_GETFD, 0);
                if (fl < 0)
                        return -errno;

                cloexec = !!(fl & FD_CLOEXEC);
        }

        r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
        if (r < 0)
                return -errno;

        assert(r == to);

        safe_close(from);

        return to;
}

int acquire_data_fd(const void *data, size_t size, unsigned flags) {

        char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
        _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
        char pattern[] = "/dev/shm/data-fd-XXXXXX";
        _cleanup_close_ int fd = -1;
        int isz = 0, r;
        ssize_t n;
        off_t f;

        assert(data || size == 0);

        /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
         * complex than I wish it was. But here's why:
         *
         * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
         *    read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
         *
         * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
         *    a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
         *    clients can only bump their size to a system-wide limit, which might be quite low.
         *
         * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
         *    earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
         *    /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
         *
         * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
         *
         * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
         * figure. */

        if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
                /* As a special case, return /dev/null if we have been called for an empty data block */
                r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
                if (r < 0)
                        return -errno;

                return r;
        }

        if ((flags & ACQUIRE_NO_MEMFD) == 0) {
                fd = memfd_new("data-fd");
                if (fd < 0)
                        goto try_pipe;

                n = write(fd, data, size);
                if (n < 0)
                        return -errno;
                if ((size_t) n != size)
                        return -EIO;

                f = lseek(fd, 0, SEEK_SET);
                if (f != 0)
                        return -errno;

                r = memfd_set_sealed(fd);
                if (r < 0)
                        return r;

                return TAKE_FD(fd);
        }

try_pipe:
        if ((flags & ACQUIRE_NO_PIPE) == 0) {
                if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
                        return -errno;

                isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
                if (isz < 0)
                        return -errno;

                if ((size_t) isz < size) {
                        isz = (int) size;
                        if (isz < 0 || (size_t) isz != size)
                                return -E2BIG;

                        /* Try to bump the pipe size */
                        (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);

                        /* See if that worked */
                        isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
                        if (isz < 0)
                                return -errno;

                        if ((size_t) isz < size)
                                goto try_dev_shm;
                }

                n = write(pipefds[1], data, size);
                if (n < 0)
                        return -errno;
                if ((size_t) n != size)
                        return -EIO;

                (void) fd_nonblock(pipefds[0], false);

                return TAKE_FD(pipefds[0]);
        }

try_dev_shm:
        if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
                fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
                if (fd < 0)
                        goto try_dev_shm_without_o_tmpfile;

                n = write(fd, data, size);
                if (n < 0)
                        return -errno;
                if ((size_t) n != size)
                        return -EIO;

                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
                xsprintf(procfs_path, "/proc/self/fd/%i", fd);
                r = open(procfs_path, O_RDONLY|O_CLOEXEC);
                if (r < 0)
                        return -errno;

                return r;
        }

try_dev_shm_without_o_tmpfile:
        if ((flags & ACQUIRE_NO_REGULAR) == 0) {
                fd = mkostemp_safe(pattern);
                if (fd < 0)
                        return fd;

                n = write(fd, data, size);
                if (n < 0) {
                        r = -errno;
                        goto unlink_and_return;
                }
                if ((size_t) n != size) {
                        r = -EIO;
                        goto unlink_and_return;
                }

                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
                r = open(pattern, O_RDONLY|O_CLOEXEC);
                if (r < 0)
                        r = -errno;

        unlink_and_return:
                (void) unlink(pattern);
                return r;
        }

        return -EOPNOTSUPP;
}

int fd_move_above_stdio(int fd) {
        int flags, copy;
        PROTECT_ERRNO;

        /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
         * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
         * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
         * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
         * stdin/stdout/stderr of unrelated code.
         *
         * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
         * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
         * been closed before.
         *
         * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
         * error we simply return the original file descriptor, and we do not touch errno. */

        if (fd < 0 || fd > 2)
                return fd;

        flags = fcntl(fd, F_GETFD, 0);
        if (flags < 0)
                return fd;

        if (flags & FD_CLOEXEC)
                copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
        else
                copy = fcntl(fd, F_DUPFD, 3);
        if (copy < 0)
                return fd;

        assert(copy > 2);

        (void) close(fd);
        return copy;
}

int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {

        int fd[3] = { /* Put together an array of fds we work on */
                original_input_fd,
                original_output_fd,
                original_error_fd
        };

        int r, i,
                null_fd = -1,                /* if we open /dev/null, we store the fd to it here */
                copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
        bool null_readable, null_writable;

        /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
         * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
         * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
         * on.
         *
         * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
         * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
         *
         * Note that when this function fails stdin/stdout/stderr might remain half set up!
         *
         * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
         * stdin/stdout/stderr). */

        null_readable = original_input_fd < 0;
        null_writable = original_output_fd < 0 || original_error_fd < 0;

        /* First step, open /dev/null once, if we need it */
        if (null_readable || null_writable) {

                /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
                null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
                                             null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
                if (null_fd < 0) {
                        r = -errno;
                        goto finish;
                }

                /* If this fd is in the 0…2 range, let's move it out of it */
                if (null_fd < 3) {
                        int copy;

                        copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
                        if (copy < 0) {
                                r = -errno;
                                goto finish;
                        }

                        safe_close(null_fd);
                        null_fd = copy;
                }
        }

        /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
        for (i = 0; i < 3; i++) {

                if (fd[i] < 0)
                        fd[i] = null_fd;        /* A negative parameter means: connect this one to /dev/null */
                else if (fd[i] != i && fd[i] < 3) {
                        /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
                        copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
                        if (copy_fd[i] < 0) {
                                r = -errno;
                                goto finish;
                        }

                        fd[i] = copy_fd[i];
                }
        }

        /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
         * have freedom to move them around. If the fds already were at the right places then the specific fds are
         * -1. Let's now move them to the right places. This is the point of no return. */
        for (i = 0; i < 3; i++) {

                if (fd[i] == i) {

                        /* fd is already in place, but let's make sure O_CLOEXEC is off */
                        r = fd_cloexec(i, false);
                        if (r < 0)
                                goto finish;

                } else {
                        assert(fd[i] > 2);

                        if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
                                r = -errno;
                                goto finish;
                        }
                }
        }

        r = 0;

finish:
        /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
         * fd passed in multiple times. */
        safe_close_above_stdio(original_input_fd);
        if (original_output_fd != original_input_fd)
                safe_close_above_stdio(original_output_fd);
        if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
                safe_close_above_stdio(original_error_fd);

        /* Close the copies we moved > 2 */
        for (i = 0; i < 3; i++)
                safe_close(copy_fd[i]);

        /* Close our null fd, if it's > 2 */
        safe_close_above_stdio(null_fd);

        return r;
}
Commit	Line	Data
53e1b683	1	/* SPDX-License-Identifier: LGPL-2.1+ */
3ffd4af2 LP	2	/***
	3	This file is part of systemd.
	4
	5	Copyright 2010 Lennart Poettering
	6
	7	systemd is free software; you can redistribute it and/or modify it
	8	under the terms of the GNU Lesser General Public License as published by
	9	the Free Software Foundation; either version 2.1 of the License, or
	10	(at your option) any later version.
	11
	12	systemd is distributed in the hope that it will be useful, but
	13	WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	15	Lesser General Public License for more details.
	16
	17	You should have received a copy of the GNU Lesser General Public License
	18	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	19	***/
	20
11c3a366 TA	21	#include <errno.h>
	22	#include <fcntl.h>
	23	#include <sys/resource.h>
	24	#include <sys/socket.h>
	25	#include <sys/stat.h>
	26	#include <unistd.h>
	27
8fb3f009	28	#include "dirent-util.h"
3ffd4af2	29	#include "fd-util.h"
a548e14d	30	#include "fileio.h"
4aeb20f5	31	#include "fs-util.h"
11c3a366	32	#include "macro.h"
a548e14d	33	#include "memfd-util.h"
11c3a366	34	#include "missing.h"
93cc7779	35	#include "parse-util.h"
11c3a366	36	#include "path-util.h"
df0ff127	37	#include "process-util.h"
93cc7779	38	#include "socket-util.h"
4aeb20f5	39	#include "stdio-util.h"
3ffd4af2 LP	40	#include "util.h"
	41
	42	int close_nointr(int fd) {
	43	assert(fd >= 0);
	44
	45	if (close(fd) >= 0)
	46	return 0;
	47
	48	/*
	49	* Just ignore EINTR; a retry loop is the wrong thing to do on
	50	* Linux.
	51	*
	52	* http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
	53	* https://bugzilla.gnome.org/show_bug.cgi?id=682819
	54	* http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
	55	* https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
	56	*/
	57	if (errno == EINTR)
	58	return 0;
	59
	60	return -errno;
	61	}
	62
	63	int safe_close(int fd) {
	64
	65	/*
	66	* Like close_nointr() but cannot fail. Guarantees errno is
	67	* unchanged. Is a NOP with negative fds passed, and returns
	68	* -1, so that it can be used in this syntax:
	69	*
	70	* fd = safe_close(fd);
	71	*/
	72
	73	if (fd >= 0) {
	74	PROTECT_ERRNO;
	75
	76	/* The kernel might return pretty much any error code
	77	* via close(), but the fd will be closed anyway. The
	78	* only condition we want to check for here is whether
	79	* the fd was invalid at all... */
	80
	81	assert_se(close_nointr(fd) != -EBADF);
	82	}
	83
	84	return -1;
	85	}
	86
	87	void safe_close_pair(int p[]) {
	88	assert(p);
	89
	90	if (p[0] == p[1]) {
	91	/* Special case pairs which use the same fd in both
	92	* directions... */
	93	p[0] = p[1] = safe_close(p[0]);
	94	return;
	95	}
	96
	97	p[0] = safe_close(p[0]);
	98	p[1] = safe_close(p[1]);
	99	}
	100
	101	void close_many(const int fds[], unsigned n_fd) {
	102	unsigned i;
	103
104	assert(fds \|\| n_fd <= 0);
105
106	for (i = 0; i < n_fd; i++)
107	safe_close(fds[i]);
108	}
109
110	int fclose_nointr(FILE *f) {
111	assert(f);
112
113	/* Same as close_nointr(), but for fclose() */
114
115	if (fclose(f) == 0)
116	return 0;
117
118	if (errno == EINTR)
119	return 0;
120
121	return -errno;
122	}
123
124	FILE* safe_fclose(FILE *f) {
125
126	/* Same as safe_close(), but for fclose() */
127
128	if (f) {
129	PROTECT_ERRNO;
130
131	assert_se(fclose_nointr(f) != EBADF);
132	}
133
134	return NULL;
135	}
136
137	DIR* safe_closedir(DIR *d) {
138
139	if (d) {
140	PROTECT_ERRNO;
141
142	assert_se(closedir(d) >= 0 \|\| errno != EBADF);
143	}
144
145	return NULL;
146	}
147
148	int fd_nonblock(int fd, bool nonblock) {
149	int flags, nflags;
150
151	assert(fd >= 0);
152
153	flags = fcntl(fd, F_GETFL, 0);
154	if (flags < 0)
155	return -errno;
156
157	if (nonblock)
158	nflags = flags \| O_NONBLOCK;
159	else
160	nflags = flags & ~O_NONBLOCK;
161
162	if (nflags == flags)
163	return 0;
164
165	if (fcntl(fd, F_SETFL, nflags) < 0)
166	return -errno;
167
168	return 0;
169	}
170
171	int fd_cloexec(int fd, bool cloexec) {
172	int flags, nflags;
173
174	assert(fd >= 0);
175
176	flags = fcntl(fd, F_GETFD, 0);
177	if (flags < 0)
178	return -errno;
179
180	if (cloexec)
181	nflags = flags \| FD_CLOEXEC;
182	else
183	nflags = flags & ~FD_CLOEXEC;
184
185	if (nflags == flags)
186	return 0;
187
188	if (fcntl(fd, F_SETFD, nflags) < 0)
189	return -errno;
190
191	return 0;
192	}
193
194	_pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
195	unsigned i;
196
197	assert(n_fdset == 0 \|\| fdset);
198
199	for (i = 0; i < n_fdset; i++)
200	if (fdset[i] == fd)
201	return true;
202
203	return false;
204	}
205
206	int close_all_fds(const int except[], unsigned n_except) {
207	_cleanup_closedir_ DIR *d = NULL;
208	struct dirent *de;
209	int r = 0;
210
211	assert(n_except == 0 \|\| except);
212
213	d = opendir("/proc/self/fd");
214	if (!d) {
215	int fd;
216	struct rlimit rl;
217
218	/* When /proc isn't available (for example in chroots)
219	* the fallback is brute forcing through the fd
220	* table */
221
222	assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
223	for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
e43bc9f5	224	int q;
3ffd4af2 LP	225
	226	if (fd_in_set(fd, except, n_except))
	227	continue;
	228
e43bc9f5 LP	229	q = close_nointr(fd);
	230	if (q < 0 && q != -EBADF && r >= 0)
	231	r = q;
3ffd4af2 LP	232	}
	233
	234	return r;
	235	}
	236
8fb3f009	237	FOREACH_DIRENT(de, d, return -errno) {
e43bc9f5	238	int fd = -1, q;
3ffd4af2	239
3ffd4af2 LP	240	if (safe_atoi(de->d_name, &fd) < 0)
	241	/* Let's better ignore this, just in case */
	242	continue;
	243
	244	if (fd < 3)
	245	continue;
	246
	247	if (fd == dirfd(d))
	248	continue;
	249
	250	if (fd_in_set(fd, except, n_except))
	251	continue;
	252
e43bc9f5 LP	253	q = close_nointr(fd);
	254	if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
	255	r = q;
3ffd4af2 LP	256	}
	257
	258	return r;
	259	}
	260
	261	int same_fd(int a, int b) {
	262	struct stat sta, stb;
	263	pid_t pid;
	264	int r, fa, fb;
	265
	266	assert(a >= 0);
	267	assert(b >= 0);
	268
	269	/* Compares two file descriptors. Note that semantics are
	270	* quite different depending on whether we have kcmp() or we
	271	* don't. If we have kcmp() this will only return true for
	272	* dup()ed file descriptors, but not otherwise. If we don't
	273	* have kcmp() this will also return true for two fds of the same
	274	* file, created by separate open() calls. Since we use this
	275	* call mostly for filtering out duplicates in the fd store
	276	* this difference hopefully doesn't matter too much. */
	277
	278	if (a == b)
	279	return true;
	280
	281	/* Try to use kcmp() if we have it. */
df0ff127	282	pid = getpid_cached();
3ffd4af2 LP	283	r = kcmp(pid, pid, KCMP_FILE, a, b);
	284	if (r == 0)
	285	return true;
	286	if (r > 0)
	287	return false;
	288	if (errno != ENOSYS)
	289	return -errno;
	290
	291	/* We don't have kcmp(), use fstat() instead. */
	292	if (fstat(a, &sta) < 0)
	293	return -errno;
	294
	295	if (fstat(b, &stb) < 0)
	296	return -errno;
	297
	298	if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
	299	return false;
	300
	301	/* We consider all device fds different, since two device fds
	302	* might refer to quite different device contexts even though
	303	* they share the same inode and backing dev_t. */
	304
	305	if (S_ISCHR(sta.st_mode) \|\| S_ISBLK(sta.st_mode))
	306	return false;
	307
	308	if (sta.st_dev != stb.st_dev \|\| sta.st_ino != stb.st_ino)
	309	return false;
	310
	311	/* The fds refer to the same inode on disk, let's also check
	312	* if they have the same fd flags. This is useful to
	313	* distinguish the read and write side of a pipe created with
	314	* pipe(). */
	315	fa = fcntl(a, F_GETFL);
	316	if (fa < 0)
	317	return -errno;
	318
	319	fb = fcntl(b, F_GETFL);
	320	if (fb < 0)
	321	return -errno;
	322
	323	return fa == fb;
	324	}
	325
	326	void cmsg_close_all(struct msghdr *mh) {
	327	struct cmsghdr *cmsg;
	328
	329	assert(mh);
	330
	331	CMSG_FOREACH(cmsg, mh)
	332	if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
	333	close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
	334	}
4fee3975 LP	335
	336	bool fdname_is_valid(const char *s) {
	337	const char *p;
	338
	339	/* Validates a name for $LISTEN_FDNAMES. We basically allow
	340	* everything ASCII that's not a control character. Also, as
	341	* special exception the ":" character is not allowed, as we
	342	* use that as field separator in $LISTEN_FDNAMES.
	343	*
	344	* Note that the empty string is explicitly allowed
	345	* here. However, we limit the length of the names to 255
	346	* characters. */
	347
	348	if (!s)
	349	return false;
	350
	351	for (p = s; *p; p++) {
	352	if (*p < ' ')
	353	return false;
	354	if (*p >= 127)
	355	return false;
	356	if (*p == ':')
	357	return false;
	358	}
	359
	360	return p - s < 256;
	361	}
4aeb20f5 LP	362
4aeb20f5 LP	363	int fd_get_path(int fd, char **ret) {
3ceae1bc ZJS	364	_cleanup_close_ int dir = -1;
3ceae1bc ZJS	365	char fdname[DECIMAL_STR_MAX(int)];
a0fe2a2d	366	int r;
4aeb20f5	367
3ceae1bc ZJS	368	dir = open("/proc/self/fd/", O_CLOEXEC \| O_DIRECTORY \| O_PATH);
	369	if (dir < 0)
	370	/* /proc is not available or not set up properly, we're most likely
	371	* in some chroot environment. */
	372	return errno == ENOENT ? -EOPNOTSUPP : -errno;
4aeb20f5	373
3ceae1bc	374	xsprintf(fdname, "%i", fd);
a0fe2a2d	375
3ceae1bc ZJS	376	r = readlinkat_malloc(dir, fdname, ret);
	377	if (r == -ENOENT)
	378	/* If the file doesn't exist the fd is invalid */
a0fe2a2d LP	379	return -EBADF;
	380
	381	return r;
4aeb20f5	382	}
046a82c1 LP	383
	384	int move_fd(int from, int to, int cloexec) {
	385	int r;
	386
	387	/* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
	388	* 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
	389	* off, if it is > 0 it is turned on. */
	390
	391	if (from < 0)
	392	return -EBADF;
	393	if (to < 0)
	394	return -EBADF;
	395
	396	if (from == to) {
	397
	398	if (cloexec >= 0) {
	399	r = fd_cloexec(to, cloexec);
	400	if (r < 0)
	401	return r;
	402	}
	403
	404	return to;
	405	}
	406
	407	if (cloexec < 0) {
	408	int fl;
	409
	410	fl = fcntl(from, F_GETFD, 0);
	411	if (fl < 0)
	412	return -errno;
	413
	414	cloexec = !!(fl & FD_CLOEXEC);
	415	}
	416
	417	r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
	418	if (r < 0)
	419	return -errno;
	420
	421	assert(r == to);
	422
	423	safe_close(from);
	424
	425	return to;
	426	}
a548e14d LP	427
	428	int acquire_data_fd(const void *data, size_t size, unsigned flags) {
	429
fbd0b64f	430	char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
a548e14d LP	431	_cleanup_close_pair_ int pipefds[2] = { -1, -1 };
	432	char pattern[] = "/dev/shm/data-fd-XXXXXX";
	433	_cleanup_close_ int fd = -1;
	434	int isz = 0, r;
	435	ssize_t n;
	436	off_t f;
	437
	438	assert(data \|\| size == 0);
	439
	440	/* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
	441	* complex than I wish it was. But here's why:
	442	*
	443	* a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
	444	* read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
	445	*
	446	* b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
	447	* a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
	448	* clients can only bump their size to a system-wide limit, which might be quite low.
	449	*
	450	* c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
	451	* earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
	452	* /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
	453	*
	454	* d) Finally, we try creating a regular file in /dev/shm, which we then delete.
	455	*
	456	* It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
	457	* figure. */
	458
	459	if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
	460	/* As a special case, return /dev/null if we have been called for an empty data block */
	461	r = open("/dev/null", O_RDONLY\|O_CLOEXEC\|O_NOCTTY);
	462	if (r < 0)
	463	return -errno;
	464
	465	return r;
	466	}
	467
	468	if ((flags & ACQUIRE_NO_MEMFD) == 0) {
	469	fd = memfd_new("data-fd");
	470	if (fd < 0)
	471	goto try_pipe;
	472
	473	n = write(fd, data, size);
	474	if (n < 0)
	475	return -errno;
	476	if ((size_t) n != size)
	477	return -EIO;
	478
	479	f = lseek(fd, 0, SEEK_SET);
	480	if (f != 0)
	481	return -errno;
	482
	483	r = memfd_set_sealed(fd);
	484	if (r < 0)
	485	return r;
	486
c10d6bdb	487	return TAKE_FD(fd);
a548e14d LP	488	}
	489
	490	try_pipe:
	491	if ((flags & ACQUIRE_NO_PIPE) == 0) {
	492	if (pipe2(pipefds, O_CLOEXEC\|O_NONBLOCK) < 0)
	493	return -errno;
	494
	495	isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
	496	if (isz < 0)
	497	return -errno;
	498
	499	if ((size_t) isz < size) {
	500	isz = (int) size;
	501	if (isz < 0 \|\| (size_t) isz != size)
	502	return -E2BIG;
	503
	504	/* Try to bump the pipe size */
	505	(void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
	506
	507	/* See if that worked */
	508	isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
	509	if (isz < 0)
	510	return -errno;
	511
	512	if ((size_t) isz < size)
	513	goto try_dev_shm;
	514	}
	515
	516	n = write(pipefds[1], data, size);
	517	if (n < 0)
	518	return -errno;
	519	if ((size_t) n != size)
	520	return -EIO;
	521
	522	(void) fd_nonblock(pipefds[0], false);
	523
c10d6bdb	524	return TAKE_FD(pipefds[0]);
a548e14d LP	525	}
	526
	527	try_dev_shm:
	528	if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
	529	fd = open("/dev/shm", O_RDWR\|O_TMPFILE\|O_CLOEXEC, 0500);
	530	if (fd < 0)
	531	goto try_dev_shm_without_o_tmpfile;
	532
	533	n = write(fd, data, size);
	534	if (n < 0)
	535	return -errno;
	536	if ((size_t) n != size)
	537	return -EIO;
	538
	539	/* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
	540	xsprintf(procfs_path, "/proc/self/fd/%i", fd);
	541	r = open(procfs_path, O_RDONLY\|O_CLOEXEC);
	542	if (r < 0)
	543	return -errno;
	544
	545	return r;
	546	}
	547
	548	try_dev_shm_without_o_tmpfile:
	549	if ((flags & ACQUIRE_NO_REGULAR) == 0) {
	550	fd = mkostemp_safe(pattern);
	551	if (fd < 0)
	552	return fd;
	553
	554	n = write(fd, data, size);
	555	if (n < 0) {
	556	r = -errno;
	557	goto unlink_and_return;
	558	}
	559	if ((size_t) n != size) {
	560	r = -EIO;
	561	goto unlink_and_return;
	562	}
	563
	564	/* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
	565	r = open(pattern, O_RDONLY\|O_CLOEXEC);
	566	if (r < 0)
	567	r = -errno;
	568
	569	unlink_and_return:
	570	(void) unlink(pattern);
	571	return r;
	572	}
	573
	574	return -EOPNOTSUPP;
	575	}
7fe2903c LP	576
	577	int fd_move_above_stdio(int fd) {
	578	int flags, copy;
	579	PROTECT_ERRNO;
	580
	581	/* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
	582	* stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
	583	* returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
	584	* might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
	585	* stdin/stdout/stderr of unrelated code.
	586	*
	587	* Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
	588	* buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
	589	* been closed before.
	590	*
	591	* This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
	592	* error we simply return the original file descriptor, and we do not touch errno. */
	593
	594	if (fd < 0 \|\| fd > 2)
	595	return fd;
	596
	597	flags = fcntl(fd, F_GETFD, 0);
	598	if (flags < 0)
	599	return fd;
	600
	601	if (flags & FD_CLOEXEC)
	602	copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
	603	else
	604	copy = fcntl(fd, F_DUPFD, 3);
	605	if (copy < 0)
	606	return fd;
	607
	608	assert(copy > 2);
	609
	610	(void) close(fd);
	611	return copy;
	612	}
aa11e28b LP	613
	614	int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
	615
	616	int fd[3] = { /* Put together an array of fds we work on */
	617	original_input_fd,
	618	original_output_fd,
	619	original_error_fd
	620	};
	621
	622	int r, i,
	623	null_fd = -1, /* if we open /dev/null, we store the fd to it here */
	624	copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
	625	bool null_readable, null_writable;
	626
	627	/* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
	628	* specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
	629	* itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
	630	* on.
	631	*
	632	* Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
	633	* failure! Thus, callers should assume that when this function returns the input fds are invalidated.
	634	*
	635	* Note that when this function fails stdin/stdout/stderr might remain half set up!
	636	*
	637	* O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
	638	* stdin/stdout/stderr). */
	639
	640	null_readable = original_input_fd < 0;
	641	null_writable = original_output_fd < 0 \|\| original_error_fd < 0;
	642
	643	/* First step, open /dev/null once, if we need it */
	644	if (null_readable \|\| null_writable) {
	645
	646	/* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
	647	null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
	648	null_readable ? O_RDONLY : O_WRONLY) \| O_CLOEXEC);
	649	if (null_fd < 0) {
	650	r = -errno;
	651	goto finish;
	652	}
	653
	654	/* If this fd is in the 0…2 range, let's move it out of it */
	655	if (null_fd < 3) {
	656	int copy;
	657
	658	copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
	659	if (copy < 0) {
	660	r = -errno;
	661	goto finish;
	662	}
	663
	664	safe_close(null_fd);
	665	null_fd = copy;
	666	}
	667	}
	668
	669	/* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
	670	for (i = 0; i < 3; i++) {
	671
	672	if (fd[i] < 0)
	673	fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
	674	else if (fd[i] != i && fd[i] < 3) {
	675	/* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
	676	copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
677	if (copy_fd[i] < 0) {
678	r = -errno;
679	goto finish;
680	}
681
682	fd[i] = copy_fd[i];
683	}
684	}
685
686	/* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
687	* have freedom to move them around. If the fds already were at the right places then the specific fds are
688	* -1. Let's now move them to the right places. This is the point of no return. */
689	for (i = 0; i < 3; i++) {
690
691	if (fd[i] == i) {
692
693	/* fd is already in place, but let's make sure O_CLOEXEC is off */
694	r = fd_cloexec(i, false);
695	if (r < 0)
696	goto finish;
697
698	} else {
699	assert(fd[i] > 2);
700
701	if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
702	r = -errno;
703	goto finish;
704	}
705	}
706	}
707
708	r = 0;
709
710	finish:
711	/* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
712	* fd passed in multiple times. */
713	safe_close_above_stdio(original_input_fd);
714	if (original_output_fd != original_input_fd)
715	safe_close_above_stdio(original_output_fd);
716	if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
717	safe_close_above_stdio(original_error_fd);
718
719	/* Close the copies we moved > 2 */
720	for (i = 0; i < 3; i++)
721	safe_close(copy_fd[i]);
722
723	/* Close our null fd, if it's > 2 */
724	safe_close_above_stdio(null_fd);
725
726	return r;
727	}