From: Lennart Poettering Date: Tue, 23 May 2023 15:24:46 +0000 (+0200) Subject: pid1: when taking possession of passed fds check O_CLOEXEC state first X-Git-Tag: v254-rc1~404^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a3dff21ae882adadd946c51284bac9b14568f598;p=thirdparty%2Fsystemd.git pid1: when taking possession of passed fds check O_CLOEXEC state first So here's the thing. One library we use (libselinux) is opening fds behind our back when we initialize it and keeps it open. On the other hand we want to automatically pick up all fds passed in to us, so that we can distribute them to our services and close the rest. We pick them up very early in our code, to ensure that we don't get confused by open fds at that point. Except that libselinux insists on being initialized even earlier. So suddenly we might take possession of libselinux' fds, and then close them later when we decide no service wants them. Then during shutdown we close down selinux and selinux closes its fds, but since already closed long ago this ight close our fds instead. Hilarity ensues. I wish low-level software wouldn't do such things behind our back, but well, let's make the best of it. This changes the fd pick-up logic to only pick up fds that have O_CLOEXEC unset. O_CLOEXEC must be unset for any fds passed in to us over execve() after all. And for all our own fds we should set O_CLOEXEC since we generally don't want to litter fd tables for execve(). Also, libselinux thankfully appears to set O_CLOEXEC correctly on its fds, hence the filter works. Fixes: #27491 --- diff --git a/src/core/main.c b/src/core/main.c index 3fbfc5b7414..08d416bc149 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -2675,16 +2675,24 @@ static int collect_fds(FDSet **ret_fds, const char **ret_error_message) { assert(ret_fds); assert(ret_error_message); - r = fdset_new_fill(ret_fds); + /* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC + * off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies + * should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe + * mechanism to distinguish passed in fds from our own. + * + * Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our + * process behind our back. We should not take possession of that (and then accidentally close + * it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */ + r = fdset_new_fill(/* filter_cloexec= */ 0, ret_fds); if (r < 0) { *ret_error_message = "Failed to allocate fd set"; return log_emergency_errno(r, "Failed to allocate fd set: %m"); } - fdset_cloexec(*ret_fds, true); + (void) fdset_cloexec(*ret_fds, true); - if (arg_serialization) - assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0); + /* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */ + assert_se(!arg_serialization || !fdset_contains(*ret_fds, fileno(arg_serialization))); return 0; } diff --git a/src/notify/notify.c b/src/notify/notify.c index 1aecd0e7817..c15df588ef2 100644 --- a/src/notify/notify.c +++ b/src/notify/notify.c @@ -221,7 +221,7 @@ static int parse_argv(int argc, char *argv[]) { if (!passed) { /* Take possession of all passed fds */ - r = fdset_new_fill(&passed); + r = fdset_new_fill(/* filter_cloexec= */ 0, &passed); if (r < 0) return log_error_errno(r, "Failed to take possession of passed file descriptors: %m"); diff --git a/src/shared/fdset.c b/src/shared/fdset.c index 2138ffcdb92..86b3139999e 100644 --- a/src/shared/fdset.c +++ b/src/shared/fdset.c @@ -139,7 +139,9 @@ int fdset_remove(FDSet *s, int fd) { return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT; } -int fdset_new_fill(FDSet **ret) { +int fdset_new_fill( + int filter_cloexec, /* if < 0 takes all fds, otherwise only those with O_CLOEXEC set (1) or unset (0) */ + FDSet **ret) { _cleanup_(fdset_shallow_freep) FDSet *s = NULL; _cleanup_closedir_ DIR *d = NULL; int r; @@ -172,6 +174,20 @@ int fdset_new_fill(FDSet **ret) { if (fd == dirfd(d)) continue; + if (filter_cloexec >= 0) { + int fl; + + /* If user asked for that filter by O_CLOEXEC. This is useful so that fds that have + * been passed in can be collected and fds which have been created locally can be + * ignored, under the assumption that only the latter have O_CLOEXEC set. */ + fl = fcntl(fd, F_GETFD); + if (fl < 0) + return -errno; + + if (FLAGS_SET(fl, FD_CLOEXEC) != !!filter_cloexec) + continue; + } + r = fdset_put(s, fd); if (r < 0) return r; diff --git a/src/shared/fdset.h b/src/shared/fdset.h index 9700cdbbca8..70a764fb4d3 100644 --- a/src/shared/fdset.h +++ b/src/shared/fdset.h @@ -20,7 +20,7 @@ bool fdset_contains(FDSet *s, int fd); int fdset_remove(FDSet *s, int fd); int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds); -int fdset_new_fill(FDSet **ret); +int fdset_new_fill(int filter_cloexec, FDSet **ret); int fdset_new_listen_fds(FDSet **ret, bool unset); int fdset_cloexec(FDSet *fds, bool b); diff --git a/src/test/test-fdset.c b/src/test/test-fdset.c index e2b8f948330..01e21a5599e 100644 --- a/src/test/test-fdset.c +++ b/src/test/test-fdset.c @@ -17,7 +17,7 @@ TEST(fdset_new_fill) { fd = mkostemp_safe(name); assert_se(fd >= 0); - assert_se(fdset_new_fill(&fdset) >= 0); + assert_se(fdset_new_fill(/* filter_cloexec= */ -1, &fdset) >= 0); assert_se(fdset_contains(fdset, fd)); }