From: Lennart Poettering Date: Thu, 22 Jun 2023 08:28:13 +0000 (+0200) Subject: async: stop using threads for asynchronous_close() X-Git-Tag: v254-rc1~133^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c26d7837bb08508c8d906d849dff8f1bc465063e;p=thirdparty%2Fsystemd.git async: stop using threads for asynchronous_close() Let's work towards PID1 being purely single threaded again. Let's rework asynchronous_close() on top of clone() with CLONE_FILES (so that we can manipulate PID1's fd table correctly). One less use of pthread_create() in PID 1. --- diff --git a/src/basic/async.c b/src/basic/async.c index 5e347dde8b0..c0e1641cb2b 100644 --- a/src/basic/async.c +++ b/src/basic/async.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include "async.h" @@ -79,29 +81,80 @@ int asynchronous_sync(pid_t *ret_pid) { return 0; } -static void *close_thread(void *p) { - (void) pthread_setname_np(pthread_self(), "close"); +/* We encode the fd to close in the userdata pointer as an unsigned value. The highest bit indicates whether + * we need to fork again */ +#define NEED_DOUBLE_FORK (1U << (sizeof(unsigned) * 8 - 1)) - assert_se(close_nointr(PTR_TO_FD(p)) != -EBADF); - return NULL; +static int close_func(void *p) { + unsigned v = PTR_TO_UINT(p); + + (void) prctl(PR_SET_NAME, (unsigned long*) "(close)"); + + /* Note: 💣 This function is invoked in a child process created via glibc's clone() wrapper. In such + * children memory allocation is not allowed, since glibc does not release malloc mutexes in + * clone() 💣 */ + + if (v & NEED_DOUBLE_FORK) { + pid_t pid; + + v &= ~NEED_DOUBLE_FORK; + + /* This inner child will be reparented to the subreaper/PID 1. Here we turn on SIGCHLD, so + * that the reaper knows when it's time to reap. */ + pid = clone_with_nested_stack(close_func, SIGCHLD|CLONE_FILES, UINT_TO_PTR(v)); + if (pid >= 0) + return 0; + } + + close((int) v); /* no assert() here, we are in the child and the result would be eaten up anyway */ + return 0; } int asynchronous_close(int fd) { + unsigned v; + pid_t pid; int r; - /* This is supposed to behave similar to safe_close(), but - * actually invoke close() asynchronously, so that it will - * never block. Ideally the kernel would have an API for this, - * but it doesn't, so we work around it, and hide this as a - * far away as we can. */ + /* This is supposed to behave similar to safe_close(), but actually invoke close() asynchronously, so + * that it will never block. Ideally the kernel would have an API for this, but it doesn't, so we + * work around it, and hide this as a far away as we can. + * + * It is important to us that we don't use threads (via glibc pthread) in PID 1, hence we'll do a + * minimal subprocess instead which shares our fd table via CLONE_FILES. */ + + if (fd < 0) + return -EBADF; /* already invalid */ - if (fd >= 0) { - PROTECT_ERRNO; + PROTECT_ERRNO; - r = asynchronous_job(close_thread, FD_TO_PTR(fd)); - if (r < 0) - assert_se(close_nointr(fd) != -EBADF); + v = (unsigned) fd; + + /* We want to fork off a process that is automatically reaped. For that we'd usually double-fork. But + * we can optimize this a bit: if we are PID 1 or a subreaper anyway (the systemd service manager + * process qualifies as this), we can avoid the double forking, since the double forked process would + * be reparented back to us anyway. */ + r = is_reaper_process(); + if (r < 0) + log_debug_errno(r, "Cannot determine if we are a reaper process, assuming we are not: %m"); + if (r <= 0) + v |= NEED_DOUBLE_FORK; + + pid = clone_with_nested_stack(close_func, CLONE_FILES | ((v & NEED_DOUBLE_FORK) ? 0 : SIGCHLD), UINT_TO_PTR(v)); + if (pid < 0) + assert_se(close_nointr(fd) != -EBADF); /* local fallback */ + else if (v & NEED_DOUBLE_FORK) { + + /* Reap the intermediate child. Key here is that we specify __WCLONE, since we didn't ask for + * any signal to be sent to us on process exit, and otherwise waitid() would refuse waiting + * then. + * + * We usually prefer calling waitid(), but before kernel 4.7 it didn't support __WCLONE while + * waitpid() did. Hence let's use waitpid() here, it's good enough for our purposes here. */ + for (;;) { + if (waitpid(pid, NULL, WEXITED|__WCLONE) >= 0 || errno != EINTR) + break; + } } - return -EBADF; + return -EBADF; /* return an invalidated fd */ } diff --git a/src/test/test-async.c b/src/test/test-async.c index 69785e47fad..c2bf8011969 100644 --- a/src/test/test-async.c +++ b/src/test/test-async.c @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include +#include #include #include "async.h" #include "fs-util.h" -#include "tmpfile-util.h" +#include "process-util.h" #include "tests.h" +#include "tmpfile-util.h" static bool test_async = false; @@ -17,21 +19,44 @@ static void *async_func(void *arg) { } TEST(test_async) { + assert_se(asynchronous_job(async_func, NULL) >= 0); + assert_se(asynchronous_sync(NULL) >= 0); + + assert_se(test_async); +} + +TEST(asynchronous_close) { _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-asynchronous_close.XXXXXX"; - int fd; + int fd, r; fd = mkostemp_safe(name); assert_se(fd >= 0); asynchronous_close(fd); - assert_se(asynchronous_job(async_func, NULL) >= 0); - assert_se(asynchronous_sync(NULL) >= 0); - sleep(1); assert_se(fcntl(fd, F_GETFD) == -1); assert_se(errno == EBADF); - assert_se(test_async); + + r = safe_fork("(subreaper)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL); + assert(r >= 0); + + if (r == 0) { + /* child */ + + assert(prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) >= 0); + + fd = open("/dev/null", O_RDONLY|O_CLOEXEC); + assert_se(fd >= 0); + asynchronous_close(fd); + + sleep(1); + + assert_se(fcntl(fd, F_GETFD) == -1); + assert_se(errno == EBADF); + + _exit(EXIT_SUCCESS); + } } DEFINE_TEST_MAIN(LOG_DEBUG);