]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
process-util: always retry with pidfd_spawn() w/o cgroup first 34053/head
authorMike Yuan <me@yhndnzj.com>
Tue, 20 Aug 2024 18:04:46 +0000 (20:04 +0200)
committerMike Yuan <me@yhndnzj.com>
Wed, 21 Aug 2024 13:27:57 +0000 (15:27 +0200)
Follow-up for 7ac58157ca67ab001307f1fd72e0cc7c0c4e846a

With the mentioned commit, iff E2BIG we'd retry pidfd_spawn()
with POSIX_SPAWN_SETCGROUP disabled. However, the same strategy
should actually apply to EOPNOTSUPP/ENOSYS/EPERM too -
they can mean two things here: no clone3() or no CLONE_PIDFD.
Therefore, let's first try clone() + CLONE_PIDFD, and fall further back
to plain clone() (posix_spawn()) only as last resort. Plus, record
the fact so that we don't unnecessarily retry every single time
if CLONE_PIDFD is the one that's unavailable.

src/basic/process-util.c

index 711457fa7b70dff1ea504782f9597cc9c80bb2ed..f30d9117a7eb6671d13c969c12c25350e835029e 100644 (file)
@@ -2059,10 +2059,14 @@ int posix_spawn_wrapper(
         _unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr;
 
 #if HAVE_PIDFD_SPAWN
-        static bool setcgroup_supported = true;
+        static enum {
+                CLONE_ONLY_PID,
+                CLONE_CAN_PIDFD,  /* 5.2 */
+                CLONE_CAN_CGROUP, /* 5.7 */
+        } clone_support = CLONE_CAN_CGROUP;
         _cleanup_close_ int cgroup_fd = -EBADF;
 
-        if (cgroup && setcgroup_supported) {
+        if (cgroup && clone_support >= CLONE_CAN_CGROUP) {
                 _cleanup_free_ char *resolved_cgroup = NULL;
 
                 r = cg_get_path_and_check(
@@ -2093,45 +2097,43 @@ int posix_spawn_wrapper(
                 return -r;
 
 #if HAVE_PIDFD_SPAWN
-        _cleanup_close_ int pidfd = -EBADF;
-
-        r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
-        if (r == E2BIG && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
-                /* Some kernels (e.g., 5.4) support clone3 but they do not support CLONE_INTO_CGROUP.
-                 * Retry pidfd_spawn() after removing the flag. */
-                flags &= ~POSIX_SPAWN_SETCGROUP;
-                r = posix_spawnattr_setflags(&attr, flags);
-                if (r != 0)
-                        return -r;
-                r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
-                /* if pidfd_spawn was successful after removing SPAWN_CGROUP,
-                 * mark setcgroup_supported as false so that we do not retry every time */
-                if (r == 0)
-                        setcgroup_supported = false;
-        }
-        if (r == 0) {
-                r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
-                if (r < 0)
-                        return r;
+        if (clone_support >= CLONE_CAN_PIDFD) {
+                _cleanup_close_ int pidfd = -EBADF;
 
-                return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
-        }
-        if (ERRNO_IS_NOT_SUPPORTED(r)) {
-                /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode. */
-                if (FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0)
+                r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
+                if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) &&
+                    cg_is_threaded(cgroup) > 0) /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode. */
                         return -EUCLEAN;
+                if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
+                    FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
+                        /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
+                         * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
+                         * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
+                         * but not CLONE_INTO_CGROUP. */
+
+                        /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
+                         * retry every time. */
+                        assert(clone_support >= CLONE_CAN_CGROUP);
+                        clone_support = CLONE_CAN_PIDFD;
+
+                        flags &= ~POSIX_SPAWN_SETCGROUP;
+                        r = posix_spawnattr_setflags(&attr, flags);
+                        if (r != 0)
+                                return -r;
+
+                        r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
+                }
+                if (r == 0) {
+                        r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
+                        if (r < 0)
+                                return r;
 
-                /* clone3() not available? */
-        } else if (!ERRNO_IS_PRIVILEGE(r))
-                return -r;
-
-        /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but need to change the
-         * flags to remove the cgroup one, which is what redirects to clone3() */
-        if (FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
-                flags &= ~POSIX_SPAWN_SETCGROUP;
-                r = posix_spawnattr_setflags(&attr, flags);
-                if (r != 0)
+                        return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
+                }
+                if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
                         return -r;
+
+                clone_support = CLONE_ONLY_PID; /* No CLONE_PIDFD either? */
         }
 #endif