]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
Revert "core: do not leak mount for credentials directory if mount namespace is enabled"
authorYu Watanabe <watanabe.yu+github@gmail.com>
Fri, 25 Aug 2023 06:54:52 +0000 (15:54 +0900)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Wed, 6 Sep 2023 15:53:28 +0000 (00:53 +0900)
This reverts commits
9ae3624889b98f75efa6fd0c5f4b4de3eaf328d4
  "test-execute: add tests for credentials directory with mount namespace"↲
94fe4cf2557d1f70f20ee02d32f4c2ae6bc1fb3f
  "core: do not leak mount for credentials directory if mount namespace is enabled",
7241b9cd72d6e6079d5140cf24c34e78d3cf43cc
  "core/credential: make setup_credentials() return path to credentials directory",
fbaf3b23ae4aa79110ebd37aada70ce6a044c692
  "core: set $CREDENTIALS_DIRECTORY only when we set up credentials"

Before the commits, credentials directory set up on ExecStart= was kept
on e.g. ExecStop=. But, with the changes, if a service requests a
private mount namespace, the credentials directory is discarded after
ExecStart= is finished.

Let's revert the change, and find better way later.

Addresses the post-merge comment
https://github.com/systemd/systemd/pull/28787#issuecomment-1690614202.

12 files changed:
src/core/credential.c
src/core/credential.h
src/core/execute.c
src/core/namespace.c
src/core/namespace.h
src/test/test-execute.c
src/test/test-namespace.c
src/test/test-ns.c
test/test-execute/exec-load-credential-with-mount-namespace.service [deleted file]
test/test-execute/exec-load-credential-with-seccomp.service [deleted file]
test/test-execute/exec-set-credential-with-mount-namespace.service [deleted file]
test/test-execute/exec-set-credential-with-seccomp.service [deleted file]

index d9a88004c5a689935554f549e1f94a2aaeab0d69..b8b8b4edaa7d6c3bb83bc2428567a16d0092776c 100644 (file)
@@ -10,7 +10,6 @@
 #include "glob-util.h"
 #include "io-util.h"
 #include "label-util.h"
-#include "missing_syscall.h"
 #include "mkdir-label.h"
 #include "mount-util.h"
 #include "mountpoint-util.h"
@@ -18,7 +17,6 @@
 #include "random-util.h"
 #include "recurse-dir.h"
 #include "rm-rf.h"
-#include "socket-util.h"
 #include "tmpfile-util.h"
 
 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
@@ -732,8 +730,7 @@ static int setup_credentials_internal(
                 bool reuse_workspace,     /* Whether to reuse any existing workspace mount if it already is a mount */
                 bool must_mount,          /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
                 uid_t uid,
-                gid_t gid,
-                int *ret_mount_fd) {
+                gid_t gid) {
 
         int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
                                    * if we mounted something; false if we definitely can't mount anything */
@@ -851,27 +848,6 @@ static int setup_credentials_internal(
                         if (r < 0)
                                 return r;
 
-                        if (ret_mount_fd) {
-                                _cleanup_close_ int mount_fd = -EBADF;
-
-                                r = mount_fd = RET_NERRNO(open_tree(AT_FDCWD, workspace, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
-                                if (r >= 0) {
-                                        /* The workspace is already cloned in the above, and not necessary
-                                         * anymore. Even though the workspace is unmounted when the short-lived
-                                         * child process exits, let's explicitly unmount it here for safety. */
-                                        r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
-                                        if (r < 0)
-                                                return r;
-
-                                        *ret_mount_fd = TAKE_FD(mount_fd);
-                                        return 0;
-                                }
-
-                                /* Old kernel? Unprivileged? */
-                                if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
-                                        return r;
-                        }
-
                         /* And mount it to the final place, read-only */
                         r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
                 } else
@@ -892,8 +868,6 @@ static int setup_credentials_internal(
                         return -errno;
         }
 
-        if (ret_mount_fd)
-                *ret_mount_fd = -EBADF;
         return 0;
 }
 
@@ -902,25 +876,16 @@ int setup_credentials(
                 const ExecParameters *params,
                 const char *unit,
                 uid_t uid,
-                gid_t gid,
-                char **ret_path,
-                int *ret_mount_fd) {
+                gid_t gid) {
 
-        _cleanup_close_pair_ int socket_pair[2] = PIPE_EBADF;
         _cleanup_free_ char *p = NULL, *q = NULL;
-        _cleanup_close_ int mount_fd = -EBADF;
         int r;
 
         assert(context);
         assert(params);
-        assert(ret_path);
-        assert(ret_mount_fd);
 
-        if (!exec_context_has_credentials(context)) {
-                *ret_path = NULL;
-                *ret_mount_fd = -EBADF;
+        if (!exec_context_has_credentials(context))
                 return 0;
-        }
 
         if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
                 return -EINVAL;
@@ -943,12 +908,7 @@ int setup_credentials(
         if (r < 0 && r != -EEXIST)
                 return r;
 
-        if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, socket_pair) < 0)
-                return -errno;
-
-        r = safe_fork_full("(sd-mkdcreds)",
-                           NULL, &socket_pair[1], 1,
-                           FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_REOPEN_LOG, NULL);
+        r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
         if (r < 0) {
                 _cleanup_free_ char *t = NULL, *u = NULL;
 
@@ -984,15 +944,14 @@ int setup_credentials(
                                 true,    /* reuse the workspace if it is already a mount */
                                 false,   /* it's OK to fall back to a plain directory if we can't mount anything */
                                 uid,
-                                gid,
-                                NULL);
+                                gid);
 
                 (void) rmdir(u); /* remove the workspace again if we can. */
 
                 if (r < 0)
                         return r;
 
-        } else if (r == 0) { /* child */
+        } else if (r == 0) {
 
                 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
                  * we can use the same directory for all cases, after turning off propagation. Question
@@ -1011,8 +970,6 @@ int setup_credentials(
                  * given that we do this in a privately namespaced short-lived single-threaded process that
                  * no one else sees this should be OK to do. */
 
-                _cleanup_close_ int fd = -EBADF;
-
                 /* Turn off propagation from our namespace to host */
                 r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
                 if (r < 0)
@@ -1027,14 +984,7 @@ int setup_credentials(
                                 false,       /* do not reuse /dev/shm if it is already a mount, under no circumstances */
                                 true,        /* insist that something is mounted, do not allow fallback to plain directory */
                                 uid,
-                                gid,
-                                &fd);
-                if (r < 0)
-                        goto child_fail;
-
-                r = send_one_fd_iov(socket_pair[1], fd,
-                                    &IOVEC_MAKE((int[]) { fd >= 0 }, sizeof(int)), 1,
-                                    MSG_DONTWAIT);
+                                gid);
                 if (r < 0)
                         goto child_fail;
 
@@ -1042,17 +992,6 @@ int setup_credentials(
 
         child_fail:
                 _exit(EXIT_FAILURE);
-
-        } else { /* parent */
-
-                int ret;
-                struct iovec iov = IOVEC_MAKE(&ret, sizeof(int));
-
-                r = receive_one_fd_iov(socket_pair[0], &iov, 1, MSG_DONTWAIT, &mount_fd);
-                if (r < 0)
-                        return r;
-                if (ret > 0 && mount_fd < 0)
-                        return -EIO;
         }
 
         /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
@@ -1060,8 +999,5 @@ int setup_credentials(
          * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
          * seen by users when trying access this inode. */
         (void) rmdir(p);
-
-        *ret_path = TAKE_PTR(p);
-        *ret_mount_fd = TAKE_FD(mount_fd);
         return 0;
 }
index 4b936b32d603b239ec8f7bdeeeec02b557a2fc48..54155f515bc5facdb5af5066566a0aa1cf216c50 100644 (file)
@@ -45,6 +45,4 @@ int setup_credentials(
                 const ExecParameters *params,
                 const char *unit,
                 uid_t uid,
-                gid_t gid,
-                char **ret_path,
-                int *ret_mount_fd);
+                gid_t gid);
index eb452a45ec21dbe019d5aeebbb3d3e1cee5cd4f9..14e196d4c7a6c33def1bb901cc0d710d8bb2aed0 100644 (file)
@@ -7,7 +7,6 @@
 #include <sys/file.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
-#include <sys/mount.h>
 #include <sys/personality.h>
 #include <sys/prctl.h>
 #include <sys/shm.h>
@@ -73,7 +72,6 @@
 #include "missing_fs.h"
 #include "missing_ioprio.h"
 #include "missing_prctl.h"
-#include "missing_syscall.h"
 #include "mkdir-label.h"
 #include "namespace.h"
 #include "parse-util.h"
@@ -1866,7 +1864,6 @@ static int build_environment(
                 dev_t journal_stream_dev,
                 ino_t journal_stream_ino,
                 const char *memory_pressure_path,
-                const char *creds_path,
                 char ***ret) {
 
         _cleanup_strv_free_ char **our_env = NULL;
@@ -2044,8 +2041,8 @@ static int build_environment(
                 our_env[n_env++] = x;
         }
 
-        if (creds_path) {
-                x = strjoin("CREDENTIALS_DIRECTORY=", creds_path);
+        if (exec_context_has_credentials(c) && p->prefix[EXEC_DIRECTORY_RUNTIME]) {
+                x = strjoin("CREDENTIALS_DIRECTORY=", p->prefix[EXEC_DIRECTORY_RUNTIME], "/credentials/", u->id);
                 if (!x)
                         return -ENOMEM;
 
@@ -3113,14 +3110,12 @@ static int apply_mount_namespace(
                 const ExecParameters *params,
                 ExecRuntime *runtime,
                 const char *memory_pressure_path,
-                const char *creds_path,
-                int creds_fd,
                 char **error_path) {
 
         _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
         _cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
                         **read_write_paths_cleanup = NULL;
-        _cleanup_free_ char *incoming_dir = NULL, *propagate_dir = NULL,
+        _cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
                         *extension_dir = NULL, *host_os_release_stage = NULL;
         const char *root_dir = NULL, *root_image = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
         char **read_write_paths;
@@ -3222,6 +3217,14 @@ static int apply_mount_namespace(
         if (context->mount_propagation_flag == MS_SHARED)
                 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
 
+        if (exec_context_has_credentials(context) &&
+            params->prefix[EXEC_DIRECTORY_RUNTIME] &&
+            FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
+                creds_path = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials", u->id);
+                if (!creds_path)
+                        return -ENOMEM;
+        }
+
         if (params->runtime_scope == RUNTIME_SCOPE_SYSTEM) {
                 propagate_dir = path_join("/run/systemd/propagate/", u->id);
                 if (!propagate_dir)
@@ -3290,7 +3293,6 @@ static int apply_mount_namespace(
                         tmp_dir,
                         var_tmp_dir,
                         creds_path,
-                        creds_fd,
                         context->log_namespace,
                         context->mount_propagation_flag,
                         &verity,
@@ -3944,7 +3946,7 @@ static int exec_child(
         int r, ngids = 0, exec_fd;
         _cleanup_free_ gid_t *supplementary_gids = NULL;
         const char *username = NULL, *groupname = NULL;
-        _cleanup_free_ char *home_buffer = NULL, *memory_pressure_path = NULL, *creds_path = NULL;
+        _cleanup_free_ char *home_buffer = NULL, *memory_pressure_path = NULL;
         const char *home = NULL, *shell = NULL;
         char **final_argv = NULL;
         dev_t journal_stream_dev = 0;
@@ -3975,7 +3977,6 @@ static int exec_child(
         int ngids_after_pam = 0;
         _cleanup_free_ int *fds = NULL;
         _cleanup_strv_free_ char **fdnames = NULL;
-        _cleanup_close_ int creds_fd = -EBADF;
 
         assert(unit);
         assert(command);
@@ -4426,7 +4427,7 @@ static int exec_child(
         }
 
         if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
-                r = setup_credentials(context, params, unit->id, uid, gid, &creds_path, &creds_fd);
+                r = setup_credentials(context, params, unit->id, uid, gid);
                 if (r < 0) {
                         *exit_status = EXIT_CREDENTIALS;
                         return log_unit_error_errno(unit, r, "Failed to set up credentials: %m");
@@ -4446,7 +4447,6 @@ static int exec_child(
                         journal_stream_dev,
                         journal_stream_ino,
                         memory_pressure_path,
-                        creds_path,
                         &our_env);
         if (r < 0) {
                 *exit_status = EXIT_MEMORY;
@@ -4640,7 +4640,7 @@ static int exec_child(
         if (needs_mount_namespace) {
                 _cleanup_free_ char *error_path = NULL;
 
-                r = apply_mount_namespace(unit, command->flags, context, params, runtime, memory_pressure_path, creds_path, creds_fd, &error_path);
+                r = apply_mount_namespace(unit, command->flags, context, params, runtime, memory_pressure_path, &error_path);
                 if (r < 0) {
                         *exit_status = EXIT_NAMESPACE;
                         return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
@@ -4648,19 +4648,6 @@ static int exec_child(
                 }
         }
 
-        if (creds_fd >= 0) {
-                assert(creds_path);
-
-                /* When a mount namespace is not requested, then the target directory may not exist yet.
-                 * Here, we ignore the failure, as if it fails, the subsequent move_mount() will fail. */
-                (void) mkdir_p_label(creds_path, 0755);
-
-                if (move_mount(creds_fd, "", AT_FDCWD, creds_path, MOVE_MOUNT_F_EMPTY_PATH) < 0) {
-                        *exit_status = EXIT_CREDENTIALS;
-                        return log_unit_error_errno(unit, errno, "Failed to mount credentials directory on %s: %m", creds_path);
-                }
-        }
-
         if (needs_sandboxing) {
                 r = apply_protect_hostname(unit, context, exit_status);
                 if (r < 0)
index 2197287fd08afa3824269b02018ce6e27ec6447b..e2304f5d066da8b345bf69fa4f21786519d011b7 100644 (file)
@@ -74,8 +74,7 @@ typedef enum MountMode {
         EXTENSION_DIRECTORIES, /* Bind-mounted outside the root directory, and used by subsequent mounts */
         EXTENSION_IMAGES, /* Mounted outside the root directory, and used by subsequent mounts */
         MQUEUEFS,
-        READWRITE_IMPLICIT, /* Should have the 2nd lowest priority. */
-        MKDIR,              /* Should have the lowest priority. */
+        READWRITE_IMPLICIT, /* Should have the lowest priority. */
         _MOUNT_MODE_MAX,
 } MountMode;
 
@@ -232,7 +231,6 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
         [EXTENSION_IMAGES]      = "extension-images",
         [MQUEUEFS]              = "mqueuefs",
         [READWRITE_IMPLICIT]    = "read-write-implicit",
-        [MKDIR]                 = "mkdir",
 };
 
 /* Helper struct for naming simplicity and reusability */
@@ -1549,12 +1547,6 @@ static int apply_one_mount(
         case OVERLAY_MOUNT:
                 return mount_overlay(m);
 
-        case MKDIR:
-                r = mkdir_p_label(mount_entry_path(m), 0755);
-                if (r < 0)
-                        return r;
-                return 1;
-
         default:
                 assert_not_reached();
         }
@@ -2021,7 +2013,6 @@ int setup_namespace(
                 const char* tmp_dir,
                 const char* var_tmp_dir,
                 const char *creds_path,
-                int creds_fd,
                 const char *log_namespace,
                 unsigned long mount_propagation_flag,
                 VeritySettings *verity,
@@ -2344,22 +2335,13 @@ int setup_namespace(
                                 .flags = MS_NODEV|MS_STRICTATIME|MS_NOSUID|MS_NOEXEC,
                         };
 
-                        /* If we have mount fd for credentials directory, then it will be mounted after
-                         * namespace is set up. So, here we only create the mount point. */
-
-                        if (creds_fd < 0)
-                                *(m++) = (MountEntry) {
-                                        .path_const = creds_path,
-                                        .mode = BIND_MOUNT,
-                                        .read_only = true,
-                                        .source_const = creds_path,
-                                        .ignore = true,
-                                };
-                        else
-                                *(m++) = (MountEntry) {
-                                        .path_const = creds_path,
-                                        .mode = MKDIR,
-                                };
+                        *(m++) = (MountEntry) {
+                                .path_const = creds_path,
+                                .mode = BIND_MOUNT,
+                                .read_only = true,
+                                .source_const = creds_path,
+                                .ignore = true,
+                        };
                 } else {
                         /* If our service has no credentials store configured, then make the whole
                          * credentials tree inaccessible wholesale. */
index b8615cbd98696c10ef1020a8b412cd5915a5c3da..b6132154c5132ec0c98d83b7b3c3e82e4b1f1ba8 100644 (file)
@@ -122,7 +122,6 @@ int setup_namespace(
                 const char *tmp_dir,
                 const char *var_tmp_dir,
                 const char *creds_path,
-                int creds_fd,
                 const char *log_namespace,
                 unsigned long mount_propagation_flag,
                 VeritySettings *verity,
index 847415b9ae43ed6333f5d2c5d8cbd0d21d7bc938..0be66c2c7bf0f5649800c288d1e715977c47647e 100644 (file)
@@ -282,11 +282,7 @@ static void test_exec_cpuaffinity(Manager *m) {
 
 static void test_exec_credentials(Manager *m) {
         test(m, "exec-set-credential.service", 0, CLD_EXITED);
-        test(m, "exec-set-credential-with-mount-namespace.service", 0, CLD_EXITED);
-        test(m, "exec-set-credential-with-seccomp.service", 0, CLD_EXITED);
         test(m, "exec-load-credential.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED);
-        test(m, "exec-load-credential-with-mount-namespace.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED);
-        test(m, "exec-load-credential-with-seccomp.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED);
         test(m, "exec-credentials-dir-specifier.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED);
 }
 
index 436a784e86808c2ace8a2fb506222e8d88893277..25aafc35ca837dda241061be465256c6d6325439 100644 (file)
@@ -194,7 +194,6 @@ TEST(protect_kernel_logs) {
                                     NULL,
                                     NULL,
                                     NULL,
-                                    -EBADF,
                                     NULL,
                                     0,
                                     NULL,
index 56f3de83b6f7dc88d81053e9e7868db27b03633b..77afd2f6b9eb81f1c23701fc153674c04a26647b 100644 (file)
@@ -96,7 +96,6 @@ int main(int argc, char *argv[]) {
                             tmp_dir,
                             var_tmp_dir,
                             NULL,
-                            -EBADF,
                             NULL,
                             0,
                             NULL,
diff --git a/test/test-execute/exec-load-credential-with-mount-namespace.service b/test/test-execute/exec-load-credential-with-mount-namespace.service
deleted file mode 100644 (file)
index fd71cf6..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: LGPL-2.1-or-later
-[Unit]
-Description=Test for LoadCredential=
-
-[Service]
-ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.load-credential)" = "foo"'
-Type=oneshot
-LoadCredential=test-execute.load-credential
-PrivateMounts=yes
diff --git a/test/test-execute/exec-load-credential-with-seccomp.service b/test/test-execute/exec-load-credential-with-seccomp.service
deleted file mode 100644 (file)
index 67303f2..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: LGPL-2.1-or-later
-[Unit]
-Description=Test for LoadCredential=
-
-[Service]
-ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.load-credential)" = "foo"'
-Type=oneshot
-LoadCredential=test-execute.load-credential
-SystemCallFilter=~open_tree move_mount
diff --git a/test/test-execute/exec-set-credential-with-mount-namespace.service b/test/test-execute/exec-set-credential-with-mount-namespace.service
deleted file mode 100644 (file)
index 67d15e5..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: LGPL-2.1-or-later
-[Unit]
-Description=Test for SetCredential=
-
-[Service]
-ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.set-credential)" = "hoge"'
-Type=oneshot
-SetCredential=test-execute.set-credential:hoge
-PrivateMounts=yes
diff --git a/test/test-execute/exec-set-credential-with-seccomp.service b/test/test-execute/exec-set-credential-with-seccomp.service
deleted file mode 100644 (file)
index 778777b..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: LGPL-2.1-or-later
-[Unit]
-Description=Test for SetCredential=
-
-[Service]
-ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.set-credential)" = "hoge"'
-Type=oneshot
-SetCredential=test-execute.set-credential:hoge
-SystemCallFilter=~open_tree move_mount