From: Yu Watanabe Date: Fri, 25 Aug 2023 06:54:52 +0000 (+0900) Subject: Revert "core: do not leak mount for credentials directory if mount namespace is enabled" X-Git-Tag: v255-rc1~557^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=73ff4d48de888d6f9908f5383b70a4d53e8edd7a;p=thirdparty%2Fsystemd.git Revert "core: do not leak mount for credentials directory if mount namespace is enabled" This reverts commits - 9ae3624889b98f75efa6fd0c5f4b4de3eaf328d4 "test-execute: add tests for credentials directory with mount namespace"↲ - 94fe4cf2557d1f70f20ee02d32f4c2ae6bc1fb3f "core: do not leak mount for credentials directory if mount namespace is enabled", - 7241b9cd72d6e6079d5140cf24c34e78d3cf43cc "core/credential: make setup_credentials() return path to credentials directory", - fbaf3b23ae4aa79110ebd37aada70ce6a044c692 "core: set $CREDENTIALS_DIRECTORY only when we set up credentials" Before the commits, credentials directory set up on ExecStart= was kept on e.g. ExecStop=. But, with the changes, if a service requests a private mount namespace, the credentials directory is discarded after ExecStart= is finished. Let's revert the change, and find better way later. Addresses the post-merge comment https://github.com/systemd/systemd/pull/28787#issuecomment-1690614202. --- diff --git a/src/core/credential.c b/src/core/credential.c index d9a88004c5a..b8b8b4edaa7 100644 --- a/src/core/credential.c +++ b/src/core/credential.c @@ -10,7 +10,6 @@ #include "glob-util.h" #include "io-util.h" #include "label-util.h" -#include "missing_syscall.h" #include "mkdir-label.h" #include "mount-util.h" #include "mountpoint-util.h" @@ -18,7 +17,6 @@ #include "random-util.h" #include "recurse-dir.h" #include "rm-rf.h" -#include "socket-util.h" #include "tmpfile-util.h" ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) { @@ -732,8 +730,7 @@ static int setup_credentials_internal( bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */ bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */ uid_t uid, - gid_t gid, - int *ret_mount_fd) { + gid_t gid) { int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true * if we mounted something; false if we definitely can't mount anything */ @@ -851,27 +848,6 @@ static int setup_credentials_internal( if (r < 0) return r; - if (ret_mount_fd) { - _cleanup_close_ int mount_fd = -EBADF; - - r = mount_fd = RET_NERRNO(open_tree(AT_FDCWD, workspace, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC)); - if (r >= 0) { - /* The workspace is already cloned in the above, and not necessary - * anymore. Even though the workspace is unmounted when the short-lived - * child process exits, let's explicitly unmount it here for safety. */ - r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW); - if (r < 0) - return r; - - *ret_mount_fd = TAKE_FD(mount_fd); - return 0; - } - - /* Old kernel? Unprivileged? */ - if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r)) - return r; - } - /* And mount it to the final place, read-only */ r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL); } else @@ -892,8 +868,6 @@ static int setup_credentials_internal( return -errno; } - if (ret_mount_fd) - *ret_mount_fd = -EBADF; return 0; } @@ -902,25 +876,16 @@ int setup_credentials( const ExecParameters *params, const char *unit, uid_t uid, - gid_t gid, - char **ret_path, - int *ret_mount_fd) { + gid_t gid) { - _cleanup_close_pair_ int socket_pair[2] = PIPE_EBADF; _cleanup_free_ char *p = NULL, *q = NULL; - _cleanup_close_ int mount_fd = -EBADF; int r; assert(context); assert(params); - assert(ret_path); - assert(ret_mount_fd); - if (!exec_context_has_credentials(context)) { - *ret_path = NULL; - *ret_mount_fd = -EBADF; + if (!exec_context_has_credentials(context)) return 0; - } if (!params->prefix[EXEC_DIRECTORY_RUNTIME]) return -EINVAL; @@ -943,12 +908,7 @@ int setup_credentials( if (r < 0 && r != -EEXIST) return r; - if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, socket_pair) < 0) - return -errno; - - r = safe_fork_full("(sd-mkdcreds)", - NULL, &socket_pair[1], 1, - FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_REOPEN_LOG, NULL); + r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG|FORK_WAIT|FORK_NEW_MOUNTNS, NULL); if (r < 0) { _cleanup_free_ char *t = NULL, *u = NULL; @@ -984,15 +944,14 @@ int setup_credentials( true, /* reuse the workspace if it is already a mount */ false, /* it's OK to fall back to a plain directory if we can't mount anything */ uid, - gid, - NULL); + gid); (void) rmdir(u); /* remove the workspace again if we can. */ if (r < 0) return r; - } else if (r == 0) { /* child */ + } else if (r == 0) { /* We managed to set up a mount namespace, and are now in a child. That's great. In this case * we can use the same directory for all cases, after turning off propagation. Question @@ -1011,8 +970,6 @@ int setup_credentials( * given that we do this in a privately namespaced short-lived single-threaded process that * no one else sees this should be OK to do. */ - _cleanup_close_ int fd = -EBADF; - /* Turn off propagation from our namespace to host */ r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL); if (r < 0) @@ -1027,14 +984,7 @@ int setup_credentials( false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */ true, /* insist that something is mounted, do not allow fallback to plain directory */ uid, - gid, - &fd); - if (r < 0) - goto child_fail; - - r = send_one_fd_iov(socket_pair[1], fd, - &IOVEC_MAKE((int[]) { fd >= 0 }, sizeof(int)), 1, - MSG_DONTWAIT); + gid); if (r < 0) goto child_fail; @@ -1042,17 +992,6 @@ int setup_credentials( child_fail: _exit(EXIT_FAILURE); - - } else { /* parent */ - - int ret; - struct iovec iov = IOVEC_MAKE(&ret, sizeof(int)); - - r = receive_one_fd_iov(socket_pair[0], &iov, 1, MSG_DONTWAIT, &mount_fd); - if (r < 0) - return r; - if (ret > 0 && mount_fd < 0) - return -EIO; } /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's @@ -1060,8 +999,5 @@ int setup_credentials( * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being * seen by users when trying access this inode. */ (void) rmdir(p); - - *ret_path = TAKE_PTR(p); - *ret_mount_fd = TAKE_FD(mount_fd); return 0; } diff --git a/src/core/credential.h b/src/core/credential.h index 4b936b32d60..54155f515bc 100644 --- a/src/core/credential.h +++ b/src/core/credential.h @@ -45,6 +45,4 @@ int setup_credentials( const ExecParameters *params, const char *unit, uid_t uid, - gid_t gid, - char **ret_path, - int *ret_mount_fd); + gid_t gid); diff --git a/src/core/execute.c b/src/core/execute.c index eb452a45ec2..14e196d4c7a 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -73,7 +72,6 @@ #include "missing_fs.h" #include "missing_ioprio.h" #include "missing_prctl.h" -#include "missing_syscall.h" #include "mkdir-label.h" #include "namespace.h" #include "parse-util.h" @@ -1866,7 +1864,6 @@ static int build_environment( dev_t journal_stream_dev, ino_t journal_stream_ino, const char *memory_pressure_path, - const char *creds_path, char ***ret) { _cleanup_strv_free_ char **our_env = NULL; @@ -2044,8 +2041,8 @@ static int build_environment( our_env[n_env++] = x; } - if (creds_path) { - x = strjoin("CREDENTIALS_DIRECTORY=", creds_path); + if (exec_context_has_credentials(c) && p->prefix[EXEC_DIRECTORY_RUNTIME]) { + x = strjoin("CREDENTIALS_DIRECTORY=", p->prefix[EXEC_DIRECTORY_RUNTIME], "/credentials/", u->id); if (!x) return -ENOMEM; @@ -3113,14 +3110,12 @@ static int apply_mount_namespace( const ExecParameters *params, ExecRuntime *runtime, const char *memory_pressure_path, - const char *creds_path, - int creds_fd, char **error_path) { _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT; _cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL, **read_write_paths_cleanup = NULL; - _cleanup_free_ char *incoming_dir = NULL, *propagate_dir = NULL, + _cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL, *extension_dir = NULL, *host_os_release_stage = NULL; const char *root_dir = NULL, *root_image = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL; char **read_write_paths; @@ -3222,6 +3217,14 @@ static int apply_mount_namespace( if (context->mount_propagation_flag == MS_SHARED) log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring"); + if (exec_context_has_credentials(context) && + params->prefix[EXEC_DIRECTORY_RUNTIME] && + FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) { + creds_path = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials", u->id); + if (!creds_path) + return -ENOMEM; + } + if (params->runtime_scope == RUNTIME_SCOPE_SYSTEM) { propagate_dir = path_join("/run/systemd/propagate/", u->id); if (!propagate_dir) @@ -3290,7 +3293,6 @@ static int apply_mount_namespace( tmp_dir, var_tmp_dir, creds_path, - creds_fd, context->log_namespace, context->mount_propagation_flag, &verity, @@ -3944,7 +3946,7 @@ static int exec_child( int r, ngids = 0, exec_fd; _cleanup_free_ gid_t *supplementary_gids = NULL; const char *username = NULL, *groupname = NULL; - _cleanup_free_ char *home_buffer = NULL, *memory_pressure_path = NULL, *creds_path = NULL; + _cleanup_free_ char *home_buffer = NULL, *memory_pressure_path = NULL; const char *home = NULL, *shell = NULL; char **final_argv = NULL; dev_t journal_stream_dev = 0; @@ -3975,7 +3977,6 @@ static int exec_child( int ngids_after_pam = 0; _cleanup_free_ int *fds = NULL; _cleanup_strv_free_ char **fdnames = NULL; - _cleanup_close_ int creds_fd = -EBADF; assert(unit); assert(command); @@ -4426,7 +4427,7 @@ static int exec_child( } if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) { - r = setup_credentials(context, params, unit->id, uid, gid, &creds_path, &creds_fd); + r = setup_credentials(context, params, unit->id, uid, gid); if (r < 0) { *exit_status = EXIT_CREDENTIALS; return log_unit_error_errno(unit, r, "Failed to set up credentials: %m"); @@ -4446,7 +4447,6 @@ static int exec_child( journal_stream_dev, journal_stream_ino, memory_pressure_path, - creds_path, &our_env); if (r < 0) { *exit_status = EXIT_MEMORY; @@ -4640,7 +4640,7 @@ static int exec_child( if (needs_mount_namespace) { _cleanup_free_ char *error_path = NULL; - r = apply_mount_namespace(unit, command->flags, context, params, runtime, memory_pressure_path, creds_path, creds_fd, &error_path); + r = apply_mount_namespace(unit, command->flags, context, params, runtime, memory_pressure_path, &error_path); if (r < 0) { *exit_status = EXIT_NAMESPACE; return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m", @@ -4648,19 +4648,6 @@ static int exec_child( } } - if (creds_fd >= 0) { - assert(creds_path); - - /* When a mount namespace is not requested, then the target directory may not exist yet. - * Here, we ignore the failure, as if it fails, the subsequent move_mount() will fail. */ - (void) mkdir_p_label(creds_path, 0755); - - if (move_mount(creds_fd, "", AT_FDCWD, creds_path, MOVE_MOUNT_F_EMPTY_PATH) < 0) { - *exit_status = EXIT_CREDENTIALS; - return log_unit_error_errno(unit, errno, "Failed to mount credentials directory on %s: %m", creds_path); - } - } - if (needs_sandboxing) { r = apply_protect_hostname(unit, context, exit_status); if (r < 0) diff --git a/src/core/namespace.c b/src/core/namespace.c index 2197287fd08..e2304f5d066 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -74,8 +74,7 @@ typedef enum MountMode { EXTENSION_DIRECTORIES, /* Bind-mounted outside the root directory, and used by subsequent mounts */ EXTENSION_IMAGES, /* Mounted outside the root directory, and used by subsequent mounts */ MQUEUEFS, - READWRITE_IMPLICIT, /* Should have the 2nd lowest priority. */ - MKDIR, /* Should have the lowest priority. */ + READWRITE_IMPLICIT, /* Should have the lowest priority. */ _MOUNT_MODE_MAX, } MountMode; @@ -232,7 +231,6 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = { [EXTENSION_IMAGES] = "extension-images", [MQUEUEFS] = "mqueuefs", [READWRITE_IMPLICIT] = "read-write-implicit", - [MKDIR] = "mkdir", }; /* Helper struct for naming simplicity and reusability */ @@ -1549,12 +1547,6 @@ static int apply_one_mount( case OVERLAY_MOUNT: return mount_overlay(m); - case MKDIR: - r = mkdir_p_label(mount_entry_path(m), 0755); - if (r < 0) - return r; - return 1; - default: assert_not_reached(); } @@ -2021,7 +2013,6 @@ int setup_namespace( const char* tmp_dir, const char* var_tmp_dir, const char *creds_path, - int creds_fd, const char *log_namespace, unsigned long mount_propagation_flag, VeritySettings *verity, @@ -2344,22 +2335,13 @@ int setup_namespace( .flags = MS_NODEV|MS_STRICTATIME|MS_NOSUID|MS_NOEXEC, }; - /* If we have mount fd for credentials directory, then it will be mounted after - * namespace is set up. So, here we only create the mount point. */ - - if (creds_fd < 0) - *(m++) = (MountEntry) { - .path_const = creds_path, - .mode = BIND_MOUNT, - .read_only = true, - .source_const = creds_path, - .ignore = true, - }; - else - *(m++) = (MountEntry) { - .path_const = creds_path, - .mode = MKDIR, - }; + *(m++) = (MountEntry) { + .path_const = creds_path, + .mode = BIND_MOUNT, + .read_only = true, + .source_const = creds_path, + .ignore = true, + }; } else { /* If our service has no credentials store configured, then make the whole * credentials tree inaccessible wholesale. */ diff --git a/src/core/namespace.h b/src/core/namespace.h index b8615cbd986..b6132154c51 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -122,7 +122,6 @@ int setup_namespace( const char *tmp_dir, const char *var_tmp_dir, const char *creds_path, - int creds_fd, const char *log_namespace, unsigned long mount_propagation_flag, VeritySettings *verity, diff --git a/src/test/test-execute.c b/src/test/test-execute.c index 847415b9ae4..0be66c2c7bf 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -282,11 +282,7 @@ static void test_exec_cpuaffinity(Manager *m) { static void test_exec_credentials(Manager *m) { test(m, "exec-set-credential.service", 0, CLD_EXITED); - test(m, "exec-set-credential-with-mount-namespace.service", 0, CLD_EXITED); - test(m, "exec-set-credential-with-seccomp.service", 0, CLD_EXITED); test(m, "exec-load-credential.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED); - test(m, "exec-load-credential-with-mount-namespace.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED); - test(m, "exec-load-credential-with-seccomp.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED); test(m, "exec-credentials-dir-specifier.service", MANAGER_IS_SYSTEM(m) ? 0 : EXIT_CREDENTIALS, CLD_EXITED); } diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index 436a784e868..25aafc35ca8 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -194,7 +194,6 @@ TEST(protect_kernel_logs) { NULL, NULL, NULL, - -EBADF, NULL, 0, NULL, diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 56f3de83b6f..77afd2f6b9e 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -96,7 +96,6 @@ int main(int argc, char *argv[]) { tmp_dir, var_tmp_dir, NULL, - -EBADF, NULL, 0, NULL, diff --git a/test/test-execute/exec-load-credential-with-mount-namespace.service b/test/test-execute/exec-load-credential-with-mount-namespace.service deleted file mode 100644 index fd71cf67179..00000000000 --- a/test/test-execute/exec-load-credential-with-mount-namespace.service +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -[Unit] -Description=Test for LoadCredential= - -[Service] -ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.load-credential)" = "foo"' -Type=oneshot -LoadCredential=test-execute.load-credential -PrivateMounts=yes diff --git a/test/test-execute/exec-load-credential-with-seccomp.service b/test/test-execute/exec-load-credential-with-seccomp.service deleted file mode 100644 index 67303f2713a..00000000000 --- a/test/test-execute/exec-load-credential-with-seccomp.service +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -[Unit] -Description=Test for LoadCredential= - -[Service] -ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.load-credential)" = "foo"' -Type=oneshot -LoadCredential=test-execute.load-credential -SystemCallFilter=~open_tree move_mount diff --git a/test/test-execute/exec-set-credential-with-mount-namespace.service b/test/test-execute/exec-set-credential-with-mount-namespace.service deleted file mode 100644 index 67d15e5dbb7..00000000000 --- a/test/test-execute/exec-set-credential-with-mount-namespace.service +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -[Unit] -Description=Test for SetCredential= - -[Service] -ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.set-credential)" = "hoge"' -Type=oneshot -SetCredential=test-execute.set-credential:hoge -PrivateMounts=yes diff --git a/test/test-execute/exec-set-credential-with-seccomp.service b/test/test-execute/exec-set-credential-with-seccomp.service deleted file mode 100644 index 778777b9478..00000000000 --- a/test/test-execute/exec-set-credential-with-seccomp.service +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -[Unit] -Description=Test for SetCredential= - -[Service] -ExecStart=/bin/sh -x -c 'test "$$(cat %d/test-execute.set-credential)" = "hoge"' -Type=oneshot -SetCredential=test-execute.set-credential:hoge -SystemCallFilter=~open_tree move_mount