#include "glob-util.h"
#include "io-util.h"
#include "label-util.h"
+#include "missing_syscall.h"
#include "mkdir-label.h"
#include "mount-util.h"
#include "mountpoint-util.h"
#include "random-util.h"
#include "recurse-dir.h"
#include "rm-rf.h"
+#include "socket-util.h"
#include "tmpfile-util.h"
ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
uid_t uid,
- gid_t gid) {
+ gid_t gid,
+ int *ret_mount_fd) {
int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
* if we mounted something; false if we definitely can't mount anything */
if (r < 0)
return r;
+ if (ret_mount_fd) {
+ _cleanup_close_ int mount_fd = -EBADF;
+
+ r = mount_fd = RET_NERRNO(open_tree(AT_FDCWD, workspace, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
+ if (r >= 0) {
+ /* The workspace is already cloned in the above, and not necessary
+ * anymore. Even though the workspace is unmounted when the short-lived
+ * child process exits, let's explicitly unmount it here for safety. */
+ r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
+ if (r < 0)
+ return r;
+
+ *ret_mount_fd = TAKE_FD(mount_fd);
+ return 0;
+ }
+
+ /* Old kernel? Unprivileged? */
+ if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ return r;
+ }
+
/* And mount it to the final place, read-only */
r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
} else
return -errno;
}
+ if (ret_mount_fd)
+ *ret_mount_fd = -EBADF;
return 0;
}
const char *unit,
uid_t uid,
gid_t gid,
- char **ret_path) {
+ char **ret_path,
+ int *ret_mount_fd) {
+ _cleanup_close_pair_ int socket_pair[2] = PIPE_EBADF;
_cleanup_free_ char *p = NULL, *q = NULL;
+ _cleanup_close_ int mount_fd = -EBADF;
int r;
assert(context);
assert(params);
assert(ret_path);
+ assert(ret_mount_fd);
if (!exec_context_has_credentials(context)) {
*ret_path = NULL;
+ *ret_mount_fd = -EBADF;
return 0;
}
if (r < 0 && r != -EEXIST)
return r;
- r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, socket_pair) < 0)
+ return -errno;
+
+ r = safe_fork_full("(sd-mkdcreds)",
+ NULL, &socket_pair[1], 1,
+ FORK_DEATHSIG|FORK_CLOSE_ALL_FDS|FORK_WAIT|FORK_NEW_MOUNTNS|FORK_REOPEN_LOG, NULL);
if (r < 0) {
_cleanup_free_ char *t = NULL, *u = NULL;
true, /* reuse the workspace if it is already a mount */
false, /* it's OK to fall back to a plain directory if we can't mount anything */
uid,
- gid);
+ gid,
+ NULL);
(void) rmdir(u); /* remove the workspace again if we can. */
if (r < 0)
return r;
- } else if (r == 0) {
+ } else if (r == 0) { /* child */
/* We managed to set up a mount namespace, and are now in a child. That's great. In this case
* we can use the same directory for all cases, after turning off propagation. Question
* given that we do this in a privately namespaced short-lived single-threaded process that
* no one else sees this should be OK to do. */
+ _cleanup_close_ int fd = -EBADF;
+
/* Turn off propagation from our namespace to host */
r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
if (r < 0)
false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
true, /* insist that something is mounted, do not allow fallback to plain directory */
uid,
- gid);
+ gid,
+ &fd);
+ if (r < 0)
+ goto child_fail;
+
+ r = send_one_fd_iov(socket_pair[1], fd,
+ &IOVEC_MAKE((int[]) { fd >= 0 }, sizeof(int)), 1,
+ MSG_DONTWAIT);
if (r < 0)
goto child_fail;
child_fail:
_exit(EXIT_FAILURE);
+
+ } else { /* parent */
+
+ int ret;
+ struct iovec iov = IOVEC_MAKE(&ret, sizeof(int));
+
+ r = receive_one_fd_iov(socket_pair[0], &iov, 1, MSG_DONTWAIT, &mount_fd);
+ if (r < 0)
+ return r;
+ if (ret > 0 && mount_fd < 0)
+ return -EIO;
}
/* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
(void) rmdir(p);
*ret_path = TAKE_PTR(p);
+ *ret_mount_fd = TAKE_FD(mount_fd);
return 0;
}
const char *unit,
uid_t uid,
gid_t gid,
- char **ret_path);
+ char **ret_path,
+ int *ret_mount_fd);
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/shm.h>
#include "missing_fs.h"
#include "missing_ioprio.h"
#include "missing_prctl.h"
+#include "missing_syscall.h"
#include "mkdir-label.h"
#include "namespace.h"
#include "parse-util.h"
ExecRuntime *runtime,
const char *memory_pressure_path,
const char *creds_path,
+ int creds_fd,
char **error_path) {
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
tmp_dir,
var_tmp_dir,
creds_path,
+ creds_fd,
context->log_namespace,
context->mount_propagation_flag,
&verity,
int ngids_after_pam = 0;
_cleanup_free_ int *fds = NULL;
_cleanup_strv_free_ char **fdnames = NULL;
+ _cleanup_close_ int creds_fd = -EBADF;
assert(unit);
assert(command);
}
if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
- r = setup_credentials(context, params, unit->id, uid, gid, &creds_path);
+ r = setup_credentials(context, params, unit->id, uid, gid, &creds_path, &creds_fd);
if (r < 0) {
*exit_status = EXIT_CREDENTIALS;
return log_unit_error_errno(unit, r, "Failed to set up credentials: %m");
if (needs_mount_namespace) {
_cleanup_free_ char *error_path = NULL;
- r = apply_mount_namespace(unit, command->flags, context, params, runtime, memory_pressure_path, creds_path, &error_path);
+ r = apply_mount_namespace(unit, command->flags, context, params, runtime, memory_pressure_path, creds_path, creds_fd, &error_path);
if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
}
}
+ if (creds_fd >= 0) {
+ assert(creds_path);
+
+ /* When a mount namespace is not requested, then the target directory may not exist yet.
+ * Here, we ignore the failure, as if it fails, the subsequent move_mount() will fail. */
+ (void) mkdir_p_label(creds_path, 0755);
+
+ if (move_mount(creds_fd, "", AT_FDCWD, creds_path, MOVE_MOUNT_F_EMPTY_PATH) < 0) {
+ *exit_status = EXIT_CREDENTIALS;
+ return log_unit_error_errno(unit, errno, "Failed to mount credentials directory on %s: %m", creds_path);
+ }
+ }
+
if (needs_sandboxing) {
r = apply_protect_hostname(unit, context, exit_status);
if (r < 0)
EXTENSION_DIRECTORIES, /* Bind-mounted outside the root directory, and used by subsequent mounts */
EXTENSION_IMAGES, /* Mounted outside the root directory, and used by subsequent mounts */
MQUEUEFS,
- READWRITE_IMPLICIT, /* Should have the lowest priority. */
+ READWRITE_IMPLICIT, /* Should have the 2nd lowest priority. */
+ MKDIR, /* Should have the lowest priority. */
_MOUNT_MODE_MAX,
} MountMode;
[EXEC] = "exec",
[NOEXEC] = "noexec",
[MQUEUEFS] = "mqueuefs",
+ [MKDIR] = "mkdir",
};
/* Helper struct for naming simplicity and reusability */
case OVERLAY_MOUNT:
return mount_overlay(m);
+ case MKDIR:
+ r = mkdir_p_label(mount_entry_path(m), 0755);
+ if (r < 0)
+ return r;
+ return 1;
+
default:
assert_not_reached();
}
const char* tmp_dir,
const char* var_tmp_dir,
const char *creds_path,
+ int creds_fd,
const char *log_namespace,
unsigned long mount_propagation_flag,
VeritySettings *verity,
.flags = MS_NODEV|MS_STRICTATIME|MS_NOSUID|MS_NOEXEC,
};
- *(m++) = (MountEntry) {
- .path_const = creds_path,
- .mode = BIND_MOUNT,
- .read_only = true,
- .source_const = creds_path,
- .ignore = true,
- };
+ /* If we have mount fd for credentials directory, then it will be mounted after
+ * namespace is set up. So, here we only create the mount point. */
+
+ if (creds_fd < 0)
+ *(m++) = (MountEntry) {
+ .path_const = creds_path,
+ .mode = BIND_MOUNT,
+ .read_only = true,
+ .source_const = creds_path,
+ .ignore = true,
+ };
+ else
+ *(m++) = (MountEntry) {
+ .path_const = creds_path,
+ .mode = MKDIR,
+ };
} else {
/* If our service has no credentials store configured, then make the whole
* credentials tree inaccessible wholesale. */
const char *tmp_dir,
const char *var_tmp_dir,
const char *creds_path,
+ int creds_fd,
const char *log_namespace,
unsigned long mount_propagation_flag,
VeritySettings *verity,
NULL,
NULL,
NULL,
+ -EBADF,
NULL,
0,
NULL,
tmp_dir,
var_tmp_dir,
NULL,
+ -EBADF,
NULL,
0,
NULL,