* think autofs, NFS, FUSE, …), but let's generate useful debug messages at
* the very least. */
- q = path_is_mount_point(x, NULL, 0);
+ q = path_is_mount_point(x);
if (IN_SET(q, 0, -ENOENT)) {
/* Hmm, whaaaa? The mount point is not actually a mount point? Then
* it is either obstructed by a later mount or somebody has been
return 0;
}
+int bind_remount_one(const char *path, unsigned long new_flags, unsigned long flags_mask) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m");
+
+ return bind_remount_one_with_mountinfo(path, new_flags, flags_mask, proc_self_mountinfo);
+}
+
static int mount_switch_root_pivot(int fd_newroot, const char *path) {
assert(fd_newroot >= 0);
assert(path);
- /* Change into the new rootfs. */
- if (fchdir(fd_newroot) < 0)
- return log_debug_errno(errno, "Failed to chdir into new rootfs '%s': %m", path);
-
/* Let the kernel tuck the new root under the old one. */
if (pivot_root(".", ".") < 0)
return log_debug_errno(errno, "Failed to pivot root to new rootfs '%s': %m", path);
assert(fd_newroot >= 0);
assert(path);
- /* Change into the new rootfs. */
- if (fchdir(fd_newroot) < 0)
- return log_debug_errno(errno, "Failed to chdir into new rootfs '%s': %m", path);
-
/* Move the new root fs */
if (mount(".", "/", NULL, MS_MOVE, NULL) < 0)
return log_debug_errno(errno, "Failed to move new rootfs '%s': %m", path);
int mount_switch_root_full(const char *path, unsigned long mount_propagation_flag, bool force_ms_move) {
_cleanup_close_ int fd_newroot = -EBADF;
- int r;
+ int r, is_current_root;
assert(path);
assert(mount_propagation_flag_is_valid(mount_propagation_flag));
if (fd_newroot < 0)
return log_debug_errno(errno, "Failed to open new rootfs '%s': %m", path);
- if (!force_ms_move) {
- r = mount_switch_root_pivot(fd_newroot, path);
- if (r < 0) {
- log_debug_errno(r, "Failed to pivot into new rootfs '%s', will try to use MS_MOVE instead: %m", path);
- force_ms_move = true;
+ is_current_root = path_is_root_at(fd_newroot, NULL);
+ if (is_current_root < 0)
+ return log_debug_errno(is_current_root, "Failed to determine if target dir is our root already: %m");
+
+ /* Change into the new rootfs. */
+ if (fchdir(fd_newroot) < 0)
+ return log_debug_errno(errno, "Failed to chdir into new rootfs '%s': %m", path);
+
+ /* Make this a NOP if we are supposed to switch to our current root fs. After all, both pivot_root()
+ * and MS_MOVE don't like that. */
+ if (!is_current_root) {
+ if (!force_ms_move) {
+ r = mount_switch_root_pivot(fd_newroot, path);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to pivot into new rootfs '%s', will try to use MS_MOVE instead: %m", path);
+ force_ms_move = true;
+ }
+ }
+ if (force_ms_move) {
+ /* Failed to pivot_root() fallback to MS_MOVE. For example, this may happen if the rootfs is
+ * an initramfs in which case pivot_root() isn't supported. */
+ r = mount_switch_root_move(fd_newroot, path);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to switch to new rootfs '%s' with MS_MOVE: %m", path);
}
- }
- if (force_ms_move) {
- /* Failed to pivot_root() fallback to MS_MOVE. For example, this may happen if the rootfs is
- * an initramfs in which case pivot_root() isn't supported. */
- r = mount_switch_root_move(fd_newroot, path);
- if (r < 0)
- return log_debug_errno(r, "Failed to switch to new rootfs '%s' with MS_MOVE: %m", path);
}
/* Finally, let's establish the requested propagation flags. */
if (!(ent->mask & MNT_INVERT))
mount_flags |= ent->id;
- else if (mount_flags & ent->id)
- mount_flags ^= ent->id;
+ else
+ mount_flags &= ~ent->id;
break;
}
const ImagePolicy *image_policy,
bool is_image) {
- _cleanup_close_pair_ int errno_pipe_fd[2] = PIPE_EBADF;
+ _cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR;
char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
bool mount_slave_created = false, mount_slave_mounted = false,
mount_tmp_created = false, mount_tmp_mounted = false,
goto finish;
}
- r = namespace_fork("(sd-bindmnt)", "(sd-bindmnt-inner)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ r = namespace_fork("(sd-bindmnt)", "(sd-bindmnt-inner)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM,
pidns_fd, mntns_fd, -1, -1, root_fd, &child);
if (r < 0)
goto finish;
bool is_image) {
_cleanup_(dissected_image_unrefp) DissectedImage *img = NULL;
- _cleanup_close_pair_ int errno_pipe_fd[2] = PIPE_EBADF;
+ _cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR;
_cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF, chased_src_fd = -EBADF,
new_mount_fd = -EBADF;
_cleanup_free_ char *chased_src_path = NULL;
if (!pidref_is_set(target))
return -ESRCH;
- r = namespace_open(target->pid, &pidns_fd, &mntns_fd, NULL, NULL, &root_fd);
+ r = namespace_open(target->pid, &pidns_fd, &mntns_fd, /* ret_netns_fd = */ NULL, /* ret_userns_fd = */ NULL, &root_fd);
if (r < 0)
return log_debug_errno(r, "Failed to retrieve FDs of the target process' namespace: %m");
"(sd-bindmnt-inner)",
/* except_fds= */ NULL,
/* n_except_fds= */ 0,
- FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM,
pidns_fd,
mntns_fd,
/* netns_fd= */ -1,
/* If 'path' is already a mount point, does nothing and returns 0. If it is not it makes it one, and returns 1. */
- r = path_is_mount_point(path, NULL, 0);
+ r = path_is_mount_point(path);
if (r < 0)
return log_debug_errno(r, "Failed to determine whether '%s' is a mount point: %m", path);
if (r > 0)
return 1;
}
-int make_userns(uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping) {
+int make_userns(uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest_owner, RemountIdmapping idmapping) {
_cleanup_close_ int userns_fd = -EBADF;
_cleanup_free_ char *line = NULL;
if (idmapping == REMOUNT_IDMAPPING_HOST_OWNER) {
/* Remap the owner of the bind mounted directory to the root user within the container. This
* way every file written by root within the container to the bind-mounted directory will
- * be owned by the original user. All other user will remain unmapped. */
- if (asprintf(&line, UID_FMT " " UID_FMT " " UID_FMT "\n", owner, uid_shift, 1u) < 0)
+ * be owned by the original user from the host. All other users will remain unmapped. */
+ if (asprintf(&line, UID_FMT " " UID_FMT " " UID_FMT "\n", source_owner, uid_shift, 1u) < 0)
+ return log_oom_debug();
+ }
+
+ if (idmapping == REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER) {
+ /* Remap the owner of the bind mounted directory to the owner of the target directory
+ * within the container. This way every file written by target directory owner within the
+ * container to the bind-mounted directory will be owned by the original host user.
+ * All other users will remain unmapped. */
+ if (asprintf(
+ &line,
+ UID_FMT " " UID_FMT " " UID_FMT "\n",
+ source_owner, dest_owner, 1u) < 0)
return log_oom_debug();
}
}
int remount_idmap_fd(
- const char *p,
+ char **paths,
int userns_fd) {
- _cleanup_close_ int mount_fd = -EBADF;
int r;
- assert(p);
assert(userns_fd >= 0);
- /* Clone the mount point */
- mount_fd = open_tree(-1, p, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
- if (mount_fd < 0)
- return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", p);
+ /* This remounts all specified paths with the specified userns as idmap. It will do so in in the
+ * order specified in the strv: the expectation is that the top-level directories are at the
+ * beginning, and nested directories in the right, so that the tree can be built correctly from left
+ * to right. */
- /* Set the user namespace mapping attribute on the cloned mount point */
- if (mount_setattr(mount_fd, "", AT_EMPTY_PATH | AT_RECURSIVE,
- &(struct mount_attr) {
- .attr_set = MOUNT_ATTR_IDMAP,
- .userns_fd = userns_fd,
- }, sizeof(struct mount_attr)) < 0)
- return log_debug_errno(errno, "Failed to change bind mount attributes for '%s': %m", p);
+ size_t n = strv_length(paths);
+ if (n == 0) /* Nothing to do? */
+ return 0;
- /* Remove the old mount point */
- r = umount_verbose(LOG_DEBUG, p, UMOUNT_NOFOLLOW);
- if (r < 0)
- return r;
+ int *mount_fds = NULL;
+ size_t n_mounts_fds = 0;
+
+ mount_fds = new(int, n);
+ if (!mount_fds)
+ return log_oom_debug();
+
+ CLEANUP_ARRAY(mount_fds, n_mounts_fds, close_many_and_free);
- /* And place the cloned version in its place */
- if (move_mount(mount_fd, "", -1, p, MOVE_MOUNT_F_EMPTY_PATH) < 0)
- return log_debug_errno(errno, "Failed to attach UID mapped mount to '%s': %m", p);
+ for (size_t i = 0; i < n; i++) {
+ int mntfd;
+
+ /* Clone the mount point */
+ mntfd = mount_fds[n_mounts_fds] = open_tree(-EBADF, paths[i], OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ if (mount_fds[n_mounts_fds] < 0)
+ return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", paths[i]);
+
+ n_mounts_fds++;
+
+ /* Set the user namespace mapping attribute on the cloned mount point */
+ if (mount_setattr(mntfd, "", AT_EMPTY_PATH,
+ &(struct mount_attr) {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = userns_fd,
+ }, sizeof(struct mount_attr)) < 0)
+ return log_debug_errno(errno, "Failed to change bind mount attributes for clone of '%s': %m", paths[i]);
+ }
+
+ for (size_t i = n; i > 0; i--) { /* Unmount the paths right-to-left */
+ /* Remove the old mount points now that we have a idmapped mounts as replacement for all of them */
+ r = umount_verbose(LOG_DEBUG, paths[i-1], UMOUNT_NOFOLLOW);
+ if (r < 0)
+ return r;
+ }
+
+ for (size_t i = 0; i < n; i++) { /* Mount the replacement mounts left-to-right */
+ /* And place the cloned version in its place */
+ log_debug("Mounting idmapped fs to '%s'", paths[i]);
+ if (move_mount(mount_fds[i], "", -EBADF, paths[i], MOVE_MOUNT_F_EMPTY_PATH) < 0)
+ return log_debug_errno(errno, "Failed to attach UID mapped mount to '%s': %m", paths[i]);
+ }
return 0;
}
-int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping) {
+int remount_idmap(char **p, uid_t uid_shift, uid_t uid_range, uid_t source_owner, uid_t dest_owner,RemountIdmapping idmapping) {
_cleanup_close_ int userns_fd = -EBADF;
- userns_fd = make_userns(uid_shift, uid_range, owner, idmapping);
+ userns_fd = make_userns(uid_shift, uid_range, source_owner, dest_owner, idmapping);
if (userns_fd < 0)
return userns_fd;
if (!t)
return -ENOMEM;
- r = path_is_mount_point(t, NULL, 0);
+ r = path_is_mount_point(t);
if (r < 0) {
log_debug_errno(r, "Failed to detect if '%s' already is a mount point, ignoring: %m", t);
continue;